10 #include "tree-walk.h"
12 #include "list-objects.h"
13 #include "list-objects-filter.h"
14 #include "list-objects-filter-options.h"
17 #include "object-store.h"
19 /* Remember to update object flag allocation in object.h */
21 * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
22 * that have been shown, but should be revisited if they appear
23 * in the traversal (until we mark it SEEN). This is a way to
24 * let us silently de-dup calls to show() in the caller. This
25 * is subtly different from the "revision.h:SHOWN" and the
26 * "object-name.c:ONELINE_SEEN" bits. And also different from
27 * the non-de-dup usage in pack-bitmap.c
29 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
32 struct filter
*filter
;
35 struct object_id skip_tree
;
36 unsigned is_skipping_tree
: 1;
40 enum list_objects_filter_result (*filter_object_fn
)(
42 enum list_objects_filter_situation filter_situation
,
50 * Optional. If this function is supplied and the filter needs
51 * to collect omits, then this function is called once before
54 * This is required because the following two conditions hold:
56 * a. A tree filter can add and remove objects as an object
58 * b. A combine filter's omit set is the union of all its
59 * subfilters, which may include tree: filters.
61 * As such, the omits sets must be separate sets, and can only
62 * be unioned after the traversal is completed.
64 void (*finalize_omits_fn
)(struct oidset
*omits
, void *filter_data
);
66 void (*free_fn
)(void *filter_data
);
70 /* If non-NULL, the filter collects a list of the omitted OIDs here. */
74 static enum list_objects_filter_result
filter_blobs_none(
75 struct repository
*r UNUSED
,
76 enum list_objects_filter_situation filter_situation
,
78 const char *pathname UNUSED
,
79 const char *filename UNUSED
,
81 void *filter_data_ UNUSED
)
83 switch (filter_situation
) {
85 BUG("unknown filter_situation: %d", filter_situation
);
88 assert(obj
->type
== OBJ_TAG
);
89 /* always include all tag objects */
90 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
93 assert(obj
->type
== OBJ_COMMIT
);
94 /* always include all commit objects */
95 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
98 assert(obj
->type
== OBJ_TREE
);
99 /* always include all tree objects */
100 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
103 assert(obj
->type
== OBJ_TREE
);
107 assert(obj
->type
== OBJ_BLOB
);
108 assert((obj
->flags
& SEEN
) == 0);
111 oidset_insert(omits
, &obj
->oid
);
112 return LOFR_MARK_SEEN
; /* but not LOFR_DO_SHOW (hard omit) */
116 static void filter_blobs_none__init(
117 struct list_objects_filter_options
*filter_options UNUSED
,
118 struct filter
*filter
)
120 filter
->filter_object_fn
= filter_blobs_none
;
121 filter
->free_fn
= free
;
125 * A filter for list-objects to omit ALL trees and blobs from the traversal.
126 * Can OPTIONALLY collect a list of the omitted OIDs.
128 struct filter_trees_depth_data
{
130 * Maps trees to the minimum depth at which they were seen. It is not
131 * necessary to re-traverse a tree at deeper or equal depths than it has
132 * already been traversed.
134 * We can't use LOFR_MARK_SEEN for tree objects since this will prevent
135 * it from being traversed at shallower depths.
137 struct oidmap seen_at_depth
;
139 unsigned long exclude_depth
;
140 unsigned long current_depth
;
143 struct seen_map_entry
{
144 struct oidmap_entry base
;
148 /* Returns 1 if the oid was in the omits set before it was invoked. */
149 static int filter_trees_update_omits(
151 struct oidset
*omits
,
158 return oidset_remove(omits
, &obj
->oid
);
160 return oidset_insert(omits
, &obj
->oid
);
163 static enum list_objects_filter_result
filter_trees_depth(
164 struct repository
*r UNUSED
,
165 enum list_objects_filter_situation filter_situation
,
167 const char *pathname UNUSED
,
168 const char *filename UNUSED
,
169 struct oidset
*omits
,
172 struct filter_trees_depth_data
*filter_data
= filter_data_
;
173 struct seen_map_entry
*seen_info
;
174 int include_it
= filter_data
->current_depth
<
175 filter_data
->exclude_depth
;
180 * Note that we do not use _MARK_SEEN in order to allow re-traversal in
181 * case we encounter a tree or blob again at a shallower depth.
184 switch (filter_situation
) {
186 BUG("unknown filter_situation: %d", filter_situation
);
189 assert(obj
->type
== OBJ_TAG
);
190 /* always include all tag objects */
191 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
194 assert(obj
->type
== OBJ_COMMIT
);
195 /* always include all commit objects */
196 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
199 assert(obj
->type
== OBJ_TREE
);
200 filter_data
->current_depth
--;
204 filter_trees_update_omits(obj
, omits
, include_it
);
205 return include_it
? LOFR_MARK_SEEN
| LOFR_DO_SHOW
: LOFR_ZERO
;
207 case LOFS_BEGIN_TREE
:
208 seen_info
= oidmap_get(
209 &filter_data
->seen_at_depth
, &obj
->oid
);
211 CALLOC_ARRAY(seen_info
, 1);
212 oidcpy(&seen_info
->base
.oid
, &obj
->oid
);
213 seen_info
->depth
= filter_data
->current_depth
;
214 oidmap_put(&filter_data
->seen_at_depth
, seen_info
);
218 filter_data
->current_depth
>= seen_info
->depth
;
222 filter_res
= LOFR_SKIP_TREE
;
224 int been_omitted
= filter_trees_update_omits(
225 obj
, omits
, include_it
);
226 seen_info
->depth
= filter_data
->current_depth
;
229 filter_res
= LOFR_DO_SHOW
;
230 else if (omits
&& !been_omitted
)
232 * Must update omit information of children
233 * recursively; they have not been omitted yet.
235 filter_res
= LOFR_ZERO
;
237 filter_res
= LOFR_SKIP_TREE
;
240 filter_data
->current_depth
++;
245 static void filter_trees_free(void *filter_data
) {
246 struct filter_trees_depth_data
*d
= filter_data
;
249 oidmap_free(&d
->seen_at_depth
, 1);
253 static void filter_trees_depth__init(
254 struct list_objects_filter_options
*filter_options
,
255 struct filter
*filter
)
257 struct filter_trees_depth_data
*d
= xcalloc(1, sizeof(*d
));
258 oidmap_init(&d
->seen_at_depth
, 0);
259 d
->exclude_depth
= filter_options
->tree_exclude_depth
;
260 d
->current_depth
= 0;
262 filter
->filter_data
= d
;
263 filter
->filter_object_fn
= filter_trees_depth
;
264 filter
->free_fn
= filter_trees_free
;
268 * A filter for list-objects to omit large blobs.
269 * And to OPTIONALLY collect a list of the omitted OIDs.
271 struct filter_blobs_limit_data
{
272 unsigned long max_bytes
;
275 static enum list_objects_filter_result
filter_blobs_limit(
276 struct repository
*r
,
277 enum list_objects_filter_situation filter_situation
,
279 const char *pathname UNUSED
,
280 const char *filename UNUSED
,
281 struct oidset
*omits
,
284 struct filter_blobs_limit_data
*filter_data
= filter_data_
;
285 unsigned long object_length
;
288 switch (filter_situation
) {
290 BUG("unknown filter_situation: %d", filter_situation
);
293 assert(obj
->type
== OBJ_TAG
);
294 /* always include all tag objects */
295 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
298 assert(obj
->type
== OBJ_COMMIT
);
299 /* always include all commit objects */
300 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
302 case LOFS_BEGIN_TREE
:
303 assert(obj
->type
== OBJ_TREE
);
304 /* always include all tree objects */
305 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
308 assert(obj
->type
== OBJ_TREE
);
312 assert(obj
->type
== OBJ_BLOB
);
313 assert((obj
->flags
& SEEN
) == 0);
315 t
= oid_object_info(r
, &obj
->oid
, &object_length
);
316 if (t
!= OBJ_BLOB
) { /* probably OBJ_NONE */
318 * We DO NOT have the blob locally, so we cannot
319 * apply the size filter criteria. Be conservative
320 * and force show it (and let the caller deal with
326 if (object_length
< filter_data
->max_bytes
)
330 oidset_insert(omits
, &obj
->oid
);
331 return LOFR_MARK_SEEN
; /* but not LOFR_DO_SHOW (hard omit) */
336 oidset_remove(omits
, &obj
->oid
);
337 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
340 static void filter_blobs_limit__init(
341 struct list_objects_filter_options
*filter_options
,
342 struct filter
*filter
)
344 struct filter_blobs_limit_data
*d
= xcalloc(1, sizeof(*d
));
345 d
->max_bytes
= filter_options
->blob_limit_value
;
347 filter
->filter_data
= d
;
348 filter
->filter_object_fn
= filter_blobs_limit
;
349 filter
->free_fn
= free
;
353 * A filter driven by a sparse-checkout specification to only
354 * include blobs that a sparse checkout would populate.
356 * The sparse-checkout spec can be loaded from a blob with the
357 * given OID or from a local pathname. We allow an OID because
358 * the repo may be bare or we may be doing the filtering on the
363 * default_match is the usual default include/exclude value that
364 * should be inherited as we recurse into directories based
365 * upon pattern matching of the directory itself or of a
366 * containing directory.
368 enum pattern_match_result default_match
;
371 * 1 if the directory (recursively) contains any provisionally
374 * 0 if everything (recursively) contained in this directory
375 * has been explicitly included (SHOWN) in the result and
376 * the directory may be short-cut later in the traversal.
378 unsigned child_prov_omit
: 1;
381 struct filter_sparse_data
{
382 struct pattern_list pl
;
385 struct frame
*array_frame
;
388 static enum list_objects_filter_result
filter_sparse(
389 struct repository
*r
,
390 enum list_objects_filter_situation filter_situation
,
392 const char *pathname
,
393 const char *filename
,
394 struct oidset
*omits
,
397 struct filter_sparse_data
*filter_data
= filter_data_
;
400 enum pattern_match_result match
;
402 switch (filter_situation
) {
404 BUG("unknown filter_situation: %d", filter_situation
);
407 assert(obj
->type
== OBJ_TAG
);
408 /* always include all tag objects */
409 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
412 assert(obj
->type
== OBJ_COMMIT
);
413 /* always include all commit objects */
414 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
416 case LOFS_BEGIN_TREE
:
417 assert(obj
->type
== OBJ_TREE
);
419 match
= path_matches_pattern_list(pathname
, strlen(pathname
),
420 filename
, &dtype
, &filter_data
->pl
,
422 if (match
== UNDECIDED
)
423 match
= filter_data
->array_frame
[filter_data
->nr
- 1].default_match
;
425 ALLOC_GROW(filter_data
->array_frame
, filter_data
->nr
+ 1,
427 filter_data
->array_frame
[filter_data
->nr
].default_match
= match
;
428 filter_data
->array_frame
[filter_data
->nr
].child_prov_omit
= 0;
432 * A directory with this tree OID may appear in multiple
433 * places in the tree. (Think of a directory move or copy,
434 * with no other changes, so the OID is the same, but the
435 * full pathnames of objects within this directory are new
436 * and may match is_excluded() patterns differently.)
437 * So we cannot mark this directory as SEEN (yet), since
438 * that will prevent process_tree() from revisiting this
439 * tree object with other pathname prefixes.
441 * Only _DO_SHOW the tree object the first time we visit
444 * We always show all tree objects. A future optimization
445 * may want to attempt to narrow this.
447 if (obj
->flags
& FILTER_SHOWN_BUT_REVISIT
)
449 obj
->flags
|= FILTER_SHOWN_BUT_REVISIT
;
453 assert(obj
->type
== OBJ_TREE
);
454 assert(filter_data
->nr
> 1);
456 frame
= &filter_data
->array_frame
[--filter_data
->nr
];
459 * Tell our parent directory if any of our children were
460 * provisionally omitted.
462 filter_data
->array_frame
[filter_data
->nr
- 1].child_prov_omit
|=
463 frame
->child_prov_omit
;
466 * If there are NO provisionally omitted child objects (ALL child
467 * objects in this folder were INCLUDED), then we can mark the
468 * folder as SEEN (so we will not have to revisit it again).
470 if (!frame
->child_prov_omit
)
471 return LOFR_MARK_SEEN
;
475 assert(obj
->type
== OBJ_BLOB
);
476 assert((obj
->flags
& SEEN
) == 0);
478 frame
= &filter_data
->array_frame
[filter_data
->nr
- 1];
481 match
= path_matches_pattern_list(pathname
, strlen(pathname
),
482 filename
, &dtype
, &filter_data
->pl
,
484 if (match
== UNDECIDED
)
485 match
= frame
->default_match
;
486 if (match
== MATCHED
) {
488 oidset_remove(omits
, &obj
->oid
);
489 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
493 * Provisionally omit it. We've already established that
494 * this pathname is not in the sparse-checkout specification
495 * with the CURRENT pathname, so we *WANT* to omit this blob.
497 * However, a pathname elsewhere in the tree may also
498 * reference this same blob, so we cannot reject it yet.
499 * Leave the LOFR_ bits unset so that if the blob appears
500 * again in the traversal, we will be asked again.
503 oidset_insert(omits
, &obj
->oid
);
506 * Remember that at least 1 blob in this tree was
507 * provisionally omitted. This prevents us from short
508 * cutting the tree in future iterations.
510 frame
->child_prov_omit
= 1;
516 static void filter_sparse_free(void *filter_data
)
518 struct filter_sparse_data
*d
= filter_data
;
519 clear_pattern_list(&d
->pl
);
520 free(d
->array_frame
);
524 static void filter_sparse_oid__init(
525 struct list_objects_filter_options
*filter_options
,
526 struct filter
*filter
)
528 struct filter_sparse_data
*d
= xcalloc(1, sizeof(*d
));
529 struct object_context oc
;
530 struct object_id sparse_oid
;
532 if (get_oid_with_context(the_repository
,
533 filter_options
->sparse_oid_name
,
534 GET_OID_BLOB
, &sparse_oid
, &oc
))
535 die(_("unable to access sparse blob in '%s'"),
536 filter_options
->sparse_oid_name
);
537 if (add_patterns_from_blob_to_list(&sparse_oid
, "", 0, &d
->pl
) < 0)
538 die(_("unable to parse sparse filter data in %s"),
539 oid_to_hex(&sparse_oid
));
541 ALLOC_GROW(d
->array_frame
, d
->nr
+ 1, d
->alloc
);
542 d
->array_frame
[d
->nr
].default_match
= 0; /* default to include */
543 d
->array_frame
[d
->nr
].child_prov_omit
= 0;
546 filter
->filter_data
= d
;
547 filter
->filter_object_fn
= filter_sparse
;
548 filter
->free_fn
= filter_sparse_free
;
552 * A filter for list-objects to omit large blobs.
553 * And to OPTIONALLY collect a list of the omitted OIDs.
555 struct filter_object_type_data
{
556 enum object_type object_type
;
559 static enum list_objects_filter_result
filter_object_type(
560 struct repository
*r UNUSED
,
561 enum list_objects_filter_situation filter_situation
,
563 const char *pathname UNUSED
,
564 const char *filename UNUSED
,
565 struct oidset
*omits UNUSED
,
568 struct filter_object_type_data
*filter_data
= filter_data_
;
570 switch (filter_situation
) {
572 BUG("unknown filter_situation: %d", filter_situation
);
575 assert(obj
->type
== OBJ_TAG
);
576 if (filter_data
->object_type
== OBJ_TAG
)
577 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
578 return LOFR_MARK_SEEN
;
581 assert(obj
->type
== OBJ_COMMIT
);
582 if (filter_data
->object_type
== OBJ_COMMIT
)
583 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
584 return LOFR_MARK_SEEN
;
586 case LOFS_BEGIN_TREE
:
587 assert(obj
->type
== OBJ_TREE
);
590 * If we only want to show commits or tags, then there is no
591 * need to walk down trees.
593 if (filter_data
->object_type
== OBJ_COMMIT
||
594 filter_data
->object_type
== OBJ_TAG
)
595 return LOFR_SKIP_TREE
;
597 if (filter_data
->object_type
== OBJ_TREE
)
598 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
600 return LOFR_MARK_SEEN
;
603 assert(obj
->type
== OBJ_BLOB
);
605 if (filter_data
->object_type
== OBJ_BLOB
)
606 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
607 return LOFR_MARK_SEEN
;
614 static void filter_object_type__init(
615 struct list_objects_filter_options
*filter_options
,
616 struct filter
*filter
)
618 struct filter_object_type_data
*d
= xcalloc(1, sizeof(*d
));
619 d
->object_type
= filter_options
->object_type
;
621 filter
->filter_data
= d
;
622 filter
->filter_object_fn
= filter_object_type
;
623 filter
->free_fn
= free
;
626 /* A filter which only shows objects shown by all sub-filters. */
627 struct combine_filter_data
{
628 struct subfilter
*sub
;
632 static enum list_objects_filter_result
process_subfilter(
633 struct repository
*r
,
634 enum list_objects_filter_situation filter_situation
,
636 const char *pathname
,
637 const char *filename
,
638 struct subfilter
*sub
)
640 enum list_objects_filter_result result
;
643 * Check and update is_skipping_tree before oidset_contains so
644 * that is_skipping_tree gets unset even when the object is
645 * marked as seen. As of this writing, no filter uses
646 * LOFR_MARK_SEEN on trees that also uses LOFR_SKIP_TREE, so the
647 * ordering is only theoretically important. Be cautious if you
648 * change the order of the below checks and more filters have
651 if (sub
->is_skipping_tree
) {
652 if (filter_situation
== LOFS_END_TREE
&&
653 oideq(&obj
->oid
, &sub
->skip_tree
))
654 sub
->is_skipping_tree
= 0;
658 if (oidset_contains(&sub
->seen
, &obj
->oid
))
661 result
= list_objects_filter__filter_object(
662 r
, filter_situation
, obj
, pathname
, filename
, sub
->filter
);
664 if (result
& LOFR_MARK_SEEN
)
665 oidset_insert(&sub
->seen
, &obj
->oid
);
667 if (result
& LOFR_SKIP_TREE
) {
668 sub
->is_skipping_tree
= 1;
669 sub
->skip_tree
= obj
->oid
;
675 static enum list_objects_filter_result
filter_combine(
676 struct repository
*r
,
677 enum list_objects_filter_situation filter_situation
,
679 const char *pathname
,
680 const char *filename
,
681 struct oidset
*omits UNUSED
,
684 struct combine_filter_data
*d
= filter_data
;
685 enum list_objects_filter_result combined_result
=
686 LOFR_DO_SHOW
| LOFR_MARK_SEEN
| LOFR_SKIP_TREE
;
689 for (sub
= 0; sub
< d
->nr
; sub
++) {
690 enum list_objects_filter_result sub_result
= process_subfilter(
691 r
, filter_situation
, obj
, pathname
, filename
,
693 if (!(sub_result
& LOFR_DO_SHOW
))
694 combined_result
&= ~LOFR_DO_SHOW
;
695 if (!(sub_result
& LOFR_MARK_SEEN
))
696 combined_result
&= ~LOFR_MARK_SEEN
;
697 if (!d
->sub
[sub
].is_skipping_tree
)
698 combined_result
&= ~LOFR_SKIP_TREE
;
701 return combined_result
;
704 static void filter_combine__free(void *filter_data
)
706 struct combine_filter_data
*d
= filter_data
;
708 for (sub
= 0; sub
< d
->nr
; sub
++) {
709 list_objects_filter__free(d
->sub
[sub
].filter
);
710 oidset_clear(&d
->sub
[sub
].seen
);
711 if (d
->sub
[sub
].omits
.set
.size
)
712 BUG("expected oidset to be cleared already");
718 static void add_all(struct oidset
*dest
, struct oidset
*src
) {
719 struct oidset_iter iter
;
720 struct object_id
*src_oid
;
722 oidset_iter_init(src
, &iter
);
723 while ((src_oid
= oidset_iter_next(&iter
)) != NULL
)
724 oidset_insert(dest
, src_oid
);
727 static void filter_combine__finalize_omits(
728 struct oidset
*omits
,
731 struct combine_filter_data
*d
= filter_data
;
734 for (sub
= 0; sub
< d
->nr
; sub
++) {
735 add_all(omits
, &d
->sub
[sub
].omits
);
736 oidset_clear(&d
->sub
[sub
].omits
);
740 static void filter_combine__init(
741 struct list_objects_filter_options
*filter_options
,
742 struct filter
* filter
)
744 struct combine_filter_data
*d
= xcalloc(1, sizeof(*d
));
747 d
->nr
= filter_options
->sub_nr
;
748 CALLOC_ARRAY(d
->sub
, d
->nr
);
749 for (sub
= 0; sub
< d
->nr
; sub
++)
750 d
->sub
[sub
].filter
= list_objects_filter__init(
751 filter
->omits
? &d
->sub
[sub
].omits
: NULL
,
752 &filter_options
->sub
[sub
]);
754 filter
->filter_data
= d
;
755 filter
->filter_object_fn
= filter_combine
;
756 filter
->free_fn
= filter_combine__free
;
757 filter
->finalize_omits_fn
= filter_combine__finalize_omits
;
760 typedef void (*filter_init_fn
)(
761 struct list_objects_filter_options
*filter_options
,
762 struct filter
*filter
);
765 * Must match "enum list_objects_filter_choice".
767 static filter_init_fn s_filters
[] = {
769 filter_blobs_none__init
,
770 filter_blobs_limit__init
,
771 filter_trees_depth__init
,
772 filter_sparse_oid__init
,
773 filter_object_type__init
,
774 filter_combine__init
,
777 struct filter
*list_objects_filter__init(
778 struct oidset
*omitted
,
779 struct list_objects_filter_options
*filter_options
)
781 struct filter
*filter
;
782 filter_init_fn init_fn
;
784 assert((sizeof(s_filters
) / sizeof(s_filters
[0])) == LOFC__COUNT
);
789 if (filter_options
->choice
>= LOFC__COUNT
)
790 BUG("invalid list-objects filter choice: %d",
791 filter_options
->choice
);
793 init_fn
= s_filters
[filter_options
->choice
];
797 CALLOC_ARRAY(filter
, 1);
798 filter
->omits
= omitted
;
799 init_fn(filter_options
, filter
);
803 enum list_objects_filter_result
list_objects_filter__filter_object(
804 struct repository
*r
,
805 enum list_objects_filter_situation filter_situation
,
807 const char *pathname
,
808 const char *filename
,
809 struct filter
*filter
)
811 if (filter
&& (obj
->flags
& NOT_USER_GIVEN
))
812 return filter
->filter_object_fn(r
, filter_situation
, obj
,
815 filter
->filter_data
);
817 * No filter is active or user gave object explicitly. In this case,
818 * always show the object (except when LOFS_END_TREE, since this tree
819 * had already been shown when LOFS_BEGIN_TREE).
821 if (filter_situation
== LOFS_END_TREE
)
823 return LOFR_MARK_SEEN
| LOFR_DO_SHOW
;
826 void list_objects_filter__free(struct filter
*filter
)
830 if (filter
->finalize_omits_fn
&& filter
->omits
)
831 filter
->finalize_omits_fn(filter
->omits
, filter
->filter_data
);
832 filter
->free_fn(filter
->filter_data
);