7 struct bloom_filter_settings
{
9 * The version of the hashing technique being used.
10 * We currently only support version = 1 which is
11 * the seeded murmur3 hashing technique implemented
14 uint32_t hash_version
;
17 * The number of times a path is hashed, i.e. the
18 * number of bit positions tht cumulatively
19 * determine whether a path is present in the
25 * The minimum number of bits per entry in the Bloom
26 * filter. If the filter contains 'n' entries, then
27 * filter size is the minimum number of 8-bit words
28 * that contain n*b bits.
30 uint32_t bits_per_entry
;
33 * The maximum number of changed paths per commit
34 * before declaring a Bloom filter to be too-large.
36 * Not written to the commit-graph file.
38 uint32_t max_changed_paths
;
41 #define DEFAULT_BLOOM_MAX_CHANGES 512
42 #define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10, DEFAULT_BLOOM_MAX_CHANGES }
43 #define BITS_PER_WORD 8
44 #define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t)
47 * A bloom_filter struct represents a data segment to
48 * use when testing hash values. The 'len' member
49 * dictates how many entries are stored in
58 * A bloom_key represents the k hash values for a
59 * given string. These can be precomputed and
60 * stored in a bloom_key for re-use when testing
61 * against a bloom_filter. The number of hashes is
62 * given by the Bloom filter settings and is the same
63 * for all Bloom filters and keys interacting with
64 * the loaded version of the commit graph file and
65 * the Bloom data chunks.
72 * Calculate the murmur3 32-bit hash value for the given data
73 * using the given seed.
74 * Produces a uniformly distributed hash value.
75 * Not considered to be cryptographically secure.
76 * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
78 uint32_t murmur3_seeded(uint32_t seed
, const char *data
, size_t len
);
80 void fill_bloom_key(const char *data
,
82 struct bloom_key
*key
,
83 const struct bloom_filter_settings
*settings
);
84 void clear_bloom_key(struct bloom_key
*key
);
86 void add_key_to_filter(const struct bloom_key
*key
,
87 struct bloom_filter
*filter
,
88 const struct bloom_filter_settings
*settings
);
90 void init_bloom_filters(void);
92 enum bloom_filter_computed
{
93 BLOOM_NOT_COMPUTED
= (1 << 0),
94 BLOOM_COMPUTED
= (1 << 1),
95 BLOOM_TRUNC_LARGE
= (1 << 2),
96 BLOOM_TRUNC_EMPTY
= (1 << 3),
99 struct bloom_filter
*get_or_compute_bloom_filter(struct repository
*r
,
101 int compute_if_not_present
,
102 const struct bloom_filter_settings
*settings
,
103 enum bloom_filter_computed
*computed
);
105 #define get_bloom_filter(r, c) get_or_compute_bloom_filter( \
106 (r), (c), 0, NULL, NULL)
108 int bloom_filter_contains(const struct bloom_filter
*filter
,
109 const struct bloom_key
*key
,
110 const struct bloom_filter_settings
*settings
);