Merge branch 'jk/arith-expansion-coding-guidelines'
[git/debian.git] / bloom.h
blobb935186425d36d501a5cae1229338327e39bf6d1
1 #ifndef BLOOM_H
2 #define BLOOM_H
4 struct commit;
5 struct repository;
7 struct bloom_filter_settings {
8 /*
9 * The version of the hashing technique being used.
10 * We currently only support version = 1 which is
11 * the seeded murmur3 hashing technique implemented
12 * in bloom.c.
14 uint32_t hash_version;
17 * The number of times a path is hashed, i.e. the
18 * number of bit positions tht cumulatively
19 * determine whether a path is present in the
20 * Bloom filter.
22 uint32_t num_hashes;
25 * The minimum number of bits per entry in the Bloom
26 * filter. If the filter contains 'n' entries, then
27 * filter size is the minimum number of 8-bit words
28 * that contain n*b bits.
30 uint32_t bits_per_entry;
33 #define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10 }
34 #define BITS_PER_WORD 8
35 #define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t)
38 * A bloom_filter struct represents a data segment to
39 * use when testing hash values. The 'len' member
40 * dictates how many entries are stored in
41 * 'data'.
43 struct bloom_filter {
44 unsigned char *data;
45 size_t len;
49 * A bloom_key represents the k hash values for a
50 * given string. These can be precomputed and
51 * stored in a bloom_key for re-use when testing
52 * against a bloom_filter. The number of hashes is
53 * given by the Bloom filter settings and is the same
54 * for all Bloom filters and keys interacting with
55 * the loaded version of the commit graph file and
56 * the Bloom data chunks.
58 struct bloom_key {
59 uint32_t *hashes;
63 * Calculate the murmur3 32-bit hash value for the given data
64 * using the given seed.
65 * Produces a uniformly distributed hash value.
66 * Not considered to be cryptographically secure.
67 * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
69 uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len);
71 void fill_bloom_key(const char *data,
72 size_t len,
73 struct bloom_key *key,
74 const struct bloom_filter_settings *settings);
76 void add_key_to_filter(const struct bloom_key *key,
77 struct bloom_filter *filter,
78 const struct bloom_filter_settings *settings);
80 void init_bloom_filters(void);
82 struct bloom_filter *get_bloom_filter(struct repository *r,
83 struct commit *c,
84 int compute_if_not_present);
86 int bloom_filter_contains(const struct bloom_filter *filter,
87 const struct bloom_key *key,
88 const struct bloom_filter_settings *settings);
90 #endif