1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/rappor/bloom_filter.h"
7 #include "base/logging.h"
8 #include "third_party/smhasher/src/City.h"
14 uint32_t ComputeHash(const std::string
& str
, uint32_t seed
) {
15 // Using CityHash here because we have support for it in Dremel. Many hash
16 // functions, such as MD5, SHA1, or Murmur, would probably also work.
17 return CityHash64WithSeed(str
.data(), str
.size(), seed
);
22 BloomFilter::BloomFilter(uint32_t bytes_size
,
23 uint32_t hash_function_count
,
24 uint32_t hash_seed_offset
)
26 hash_function_count_(hash_function_count
),
27 hash_seed_offset_(hash_seed_offset
) {
28 DCHECK_GT(bytes_size
, 0u);
31 BloomFilter::~BloomFilter() {}
33 void BloomFilter::SetString(const std::string
& str
) {
34 for (size_t i
= 0; i
< bytes_
.size(); ++i
) {
37 for (size_t i
= 0; i
< hash_function_count_
; ++i
) {
38 uint32_t index
= ComputeHash(str
, hash_seed_offset_
+ i
);
39 // Note that the "bytes" are uint8_t, so they are always 8-bits.
40 uint32_t byte_index
= (index
/ 8) % bytes_
.size();
41 uint32_t bit_index
= index
% 8;
42 bytes_
[byte_index
] |= 1 << bit_index
;
46 void BloomFilter::SetBytesForTesting(const ByteVector
& bytes
) {
47 DCHECK_EQ(bytes_
.size(), bytes
.size());
48 for (size_t i
= 0; i
< bytes_
.size(); ++i
) {
55 uint64_t GetBloomBits(uint32_t bytes_size
,
56 uint32_t hash_function_count
,
57 uint32_t hash_seed_offset
,
58 const std::string
& str
) {
59 // Make sure result fits in uint64.
60 DCHECK_LE(bytes_size
, 8u);
62 const uint32_t bits_size
= bytes_size
* 8;
63 for (size_t i
= 0; i
< hash_function_count
; ++i
) {
64 uint32_t index
= ComputeHash(str
, hash_seed_offset
+ i
);
65 output
|= 1ULL << uint64_t(index
% bits_size
);
70 } // namespace internal