1 // Copyright (c) 2012-2017 The Bitcoin Core developers
2 // Distributed under the MIT software license, see the accompanying
3 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
7 #include <primitives/transaction.h>
9 #include <script/script.h>
10 #include <script/standard.h>
18 #define LN2SQUARED 0.4804530139182014246671025263266649717305529515945455
19 #define LN2 0.6931471805599453094172321214581765680755001343602552
21 CBloomFilter::CBloomFilter(const unsigned int nElements
, const double nFPRate
, const unsigned int nTweakIn
, unsigned char nFlagsIn
) :
23 * The ideal size for a bloom filter with a given number of elements and false positive rate is:
24 * - nElements * log(fp rate) / ln(2)^2
25 * We ignore filter parameters which will create a bloom filter larger than the protocol limits
27 vData(std::min((unsigned int)(-1 / LN2SQUARED
* nElements
* log(nFPRate
)), MAX_BLOOM_FILTER_SIZE
* 8) / 8),
29 * The ideal number of hash functions is filter size * ln(2) / number of elements
30 * Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
31 * See https://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
35 nHashFuncs(std::min((unsigned int)(vData
.size() * 8 / nElements
* LN2
), MAX_HASH_FUNCS
)),
41 // Private constructor used by CRollingBloomFilter
42 CBloomFilter::CBloomFilter(const unsigned int nElements
, const double nFPRate
, const unsigned int nTweakIn
) :
43 vData((unsigned int)(-1 / LN2SQUARED
* nElements
* log(nFPRate
)) / 8),
46 nHashFuncs((unsigned int)(vData
.size() * 8 / nElements
* LN2
)),
48 nFlags(BLOOM_UPDATE_NONE
)
52 inline unsigned int CBloomFilter::Hash(unsigned int nHashNum
, const std::vector
<unsigned char>& vDataToHash
) const
54 // 0xFBA4C795 chosen as it guarantees a reasonable bit difference between nHashNum values.
55 return MurmurHash3(nHashNum
* 0xFBA4C795 + nTweak
, vDataToHash
) % (vData
.size() * 8);
58 void CBloomFilter::insert(const std::vector
<unsigned char>& vKey
)
62 for (unsigned int i
= 0; i
< nHashFuncs
; i
++)
64 unsigned int nIndex
= Hash(i
, vKey
);
65 // Sets bit nIndex of vData
66 vData
[nIndex
>> 3] |= (1 << (7 & nIndex
));
71 void CBloomFilter::insert(const COutPoint
& outpoint
)
73 CDataStream
stream(SER_NETWORK
, PROTOCOL_VERSION
);
75 std::vector
<unsigned char> data(stream
.begin(), stream
.end());
79 void CBloomFilter::insert(const uint256
& hash
)
81 std::vector
<unsigned char> data(hash
.begin(), hash
.end());
85 bool CBloomFilter::contains(const std::vector
<unsigned char>& vKey
) const
91 for (unsigned int i
= 0; i
< nHashFuncs
; i
++)
93 unsigned int nIndex
= Hash(i
, vKey
);
94 // Checks bit nIndex of vData
95 if (!(vData
[nIndex
>> 3] & (1 << (7 & nIndex
))))
101 bool CBloomFilter::contains(const COutPoint
& outpoint
) const
103 CDataStream
stream(SER_NETWORK
, PROTOCOL_VERSION
);
105 std::vector
<unsigned char> data(stream
.begin(), stream
.end());
106 return contains(data
);
109 bool CBloomFilter::contains(const uint256
& hash
) const
111 std::vector
<unsigned char> data(hash
.begin(), hash
.end());
112 return contains(data
);
115 void CBloomFilter::clear()
117 vData
.assign(vData
.size(),0);
122 void CBloomFilter::reset(const unsigned int nNewTweak
)
128 bool CBloomFilter::IsWithinSizeConstraints() const
130 return vData
.size() <= MAX_BLOOM_FILTER_SIZE
&& nHashFuncs
<= MAX_HASH_FUNCS
;
133 bool CBloomFilter::IsRelevantAndUpdate(const CTransaction
& tx
)
136 // Match if the filter contains the hash of tx
137 // for finding tx when they appear in a block
142 const uint256
& hash
= tx
.GetHash();
146 for (unsigned int i
= 0; i
< tx
.vout
.size(); i
++)
148 const CTxOut
& txout
= tx
.vout
[i
];
149 // Match if the filter contains any arbitrary script data element in any scriptPubKey in tx
150 // If this matches, also add the specific output that was matched.
151 // This means clients don't have to update the filter themselves when a new relevant tx
152 // is discovered in order to find spending transactions, which avoids round-tripping and race conditions.
153 CScript::const_iterator pc
= txout
.scriptPubKey
.begin();
154 std::vector
<unsigned char> data
;
155 while (pc
< txout
.scriptPubKey
.end())
158 if (!txout
.scriptPubKey
.GetOp(pc
, opcode
, data
))
160 if (data
.size() != 0 && contains(data
))
163 if ((nFlags
& BLOOM_UPDATE_MASK
) == BLOOM_UPDATE_ALL
)
164 insert(COutPoint(hash
, i
));
165 else if ((nFlags
& BLOOM_UPDATE_MASK
) == BLOOM_UPDATE_P2PUBKEY_ONLY
)
168 std::vector
<std::vector
<unsigned char> > vSolutions
;
169 if (Solver(txout
.scriptPubKey
, type
, vSolutions
) &&
170 (type
== TX_PUBKEY
|| type
== TX_MULTISIG
))
171 insert(COutPoint(hash
, i
));
181 for (const CTxIn
& txin
: tx
.vin
)
183 // Match if the filter contains an outpoint tx spends
184 if (contains(txin
.prevout
))
187 // Match if the filter contains any arbitrary script data element in any scriptSig in tx
188 CScript::const_iterator pc
= txin
.scriptSig
.begin();
189 std::vector
<unsigned char> data
;
190 while (pc
< txin
.scriptSig
.end())
193 if (!txin
.scriptSig
.GetOp(pc
, opcode
, data
))
195 if (data
.size() != 0 && contains(data
))
203 void CBloomFilter::UpdateEmptyFull()
207 for (unsigned int i
= 0; i
< vData
.size(); i
++)
209 full
&= vData
[i
] == 0xff;
210 empty
&= vData
[i
] == 0;
216 CRollingBloomFilter::CRollingBloomFilter(const unsigned int nElements
, const double fpRate
)
218 double logFpRate
= log(fpRate
);
219 /* The optimal number of hash functions is log(fpRate) / log(0.5), but
220 * restrict it to the range 1-50. */
221 nHashFuncs
= std::max(1, std::min((int)round(logFpRate
/ log(0.5)), 50));
222 /* In this rolling bloom filter, we'll store between 2 and 3 generations of nElements / 2 entries. */
223 nEntriesPerGeneration
= (nElements
+ 1) / 2;
224 uint32_t nMaxElements
= nEntriesPerGeneration
* 3;
225 /* The maximum fpRate = pow(1.0 - exp(-nHashFuncs * nMaxElements / nFilterBits), nHashFuncs)
226 * => pow(fpRate, 1.0 / nHashFuncs) = 1.0 - exp(-nHashFuncs * nMaxElements / nFilterBits)
227 * => 1.0 - pow(fpRate, 1.0 / nHashFuncs) = exp(-nHashFuncs * nMaxElements / nFilterBits)
228 * => log(1.0 - pow(fpRate, 1.0 / nHashFuncs)) = -nHashFuncs * nMaxElements / nFilterBits
229 * => nFilterBits = -nHashFuncs * nMaxElements / log(1.0 - pow(fpRate, 1.0 / nHashFuncs))
230 * => nFilterBits = -nHashFuncs * nMaxElements / log(1.0 - exp(logFpRate / nHashFuncs))
232 uint32_t nFilterBits
= (uint32_t)ceil(-1.0 * nHashFuncs
* nMaxElements
/ log(1.0 - exp(logFpRate
/ nHashFuncs
)));
234 /* For each data element we need to store 2 bits. If both bits are 0, the
235 * bit is treated as unset. If the bits are (01), (10), or (11), the bit is
236 * treated as set in generation 1, 2, or 3 respectively.
237 * These bits are stored in separate integers: position P corresponds to bit
238 * (P & 63) of the integers data[(P >> 6) * 2] and data[(P >> 6) * 2 + 1]. */
239 data
.resize(((nFilterBits
+ 63) / 64) << 1);
243 /* Similar to CBloomFilter::Hash */
244 static inline uint32_t RollingBloomHash(unsigned int nHashNum
, uint32_t nTweak
, const std::vector
<unsigned char>& vDataToHash
) {
245 return MurmurHash3(nHashNum
* 0xFBA4C795 + nTweak
, vDataToHash
);
248 void CRollingBloomFilter::insert(const std::vector
<unsigned char>& vKey
)
250 if (nEntriesThisGeneration
== nEntriesPerGeneration
) {
251 nEntriesThisGeneration
= 0;
253 if (nGeneration
== 4) {
256 uint64_t nGenerationMask1
= 0 - (uint64_t)(nGeneration
& 1);
257 uint64_t nGenerationMask2
= 0 - (uint64_t)(nGeneration
>> 1);
258 /* Wipe old entries that used this generation number. */
259 for (uint32_t p
= 0; p
< data
.size(); p
+= 2) {
260 uint64_t p1
= data
[p
], p2
= data
[p
+ 1];
261 uint64_t mask
= (p1
^ nGenerationMask1
) | (p2
^ nGenerationMask2
);
263 data
[p
+ 1] = p2
& mask
;
266 nEntriesThisGeneration
++;
268 for (int n
= 0; n
< nHashFuncs
; n
++) {
269 uint32_t h
= RollingBloomHash(n
, nTweak
, vKey
);
271 uint32_t pos
= (h
>> 6) % data
.size();
272 /* The lowest bit of pos is ignored, and set to zero for the first bit, and to one for the second. */
273 data
[pos
& ~1] = (data
[pos
& ~1] & ~(((uint64_t)1) << bit
)) | ((uint64_t)(nGeneration
& 1)) << bit
;
274 data
[pos
| 1] = (data
[pos
| 1] & ~(((uint64_t)1) << bit
)) | ((uint64_t)(nGeneration
>> 1)) << bit
;
278 void CRollingBloomFilter::insert(const uint256
& hash
)
280 std::vector
<unsigned char> vData(hash
.begin(), hash
.end());
284 bool CRollingBloomFilter::contains(const std::vector
<unsigned char>& vKey
) const
286 for (int n
= 0; n
< nHashFuncs
; n
++) {
287 uint32_t h
= RollingBloomHash(n
, nTweak
, vKey
);
289 uint32_t pos
= (h
>> 6) % data
.size();
290 /* If the relevant bit is not set in either data[pos & ~1] or data[pos | 1], the filter does not contain vKey */
291 if (!(((data
[pos
& ~1] | data
[pos
| 1]) >> bit
) & 1)) {
298 bool CRollingBloomFilter::contains(const uint256
& hash
) const
300 std::vector
<unsigned char> vData(hash
.begin(), hash
.end());
301 return contains(vData
);
304 void CRollingBloomFilter::reset()
306 nTweak
= GetRand(std::numeric_limits
<unsigned int>::max());
307 nEntriesThisGeneration
= 0;
309 for (std::vector
<uint64_t>::iterator it
= data
.begin(); it
!= data
.end(); it
++) {