Fixed issue #546: Exporting without specifying a zip filename sends all zip output...
[TortoiseGit.git] / src / Utils / QuickHash.h
blob864bddfa4e1236bf91eccab111caed799b668573
1 // TortoiseSVN - a Windows shell extension for easy version control
3 // Copyright (C) 2007-2007 - TortoiseSVN
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License
7 // as published by the Free Software Foundation; either version 2
8 // of the License, or (at your option) any later version.
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software Foundation,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 #pragma once
22 /**
23 * A quick linear (array) hash index class. It requires HF to
24 * provide the following interface:
26 * value_type data type of the hash
27 * index_type type of the index to store the hash
28 * NO_INDEX an index type to mark empty buckets
30 * operator() value_type -> size_t hash function
31 * value() index_type -> value_type
32 * equal() (value_type, index_type) -> bool
34 * The capacity approximately doubles by each rehash().
35 * Only insertion and lookup are provided. Collisions are
36 * resolved using linear probing.
38 * Use statistics() to monitor the cache performance.
40 template<class HF>
41 class quick_hash
43 public:
45 typedef typename HF::value_type value_type;
46 typedef typename HF::index_type index_type;
48 enum {NO_INDEX = (index_type)(HF::NO_INDEX)};
50 struct statistics_t
52 size_t capacity;
53 size_t used;
54 size_t collisions;
55 size_t max_path;
56 size_t collision_path_sum;
58 statistics_t()
59 : capacity (1)
60 , used (0)
61 , collisions (0)
62 , max_path (1)
63 , collision_path_sum (0)
68 private:
70 class prime_grower
72 private:
74 static const size_t primes[31];
75 statistics_t statistics;
76 size_t index;
78 public:
80 prime_grower()
81 : index (0)
82 , statistics()
84 statistics.capacity = primes[index];
87 size_t capacity() const
89 return statistics.capacity;
92 size_t size() const
94 return statistics.used;
97 size_t collisions() const
99 return statistics.collisions;
102 void inserted_cleanly()
104 ++statistics.used;
107 void inserted_collision (size_t path_size)
109 ++statistics.used;
110 ++statistics.collisions;
111 statistics.collision_path_sum += path_size;
112 if (statistics.max_path <= path_size)
113 statistics.max_path = path_size + 1;
116 void grow()
118 statistics.capacity = primes[++index];
119 statistics.collisions = 0;
120 statistics.used = 0;
121 statistics.collision_path_sum = 0;
122 statistics.max_path = 1;
125 size_t map (size_t hash_value) const
127 return hash_value % capacity();
130 size_t next (size_t index) const
132 return (index + 1381000000) % capacity();
135 const statistics_t& get_statistics() const
137 return statistics;
141 index_type* data;
142 prime_grower grower;
143 HF hf;
145 private:
147 /// check if we're allowed to add new entries to the hash
148 /// without re-hashing.
149 bool should_grow() const
151 // grow, if there are many collisions
152 // or capacity is almost exceeded
153 // There must also be at least one empty entry
155 return grower.size() + grower.collisions() + 1 >= grower.capacity();
158 /// initialize the new array before re-hashing
159 void create_data()
161 size_t new_capacity = grower.capacity();
163 data = new index_type[new_capacity];
164 stdext::unchecked_fill_n (data, new_capacity, NO_INDEX);
167 /// add a value to the hash
168 /// (must not be in it already; hash must not be full)
169 void internal_insert (const value_type& value, index_type index)
171 // first try: un-collisioned insertion
173 size_t bucket = grower.map (hf (value));
174 index_type* target = data + bucket;
176 if (*target == NO_INDEX)
178 *target = index;
179 grower.inserted_cleanly();
181 return;
184 // collision -> look for an empty bucket
186 size_t collision_path_size = 0;
189 bucket = grower.next (bucket);
190 target = data + bucket;
191 ++collision_path_size;
193 while (*target != NO_INDEX);
195 // insert collisioned item
197 *target = index;
198 grower.inserted_collision (collision_path_size);
201 void rehash (index_type* old_data, size_t old_data_size)
203 create_data();
205 for (size_t i = 0; i < old_data_size; ++i)
207 index_type index = old_data[i];
208 if (index != NO_INDEX)
209 internal_insert (hf.value (index), index);
212 delete[] old_data;
215 public:
217 /// construction / destruction
218 quick_hash (const HF& hash_function)
219 : data(NULL)
220 , hf (hash_function)
221 , grower()
223 create_data();
226 quick_hash (const quick_hash& rhs)
227 : data (NULL)
228 , hf (rhs.hf)
229 , grower (rhs.grower)
231 create_data();
232 operator= (rhs);
235 ~quick_hash()
237 delete[] data;
240 /// find the bucket containing the desired value;
241 /// return NO_INDEX if not contained in hash
242 index_type find (const value_type& value) const
244 size_t bucket = grower.map (hf (value));
245 index_type index = data[bucket];
247 while (index != NO_INDEX)
249 // found?
251 if (hf.equal (value, index))
252 break;
254 // collision -> look at next bucket position
256 bucket = grower.next (bucket);
257 index = data[bucket];
260 // either found or not in hash
262 return index;
265 void insert (const value_type& value, index_type index)
267 assert (find (value) == NO_INDEX);
269 if (should_grow())
270 reserve (grower.capacity()+1);
272 internal_insert (value, index);
275 void reserve (size_t min_bucket_count)
277 if (size_t(-1) / sizeof (index_type[4]) > min_bucket_count)
278 min_bucket_count *= 2;
280 index_type* old_data = data;
281 size_t old_data_size = grower.capacity();
283 while (grower.capacity() < min_bucket_count)
284 grower.grow();
286 if (grower.capacity() != old_data_size)
287 rehash (old_data, old_data_size);
290 /// assignment
292 quick_hash& operator=(const quick_hash& rhs)
294 if (grower.capacity() != rhs.grower.capacity())
296 delete[] data;
297 data = new index_type [rhs.grower.capacity()];
300 grower = rhs.grower;
302 stdext::unchecked_copy (rhs.data, rhs.data + rhs.grower.capacity(), data);
304 return *this;
307 /// get rid of all entries
309 void clear()
311 if (grower.size() > 0)
313 delete[] data;
314 grower = prime_grower();
315 create_data();
319 /// efficiently exchange two containers
321 void swap (quick_hash& rhs)
323 std::swap (data, rhs.data);
325 prime_grower temp = grower;
326 grower = rhs.grower;
327 rhs.grower = temp;
330 /// read cache performance statistics
331 const statistics_t& statistics() const
333 return grower.get_statistics();
337 template<class HF>
338 const size_t quick_hash<HF>::prime_grower::primes[31] =
339 {1, 3, 7, 17,
340 31, 67, 127, 257,
341 509, 1021, 2053, 4099,
342 8191, 16381, 32771, 65537,
343 131071, 262147, 524287, 1048573,
344 2097143, 4194301, 8388617, 16777213,
345 33554467, 67108859, 134217757, 268435459,
346 536870909, 1073741827};