1 // TortoiseSVN - a Windows shell extension for easy version control
3 // Copyright (C) 2007-2007 - TortoiseSVN
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License
7 // as published by the Free Software Foundation; either version 2
8 // of the License, or (at your option) any later version.
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software Foundation,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 * A quick linear (array) hash index class. It requires HF to
24 * provide the following interface:
26 * value_type data type of the hash
27 * index_type type of the index to store the hash
28 * NO_INDEX an index type to mark empty buckets
30 * operator() value_type -> size_t hash function
31 * value() index_type -> value_type
32 * equal() (value_type, index_type) -> bool
34 * The capacity approximately doubles by each rehash().
35 * Only insertion and lookup are provided. Collisions are
36 * resolved using linear probing.
38 * Use statistics() to monitor the cache performance.
45 typedef typename
HF::value_type value_type
;
46 typedef typename
HF::index_type index_type
;
48 enum {NO_INDEX
= (index_type
)(HF::NO_INDEX
)};
56 size_t collision_path_sum
;
63 , collision_path_sum (0)
74 static const size_t primes
[31];
75 statistics_t statistics
;
84 statistics
.capacity
= primes
[index
];
87 size_t capacity() const
89 return statistics
.capacity
;
94 return statistics
.used
;
97 size_t collisions() const
99 return statistics
.collisions
;
102 void inserted_cleanly()
107 void inserted_collision (size_t path_size
)
110 ++statistics
.collisions
;
111 statistics
.collision_path_sum
+= path_size
;
112 if (statistics
.max_path
<= path_size
)
113 statistics
.max_path
= path_size
+ 1;
118 statistics
.capacity
= primes
[++index
];
119 statistics
.collisions
= 0;
121 statistics
.collision_path_sum
= 0;
122 statistics
.max_path
= 1;
125 size_t map (size_t hash_value
) const
127 return hash_value
% capacity();
130 size_t next (size_t index
) const
132 return (index
+ 1381000000) % capacity();
135 const statistics_t
& get_statistics() const
147 /// check if we're allowed to add new entries to the hash
148 /// without re-hashing.
149 bool should_grow() const
151 // grow, if there are many collisions
152 // or capacity is almost exceeded
153 // There must also be at least one empty entry
155 return grower
.size() + grower
.collisions() + 1 >= grower
.capacity();
158 /// initialize the new array before re-hashing
161 size_t new_capacity
= grower
.capacity();
163 data
= new index_type
[new_capacity
];
164 stdext::unchecked_fill_n (data
, new_capacity
, NO_INDEX
);
167 /// add a value to the hash
168 /// (must not be in it already; hash must not be full)
169 void internal_insert (const value_type
& value
, index_type index
)
171 // first try: un-collisioned insertion
173 size_t bucket
= grower
.map (hf (value
));
174 index_type
* target
= data
+ bucket
;
176 if (*target
== NO_INDEX
)
179 grower
.inserted_cleanly();
184 // collision -> look for an empty bucket
186 size_t collision_path_size
= 0;
189 bucket
= grower
.next (bucket
);
190 target
= data
+ bucket
;
191 ++collision_path_size
;
193 while (*target
!= NO_INDEX
);
195 // insert collisioned item
198 grower
.inserted_collision (collision_path_size
);
201 void rehash (index_type
* old_data
, size_t old_data_size
)
205 for (size_t i
= 0; i
< old_data_size
; ++i
)
207 index_type index
= old_data
[i
];
208 if (index
!= NO_INDEX
)
209 internal_insert (hf
.value (index
), index
);
217 /// construction / destruction
218 quick_hash (const HF
& hash_function
)
226 quick_hash (const quick_hash
& rhs
)
229 , grower (rhs
.grower
)
240 /// find the bucket containing the desired value;
241 /// return NO_INDEX if not contained in hash
242 index_type
find (const value_type
& value
) const
244 size_t bucket
= grower
.map (hf (value
));
245 index_type index
= data
[bucket
];
247 while (index
!= NO_INDEX
)
251 if (hf
.equal (value
, index
))
254 // collision -> look at next bucket position
256 bucket
= grower
.next (bucket
);
257 index
= data
[bucket
];
260 // either found or not in hash
265 void insert (const value_type
& value
, index_type index
)
267 assert (find (value
) == NO_INDEX
);
270 reserve (grower
.capacity()+1);
272 internal_insert (value
, index
);
275 void reserve (size_t min_bucket_count
)
277 if (size_t(-1) / sizeof (index_type
[4]) > min_bucket_count
)
278 min_bucket_count
*= 2;
280 index_type
* old_data
= data
;
281 size_t old_data_size
= grower
.capacity();
283 while (grower
.capacity() < min_bucket_count
)
286 if (grower
.capacity() != old_data_size
)
287 rehash (old_data
, old_data_size
);
292 quick_hash
& operator=(const quick_hash
& rhs
)
294 if (grower
.capacity() != rhs
.grower
.capacity())
297 data
= new index_type
[rhs
.grower
.capacity()];
302 stdext::unchecked_copy (rhs
.data
, rhs
.data
+ rhs
.grower
.capacity(), data
);
307 /// get rid of all entries
311 if (grower
.size() > 0)
314 grower
= prime_grower();
319 /// efficiently exchange two containers
321 void swap (quick_hash
& rhs
)
323 std::swap (data
, rhs
.data
);
325 prime_grower temp
= grower
;
330 /// read cache performance statistics
331 const statistics_t
& statistics() const
333 return grower
.get_statistics();
338 const size_t quick_hash
<HF
>::prime_grower::primes
[31] =
341 509, 1021, 2053, 4099,
342 8191, 16381, 32771, 65537,
343 131071, 262147, 524287, 1048573,
344 2097143, 4194301, 8388617, 16777213,
345 33554467, 67108859, 134217757, 268435459,
346 536870909, 1073741827};