initial
[prop.git] / include / AD / hash / dhash.h
blob065777181b3d227968b542ccf905b31a6d81d2ad
1 //////////////////////////////////////////////////////////////////////////////
2 // NOTICE:
3 //
4 // ADLib, Prop and their related set of tools and documentation are in the
5 // public domain. The author(s) of this software reserve no copyrights on
6 // the source code and any code generated using the tools. You are encouraged
7 // to use ADLib and Prop to develop software, in both academic and commercial
8 // settings, and are free to incorporate any part of ADLib and Prop into
9 // your programs.
11 // Although you are under no obligation to do so, we strongly recommend that
12 // you give away all software developed using our tools.
14 // We also ask that credit be given to us when ADLib and/or Prop are used in
15 // your programs, and that this notice be preserved intact in all the source
16 // code.
18 // This software is still under development and we welcome any suggestions
19 // and help from the users.
21 // Allen Leung
22 // 1994
23 //////////////////////////////////////////////////////////////////////////////
25 #ifndef hash_table_with_double_hashing_h
26 #define hash_table_with_double_hashing_h
28 ////////////////////////////////////////////////////////////////////////////
29 // Class DHashTable implements a hash table using a double hashing
30 // scheme\cite{Algorithms}.
31 ////////////////////////////////////////////////////////////////////////////
33 #include <string.h>
34 #include <AD/generic/ordering.h>
36 ////////////////////////////////////////////////////////////////////////////
37 // Class |DHashTable| is parameterized with the class of the
38 // key and the class of the value. Furthermore, the functions
39 // unsigned int hash(const K&); and
40 // Bool equal(const K&, const K&);
41 // must be defined by the client that uses this template.
42 ////////////////////////////////////////////////////////////////////////////
43 template <class K, class V>
44 class DHashTable {
46 K * keys; // the array of keys
47 V * values; // the array of values
48 char * status; // status of cell
49 int table_size; // size of the array
50 int elem_count; // number of elements
51 double max_load_ratio; // maximum load ratio (> 0 && < 1)
52 double growth_ratio; // amount to expand when resizing
53 int max_load; // maximum elements before resizing
55 public:
56 ////////////////////////////////////////////////////////////////////
57 // Constructor and destructor
58 ////////////////////////////////////////////////////////////////////
59 DHashTable(int initial_size = 32,
60 double max_load_ratio = 0.0,
61 double growth_ratio = 2.0);
62 ~DHashTable();
64 ////////////////////////////////////////////////////////////////////
65 // Assignment
66 ////////////////////////////////////////////////////////////////////
67 void operator = (const DHashTable&);
69 ////////////////////////////////////////////////////////////////////
70 // Selectors
71 ////////////////////////////////////////////////////////////////////
72 inline int capacity() const { return table_size; } // current capacity
73 inline int size() const { return elem_count; } // number of elements
74 inline Bool is_empty() const { return elem_count == 0; }
75 inline Bool is_full() const { return elem_count == table_size; }
76 inline Bool contains(const K& k) const { return lookup(k) != 0; }
77 inline const V& operator [] (const K& k) const { return value(lookup(k)); }
78 inline V& operator [] (const K& k) { return value(lookup(k)); }
80 ////////////////////////////////////////////////////////////////////
81 // Insertion and deletion.
82 ////////////////////////////////////////////////////////////////////
83 void clear(); // clears out the hash table
84 Ix lookup(const K&) const; // lookup entry by key
85 Ix insert(const K&, const V&); // insert a new entry
86 Bool remove(const K&); // remove an old entry
88 ////////////////////////////////////////////////////////////////////
89 // Iteration:
90 // first() start the iteration
91 // next() get index to the next element; or 0 if none
92 // key() get the key on index
93 // value() get the value on index
94 // Implementation note: Ix's are represented internally as 1-based
95 // array index.
96 ////////////////////////////////////////////////////////////////////
97 inline Ix first() const { return find_next(0); }
98 inline Ix next(Ix i) const { return find_next((int)i); }
99 inline const K& key(Ix i) const { return keys[(int)i-1]; }
100 inline const V& value(Ix i) const { return values[(int)i-1]; }
101 inline V& value(Ix i) { return values[(int)i-1]; }
103 ////////////////////////////////////////////////////////////////////
104 // Resize the hash table
105 ////////////////////////////////////////////////////////////////////
106 void resize(int new_size = 0);
108 private:
109 ////////////////////////////////////////////////////////////////////
110 // Addition implementation methods
111 ////////////////////////////////////////////////////////////////////
112 inline Ix find_next(int i) const
113 { while (i < table_size) if (status[i++] == Cell_used) return (Ix)i;
114 return (Ix)0;
118 //////////////////////////////////////////////////////////////////////////
119 // Implementation of the template methods
120 //////////////////////////////////////////////////////////////////////////
122 //////////////////////////////////////////////////////////////////////////
123 // Create a new table.
124 // Implementation note: each end of each chain of the buckets are
125 // linked to the next. This makes it possible to find the next entry
126 // during iteration quickly.
127 //////////////////////////////////////////////////////////////////////////
128 template <class K, class V>
129 DHashTable<K,V>::DHashTable
130 (int size, double maximum_load_ratio, double growth)
131 : table_size(size), keys(new K [size]), values(new V [size]),
132 status(new char [size])
133 { clear();
134 if (maximum_load_ratio >= 0.9 || maximum_load_ratio <= 0.1)
135 max_load_ratio = .7;
136 else
137 max_load_ratio = maximum_load_ratio;
138 if (growth <= 1.2 || growth >= 5.0) growth_ratio = 2.0;
139 else growth_ratio = growth;
140 max_load = (int)(max_load_ratio * size);
141 if (max_load >= size) max_load = size - 1;
144 //////////////////////////////////////////////////////////////////////////
145 // Destroy a table
146 //////////////////////////////////////////////////////////////////////////
147 template <class K, class V>
148 DHashTable<K,V>::~DHashTable()
149 { delete [] keys; delete [] values; delete [] status; }
151 //////////////////////////////////////////////////////////////////////////
152 // Assignment
153 //////////////////////////////////////////////////////////////////////////
154 template <class K, class V>
155 void DHashTable<K,V>::operator = (const DHashTable<K,V>& t)
156 { if (this != &t) {
157 delete [] keys; delete [] values; delete [] status;
158 elem_count = t.elem_count;
159 table_size = t.table_size;
160 keys = new K [table_size];
161 values = new V [table_size];
162 status = new char [table_size];
163 for (int i = 0; i < table_size; i++) {
164 if ((status[i] = t.status[i]) == Cell_used) {
165 keys[i] = t.keys[i]; values[i] = t.values[i];
171 //////////////////////////////////////////////////////////////////////////
172 // Clear a table.
173 // We'll traverse thru all the buckets and delete each one iteratively.
174 //////////////////////////////////////////////////////////////////////////
175 template <class K, class V>
176 void DHashTable<K,V>::clear()
177 { memset(status,0,table_size); elem_count = 0; }
179 //////////////////////////////////////////////////////////////////////////
180 // Lookup an entry by key; if the entry is not found, return (Ix)0.
181 //////////////////////////////////////////////////////////////////////////
182 template <class K, class V>
183 Ix DHashTable<K,V>::lookup(const K& key) const
184 { unsigned int h = hash(key);
185 unsigned int i = h % table_size;
186 unsigned int inc = 0; // increment
187 unsigned int last;
189 ////////////////////////////////////////////////////////////////////
190 // Since the size of the hash table is not necessary a prime,
191 // care must be taken so that each probing sequence covers the
192 // entire table. The simple strategy of computing new location as
193 // i = (i + inc) % table_size
194 // cannot be used.
195 ////////////////////////////////////////////////////////////////////
196 for (;;) {
197 switch (status[i]) {
198 case Cell_unused: return (Ix)0;
199 case Cell_used: if (equal(key,keys[i])) return (Ix)(i+1);
201 ////////////////////////////////////////////////////////////////////
202 // Compute increment only if the initial probe fails.
203 ////////////////////////////////////////////////////////////////////
204 if (inc == 0) {
205 // recycle those higher order bits of hash value
206 inc = ( h / table_size ) % table_size;
207 if (inc == 0) inc = 1; // use linear probing if all else fails
208 last = i;
210 i += inc;
211 if (i >= table_size) i -= table_size;
212 if (i == last) { last = ++i; }
216 //////////////////////////////////////////////////////////////////////////
217 // Insert a new entry; there are two different cases of behavior:
218 // (1) If the key doesn't already exists, new key/value pair will be
219 // inserted into the table.
220 // (2) If the key already exists, then the old value will be overwritten
221 // by the new value.
222 // Also, if the number of elements have exceeded the maximum load,
223 // the table will be automatically resized.
224 //////////////////////////////////////////////////////////////////////////
225 template <class K, class V>
226 Ix DHashTable<K,V>::insert(const K& key, const V& value)
228 /////////////////////////////////////////////////////////////////////
229 // Make sure we have at least one unused cell.
230 /////////////////////////////////////////////////////////////////////
231 if (elem_count >= max_load) resize();
232 unsigned int h = hash(key);
233 unsigned int i = h % table_size;
234 unsigned int inc = 0;
235 unsigned int last;
236 int deleted;
238 /////////////////////////////////////////////////////////////////////
239 // Loop until one of the following:
240 // (1) The key is found; in which case the value is updated.
241 // (2) An unused cell is found; then we'll use the first
242 // deleted cell found along the way. If there is none,
243 // we'll use the unused cell. This is done to minimize
244 // the effect of contamination.
245 /////////////////////////////////////////////////////////////////////
246 for (deleted = -1;;) {
247 switch (status[i]) {
248 case Cell_deleted: if (deleted < 0) deleted = i; break;
249 case Cell_unused: goto found;
250 case Cell_used: if (equal(key,keys[i]))
251 { values[i] = value; return (Ix)(i+1); }
253 if (inc == 0) {
254 // recycle those higher order bits of hash value
255 inc = ( h / table_size ) % table_size;
256 if (inc == 0) inc = 1; // use linear probing if all else fails
257 last = i;
259 i += inc;
260 if (i >= table_size) i -= table_size;
261 if (i == last) { last = ++i; }
263 found:
264 if (deleted >= 0) i = deleted; elem_count++;
265 keys[i] = key; values[i] = value; status[i] = Cell_used;
266 return (Ix)(i+1);
269 //////////////////////////////////////////////////////////////////////////
270 // Resizing the hash table. All entries are completed rehashed.
271 //////////////////////////////////////////////////////////////////////////
272 template <class K, class V>
273 void DHashTable<K,V>::resize(int new_size)
274 { if (new_size <= elem_count) new_size = (int)(table_size * growth_ratio);
276 char * new_status = new char [ new_size ];
277 K * new_keys = new K [ new_size ];
278 V * new_values = new V [ new_size ];
279 memset(new_status,0,new_size);
281 //////////////////////////////////////////////////////////////////
282 // Rehash all used cells one by one. Notice that since all keys
283 // are unique, we don't have to do any comparison.
284 //////////////////////////////////////////////////////////////////
285 for (int i = 0; i < table_size; i++) {
286 if (status[i] == Cell_used) {
287 unsigned int h = hash(keys[i]);
288 int j = h % new_size;
289 unsigned int inc = 0, last;
290 for (;;) {
291 if (new_status[j] != Cell_used) {
292 new_keys[j] = keys[i]; new_values[j] = values[i];
293 new_status[j] = Cell_used; break;
295 if (inc == 0) {
296 inc = ( h / new_size ) % new_size;
297 if (inc == 0) inc = 1; last = j;
299 j += inc;
300 if (j >= new_size) j -= new_size;
301 if (j == last) { last = ++j; }
305 delete [] keys; delete [] values; delete [] status;
306 keys = new_keys; values = new_values; status = new_status;
307 table_size = new_size;
308 max_load = (int)(max_load_ratio * table_size);
311 //////////////////////////////////////////////////////////////////////////
312 // Remove an entry from the table; there are two different cases:
313 // (1) If the key exists within the table, the key/value pair will be
314 // removed; otherwise
315 // (2) The table will be unaltered.
316 // If the removal operation successfully deletes the entry,
317 // we'll also return true to the client.
318 //////////////////////////////////////////////////////////////////////////
319 template <class K, class V>
320 Bool DHashTable<K,V>::remove(const K& key)
321 { Ix i;
322 ///////////////////////////////////////////////////////////////////
323 // We'll just call lookup() to do the dirty work of locating the
324 // appropriate entry.
325 ///////////////////////////////////////////////////////////////////
326 if ((i = lookup(key))) {
327 elem_count--; status[(int)i-1] = Cell_deleted; return true;
328 } else return false;
331 #endif