4 * Copyright (c) 2011 Pacman Development Team <pacman-dev@archlinux.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
24 /* List of primes for possible sizes of hash tables.
26 * The maximum table size is the last prime under 1,000,000. That is
27 * more than an order of magnitude greater than the number of packages
28 * in any Linux distribution.
30 static const size_t prime_list
[] =
32 11ul, 13ul, 17ul, 19ul, 23ul, 29ul, 31ul, 37ul, 41ul, 43ul, 47ul,
33 53ul, 59ul, 61ul, 67ul, 71ul, 73ul, 79ul, 83ul, 89ul, 97ul, 103ul,
34 109ul, 113ul, 127ul, 137ul, 139ul, 149ul, 157ul, 167ul, 179ul, 193ul,
35 199ul, 211ul, 227ul, 241ul, 257ul, 277ul, 293ul, 313ul, 337ul, 359ul,
36 383ul, 409ul, 439ul, 467ul, 503ul, 541ul, 577ul, 619ul, 661ul, 709ul,
37 761ul, 823ul, 887ul, 953ul, 1031ul, 1109ul, 1193ul, 1289ul, 1381ul,
38 1493ul, 1613ul, 1741ul, 1879ul, 2029ul, 2179ul, 2357ul, 2549ul,
39 2753ul, 2971ul, 3209ul, 3469ul, 3739ul, 4027ul, 4349ul, 4703ul,
40 5087ul, 5503ul, 5953ul, 6427ul, 6949ul, 7517ul, 8123ul, 8783ul,
41 9497ul, 10273ul, 11113ul, 12011ul, 12983ul, 14033ul, 15173ul,
42 16411ul, 17749ul, 19183ul, 20753ul, 22447ul, 24281ul, 26267ul,
43 28411ul, 30727ul, 33223ul, 35933ul, 38873ul, 42043ul, 45481ul,
44 49201ul, 53201ul, 57557ul, 62233ul, 67307ul, 72817ul, 78779ul,
45 85229ul, 92203ul, 99733ul, 107897ul, 116731ul, 126271ul, 136607ul,
46 147793ul, 159871ul, 172933ul, 187091ul, 202409ul, 218971ul, 236897ul,
47 256279ul, 277261ul, 299951ul, 324503ul, 351061ul, 379787ul, 410857ul,
48 444487ul, 480881ul, 520241ul, 562841ul, 608903ul, 658753ul, 712697ul,
49 771049ul, 834181ul, 902483ul, 976369ul
52 /* Allocate a hash table with at least "size" buckets */
53 pmpkghash_t
*_alpm_pkghash_create(size_t size
)
55 pmpkghash_t
*hash
= NULL
;
58 MALLOC(hash
, sizeof(pmpkghash_t
), RET_ERR(PM_ERR_MEMORY
, NULL
));
64 loopsize
= sizeof(prime_list
) / sizeof(*prime_list
);
65 for(i
= 0; i
< loopsize
; i
++) {
66 if(prime_list
[i
] > size
) {
67 hash
->buckets
= prime_list
[i
];
72 if(hash
->buckets
< size
) {
73 _alpm_log(PM_LOG_ERROR
, _("database larger than maximum size\n"));
78 CALLOC(hash
->hash_table
, hash
->buckets
, sizeof(alpm_list_t
*), \
79 free(hash
); RET_ERR(PM_ERR_MEMORY
, NULL
));
84 static size_t get_hash_position(unsigned long name_hash
, pmpkghash_t
*hash
)
88 position
= name_hash
% hash
->buckets
;
90 /* collision resolution using open addressing with linear probing */
91 while(hash
->hash_table
[position
] != NULL
) {
92 position
= (position
+ 1) % hash
->buckets
;
98 /* Expand the hash table size to the next increment and rebin the entries */
99 static pmpkghash_t
*rehash(pmpkghash_t
*oldhash
)
101 pmpkghash_t
*newhash
;
102 size_t newsize
, position
, i
;
104 /* Hash tables will need resized in two cases:
105 * - adding packages to the local database
106 * - poor estimation of the number of packages in sync database
108 * For small hash tables sizes (<500) the increase in size is by a
109 * minimum of a factor of 2 for optimal rehash efficiency. For
110 * larger database sizes, this increase is reduced to avoid excess
111 * memory allocation as both scenarios requiring a rehash should not
112 * require a table size increase that large. */
113 if(oldhash
->buckets
< 500) {
114 newsize
= oldhash
->buckets
* 2;
115 } else if(oldhash
->buckets
< 2000) {
116 newsize
= oldhash
->buckets
* 3 / 2;
117 } else if(oldhash
->buckets
< 5000) {
118 newsize
= oldhash
->buckets
* 4 / 3;
120 newsize
= oldhash
->buckets
+ 1;
123 newhash
= _alpm_pkghash_create(newsize
);
124 if(newhash
== NULL
) {
125 /* creation of newhash failed, stick with old one... */
129 newhash
->list
= oldhash
->list
;
130 oldhash
->list
= NULL
;
132 for(i
= 0; i
< oldhash
->buckets
; i
++) {
133 if(oldhash
->hash_table
[i
] != NULL
) {
134 pmpkg_t
*package
= oldhash
->hash_table
[i
]->data
;
136 position
= get_hash_position(package
->name_hash
, newhash
);
138 newhash
->hash_table
[position
] = oldhash
->hash_table
[i
];
139 oldhash
->hash_table
[i
] = NULL
;
143 newhash
->entries
= oldhash
->entries
;
145 _alpm_pkghash_free(oldhash
);
150 static pmpkghash_t
*pkghash_add_pkg(pmpkghash_t
*hash
, pmpkg_t
*pkg
, int sorted
)
155 if(pkg
== NULL
|| hash
== NULL
) {
159 if((hash
->entries
+ 1) / MAX_HASH_LOAD
> hash
->buckets
) {
163 position
= get_hash_position(pkg
->name_hash
, hash
);
165 ptr
= calloc(1, sizeof(alpm_list_t
));
174 hash
->hash_table
[position
] = ptr
;
176 hash
->list
= alpm_list_join(hash
->list
, ptr
);
178 hash
->list
= alpm_list_mmerge(hash
->list
, ptr
, _alpm_pkg_cmp
);
185 pmpkghash_t
*_alpm_pkghash_add(pmpkghash_t
*hash
, pmpkg_t
*pkg
)
187 return pkghash_add_pkg(hash
, pkg
, 0);
190 pmpkghash_t
*_alpm_pkghash_add_sorted(pmpkghash_t
*hash
, pmpkg_t
*pkg
)
192 return pkghash_add_pkg(hash
, pkg
, 1);
195 static size_t move_one_entry(pmpkghash_t
*hash
, size_t start
, size_t end
)
197 /* Iterate backwards from 'end' to 'start', seeing if any of the items
198 * would hash to 'start'. If we find one, we move it there and break. If
199 * we get all the way back to position and find none that hash to it, we
200 * also end iteration. Iterating backwards helps prevent needless shuffles;
201 * we will never need to move more than one item per function call. The
202 * return value is our current iteration location; if this is equal to
203 * 'start' we can stop this madness. */
204 while(end
!= start
) {
205 alpm_list_t
*i
= hash
->hash_table
[end
];
206 pmpkg_t
*info
= i
->data
;
207 size_t new_position
= get_hash_position(info
->name_hash
, hash
);
209 if(new_position
== start
) {
210 hash
->hash_table
[start
] = i
;
211 hash
->hash_table
[end
] = NULL
;
215 /* the odd math ensures we are always positive, e.g.
216 * e.g. (0 - 1) % 47 == -1
217 * e.g. (47 + 0 - 1) % 47 == 46 */
218 end
= (hash
->buckets
+ end
- 1) % hash
->buckets
;
224 * @brief Remove a package from a pkghash.
226 * @param hash the hash to remove the package from
227 * @param pkg the package we are removing
228 * @param data output parameter containing the removed item
230 * @return the resultant hash
232 pmpkghash_t
*_alpm_pkghash_remove(pmpkghash_t
*hash
, pmpkg_t
*pkg
,
242 if(pkg
== NULL
|| hash
== NULL
) {
246 position
= pkg
->name_hash
% hash
->buckets
;
247 while((i
= hash
->hash_table
[position
]) != NULL
) {
248 pmpkg_t
*info
= i
->data
;
250 if(info
->name_hash
== pkg
->name_hash
&&
251 strcmp(info
->name
, pkg
->name
) == 0) {
254 /* remove from list and hash */
255 hash
->list
= alpm_list_remove_item(hash
->list
, i
);
259 hash
->hash_table
[position
] = NULL
;
263 /* Potentially move entries following removed entry to keep open
264 * addressing collision resolution working. We start by finding the
265 * next null bucket to know how far we have to look. */
266 stop
= (position
+ 1) % hash
->buckets
;
267 while(hash
->hash_table
[stop
] != NULL
&& stop
!= position
) {
268 stop
= (stop
+ 1) % hash
->buckets
;
270 stop
= (hash
->buckets
+ stop
- 1) % hash
->buckets
;
272 /* We now search backwards from stop to position. If we find an
273 * item that now hashes to position, we will move it, and then try
274 * to plug the new hole we just opened up, until we finally don't
276 while((prev
= move_one_entry(hash
, position
, stop
)) != position
) {
283 position
= (position
+ 1) % hash
->buckets
;
289 void _alpm_pkghash_free(pmpkghash_t
*hash
)
293 for(i
= 0; i
< hash
->buckets
; i
++) {
294 free(hash
->hash_table
[i
]);
296 free(hash
->hash_table
);
301 pmpkg_t
*_alpm_pkghash_find(pmpkghash_t
*hash
, const char *name
)
304 unsigned long name_hash
;
307 if(name
== NULL
|| hash
== NULL
) {
311 name_hash
= _alpm_hash_sdbm(name
);
313 position
= name_hash
% hash
->buckets
;
315 while((lp
= hash
->hash_table
[position
]) != NULL
) {
316 pmpkg_t
*info
= lp
->data
;
318 if(info
->name_hash
== name_hash
&& strcmp(info
->name
, name
) == 0) {
322 position
= (position
+ 1) % hash
->buckets
;
328 /* vim: set ts=2 sw=2 noet: */