Update.
[glibc.git] / db2 / hash / hash.c
blobc08495378e68de83e15dbe4570ac8bbb2f644808
1 /*-
2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997
5 * Sleepycat Software. All rights reserved.
6 */
7 /*
8 * Copyright (c) 1990, 1993, 1994
9 * Margo Seltzer. All rights reserved.
12 * Copyright (c) 1990, 1993, 1994
13 * The Regents of the University of California. All rights reserved.
15 * This code is derived from software contributed to Berkeley by
16 * Margo Seltzer.
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44 * SUCH DAMAGE.
47 #include "config.h"
49 #ifndef lint
50 static const char sccsid[] = "@(#)hash.c 10.33 (Sleepycat) 11/2/97";
51 #endif /* not lint */
53 #ifndef NO_SYSTEM_INCLUDES
54 #include <sys/types.h>
55 #include <sys/stat.h>
57 #include <errno.h>
58 #include <fcntl.h>
59 #include <stdio.h>
60 #include <stdlib.h>
61 #include <string.h>
62 #include <unistd.h>
63 #endif
65 #include "shqueue.h"
66 #include "db_int.h"
67 #include "db_page.h"
68 #include "db_am.h"
69 #include "db_ext.h"
70 #include "hash.h"
71 #include "log.h"
73 static int __ham_c_close __P((DBC *));
74 static int __ham_c_del __P((DBC *, int));
75 static int __ham_c_get __P((DBC *, DBT *, DBT *, int));
76 static int __ham_c_put __P((DBC *, DBT *, DBT *, int));
77 static int __ham_c_init __P((DB *, DB_TXN *, DBC **));
78 static int __ham_cursor __P((DB *, DB_TXN *, DBC **));
79 static int __ham_delete __P((DB *, DB_TXN *, DBT *, int));
80 static int __ham_dup_return __P((HTAB *, HASH_CURSOR *, DBT *, int));
81 static int __ham_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
82 static void __ham_init_htab __P((HTAB *, u_int));
83 static int __ham_lookup __P((HTAB *,
84 HASH_CURSOR *, const DBT *, u_int32_t, db_lockmode_t));
85 static int __ham_overwrite __P((HTAB *, HASH_CURSOR *, DBT *));
86 static int __ham_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
87 static int __ham_sync __P((DB *, int));
89 /************************** INTERFACE ROUTINES ***************************/
90 /* OPEN/CLOSE */
93 * __ham_open --
95 * PUBLIC: int __ham_open __P((DB *, DB_INFO *));
97 int
98 __ham_open(dbp, dbinfo)
99 DB *dbp;
100 DB_INFO *dbinfo;
102 DB_ENV *dbenv;
103 DBC *curs;
104 HTAB *hashp;
105 int file_existed, ret;
107 dbenv = dbp->dbenv;
109 if ((hashp = (HTAB *)__db_calloc(1, sizeof(HTAB))) == NULL)
110 return (ENOMEM);
111 hashp->dbp = dbp;
113 /* Set the hash function if specified by the user. */
114 if (dbinfo != NULL && dbinfo->h_hash != NULL)
115 hashp->hash = dbinfo->h_hash;
118 * Initialize the remaining fields of the dbp. The type, close and
119 * fd functions are all set in db_open.
121 dbp->internal = hashp;
122 dbp->cursor = __ham_cursor;
123 dbp->del = __ham_delete;
124 dbp->get = __ham_get;
125 dbp->put = __ham_put;
126 dbp->sync = __ham_sync;
128 /* If locking is turned on, lock the meta data page. */
129 if (F_ISSET(dbp, DB_AM_LOCKING)) {
130 dbp->lock.pgno = BUCKET_INVALID;
131 if ((ret = lock_get(dbenv->lk_info, dbp->locker,
132 0, &dbp->lock_dbt, DB_LOCK_READ, &hashp->hlock)) != 0) {
133 if (ret < 0)
134 ret = EAGAIN;
135 goto out;
140 * Now, we can try to read the meta-data page and figure out
141 * if we set up locking and get the meta-data page properly.
142 * If this is a new file, initialize it, and put it back dirty.
144 if ((ret = __ham_get_page(hashp->dbp, 0, (PAGE **)&hashp->hdr)) != 0)
145 goto out;
147 /* Initialize the hashp structure */
148 if (hashp->hdr->magic == DB_HASHMAGIC) {
149 file_existed = 1;
150 /* File exists, verify the data in the header. */
151 if (hashp->hash == NULL)
152 hashp->hash =
153 hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
154 if (hashp->hash(CHARKEY, sizeof(CHARKEY)) !=
155 hashp->hdr->h_charkey) {
156 __db_err(hashp->dbp->dbenv,
157 "hash: incompatible hash function");
158 ret = EINVAL;
159 goto out;
161 if (F_ISSET(hashp->hdr, DB_HASH_DUP))
162 F_SET(dbp, DB_AM_DUP);
163 } else {
165 * File does not exist, we must initialize the header. If
166 * locking is enabled that means getting a write lock first.
168 file_existed = 0;
169 if (F_ISSET(dbp, DB_AM_LOCKING) &&
170 ((ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0 ||
171 (ret = lock_get(dbenv->lk_info, dbp->locker, 0,
172 &dbp->lock_dbt, DB_LOCK_WRITE, &hashp->hlock)) != 0)) {
173 if (ret < 0)
174 ret = EAGAIN;
175 goto out;
178 hashp->hdr->ffactor =
179 dbinfo != NULL && dbinfo->h_ffactor ? dbinfo->h_ffactor : 0;
180 __ham_init_htab(hashp, dbinfo != NULL ? dbinfo->h_nelem : 0);
181 if (F_ISSET(dbp, DB_AM_DUP))
182 F_SET(hashp->hdr, DB_HASH_DUP);
183 if ((ret = __ham_dirty_page(hashp, (PAGE *)hashp->hdr)) != 0)
184 goto out;
187 /* Initialize the default cursor. */
188 __ham_c_init(dbp, NULL, &curs);
189 TAILQ_INSERT_TAIL(&dbp->curs_queue, curs, links);
191 /* Allocate memory for our split buffer. */
192 if ((hashp->split_buf = (PAGE *)__db_malloc(dbp->pgsize)) == NULL) {
193 ret = ENOMEM;
194 goto out;
197 #ifdef NO_STATISTICS_FOR_DB_ERR
198 __db_err(dbp->dbenv,
199 "%s%lx\n%s%ld\n%s%ld\n%s%ld\n%s%ld\n%s0x%lx\n%s0x%lx\n%s%ld\n%s%ld\n%s0x%lx",
200 "TABLE POINTER ", (long)hashp,
201 "BUCKET SIZE ", (long)hashp->hdr->pagesize,
202 "FILL FACTOR ", (long)hashp->hdr->ffactor,
203 "MAX BUCKET ", (long)hashp->hdr->max_bucket,
204 "OVFL POINT ", (long)hashp->hdr->ovfl_point,
205 "LAST FREED ", (long)hashp->hdr->last_freed,
206 "HIGH MASK ", (long)hashp->hdr->high_mask,
207 "LOW MASK ", (long)hashp->hdr->low_mask,
208 "NELEM ", (long)hashp->hdr->nelem,
209 "FLAGS ", (long)hashp->hdr->flags);
210 #endif
212 /* Release the meta data page */
213 (void)__ham_put_page(hashp->dbp, (PAGE *)hashp->hdr, 0);
214 if (F_ISSET(dbp, DB_AM_LOCKING) &&
215 (ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0) {
216 if (ret < 0)
217 ret = EAGAIN;
218 goto out;
221 hashp->hlock = 0;
222 hashp->hdr = NULL;
223 /* Sync the file so that we know that the meta data goes to disk. */
224 if (!file_existed && (ret = dbp->sync(dbp, 0)) != 0)
225 goto out;
226 return (0);
228 out: (void)__ham_close(dbp);
229 return (ret);
233 * PUBLIC: int __ham_close __P((DB *));
236 __ham_close(dbp)
237 DB *dbp;
239 HTAB *hashp;
240 int ret, t_ret;
242 DEBUG_LWRITE(dbp, NULL, "ham_close", NULL, NULL, 0);
243 hashp = (HTAB *)dbp->internal;
244 ret = 0;
246 /* Free the split page. */
247 if (hashp->split_buf)
248 FREE(hashp->split_buf, dbp->pgsize);
250 if (hashp->hdr && (t_ret = __ham_put_page(hashp->dbp,
251 (PAGE *)hashp->hdr, 0)) != 0 && ret == 0)
252 ret = t_ret;
253 if (hashp->hlock && (t_ret = lock_put(hashp->dbp->dbenv->lk_info,
254 hashp->hlock)) != 0 && ret == 0)
255 ret = t_ret;
257 FREE(hashp, sizeof(HTAB));
258 dbp->internal = NULL;
259 return (ret);
262 /************************** LOCAL CREATION ROUTINES **********************/
264 * Returns 0 on No Error
266 static void
267 __ham_init_htab(hashp, nelem)
268 HTAB *hashp;
269 u_int nelem;
271 int32_t l2, nbuckets;
273 hashp->hdr->nelem = 0;
274 hashp->hdr->pagesize = hashp->dbp->pgsize;
275 ZERO_LSN(hashp->hdr->lsn);
276 hashp->hdr->magic = DB_HASHMAGIC;
277 hashp->hdr->version = DB_HASHVERSION;
278 if (hashp->hash == NULL)
279 hashp->hash =
280 hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
281 hashp->hdr->h_charkey = hashp->hash(CHARKEY, sizeof(CHARKEY));
282 if (nelem != 0 && hashp->hdr->ffactor != 0) {
283 nelem = (nelem - 1) / hashp->hdr->ffactor + 1;
284 l2 = __db_log2(nelem > 2 ? nelem : 2);
285 } else
286 l2 = 2;
288 nbuckets = 1 << l2;
290 hashp->hdr->spares[l2] = 0;
291 hashp->hdr->spares[l2 + 1] = 0;
292 hashp->hdr->ovfl_point = l2;
293 hashp->hdr->last_freed = PGNO_INVALID;
295 hashp->hdr->max_bucket = hashp->hdr->high_mask = nbuckets - 1;
296 hashp->hdr->low_mask = (nbuckets >> 1) - 1;
297 memcpy(hashp->hdr->uid, hashp->dbp->lock.fileid, DB_FILE_ID_LEN);
300 /********************** DESTROY/CLOSE ROUTINES ************************/
304 * Write modified pages to disk
306 * Returns:
307 * 0 == OK
308 * -1 ERROR
310 static int
311 __ham_sync(dbp, flags)
312 DB *dbp;
313 int flags;
315 int ret;
317 DEBUG_LWRITE(dbp, NULL, "ham_sync", NULL, NULL, flags);
318 if ((ret = __db_syncchk(dbp, flags)) != 0)
319 return (ret);
320 if (F_ISSET(dbp, DB_AM_RDONLY))
321 return (0);
323 if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
324 ret = 0;
326 return (ret);
329 /*******************************SEARCH ROUTINES *****************************/
331 * All the access routines return
333 * Returns:
334 * 0 on SUCCESS
335 * 1 to indicate an external ERROR (i.e. key not found, etc)
336 * -1 to indicate an internal ERROR (i.e. out of memory, etc)
339 static int
340 __ham_get(dbp, txn, key, data, flags)
341 DB *dbp;
342 DB_TXN *txn;
343 DBT *key;
344 DBT *data;
345 int flags;
347 DB *ldbp;
348 DBC *cp;
349 HTAB *hashp;
350 HASH_CURSOR *hcp;
351 int ret, t_ret;
353 DEBUG_LREAD(dbp, txn, "ham_get", key, NULL, flags);
354 if ((ret = __db_getchk(dbp, key, data, flags)) != 0)
355 return (ret);
357 ldbp = dbp;
358 if (F_ISSET(dbp, DB_AM_THREAD) &&
359 (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
360 return (ret);
362 hashp = (HTAB *)ldbp->internal;
363 SET_LOCKER(ldbp, txn);
364 GET_META(ldbp, hashp);
365 cp = TAILQ_FIRST(&ldbp->curs_queue);
367 hashp->hash_accesses++;
368 hcp = (HASH_CURSOR *)TAILQ_FIRST(&ldbp->curs_queue)->internal;
369 if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ)) == 0)
370 if (F_ISSET(hcp, H_OK))
371 ret = __ham_dup_return(hashp, hcp, data, DB_FIRST);
372 else /* Key was not found */
373 ret = DB_NOTFOUND;
375 if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
376 ret = t_ret;
377 RELEASE_META(ldbp, hashp);
378 if (F_ISSET(dbp, DB_AM_THREAD))
379 __db_puthandle(ldbp);
380 return (ret);
383 static int
384 __ham_put(dbp, txn, key, data, flags)
385 DB *dbp;
386 DB_TXN *txn;
387 DBT *key;
388 DBT *data;
389 int flags;
391 DB *ldbp;
392 HTAB *hashp;
393 HASH_CURSOR *hcp;
394 DBT tmp_val, *myval;
395 int ret, t_ret;
396 u_int32_t nbytes;
398 DEBUG_LWRITE(dbp, txn, "ham_put", key, data, flags);
399 if ((ret = __db_putchk(dbp, key, data,
400 flags, F_ISSET(dbp, DB_AM_RDONLY), F_ISSET(dbp, DB_AM_DUP))) != 0)
401 return (ret);
403 ldbp = dbp;
404 if (F_ISSET(dbp, DB_AM_THREAD) &&
405 (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
406 return (ret);
408 hashp = (HTAB *)ldbp->internal;
409 SET_LOCKER(ldbp, txn);
410 GET_META(ldbp, hashp);
411 hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
413 nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
414 HKEYDATA_PSIZE(key->size)) +
415 (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
416 HKEYDATA_PSIZE(data->size));
418 hashp->hash_accesses++;
419 ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
421 if (ret == DB_NOTFOUND) {
422 ret = 0;
423 if (hcp->seek_found_page != PGNO_INVALID &&
424 hcp->seek_found_page != hcp->pgno) {
425 if ((ret = __ham_item_done(hashp, hcp, 0)) != 0)
426 goto out;
427 hcp->pgno = hcp->seek_found_page;
428 hcp->bndx = NDX_INVALID;
431 if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) {
433 * Doing a partial put, but the key does not exist
434 * and we are not beginning the write at 0. We
435 * must create a data item padded up to doff and
436 * then write the new bytes represented by val.
438 ret = __ham_init_dbt(&tmp_val, data->size + data->doff,
439 &hcp->big_data, &hcp->big_datalen);
440 if (ret == 0) {
441 memset(tmp_val.data, 0, data->doff);
442 memcpy((u_int8_t *)tmp_val.data + data->doff,
443 data->data, data->size);
444 myval = &tmp_val;
446 } else
447 myval = (DBT *)data;
449 if (ret == 0)
450 ret = __ham_add_el(hashp, hcp, key, myval, H_KEYDATA);
451 } else if (ret == 0 && F_ISSET(hcp, H_OK)) {
452 if (flags == DB_NOOVERWRITE)
453 ret = DB_KEYEXIST;
454 else if (F_ISSET(ldbp, DB_AM_DUP))
455 ret = __ham_add_dup(hashp, hcp, data, DB_KEYLAST);
456 else
457 ret = __ham_overwrite(hashp, hcp, data);
460 /* Free up all the cursor pages. */
461 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
462 ret = t_ret;
463 /* Now check if we have to grow. */
464 out: if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
465 ret = __ham_expand_table(hashp);
466 F_CLR(hcp, H_EXPAND);
469 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
470 ret = t_ret;
471 RELEASE_META(ldbp, hashp);
472 if (F_ISSET(dbp, DB_AM_THREAD))
473 __db_puthandle(ldbp);
474 return (ret);
477 static int
478 __ham_cursor(dbp, txnid, dbcp)
479 DB *dbp;
480 DB_TXN *txnid;
481 DBC **dbcp;
483 int ret;
485 DEBUG_LWRITE(dbp, txnid, "ham_cursor", NULL, NULL, 0);
486 if ((ret = __ham_c_init(dbp, txnid, dbcp)) != 0)
487 return (ret);
489 DB_THREAD_LOCK(dbp);
490 TAILQ_INSERT_TAIL(&dbp->curs_queue, *dbcp, links);
491 DB_THREAD_UNLOCK(dbp);
492 return (ret);
495 static int
496 __ham_c_init(dbp, txnid, dbcp)
497 DB *dbp;
498 DB_TXN *txnid;
499 DBC **dbcp;
501 DBC *db_curs;
502 HASH_CURSOR *new_curs;
504 if ((db_curs = (DBC *)__db_calloc(sizeof(DBC), 1)) == NULL)
505 return (ENOMEM);
507 if ((new_curs =
508 (HASH_CURSOR *)__db_calloc(sizeof(struct cursor_t), 1)) == NULL) {
509 FREE(db_curs, sizeof(DBC));
510 return (ENOMEM);
513 db_curs->internal = new_curs;
514 db_curs->c_close = __ham_c_close;
515 db_curs->c_del = __ham_c_del;
516 db_curs->c_get = __ham_c_get;
517 db_curs->c_put = __ham_c_put;
518 db_curs->txn = txnid;
519 db_curs->dbp = dbp;
521 new_curs->db_cursor = db_curs;
522 __ham_item_init(new_curs);
524 if (dbcp != NULL)
525 *dbcp = db_curs;
526 return (0);
529 static int
530 __ham_delete(dbp, txn, key, flags)
531 DB *dbp;
532 DB_TXN *txn;
533 DBT *key;
534 int flags;
536 DB *ldbp;
537 HTAB *hashp;
538 HASH_CURSOR *hcp;
539 int ret, t_ret;
541 DEBUG_LWRITE(dbp, txn, "ham_delete", key, NULL, flags);
542 if ((ret = __db_delchk(dbp, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
543 return (ret);
545 ldbp = dbp;
546 if (F_ISSET(dbp, DB_AM_THREAD) &&
547 (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
548 return (ret);
549 hashp = (HTAB *)ldbp->internal;
550 SET_LOCKER(ldbp, txn);
551 GET_META(ldbp, hashp);
552 hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
554 hashp->hash_accesses++;
555 if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_WRITE)) == 0)
556 if (F_ISSET(hcp, H_OK))
557 ret = __ham_del_pair(hashp, hcp, 1);
558 else
559 ret = DB_NOTFOUND;
561 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
562 ret = t_ret;
563 RELEASE_META(ldbp, hashp);
564 if (F_ISSET(dbp, DB_AM_THREAD))
565 __db_puthandle(ldbp);
566 return (ret);
569 /* ****************** CURSORS ********************************** */
570 static int
571 __ham_c_close(cursor)
572 DBC *cursor;
574 DB *ldbp;
575 int ret;
577 DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_close", NULL, NULL, 0);
579 * If the pagep, dpagep, and lock fields of the cursor are all NULL,
580 * then there really isn't a need to get a handle here. However,
581 * the normal case is that at least one of those fields is non-NULL,
582 * and putting those checks in here would couple the ham_item_done
583 * functionality with cursor close which would be pretty disgusting.
584 * Instead, we pay the overhead here of always getting the handle.
586 ldbp = cursor->dbp;
587 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
588 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
589 return (ret);
591 ret = __ham_c_iclose(ldbp, cursor);
593 if (F_ISSET(ldbp, DB_AM_THREAD))
594 __db_puthandle(ldbp);
595 return (ret);
598 * __ham_c_iclose --
600 * Internal cursor close routine; assumes it is being passed the correct
601 * handle, rather than getting and putting a handle.
603 * PUBLIC: int __ham_c_iclose __P((DB *, DBC *));
606 __ham_c_iclose(dbp, dbc)
607 DB *dbp;
608 DBC *dbc;
610 HASH_CURSOR *hcp;
611 HTAB *hashp;
612 int ret;
614 hashp = (HTAB *)dbp->internal;
615 hcp = (HASH_CURSOR *)dbc->internal;
616 ret = __ham_item_done(hashp, hcp, 0);
618 if (hcp->big_key)
619 FREE(hcp->big_key, hcp->big_keylen);
620 if (hcp->big_data)
621 FREE(hcp->big_data, hcp->big_datalen);
624 * All cursors (except the default ones) are linked off the master.
625 * Therefore, when we close the cursor, we have to remove it from
626 * the master, not the local one.
627 * XXX I am always removing from the master; what about local cursors?
629 DB_THREAD_LOCK(dbc->dbp);
630 TAILQ_REMOVE(&dbc->dbp->curs_queue, dbc, links);
631 DB_THREAD_UNLOCK(dbc->dbp);
633 FREE(hcp, sizeof(HASH_CURSOR));
634 FREE(dbc, sizeof(DBC));
636 return (ret);
639 static int
640 __ham_c_del(cursor, flags)
641 DBC *cursor;
642 int flags;
644 DB *ldbp;
645 HTAB *hashp;
646 HASH_CURSOR *hcp;
647 HASH_CURSOR save_curs;
648 db_pgno_t ppgno, chg_pgno;
649 int ret, t_ret;
651 DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_del", NULL, NULL, flags);
652 ldbp = cursor->dbp;
653 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
654 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
655 return (ret);
656 hashp = (HTAB *)ldbp->internal;
657 hcp = (HASH_CURSOR *)cursor->internal;
658 save_curs = *hcp;
659 if ((ret = __db_cdelchk(ldbp, flags,
660 F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
661 return (ret);
662 if (F_ISSET(hcp, H_DELETED))
663 return (DB_NOTFOUND);
665 SET_LOCKER(hashp->dbp, cursor->txn);
666 GET_META(hashp->dbp, hashp);
667 hashp->hash_accesses++;
668 if ((ret = __ham_get_cpage(hashp, hcp, DB_LOCK_WRITE)) != 0)
669 goto out;
670 if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID) {
672 * We are about to remove a duplicate from offpage.
674 * There are 4 cases.
675 * 1. We will remove an item on a page, but there are more
676 * items on that page.
677 * 2. We will remove the last item on a page, but there is a
678 * following page of duplicates.
679 * 3. We will remove the last item on a page, this page was the
680 * last page in a duplicate set, but there were dups before
681 * it.
682 * 4. We will remove the last item on a page, removing the last
683 * duplicate.
684 * In case 1 hcp->dpagep is unchanged.
685 * In case 2 hcp->dpagep comes back pointing to the next dup
686 * page.
687 * In case 3 hcp->dpagep comes back NULL.
688 * In case 4 hcp->dpagep comes back NULL.
690 * Case 4 results in deleting the pair off the master page.
691 * The normal code for doing this knows how to delete the
692 * duplicates, so we will handle this case in the normal code.
694 ppgno = PREV_PGNO(hcp->dpagep);
695 if (ppgno == PGNO_INVALID &&
696 NEXT_PGNO(hcp->dpagep) == PGNO_INVALID &&
697 NUM_ENT(hcp->dpagep) == 1)
698 goto normal;
700 /* Remove item from duplicate page. */
701 chg_pgno = hcp->dpgno;
702 if ((ret = __db_drem(hashp->dbp,
703 &hcp->dpagep, hcp->dndx, __ham_del_page)) != 0)
704 goto out;
706 if (hcp->dpagep == NULL) {
707 if (ppgno != PGNO_INVALID) { /* Case 3 */
708 hcp->dpgno = ppgno;
709 if ((ret = __ham_get_cpage(hashp, hcp,
710 DB_LOCK_READ)) != 0)
711 goto out;
712 hcp->dndx = NUM_ENT(hcp->dpagep);
713 F_SET(hcp, H_DELETED);
714 } else { /* Case 4 */
715 ret = __ham_del_pair(hashp, hcp, 1);
716 hcp->dpgno = PGNO_INVALID;
718 * Delpair updated the cursor queue, so we
719 * don't have to do that here.
721 chg_pgno = PGNO_INVALID;
723 } else if (PGNO(hcp->dpagep) != hcp->dpgno) {
724 hcp->dndx = 0; /* Case 2 */
725 hcp->dpgno = PGNO(hcp->dpagep);
726 if (ppgno == PGNO_INVALID)
727 memcpy(HOFFDUP_PGNO(P_ENTRY(hcp->pagep,
728 H_DATAINDEX(hcp->bndx))),
729 &hcp->dpgno, sizeof(db_pgno_t));
730 F_SET(hcp, H_DELETED);
731 } else /* Case 1 */
732 F_SET(hcp, H_DELETED);
733 if (chg_pgno != PGNO_INVALID)
734 __ham_c_update(hashp, hcp, chg_pgno, 0, 0, 1);
735 } else if (F_ISSET(hcp, H_ISDUP)) { /* on page */
736 if (hcp->dup_off == 0 && DUP_SIZE(hcp->dup_len) ==
737 LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx))
738 ret = __ham_del_pair(hashp, hcp, 1);
739 else {
740 DBT repldbt;
742 repldbt.flags = 0;
743 F_SET(&repldbt, DB_DBT_PARTIAL);
744 repldbt.doff = hcp->dup_off;
745 repldbt.dlen = DUP_SIZE(hcp->dup_len);
746 repldbt.size = 0;
747 ret = __ham_replpair(hashp, hcp, &repldbt, 0);
748 hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
749 F_SET(hcp, H_DELETED);
750 __ham_c_update(hashp, hcp, hcp->pgno,
751 DUP_SIZE(hcp->dup_len), 0, 1);
754 } else
755 /* Not a duplicate */
756 normal: ret = __ham_del_pair(hashp, hcp, 1);
758 out: if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
759 t_ret = ret;
760 if (ret != 0)
761 *hcp = save_curs;
762 RELEASE_META(hashp->dbp, hashp);
763 if (F_ISSET(cursor->dbp, DB_AM_THREAD))
764 __db_puthandle(ldbp);
765 return (ret);
768 static int
769 __ham_c_get(cursor, key, data, flags)
770 DBC *cursor;
771 DBT *key;
772 DBT *data;
773 int flags;
775 DB *ldbp;
776 HTAB *hashp;
777 HASH_CURSOR *hcp, save_curs;
778 int get_key, ret, t_ret;
780 DEBUG_LREAD(cursor->dbp, cursor->txn, "ham_c_get",
781 flags == DB_SET || flags == DB_SET_RANGE ? key : NULL,
782 NULL, flags);
783 ldbp = cursor->dbp;
784 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
785 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
786 return (ret);
787 hashp = (HTAB *)(ldbp->internal);
788 hcp = (HASH_CURSOR *)cursor->internal;
789 save_curs = *hcp;
790 if ((ret =
791 __db_cgetchk(hashp->dbp, key, data, flags, IS_VALID(hcp))) != 0)
792 return (ret);
794 SET_LOCKER(hashp->dbp, cursor->txn);
795 GET_META(hashp->dbp, hashp);
796 hashp->hash_accesses++;
798 hcp->seek_size = 0;
800 ret = 0;
801 get_key = 1;
802 switch (flags) {
803 case DB_PREV:
804 if (hcp->bucket != BUCKET_INVALID) {
805 ret = __ham_item_prev(hashp, hcp, DB_LOCK_READ);
806 break;
808 /* FALL THROUGH */
809 case DB_LAST:
810 ret = __ham_item_last(hashp, hcp, DB_LOCK_READ);
811 break;
812 case DB_FIRST:
813 ret = __ham_item_first(hashp, hcp, DB_LOCK_READ);
814 break;
815 case DB_NEXT:
816 if (hcp->bucket == BUCKET_INVALID)
817 hcp->bucket = 0;
818 ret = __ham_item_next(hashp, hcp, DB_LOCK_READ);
819 break;
820 case DB_SET:
821 case DB_SET_RANGE:
822 ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ);
823 get_key = 0;
824 break;
825 case DB_CURRENT:
826 if (F_ISSET(hcp, H_DELETED)) {
827 ret = DB_KEYEMPTY;
828 goto out;
831 ret = __ham_item(hashp, hcp, DB_LOCK_READ);
832 break;
836 * Must always enter this loop to do error handling and
837 * check for big key/data pair.
839 while (1) {
840 if (ret != 0 && ret != DB_NOTFOUND)
841 goto out1;
842 else if (F_ISSET(hcp, H_OK)) {
843 /* Get the key. */
844 if (get_key && (ret = __db_ret(hashp->dbp, hcp->pagep,
845 H_KEYINDEX(hcp->bndx), key, &hcp->big_key,
846 &hcp->big_keylen)) != 0)
847 goto out1;
849 ret = __ham_dup_return(hashp, hcp, data, flags);
850 break;
851 } else if (!F_ISSET(hcp, H_NOMORE)) {
852 abort();
853 break;
857 * Ran out of entries in a bucket; change buckets.
859 switch (flags) {
860 case DB_LAST:
861 case DB_PREV:
862 ret = __ham_item_done(hashp, hcp, 0);
863 if (hcp->bucket == 0) {
864 ret = DB_NOTFOUND;
865 goto out1;
867 hcp->bucket--;
868 hcp->bndx = NDX_INVALID;
869 if (ret == 0)
870 ret = __ham_item_prev(hashp,
871 hcp, DB_LOCK_READ);
872 break;
873 case DB_FIRST:
874 case DB_NEXT:
875 ret = __ham_item_done(hashp, hcp, 0);
876 hcp->bndx = NDX_INVALID;
877 hcp->bucket++;
878 hcp->pgno = PGNO_INVALID;
879 hcp->pagep = NULL;
880 if (hcp->bucket > hashp->hdr->max_bucket) {
881 ret = DB_NOTFOUND;
882 goto out1;
884 if (ret == 0)
885 ret = __ham_item_next(hashp,
886 hcp, DB_LOCK_READ);
887 break;
888 case DB_SET:
889 case DB_SET_RANGE:
890 /* Key not found. */
891 ret = DB_NOTFOUND;
892 goto out1;
895 out1: if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
896 t_ret = ret;
897 out: if (ret)
898 *hcp = save_curs;
899 RELEASE_META(hashp->dbp, hashp);
900 if (F_ISSET(cursor->dbp, DB_AM_THREAD))
901 __db_puthandle(ldbp);
902 return (ret);
905 static int
906 __ham_c_put(cursor, key, data, flags)
907 DBC *cursor;
908 DBT *key;
909 DBT *data;
910 int flags;
912 DB *ldbp;
913 HTAB *hashp;
914 HASH_CURSOR *hcp, save_curs;
915 int ret, t_ret;
916 u_int32_t nbytes;
918 DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_put",
919 flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL,
920 NULL, flags);
921 ldbp = cursor->dbp;
922 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
923 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
924 return (ret);
925 hashp = (HTAB *)(ldbp->internal);
926 hcp = (HASH_CURSOR *)cursor->internal;
927 save_curs = *hcp;
929 if ((ret = __db_cputchk(hashp->dbp, key, data, flags,
930 F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
931 return (ret);
932 if (F_ISSET(hcp, H_DELETED))
933 return (DB_NOTFOUND);
935 SET_LOCKER(hashp->dbp, cursor->txn);
936 GET_META(hashp->dbp, hashp);
937 ret = 0;
939 switch (flags) {
940 case DB_KEYLAST:
941 case DB_KEYFIRST:
942 nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
943 HKEYDATA_PSIZE(key->size)) +
944 (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
945 HKEYDATA_PSIZE(data->size));
946 ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
947 break;
948 case DB_BEFORE:
949 case DB_AFTER:
950 case DB_CURRENT:
951 ret = __ham_item(hashp, hcp, DB_LOCK_WRITE);
952 break;
955 if (ret == 0) {
956 if (flags == DB_CURRENT && !F_ISSET(ldbp, DB_AM_DUP))
957 ret = __ham_overwrite(hashp, hcp, data);
958 else
959 ret = __ham_add_dup(hashp, hcp, data, flags);
962 if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
963 ret = __ham_expand_table(hashp);
964 F_CLR(hcp, H_EXPAND);
967 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
968 ret = t_ret;
969 if (ret != 0)
970 *hcp = save_curs;
971 RELEASE_META(hashp->dbp, hashp);
972 if (F_ISSET(cursor->dbp, DB_AM_THREAD))
973 __db_puthandle(ldbp);
974 return (ret);
977 /********************************* UTILITIES ************************/
980 * __ham_expand_table --
982 * PUBLIC: int __ham_expand_table __P((HTAB *));
985 __ham_expand_table(hashp)
986 HTAB *hashp;
988 DB_LSN new_lsn;
989 u_int32_t old_bucket, new_bucket, spare_ndx;
990 int ret;
992 ret = 0;
993 DIRTY_META(hashp, ret);
994 if (ret)
995 return (ret);
998 * If the split point is about to increase, make sure that we
999 * have enough extra pages. The calculation here is weird.
1000 * We'd like to do this after we've upped max_bucket, but it's
1001 * too late then because we've logged the meta-data split. What
1002 * we'll do between then and now is increment max bucket and then
1003 * see what the log of one greater than that is; here we have to
1004 * look at the log of max + 2. VERY NASTY STUFF.
1006 if (__db_log2(hashp->hdr->max_bucket + 2) > hashp->hdr->ovfl_point) {
1008 * We are about to shift the split point. Make sure that
1009 * if the next doubling is going to be big (more than 8
1010 * pages), we have some extra pages around.
1012 if (hashp->hdr->max_bucket + 1 >= 8 &&
1013 hashp->hdr->spares[hashp->hdr->ovfl_point] <
1014 hashp->hdr->spares[hashp->hdr->ovfl_point - 1] +
1015 hashp->hdr->ovfl_point + 1)
1016 __ham_init_ovflpages(hashp);
1019 /* Now we can log the meta-data split. */
1020 if (DB_LOGGING(hashp->dbp)) {
1021 if ((ret = __ham_splitmeta_log(hashp->dbp->dbenv->lg_info,
1022 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
1023 hashp->dbp->log_fileid,
1024 hashp->hdr->max_bucket, hashp->hdr->ovfl_point,
1025 hashp->hdr->spares[hashp->hdr->ovfl_point],
1026 &hashp->hdr->lsn)) != 0)
1027 return (ret);
1029 hashp->hdr->lsn = new_lsn;
1032 hashp->hash_expansions++;
1033 new_bucket = ++hashp->hdr->max_bucket;
1034 old_bucket = (hashp->hdr->max_bucket & hashp->hdr->low_mask);
1037 * If the split point is increasing, copy the current contents
1038 * of the spare split bucket to the next bucket.
1040 spare_ndx = __db_log2(hashp->hdr->max_bucket + 1);
1041 if (spare_ndx > hashp->hdr->ovfl_point) {
1042 hashp->hdr->spares[spare_ndx] =
1043 hashp->hdr->spares[hashp->hdr->ovfl_point];
1044 hashp->hdr->ovfl_point = spare_ndx;
1047 if (new_bucket > hashp->hdr->high_mask) {
1048 /* Starting a new doubling */
1049 hashp->hdr->low_mask = hashp->hdr->high_mask;
1050 hashp->hdr->high_mask = new_bucket | hashp->hdr->low_mask;
1053 if (BUCKET_TO_PAGE(hashp, new_bucket) > MAX_PAGES(hashp)) {
1054 __db_err(hashp->dbp->dbenv,
1055 "hash: Cannot allocate new bucket. Pages exhausted.");
1056 return (ENOSPC);
1059 /* Relocate records to the new bucket */
1060 return (__ham_split_page(hashp, old_bucket, new_bucket));
1064 * PUBLIC: u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t));
1066 u_int32_t
1067 __ham_call_hash(hashp, k, len)
1068 HTAB *hashp;
1069 u_int8_t *k;
1070 int32_t len;
1072 u_int32_t n, bucket;
1074 n = (u_int32_t)hashp->hash(k, len);
1075 bucket = n & hashp->hdr->high_mask;
1076 if (bucket > hashp->hdr->max_bucket)
1077 bucket = bucket & hashp->hdr->low_mask;
1078 return (bucket);
1082 * Check for duplicates, and call __db_ret appropriately. Release
1083 * everything held by the cursor.
1085 static int
1086 __ham_dup_return(hashp, hcp, val, flags)
1087 HTAB *hashp;
1088 HASH_CURSOR *hcp;
1089 DBT *val;
1090 int flags;
1092 PAGE *pp;
1093 DBT *myval, tmp_val;
1094 db_indx_t ndx;
1095 db_pgno_t pgno;
1096 u_int8_t *hk, type;
1097 int indx, ret;
1098 db_indx_t len;
1100 /* Check for duplicate and return the first one. */
1101 ndx = H_DATAINDEX(hcp->bndx);
1102 type = HPAGE_TYPE(hcp->pagep, ndx);
1103 pp = hcp->pagep;
1104 myval = val;
1107 * There are 3 cases:
1108 * 1. We are not in duplicate, simply call db_ret.
1109 * 2. We are looking at keys and stumbled onto a duplicate.
1110 * 3. We are in the middle of a duplicate set. (ISDUP set)
1114 * Here we check for the case where we just stumbled onto a
1115 * duplicate. In this case, we do initialization and then
1116 * let the normal duplicate code handle it.
1118 if (!F_ISSET(hcp, H_ISDUP))
1119 if (type == H_DUPLICATE) {
1120 F_SET(hcp, H_ISDUP);
1121 hcp->dup_tlen = LEN_HDATA(hcp->pagep,
1122 hashp->hdr->pagesize, hcp->bndx);
1123 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
1124 if (flags == DB_LAST || flags == DB_PREV) {
1125 hcp->dndx = 0;
1126 hcp->dup_off = 0;
1127 do {
1128 memcpy(&len,
1129 HKEYDATA_DATA(hk) + hcp->dup_off,
1130 sizeof(db_indx_t));
1131 hcp->dup_off += DUP_SIZE(len);
1132 hcp->dndx++;
1133 } while (hcp->dup_off < hcp->dup_tlen);
1134 hcp->dup_off -= DUP_SIZE(len);
1135 hcp->dndx--;
1136 } else {
1137 memcpy(&len,
1138 HKEYDATA_DATA(hk), sizeof(db_indx_t));
1139 hcp->dup_off = 0;
1140 hcp->dndx = 0;
1142 hcp->dup_len = len;
1143 } else if (type == H_OFFDUP) {
1144 F_SET(hcp, H_ISDUP);
1145 memcpy(&pgno, HOFFDUP_PGNO(P_ENTRY(hcp->pagep, ndx)),
1146 sizeof(db_pgno_t));
1147 if (flags == DB_LAST || flags == DB_PREV) {
1148 indx = (int)hcp->dndx;
1149 if ((ret = __db_dend(hashp->dbp,
1150 pgno, &hcp->dpagep)) != 0)
1151 return (ret);
1152 hcp->dpgno = PGNO(hcp->dpagep);
1153 hcp->dndx = NUM_ENT(hcp->dpagep) - 1;
1154 } else if ((ret = __ham_next_cpage(hashp,
1155 hcp, pgno, 0, H_ISDUP)) != 0)
1156 return (ret);
1161 * Now, everything is initialized, grab a duplicate if
1162 * necessary.
1164 if (F_ISSET(hcp, H_ISDUP))
1165 if (hcp->dpgno != PGNO_INVALID) {
1166 pp = hcp->dpagep;
1167 ndx = hcp->dndx;
1168 } else {
1170 * Copy the DBT in case we are retrieving into
1171 * user memory and we need the parameters for
1172 * it.
1174 memcpy(&tmp_val, val, sizeof(*val));
1175 F_SET(&tmp_val, DB_DBT_PARTIAL);
1176 tmp_val.dlen = hcp->dup_len;
1177 tmp_val.doff = hcp->dup_off + sizeof(db_indx_t);
1178 myval = &tmp_val;
1183 * Finally, if we had a duplicate, pp, ndx, and myval should be
1184 * set appropriately.
1186 if ((ret = __db_ret(hashp->dbp, pp, ndx, myval, &hcp->big_data,
1187 &hcp->big_datalen)) != 0)
1188 return (ret);
1191 * In case we sent a temporary off to db_ret, set the real
1192 * return values.
1194 val->data = myval->data;
1195 val->size = myval->size;
1197 return (0);
1200 static int
1201 __ham_overwrite(hashp, hcp, nval)
1202 HTAB *hashp;
1203 HASH_CURSOR *hcp;
1204 DBT *nval;
1206 DBT *myval, tmp_val;
1207 u_int8_t *hk;
1209 if (F_ISSET(hashp->dbp, DB_AM_DUP))
1210 return (__ham_add_dup(hashp, hcp, nval, DB_KEYLAST));
1211 else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
1212 /* Put/overwrite */
1213 memcpy(&tmp_val, nval, sizeof(*nval));
1214 F_SET(&tmp_val, DB_DBT_PARTIAL);
1215 tmp_val.doff = 0;
1216 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
1217 if (HPAGE_PTYPE(hk) == H_OFFPAGE)
1218 memcpy(&tmp_val.dlen,
1219 HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
1220 else
1221 tmp_val.dlen = LEN_HDATA(hcp->pagep,
1222 hashp->hdr->pagesize,hcp->bndx);
1223 myval = &tmp_val;
1224 } else /* Regular partial put */
1225 myval = nval;
1227 return (__ham_replpair(hashp, hcp, myval, 0));
1231 * Given a key and a cursor, sets the cursor to the page/ndx on which
1232 * the key resides. If the key is found, the cursor H_OK flag is set
1233 * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
1234 * If the key is not found, the H_OK flag is not set. If the sought
1235 * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
1236 * are set indicating where an add might take place. If it is 0,
1237 * non of the cursor pointer field are valid.
1239 static int
1240 __ham_lookup(hashp, hcp, key, sought, mode)
1241 HTAB *hashp;
1242 HASH_CURSOR *hcp;
1243 const DBT *key;
1244 u_int32_t sought;
1245 db_lockmode_t mode;
1247 db_pgno_t pgno;
1248 u_int32_t tlen;
1249 int match, ret, t_ret;
1250 u_int8_t *hk;
1253 * Set up cursor so that we're looking for space to add an item
1254 * as we cycle through the pages looking for the key.
1256 if ((ret = __ham_item_reset(hashp, hcp)) != 0)
1257 return (ret);
1258 hcp->seek_size = sought;
1260 hcp->bucket = __ham_call_hash(hashp, (u_int8_t *)key->data, key->size);
1261 while (1) {
1262 if ((ret = __ham_item_next(hashp, hcp, mode)) != 0)
1263 return (ret);
1265 if (F_ISSET(hcp, H_NOMORE))
1266 break;
1268 hk = H_PAIRKEY(hcp->pagep, hcp->bndx);
1269 switch (HPAGE_PTYPE(hk)) {
1270 case H_OFFPAGE:
1271 memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
1272 if (tlen == key->size) {
1273 memcpy(&pgno,
1274 HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
1275 match = __db_moff(hashp->dbp, key, pgno);
1276 if (match == 0) {
1277 F_SET(hcp, H_OK);
1278 return (0);
1281 break;
1282 case H_KEYDATA:
1283 if (key->size == LEN_HKEY(hcp->pagep,
1284 hashp->hdr->pagesize, hcp->bndx) &&
1285 memcmp(key->data,
1286 HKEYDATA_DATA(hk), key->size) == 0) {
1287 F_SET(hcp, H_OK);
1288 return (0);
1290 break;
1291 case H_DUPLICATE:
1292 case H_OFFDUP:
1294 * These are errors because keys are never
1295 * duplicated, only data items are.
1297 return (__db_pgfmt(hashp->dbp, PGNO(hcp->pagep)));
1299 hashp->hash_collisions++;
1303 * Item was not found, adjust cursor properly.
1306 if (sought != 0)
1307 return (ret);
1309 if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
1310 ret = t_ret;
1311 return (ret);
1315 * Initialize a dbt using some possibly already allocated storage
1316 * for items.
1317 * PUBLIC: int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *));
1320 __ham_init_dbt(dbt, size, bufp, sizep)
1321 DBT *dbt;
1322 u_int32_t size;
1323 void **bufp;
1324 u_int32_t *sizep;
1326 memset(dbt, 0, sizeof(*dbt));
1327 if (*sizep < size) {
1328 if ((*bufp = (void *)(*bufp == NULL ?
1329 __db_malloc(size) : __db_realloc(*bufp, size))) == NULL) {
1330 *sizep = 0;
1331 return (ENOMEM);
1333 *sizep = size;
1335 dbt->data = *bufp;
1336 dbt->size = size;
1337 return (0);
1341 * Adjust the cursor after an insert or delete. The cursor passed is
1342 * the one that was operated upon; we just need to check any of the
1343 * others.
1345 * len indicates the length of the item added/deleted
1346 * add indicates if the item indicated by the cursor has just been
1347 * added (add == 1) or deleted (add == 0).
1348 * dup indicates if the addition occurred into a duplicate set.
1350 * PUBLIC: void __ham_c_update __P((HTAB *,
1351 * PUBLIC: HASH_CURSOR *, db_pgno_t, u_int32_t, int, int));
1353 void
1354 __ham_c_update(hashp, hcp, chg_pgno, len, add, dup)
1355 HTAB *hashp;
1356 HASH_CURSOR *hcp;
1357 db_pgno_t chg_pgno;
1358 u_int32_t len;
1359 int add;
1360 int dup;
1362 DBC *cp;
1363 HTAB *hp;
1364 HASH_CURSOR *lcp;
1365 int page_deleted;
1368 * Regular adds are always at the end of a given page,
1369 * so we never have to adjust anyone's cursor after
1370 * a regular add.
1372 if (!dup && add)
1373 return;
1376 * Determine if a page was deleted. If this is a regular update
1377 * (i.e., not dup) then the deleted page's number will be that in
1378 * chg_pgno, and the pgno in the cursor will be different. If this
1379 * was an onpage-duplicate, then the same conditions apply. If this
1380 * was an off-page duplicate, then we need to verify if hcp->dpgno
1381 * is the same (no delete) or different (delete) than chg_pgno.
1383 if (!dup || hcp->dpgno == PGNO_INVALID)
1384 page_deleted =
1385 chg_pgno != PGNO_INVALID && chg_pgno != hcp->pgno;
1386 else
1387 page_deleted =
1388 chg_pgno != PGNO_INVALID && chg_pgno != hcp->dpgno;
1390 hp = hcp->db_cursor->dbp->master->internal;
1391 DB_THREAD_LOCK(hp->dbp);
1393 for (cp = TAILQ_FIRST(&hp->dbp->curs_queue); cp != NULL;
1394 cp = TAILQ_NEXT(cp, links)) {
1395 if (cp->internal == hcp)
1396 continue;
1398 lcp = (HASH_CURSOR *)cp->internal;
1400 if (!dup && lcp->pgno != chg_pgno)
1401 continue;
1403 if (dup && F_ISSET(hcp, H_DELETED) && lcp->pgno != chg_pgno)
1404 continue;
1406 if (dup && !F_ISSET(hcp, H_DELETED) && lcp->dpgno != chg_pgno)
1407 continue;
1409 if (page_deleted) {
1410 if (dup) {
1411 lcp->dpgno = hcp->dpgno;
1412 lcp->dndx = hcp->dndx;
1413 } else {
1414 lcp->pgno = hcp->pgno;
1415 lcp->bndx = hcp->bndx;
1416 lcp->bucket = hcp->bucket;
1418 F_CLR(lcp, H_ISDUP);
1419 continue;
1422 if (!dup && lcp->bndx > hcp->bndx)
1423 lcp->bndx--;
1424 else if (!dup && lcp->bndx == hcp->bndx)
1425 F_SET(lcp, H_DELETED);
1426 else if (dup && lcp->bndx == hcp->bndx) {
1427 /* Assign dpgno in case there was page conversion. */
1428 lcp->dpgno = hcp->dpgno;
1429 if (add && lcp->dndx >= hcp->dndx )
1430 lcp->dndx++;
1431 else if (!add && lcp->dndx > hcp->dndx)
1432 lcp->dndx--;
1433 else if (!add && lcp->dndx == hcp->dndx)
1434 F_SET(lcp, H_DELETED);
1436 /* Now adjust on-page information. */
1437 if (lcp->dpgno == PGNO_INVALID)
1438 if (add) {
1439 lcp->dup_tlen += len;
1440 if (lcp->dndx > hcp->dndx)
1441 lcp->dup_off += len;
1442 } else {
1443 lcp->dup_tlen -= len;
1444 if (lcp->dndx > hcp->dndx)
1445 lcp->dup_off -= len;
1449 DB_THREAD_UNLOCK(hp->dbp);
1453 * __ham_hdup --
1454 * This function gets called when we create a duplicate handle for a
1455 * threaded DB. It should create the private part of the DB structure.
1456 * PUBLIC: int __ham_hdup __P((DB *, DB *));
1459 __ham_hdup(orig, new)
1460 DB *orig, *new;
1462 HTAB *hashp;
1463 DBC *curs;
1464 int ret;
1466 if ((hashp = (HTAB *)__db_malloc(sizeof(HTAB))) == NULL)
1467 return (ENOMEM);
1469 new->internal = hashp;
1471 hashp->dbp = new;
1472 hashp->hlock = 0;
1473 hashp->hdr = NULL;
1474 hashp->hash = ((HTAB *)orig->internal)->hash;
1475 if ((hashp->split_buf = (PAGE *)__db_malloc(orig->pgsize)) == NULL)
1476 return (ENOMEM);
1477 hashp->local_errno = 0;
1478 hashp->hash_accesses = 0;
1479 hashp->hash_collisions = 0;
1480 hashp->hash_expansions = 0;
1481 hashp->hash_overflows = 0;
1482 hashp->hash_bigpages = 0;
1483 /* Initialize the cursor queue. */
1484 ret = __ham_c_init(new, NULL, &curs);
1485 TAILQ_INSERT_TAIL(&new->curs_queue, curs, links);
1486 return (ret);