Update.
[glibc.git] / db2 / hash / hash.c
blob0265f1965986b3a1b9c5a180f9f054f2f8a21e11
1 /*-
2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
6 */
7 /*
8 * Copyright (c) 1990, 1993, 1994
9 * Margo Seltzer. All rights reserved.
12 * Copyright (c) 1990, 1993, 1994
13 * The Regents of the University of California. All rights reserved.
15 * This code is derived from software contributed to Berkeley by
16 * Margo Seltzer.
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44 * SUCH DAMAGE.
47 #include "config.h"
49 #ifndef lint
50 static const char sccsid[] = "@(#)hash.c 10.45 (Sleepycat) 5/11/98";
51 #endif /* not lint */
53 #ifndef NO_SYSTEM_INCLUDES
54 #include <sys/types.h>
56 #include <errno.h>
57 #include <stdlib.h>
58 #include <string.h>
59 #endif
61 #include "db_int.h"
62 #include "shqueue.h"
63 #include "db_page.h"
64 #include "db_am.h"
65 #include "db_ext.h"
66 #include "hash.h"
67 #include "log.h"
69 static int __ham_c_close __P((DBC *));
70 static int __ham_c_del __P((DBC *, u_int32_t));
71 static int __ham_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
72 static int __ham_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
73 static int __ham_c_init __P((DB *, DB_TXN *, DBC **));
74 static int __ham_cursor __P((DB *, DB_TXN *, DBC **));
75 static int __ham_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
76 static int __ham_dup_return __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
77 static int __ham_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
78 static void __ham_init_htab __P((HTAB *, u_int32_t, u_int32_t));
79 static int __ham_lookup __P((HTAB *,
80 HASH_CURSOR *, const DBT *, u_int32_t, db_lockmode_t));
81 static int __ham_overwrite __P((HTAB *, HASH_CURSOR *, DBT *));
82 static int __ham_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
83 static int __ham_sync __P((DB *, u_int32_t));
85 /************************** INTERFACE ROUTINES ***************************/
86 /* OPEN/CLOSE */
89 * __ham_open --
91 * PUBLIC: int __ham_open __P((DB *, DB_INFO *));
93 int
94 __ham_open(dbp, dbinfo)
95 DB *dbp;
96 DB_INFO *dbinfo;
98 DB_ENV *dbenv;
99 DBC *curs;
100 HTAB *hashp;
101 int file_existed, ret;
103 dbenv = dbp->dbenv;
105 if ((hashp = (HTAB *)__db_calloc(1, sizeof(HTAB))) == NULL)
106 return (ENOMEM);
107 hashp->dbp = dbp;
109 /* Set the hash function if specified by the user. */
110 if (dbinfo != NULL && dbinfo->h_hash != NULL)
111 hashp->hash = dbinfo->h_hash;
114 * Initialize the remaining fields of the dbp. The type, close and
115 * fd functions are all set in db_open.
117 dbp->internal = hashp;
118 dbp->cursor = __ham_cursor;
119 dbp->del = __ham_delete;
120 dbp->get = __ham_get;
121 dbp->put = __ham_put;
122 dbp->sync = __ham_sync;
124 /* If locking is turned on, lock the meta data page. */
125 if (F_ISSET(dbp, DB_AM_LOCKING)) {
126 dbp->lock.pgno = BUCKET_INVALID;
127 if ((ret = lock_get(dbenv->lk_info, dbp->locker,
128 0, &dbp->lock_dbt, DB_LOCK_READ, &hashp->hlock)) != 0) {
129 if (ret < 0)
130 ret = EAGAIN;
131 goto out;
136 * Now, we can try to read the meta-data page and figure out
137 * if we set up locking and get the meta-data page properly.
138 * If this is a new file, initialize it, and put it back dirty.
140 if ((ret = __ham_get_page(hashp->dbp, 0, (PAGE **)&hashp->hdr)) != 0)
141 goto out;
143 /* Initialize the hashp structure */
144 if (hashp->hdr->magic == DB_HASHMAGIC) {
145 file_existed = 1;
146 /* File exists, verify the data in the header. */
147 if (hashp->hash == NULL)
148 hashp->hash =
149 hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
150 if (hashp->hash(CHARKEY, sizeof(CHARKEY)) !=
151 hashp->hdr->h_charkey) {
152 __db_err(hashp->dbp->dbenv,
153 "hash: incompatible hash function");
154 ret = EINVAL;
155 goto out;
157 if (F_ISSET(hashp->hdr, DB_HASH_DUP))
158 F_SET(dbp, DB_AM_DUP);
159 } else {
161 * File does not exist, we must initialize the header. If
162 * locking is enabled that means getting a write lock first.
164 file_existed = 0;
165 if (F_ISSET(dbp, DB_AM_LOCKING) &&
166 ((ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0 ||
167 (ret = lock_get(dbenv->lk_info, dbp->locker, 0,
168 &dbp->lock_dbt, DB_LOCK_WRITE, &hashp->hlock)) != 0)) {
169 if (ret < 0)
170 ret = EAGAIN;
171 goto out;
174 __ham_init_htab(hashp,
175 dbinfo != NULL ? dbinfo->h_nelem : 0,
176 dbinfo != NULL ? dbinfo->h_ffactor : 0);
177 if (F_ISSET(dbp, DB_AM_DUP))
178 F_SET(hashp->hdr, DB_HASH_DUP);
179 if ((ret = __ham_dirty_page(hashp, (PAGE *)hashp->hdr)) != 0)
180 goto out;
183 /* Initialize the default cursor. */
184 __ham_c_init(dbp, NULL, &curs);
185 TAILQ_INSERT_TAIL(&dbp->curs_queue, curs, links);
187 /* Allocate memory for our split buffer. */
188 if ((hashp->split_buf = (PAGE *)__db_malloc(dbp->pgsize)) == NULL) {
189 ret = ENOMEM;
190 goto out;
193 #ifdef NO_STATISTICS_FOR_DB_ERR
194 __db_err(dbp->dbenv,
195 "%s%lx\n%s%ld\n%s%ld\n%s%ld\n%s%ld\n%s0x%lx\n%s0x%lx\n%s%ld\n%s%ld\n%s0x%lx",
196 "TABLE POINTER ", (long)hashp,
197 "BUCKET SIZE ", (long)hashp->hdr->pagesize,
198 "FILL FACTOR ", (long)hashp->hdr->ffactor,
199 "MAX BUCKET ", (long)hashp->hdr->max_bucket,
200 "OVFL POINT ", (long)hashp->hdr->ovfl_point,
201 "LAST FREED ", (long)hashp->hdr->last_freed,
202 "HIGH MASK ", (long)hashp->hdr->high_mask,
203 "LOW MASK ", (long)hashp->hdr->low_mask,
204 "NELEM ", (long)hashp->hdr->nelem,
205 "FLAGS ", (long)hashp->hdr->flags);
206 #endif
208 /* Release the meta data page */
209 (void)__ham_put_page(hashp->dbp, (PAGE *)hashp->hdr, 0);
210 if (F_ISSET(dbp, DB_AM_LOCKING) &&
211 (ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0) {
212 if (ret < 0)
213 ret = EAGAIN;
214 goto out;
217 hashp->hlock = 0;
218 hashp->hdr = NULL;
219 /* Sync the file so that we know that the meta data goes to disk. */
220 if (!file_existed && (ret = dbp->sync(dbp, 0)) != 0)
221 goto out;
222 return (0);
224 out: (void)__ham_close(dbp);
225 return (ret);
229 * PUBLIC: int __ham_close __P((DB *));
232 __ham_close(dbp)
233 DB *dbp;
235 HTAB *hashp;
236 int ret, t_ret;
238 DEBUG_LWRITE(dbp, NULL, "ham_close", NULL, NULL, 0);
239 hashp = (HTAB *)dbp->internal;
240 ret = 0;
242 /* Free the split page. */
243 if (hashp->split_buf)
244 FREE(hashp->split_buf, dbp->pgsize);
246 if (hashp->hdr && (t_ret = __ham_put_page(hashp->dbp,
247 (PAGE *)hashp->hdr, 0)) != 0 && ret == 0)
248 ret = t_ret;
249 if (hashp->hlock && (t_ret = lock_put(hashp->dbp->dbenv->lk_info,
250 hashp->hlock)) != 0 && ret == 0)
251 ret = t_ret;
253 FREE(hashp, sizeof(HTAB));
254 dbp->internal = NULL;
255 return (ret);
258 /************************** LOCAL CREATION ROUTINES **********************/
260 * Returns 0 on No Error
262 static void
263 __ham_init_htab(hashp, nelem, ffactor)
264 HTAB *hashp;
265 u_int32_t nelem, ffactor;
267 int32_t l2, nbuckets;
269 memset(hashp->hdr, 0, sizeof(HASHHDR));
270 hashp->hdr->ffactor = ffactor;
271 hashp->hdr->pagesize = hashp->dbp->pgsize;
272 ZERO_LSN(hashp->hdr->lsn);
273 hashp->hdr->magic = DB_HASHMAGIC;
274 hashp->hdr->version = DB_HASHVERSION;
275 if (hashp->hash == NULL)
276 hashp->hash =
277 hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
278 hashp->hdr->h_charkey = hashp->hash(CHARKEY, sizeof(CHARKEY));
279 if (nelem != 0 && hashp->hdr->ffactor != 0) {
280 nelem = (nelem - 1) / hashp->hdr->ffactor + 1;
281 l2 = __db_log2(nelem > 2 ? nelem : 2);
282 } else
283 l2 = 2;
285 nbuckets = 1 << l2;
287 hashp->hdr->ovfl_point = l2;
288 hashp->hdr->last_freed = PGNO_INVALID;
290 hashp->hdr->max_bucket = hashp->hdr->high_mask = nbuckets - 1;
291 hashp->hdr->low_mask = (nbuckets >> 1) - 1;
292 memcpy(hashp->hdr->uid, hashp->dbp->lock.fileid, DB_FILE_ID_LEN);
295 /********************** DESTROY/CLOSE ROUTINES ************************/
299 * Write modified pages to disk
301 * Returns:
302 * 0 == OK
303 * -1 ERROR
305 static int
306 __ham_sync(dbp, flags)
307 DB *dbp;
308 u_int32_t flags;
310 int ret;
312 DEBUG_LWRITE(dbp, NULL, "ham_sync", NULL, NULL, flags);
313 if ((ret = __db_syncchk(dbp, flags)) != 0)
314 return (ret);
315 if (F_ISSET(dbp, DB_AM_RDONLY))
316 return (0);
318 if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
319 ret = 0;
321 return (ret);
324 /*******************************SEARCH ROUTINES *****************************/
326 * All the access routines return
328 * Returns:
329 * 0 on SUCCESS
330 * 1 to indicate an external ERROR (i.e. key not found, etc)
331 * -1 to indicate an internal ERROR (i.e. out of memory, etc)
334 static int
335 __ham_get(dbp, txn, key, data, flags)
336 DB *dbp;
337 DB_TXN *txn;
338 DBT *key;
339 DBT *data;
340 u_int32_t flags;
342 DB *ldbp;
343 HTAB *hashp;
344 HASH_CURSOR *hcp;
345 int ret, t_ret;
347 DEBUG_LREAD(dbp, txn, "ham_get", key, NULL, flags);
348 if ((ret = __db_getchk(dbp, key, data, flags)) != 0)
349 return (ret);
351 ldbp = dbp;
352 if (F_ISSET(dbp, DB_AM_THREAD) &&
353 (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
354 return (ret);
356 hashp = (HTAB *)ldbp->internal;
357 SET_LOCKER(ldbp, txn);
358 GET_META(ldbp, hashp);
360 hashp->hash_accesses++;
361 hcp = (HASH_CURSOR *)TAILQ_FIRST(&ldbp->curs_queue)->internal;
362 if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ)) == 0) {
363 if (F_ISSET(hcp, H_OK))
364 ret = __ham_dup_return(hashp, hcp, data, DB_FIRST);
365 else /* Key was not found */
366 ret = DB_NOTFOUND;
369 if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
370 ret = t_ret;
371 RELEASE_META(ldbp, hashp);
372 if (F_ISSET(dbp, DB_AM_THREAD))
373 __db_puthandle(ldbp);
374 return (ret);
377 static int
378 __ham_put(dbp, txn, key, data, flags)
379 DB *dbp;
380 DB_TXN *txn;
381 DBT *key;
382 DBT *data;
383 u_int32_t flags;
385 DB *ldbp;
386 DBT tmp_val, *myval;
387 HASH_CURSOR *hcp;
388 HTAB *hashp;
389 u_int32_t nbytes;
390 int ret, t_ret;
392 DEBUG_LWRITE(dbp, txn, "ham_put", key, data, flags);
393 if ((ret = __db_putchk(dbp, key, data,
394 flags, F_ISSET(dbp, DB_AM_RDONLY), F_ISSET(dbp, DB_AM_DUP))) != 0)
395 return (ret);
397 ldbp = dbp;
398 if (F_ISSET(dbp, DB_AM_THREAD) &&
399 (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
400 return (ret);
402 hashp = (HTAB *)ldbp->internal;
403 SET_LOCKER(ldbp, txn);
404 GET_META(ldbp, hashp);
405 hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
407 nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
408 HKEYDATA_PSIZE(key->size)) +
409 (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
410 HKEYDATA_PSIZE(data->size));
412 hashp->hash_accesses++;
413 ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
415 if (ret == DB_NOTFOUND) {
416 ret = 0;
417 if (hcp->seek_found_page != PGNO_INVALID &&
418 hcp->seek_found_page != hcp->pgno) {
419 if ((ret = __ham_item_done(hashp, hcp, 0)) != 0)
420 goto out;
421 hcp->pgno = hcp->seek_found_page;
422 hcp->bndx = NDX_INVALID;
425 if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) {
427 * Doing a partial put, but the key does not exist
428 * and we are not beginning the write at 0. We
429 * must create a data item padded up to doff and
430 * then write the new bytes represented by val.
432 ret = __ham_init_dbt(&tmp_val, data->size + data->doff,
433 &hcp->big_data, &hcp->big_datalen);
434 if (ret == 0) {
435 memset(tmp_val.data, 0, data->doff);
436 memcpy((u_int8_t *)tmp_val.data + data->doff,
437 data->data, data->size);
438 myval = &tmp_val;
440 } else
441 myval = (DBT *)data;
443 if (ret == 0)
444 ret = __ham_add_el(hashp, hcp, key, myval, H_KEYDATA);
445 } else if (ret == 0 && F_ISSET(hcp, H_OK)) {
446 if (flags == DB_NOOVERWRITE)
447 ret = DB_KEYEXIST;
448 else if (F_ISSET(ldbp, DB_AM_DUP))
449 ret = __ham_add_dup(hashp, hcp, data, DB_KEYLAST);
450 else
451 ret = __ham_overwrite(hashp, hcp, data);
454 /* Free up all the cursor pages. */
455 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
456 ret = t_ret;
457 /* Now check if we have to grow. */
458 out: if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
459 ret = __ham_expand_table(hashp);
460 F_CLR(hcp, H_EXPAND);
463 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
464 ret = t_ret;
465 RELEASE_META(ldbp, hashp);
466 if (F_ISSET(dbp, DB_AM_THREAD))
467 __db_puthandle(ldbp);
468 return (ret);
471 static int
472 __ham_cursor(dbp, txnid, dbcp)
473 DB *dbp;
474 DB_TXN *txnid;
475 DBC **dbcp;
477 int ret;
479 DEBUG_LWRITE(dbp, txnid, "ham_cursor", NULL, NULL, 0);
480 if ((ret = __ham_c_init(dbp, txnid, dbcp)) != 0)
481 return (ret);
483 DB_THREAD_LOCK(dbp);
484 TAILQ_INSERT_TAIL(&dbp->curs_queue, *dbcp, links);
485 DB_THREAD_UNLOCK(dbp);
486 return (ret);
489 static int
490 __ham_c_init(dbp, txnid, dbcp)
491 DB *dbp;
492 DB_TXN *txnid;
493 DBC **dbcp;
495 DBC *db_curs;
496 HASH_CURSOR *new_curs;
498 if ((db_curs = (DBC *)__db_calloc(sizeof(DBC), 1)) == NULL)
499 return (ENOMEM);
501 if ((new_curs =
502 (HASH_CURSOR *)__db_calloc(sizeof(struct cursor_t), 1)) == NULL) {
503 FREE(db_curs, sizeof(DBC));
504 return (ENOMEM);
507 db_curs->internal = new_curs;
508 db_curs->c_close = __ham_c_close;
509 db_curs->c_del = __ham_c_del;
510 db_curs->c_get = __ham_c_get;
511 db_curs->c_put = __ham_c_put;
512 db_curs->txn = txnid;
513 db_curs->dbp = dbp;
515 new_curs->db_cursor = db_curs;
516 __ham_item_init(new_curs);
518 if (dbcp != NULL)
519 *dbcp = db_curs;
520 return (0);
523 static int
524 __ham_delete(dbp, txn, key, flags)
525 DB *dbp;
526 DB_TXN *txn;
527 DBT *key;
528 u_int32_t flags;
530 DB *ldbp;
531 HTAB *hashp;
532 HASH_CURSOR *hcp;
533 int ret, t_ret;
535 DEBUG_LWRITE(dbp, txn, "ham_delete", key, NULL, flags);
536 if ((ret =
537 __db_delchk(dbp, key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
538 return (ret);
540 ldbp = dbp;
541 if (F_ISSET(dbp, DB_AM_THREAD) &&
542 (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
543 return (ret);
544 hashp = (HTAB *)ldbp->internal;
545 SET_LOCKER(ldbp, txn);
546 GET_META(ldbp, hashp);
547 hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
549 hashp->hash_accesses++;
550 if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_WRITE)) == 0) {
551 if (F_ISSET(hcp, H_OK))
552 ret = __ham_del_pair(hashp, hcp, 1);
553 else
554 ret = DB_NOTFOUND;
557 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
558 ret = t_ret;
559 RELEASE_META(ldbp, hashp);
560 if (F_ISSET(dbp, DB_AM_THREAD))
561 __db_puthandle(ldbp);
562 return (ret);
565 /* ****************** CURSORS ********************************** */
566 static int
567 __ham_c_close(cursor)
568 DBC *cursor;
570 DB *ldbp;
571 int ret;
573 DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_close", NULL, NULL, 0);
575 * If the pagep, dpagep, and lock fields of the cursor are all NULL,
576 * then there really isn't a need to get a handle here. However,
577 * the normal case is that at least one of those fields is non-NULL,
578 * and putting those checks in here would couple the ham_item_done
579 * functionality with cursor close which would be pretty disgusting.
580 * Instead, we pay the overhead here of always getting the handle.
582 ldbp = cursor->dbp;
583 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
584 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
585 return (ret);
587 ret = __ham_c_iclose(ldbp, cursor);
589 if (F_ISSET(ldbp, DB_AM_THREAD))
590 __db_puthandle(ldbp);
591 return (ret);
594 * __ham_c_iclose --
596 * Internal cursor close routine; assumes it is being passed the correct
597 * handle, rather than getting and putting a handle.
599 * PUBLIC: int __ham_c_iclose __P((DB *, DBC *));
602 __ham_c_iclose(dbp, dbc)
603 DB *dbp;
604 DBC *dbc;
606 HASH_CURSOR *hcp;
607 HTAB *hashp;
608 int ret;
610 hashp = (HTAB *)dbp->internal;
611 hcp = (HASH_CURSOR *)dbc->internal;
612 ret = __ham_item_done(hashp, hcp, 0);
614 if (hcp->big_key)
615 FREE(hcp->big_key, hcp->big_keylen);
616 if (hcp->big_data)
617 FREE(hcp->big_data, hcp->big_datalen);
620 * All cursors (except the default ones) are linked off the master.
621 * Therefore, when we close the cursor, we have to remove it from
622 * the master, not the local one.
623 * XXX I am always removing from the master; what about local cursors?
625 DB_THREAD_LOCK(dbc->dbp);
626 TAILQ_REMOVE(&dbc->dbp->curs_queue, dbc, links);
627 DB_THREAD_UNLOCK(dbc->dbp);
629 FREE(hcp, sizeof(HASH_CURSOR));
630 FREE(dbc, sizeof(DBC));
632 return (ret);
635 static int
636 __ham_c_del(cursor, flags)
637 DBC *cursor;
638 u_int32_t flags;
640 DB *ldbp;
641 HASH_CURSOR *hcp;
642 HASH_CURSOR save_curs;
643 HTAB *hashp;
644 db_pgno_t ppgno, chg_pgno;
645 int ret, t_ret;
647 DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_del", NULL, NULL, flags);
648 ldbp = cursor->dbp;
649 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
650 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
651 return (ret);
652 hashp = (HTAB *)ldbp->internal;
653 hcp = (HASH_CURSOR *)cursor->internal;
654 save_curs = *hcp;
655 if ((ret = __db_cdelchk(ldbp, flags,
656 F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
657 return (ret);
658 if (F_ISSET(hcp, H_DELETED))
659 return (DB_NOTFOUND);
661 SET_LOCKER(hashp->dbp, cursor->txn);
662 GET_META(hashp->dbp, hashp);
663 hashp->hash_accesses++;
664 if ((ret = __ham_get_cpage(hashp, hcp, DB_LOCK_WRITE)) != 0)
665 goto out;
666 if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID) {
668 * We are about to remove a duplicate from offpage.
670 * There are 4 cases.
671 * 1. We will remove an item on a page, but there are more
672 * items on that page.
673 * 2. We will remove the last item on a page, but there is a
674 * following page of duplicates.
675 * 3. We will remove the last item on a page, this page was the
676 * last page in a duplicate set, but there were dups before
677 * it.
678 * 4. We will remove the last item on a page, removing the last
679 * duplicate.
680 * In case 1 hcp->dpagep is unchanged.
681 * In case 2 hcp->dpagep comes back pointing to the next dup
682 * page.
683 * In case 3 hcp->dpagep comes back NULL.
684 * In case 4 hcp->dpagep comes back NULL.
686 * Case 4 results in deleting the pair off the master page.
687 * The normal code for doing this knows how to delete the
688 * duplicates, so we will handle this case in the normal code.
690 ppgno = PREV_PGNO(hcp->dpagep);
691 if (ppgno == PGNO_INVALID &&
692 NEXT_PGNO(hcp->dpagep) == PGNO_INVALID &&
693 NUM_ENT(hcp->dpagep) == 1)
694 goto normal;
696 /* Remove item from duplicate page. */
697 chg_pgno = hcp->dpgno;
698 if ((ret = __db_drem(hashp->dbp,
699 &hcp->dpagep, hcp->dndx, __ham_del_page)) != 0)
700 goto out;
702 if (hcp->dpagep == NULL) {
703 if (ppgno != PGNO_INVALID) { /* Case 3 */
704 hcp->dpgno = ppgno;
705 if ((ret = __ham_get_cpage(hashp, hcp,
706 DB_LOCK_READ)) != 0)
707 goto out;
708 hcp->dndx = NUM_ENT(hcp->dpagep);
709 F_SET(hcp, H_DELETED);
710 } else { /* Case 4 */
711 ret = __ham_del_pair(hashp, hcp, 1);
712 hcp->dpgno = PGNO_INVALID;
714 * Delpair updated the cursor queue, so we
715 * don't have to do that here.
717 chg_pgno = PGNO_INVALID;
719 } else if (PGNO(hcp->dpagep) != hcp->dpgno) {
720 hcp->dndx = 0; /* Case 2 */
721 hcp->dpgno = PGNO(hcp->dpagep);
722 if (ppgno == PGNO_INVALID)
723 memcpy(HOFFDUP_PGNO(P_ENTRY(hcp->pagep,
724 H_DATAINDEX(hcp->bndx))),
725 &hcp->dpgno, sizeof(db_pgno_t));
726 F_SET(hcp, H_DELETED);
727 } else /* Case 1 */
728 F_SET(hcp, H_DELETED);
729 if (chg_pgno != PGNO_INVALID)
730 __ham_c_update(hcp, chg_pgno, 0, 0, 1);
731 } else if (F_ISSET(hcp, H_ISDUP)) { /* on page */
732 if (hcp->dup_off == 0 && DUP_SIZE(hcp->dup_len) ==
733 LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx))
734 ret = __ham_del_pair(hashp, hcp, 1);
735 else {
736 DBT repldbt;
738 repldbt.flags = 0;
739 F_SET(&repldbt, DB_DBT_PARTIAL);
740 repldbt.doff = hcp->dup_off;
741 repldbt.dlen = DUP_SIZE(hcp->dup_len);
742 repldbt.size = 0;
743 ret = __ham_replpair(hashp, hcp, &repldbt, 0);
744 hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
745 F_SET(hcp, H_DELETED);
746 __ham_c_update(hcp, hcp->pgno,
747 DUP_SIZE(hcp->dup_len), 0, 1);
750 } else
751 /* Not a duplicate */
752 normal: ret = __ham_del_pair(hashp, hcp, 1);
754 out: if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
755 ret = t_ret;
756 if (ret != 0)
757 *hcp = save_curs;
758 RELEASE_META(hashp->dbp, hashp);
759 if (F_ISSET(cursor->dbp, DB_AM_THREAD))
760 __db_puthandle(ldbp);
761 return (ret);
764 static int
765 __ham_c_get(cursor, key, data, flags)
766 DBC *cursor;
767 DBT *key;
768 DBT *data;
769 u_int32_t flags;
771 DB *ldbp;
772 HTAB *hashp;
773 HASH_CURSOR *hcp, save_curs;
774 int get_key, ret, t_ret;
776 DEBUG_LREAD(cursor->dbp, cursor->txn, "ham_c_get",
777 flags == DB_SET || flags == DB_SET_RANGE ? key : NULL,
778 NULL, flags);
779 ldbp = cursor->dbp;
780 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
781 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
782 return (ret);
783 hashp = (HTAB *)(ldbp->internal);
784 hcp = (HASH_CURSOR *)cursor->internal;
785 save_curs = *hcp;
786 if ((ret =
787 __db_cgetchk(hashp->dbp, key, data, flags, IS_VALID(hcp))) != 0)
788 return (ret);
790 SET_LOCKER(hashp->dbp, cursor->txn);
791 GET_META(hashp->dbp, hashp);
792 hashp->hash_accesses++;
794 hcp->seek_size = 0;
796 ret = 0;
797 get_key = 1;
798 switch (flags) {
799 case DB_PREV:
800 if (hcp->bucket != BUCKET_INVALID) {
801 ret = __ham_item_prev(hashp, hcp, DB_LOCK_READ);
802 break;
804 /* FALLTHROUGH */
805 case DB_LAST:
806 ret = __ham_item_last(hashp, hcp, DB_LOCK_READ);
807 break;
808 case DB_FIRST:
809 ret = __ham_item_first(hashp, hcp, DB_LOCK_READ);
810 break;
811 case DB_NEXT:
812 if (hcp->bucket == BUCKET_INVALID)
813 hcp->bucket = 0;
814 ret = __ham_item_next(hashp, hcp, DB_LOCK_READ);
815 break;
816 case DB_SET:
817 case DB_SET_RANGE:
818 ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ);
819 get_key = 0;
820 break;
821 case DB_CURRENT:
822 if (F_ISSET(hcp, H_DELETED)) {
823 ret = DB_KEYEMPTY;
824 goto out;
827 ret = __ham_item(hashp, hcp, DB_LOCK_READ);
828 break;
832 * Must always enter this loop to do error handling and
833 * check for big key/data pair.
835 while (1) {
836 if (ret != 0 && ret != DB_NOTFOUND)
837 goto out1;
838 else if (F_ISSET(hcp, H_OK)) {
839 /* Get the key. */
840 if (get_key && (ret = __db_ret(hashp->dbp, hcp->pagep,
841 H_KEYINDEX(hcp->bndx), key, &hcp->big_key,
842 &hcp->big_keylen)) != 0)
843 goto out1;
845 ret = __ham_dup_return(hashp, hcp, data, flags);
846 break;
847 } else if (!F_ISSET(hcp, H_NOMORE)) {
848 abort();
849 break;
853 * Ran out of entries in a bucket; change buckets.
855 switch (flags) {
856 case DB_LAST:
857 case DB_PREV:
858 ret = __ham_item_done(hashp, hcp, 0);
859 if (hcp->bucket == 0) {
860 ret = DB_NOTFOUND;
861 goto out1;
863 hcp->bucket--;
864 hcp->bndx = NDX_INVALID;
865 if (ret == 0)
866 ret = __ham_item_prev(hashp,
867 hcp, DB_LOCK_READ);
868 break;
869 case DB_FIRST:
870 case DB_NEXT:
871 ret = __ham_item_done(hashp, hcp, 0);
872 hcp->bndx = NDX_INVALID;
873 hcp->bucket++;
874 hcp->pgno = PGNO_INVALID;
875 hcp->pagep = NULL;
876 if (hcp->bucket > hashp->hdr->max_bucket) {
877 ret = DB_NOTFOUND;
878 goto out1;
880 if (ret == 0)
881 ret = __ham_item_next(hashp,
882 hcp, DB_LOCK_READ);
883 break;
884 case DB_SET:
885 case DB_SET_RANGE:
886 /* Key not found. */
887 ret = DB_NOTFOUND;
888 goto out1;
891 out1: if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
892 ret = t_ret;
893 out: if (ret)
894 *hcp = save_curs;
895 RELEASE_META(hashp->dbp, hashp);
896 if (F_ISSET(cursor->dbp, DB_AM_THREAD))
897 __db_puthandle(ldbp);
898 return (ret);
901 static int
902 __ham_c_put(cursor, key, data, flags)
903 DBC *cursor;
904 DBT *key;
905 DBT *data;
906 u_int32_t flags;
908 DB *ldbp;
909 HASH_CURSOR *hcp, save_curs;
910 HTAB *hashp;
911 u_int32_t nbytes;
912 int ret, t_ret;
914 DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_put",
915 flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL,
916 data, flags);
917 ldbp = cursor->dbp;
918 if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
919 (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
920 return (ret);
921 hashp = (HTAB *)(ldbp->internal);
922 hcp = (HASH_CURSOR *)cursor->internal;
923 save_curs = *hcp;
925 if ((ret = __db_cputchk(hashp->dbp, key, data, flags,
926 F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
927 return (ret);
928 if (F_ISSET(hcp, H_DELETED))
929 return (DB_NOTFOUND);
931 SET_LOCKER(hashp->dbp, cursor->txn);
932 GET_META(hashp->dbp, hashp);
933 ret = 0;
935 switch (flags) {
936 case DB_KEYLAST:
937 case DB_KEYFIRST:
938 nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
939 HKEYDATA_PSIZE(key->size)) +
940 (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
941 HKEYDATA_PSIZE(data->size));
942 ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
943 break;
944 case DB_BEFORE:
945 case DB_AFTER:
946 case DB_CURRENT:
947 ret = __ham_item(hashp, hcp, DB_LOCK_WRITE);
948 break;
951 if (ret == 0) {
952 if (flags == DB_CURRENT && !F_ISSET(ldbp, DB_AM_DUP))
953 ret = __ham_overwrite(hashp, hcp, data);
954 else
955 ret = __ham_add_dup(hashp, hcp, data, flags);
958 if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
959 ret = __ham_expand_table(hashp);
960 F_CLR(hcp, H_EXPAND);
963 if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
964 ret = t_ret;
965 if (ret != 0)
966 *hcp = save_curs;
967 RELEASE_META(hashp->dbp, hashp);
968 if (F_ISSET(cursor->dbp, DB_AM_THREAD))
969 __db_puthandle(ldbp);
970 return (ret);
973 /********************************* UTILITIES ************************/
976 * __ham_expand_table --
978 * PUBLIC: int __ham_expand_table __P((HTAB *));
981 __ham_expand_table(hashp)
982 HTAB *hashp;
984 DB_LSN new_lsn;
985 u_int32_t old_bucket, new_bucket, spare_ndx;
986 int ret;
988 ret = 0;
989 DIRTY_META(hashp, ret);
990 if (ret)
991 return (ret);
994 * If the split point is about to increase, make sure that we
995 * have enough extra pages. The calculation here is weird.
996 * We'd like to do this after we've upped max_bucket, but it's
997 * too late then because we've logged the meta-data split. What
998 * we'll do between then and now is increment max bucket and then
999 * see what the log of one greater than that is; here we have to
1000 * look at the log of max + 2. VERY NASTY STUFF.
1002 if (__db_log2(hashp->hdr->max_bucket + 2) > hashp->hdr->ovfl_point) {
1004 * We are about to shift the split point. Make sure that
1005 * if the next doubling is going to be big (more than 8
1006 * pages), we have some extra pages around.
1008 if (hashp->hdr->max_bucket + 1 >= 8 &&
1009 hashp->hdr->spares[hashp->hdr->ovfl_point] <
1010 hashp->hdr->spares[hashp->hdr->ovfl_point - 1] +
1011 hashp->hdr->ovfl_point + 1)
1012 __ham_init_ovflpages(hashp);
1015 /* Now we can log the meta-data split. */
1016 if (DB_LOGGING(hashp->dbp)) {
1017 if ((ret = __ham_splitmeta_log(hashp->dbp->dbenv->lg_info,
1018 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
1019 hashp->dbp->log_fileid,
1020 hashp->hdr->max_bucket, hashp->hdr->ovfl_point,
1021 hashp->hdr->spares[hashp->hdr->ovfl_point],
1022 &hashp->hdr->lsn)) != 0)
1023 return (ret);
1025 hashp->hdr->lsn = new_lsn;
1028 hashp->hash_expansions++;
1029 new_bucket = ++hashp->hdr->max_bucket;
1030 old_bucket = (hashp->hdr->max_bucket & hashp->hdr->low_mask);
1033 * If the split point is increasing, copy the current contents
1034 * of the spare split bucket to the next bucket.
1036 spare_ndx = __db_log2(hashp->hdr->max_bucket + 1);
1037 if (spare_ndx > hashp->hdr->ovfl_point) {
1038 hashp->hdr->spares[spare_ndx] =
1039 hashp->hdr->spares[hashp->hdr->ovfl_point];
1040 hashp->hdr->ovfl_point = spare_ndx;
1043 if (new_bucket > hashp->hdr->high_mask) {
1044 /* Starting a new doubling */
1045 hashp->hdr->low_mask = hashp->hdr->high_mask;
1046 hashp->hdr->high_mask = new_bucket | hashp->hdr->low_mask;
1049 if (BUCKET_TO_PAGE(hashp, new_bucket) > MAX_PAGES(hashp)) {
1050 __db_err(hashp->dbp->dbenv,
1051 "hash: Cannot allocate new bucket. Pages exhausted.");
1052 return (ENOSPC);
1055 /* Relocate records to the new bucket */
1056 return (__ham_split_page(hashp, old_bucket, new_bucket));
1060 * PUBLIC: u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t));
1062 u_int32_t
1063 __ham_call_hash(hashp, k, len)
1064 HTAB *hashp;
1065 u_int8_t *k;
1066 int32_t len;
1068 u_int32_t n, bucket;
1070 n = (u_int32_t)hashp->hash(k, len);
1071 bucket = n & hashp->hdr->high_mask;
1072 if (bucket > hashp->hdr->max_bucket)
1073 bucket = bucket & hashp->hdr->low_mask;
1074 return (bucket);
1078 * Check for duplicates, and call __db_ret appropriately. Release
1079 * everything held by the cursor.
1081 static int
1082 __ham_dup_return(hashp, hcp, val, flags)
1083 HTAB *hashp;
1084 HASH_CURSOR *hcp;
1085 DBT *val;
1086 u_int32_t flags;
1088 PAGE *pp;
1089 DBT *myval, tmp_val;
1090 db_indx_t ndx;
1091 db_pgno_t pgno;
1092 u_int8_t *hk, type;
1093 int ret;
1094 db_indx_t len;
1096 /* Check for duplicate and return the first one. */
1097 ndx = H_DATAINDEX(hcp->bndx);
1098 type = HPAGE_TYPE(hcp->pagep, ndx);
1099 pp = hcp->pagep;
1100 myval = val;
1103 * There are 3 cases:
1104 * 1. We are not in duplicate, simply call db_ret.
1105 * 2. We are looking at keys and stumbled onto a duplicate.
1106 * 3. We are in the middle of a duplicate set. (ISDUP set)
1110 * Here we check for the case where we just stumbled onto a
1111 * duplicate. In this case, we do initialization and then
1112 * let the normal duplicate code handle it.
1114 if (!F_ISSET(hcp, H_ISDUP)) {
1115 if (type == H_DUPLICATE) {
1116 F_SET(hcp, H_ISDUP);
1117 hcp->dup_tlen = LEN_HDATA(hcp->pagep,
1118 hashp->hdr->pagesize, hcp->bndx);
1119 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
1120 if (flags == DB_LAST || flags == DB_PREV) {
1121 hcp->dndx = 0;
1122 hcp->dup_off = 0;
1123 do {
1124 memcpy(&len,
1125 HKEYDATA_DATA(hk) + hcp->dup_off,
1126 sizeof(db_indx_t));
1127 hcp->dup_off += DUP_SIZE(len);
1128 hcp->dndx++;
1129 } while (hcp->dup_off < hcp->dup_tlen);
1130 hcp->dup_off -= DUP_SIZE(len);
1131 hcp->dndx--;
1132 } else {
1133 memcpy(&len,
1134 HKEYDATA_DATA(hk), sizeof(db_indx_t));
1135 hcp->dup_off = 0;
1136 hcp->dndx = 0;
1138 hcp->dup_len = len;
1139 } else if (type == H_OFFDUP) {
1140 F_SET(hcp, H_ISDUP);
1141 memcpy(&pgno, HOFFDUP_PGNO(P_ENTRY(hcp->pagep, ndx)),
1142 sizeof(db_pgno_t));
1143 if (flags == DB_LAST || flags == DB_PREV) {
1144 if ((ret = __db_dend(hashp->dbp,
1145 pgno, &hcp->dpagep)) != 0)
1146 return (ret);
1147 hcp->dpgno = PGNO(hcp->dpagep);
1148 hcp->dndx = NUM_ENT(hcp->dpagep) - 1;
1149 } else if ((ret = __ham_next_cpage(hashp,
1150 hcp, pgno, 0, H_ISDUP)) != 0)
1151 return (ret);
1156 * Now, everything is initialized, grab a duplicate if
1157 * necessary.
1159 if (F_ISSET(hcp, H_ISDUP)) {
1160 if (hcp->dpgno != PGNO_INVALID) {
1161 pp = hcp->dpagep;
1162 ndx = hcp->dndx;
1163 } else {
1165 * Copy the DBT in case we are retrieving into
1166 * user memory and we need the parameters for
1167 * it.
1169 memcpy(&tmp_val, val, sizeof(*val));
1170 F_SET(&tmp_val, DB_DBT_PARTIAL);
1171 tmp_val.dlen = hcp->dup_len;
1172 tmp_val.doff = hcp->dup_off + sizeof(db_indx_t);
1173 myval = &tmp_val;
1178 * Finally, if we had a duplicate, pp, ndx, and myval should be
1179 * set appropriately.
1181 if ((ret = __db_ret(hashp->dbp, pp, ndx, myval, &hcp->big_data,
1182 &hcp->big_datalen)) != 0)
1183 return (ret);
1186 * In case we sent a temporary off to db_ret, set the real
1187 * return values.
1189 val->data = myval->data;
1190 val->size = myval->size;
1192 return (0);
1195 static int
1196 __ham_overwrite(hashp, hcp, nval)
1197 HTAB *hashp;
1198 HASH_CURSOR *hcp;
1199 DBT *nval;
1201 DBT *myval, tmp_val;
1202 u_int8_t *hk;
1204 if (F_ISSET(hashp->dbp, DB_AM_DUP))
1205 return (__ham_add_dup(hashp, hcp, nval, DB_KEYLAST));
1206 else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
1207 /* Put/overwrite */
1208 memcpy(&tmp_val, nval, sizeof(*nval));
1209 F_SET(&tmp_val, DB_DBT_PARTIAL);
1210 tmp_val.doff = 0;
1211 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
1212 if (HPAGE_PTYPE(hk) == H_OFFPAGE)
1213 memcpy(&tmp_val.dlen,
1214 HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
1215 else
1216 tmp_val.dlen = LEN_HDATA(hcp->pagep,
1217 hashp->hdr->pagesize,hcp->bndx);
1218 myval = &tmp_val;
1219 } else /* Regular partial put */
1220 myval = nval;
1222 return (__ham_replpair(hashp, hcp, myval, 0));
1226 * Given a key and a cursor, sets the cursor to the page/ndx on which
1227 * the key resides. If the key is found, the cursor H_OK flag is set
1228 * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
1229 * If the key is not found, the H_OK flag is not set. If the sought
1230 * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
1231 * are set indicating where an add might take place. If it is 0,
1232 * non of the cursor pointer field are valid.
1234 static int
1235 __ham_lookup(hashp, hcp, key, sought, mode)
1236 HTAB *hashp;
1237 HASH_CURSOR *hcp;
1238 const DBT *key;
1239 u_int32_t sought;
1240 db_lockmode_t mode;
1242 db_pgno_t pgno;
1243 u_int32_t tlen;
1244 int match, ret, t_ret;
1245 u_int8_t *hk;
1248 * Set up cursor so that we're looking for space to add an item
1249 * as we cycle through the pages looking for the key.
1251 if ((ret = __ham_item_reset(hashp, hcp)) != 0)
1252 return (ret);
1253 hcp->seek_size = sought;
1255 hcp->bucket = __ham_call_hash(hashp, (u_int8_t *)key->data, key->size);
1256 while (1) {
1257 if ((ret = __ham_item_next(hashp, hcp, mode)) != 0)
1258 return (ret);
1260 if (F_ISSET(hcp, H_NOMORE))
1261 break;
1263 hk = H_PAIRKEY(hcp->pagep, hcp->bndx);
1264 switch (HPAGE_PTYPE(hk)) {
1265 case H_OFFPAGE:
1266 memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
1267 if (tlen == key->size) {
1268 memcpy(&pgno,
1269 HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
1270 match = __db_moff(hashp->dbp, key, pgno);
1271 if (match == 0) {
1272 F_SET(hcp, H_OK);
1273 return (0);
1276 break;
1277 case H_KEYDATA:
1278 if (key->size == LEN_HKEY(hcp->pagep,
1279 hashp->hdr->pagesize, hcp->bndx) &&
1280 memcmp(key->data,
1281 HKEYDATA_DATA(hk), key->size) == 0) {
1282 F_SET(hcp, H_OK);
1283 return (0);
1285 break;
1286 case H_DUPLICATE:
1287 case H_OFFDUP:
1289 * These are errors because keys are never
1290 * duplicated, only data items are.
1292 return (__db_pgfmt(hashp->dbp, PGNO(hcp->pagep)));
1294 hashp->hash_collisions++;
1298 * Item was not found, adjust cursor properly.
1301 if (sought != 0)
1302 return (ret);
1304 if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
1305 ret = t_ret;
1306 return (ret);
1310 * Initialize a dbt using some possibly already allocated storage
1311 * for items.
1312 * PUBLIC: int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *));
1315 __ham_init_dbt(dbt, size, bufp, sizep)
1316 DBT *dbt;
1317 u_int32_t size;
1318 void **bufp;
1319 u_int32_t *sizep;
1321 memset(dbt, 0, sizeof(*dbt));
1322 if (*sizep < size) {
1323 if ((*bufp = (void *)(*bufp == NULL ?
1324 __db_malloc(size) : __db_realloc(*bufp, size))) == NULL) {
1325 *sizep = 0;
1326 return (ENOMEM);
1328 *sizep = size;
1330 dbt->data = *bufp;
1331 dbt->size = size;
1332 return (0);
1336 * Adjust the cursor after an insert or delete. The cursor passed is
1337 * the one that was operated upon; we just need to check any of the
1338 * others.
1340 * len indicates the length of the item added/deleted
1341 * add indicates if the item indicated by the cursor has just been
1342 * added (add == 1) or deleted (add == 0).
1343 * dup indicates if the addition occurred into a duplicate set.
1345 * PUBLIC: void __ham_c_update
1346 * PUBLIC: __P((HASH_CURSOR *, db_pgno_t, u_int32_t, int, int));
1348 void
1349 __ham_c_update(hcp, chg_pgno, len, add, is_dup)
1350 HASH_CURSOR *hcp;
1351 db_pgno_t chg_pgno;
1352 u_int32_t len;
1353 int add, is_dup;
1355 DBC *cp;
1356 HTAB *hp;
1357 HASH_CURSOR *lcp;
1358 int page_deleted;
1361 * Regular adds are always at the end of a given page, so we never
1362 * have to adjust anyone's cursor after a regular add.
1364 if (!is_dup && add)
1365 return;
1368 * Determine if a page was deleted. If this is a regular update
1369 * (i.e., not is_dup) then the deleted page's number will be that in
1370 * chg_pgno, and the pgno in the cursor will be different. If this
1371 * was an onpage-duplicate, then the same conditions apply. If this
1372 * was an off-page duplicate, then we need to verify if hcp->dpgno
1373 * is the same (no delete) or different (delete) than chg_pgno.
1375 if (!is_dup || hcp->dpgno == PGNO_INVALID)
1376 page_deleted =
1377 chg_pgno != PGNO_INVALID && chg_pgno != hcp->pgno;
1378 else
1379 page_deleted =
1380 chg_pgno != PGNO_INVALID && chg_pgno != hcp->dpgno;
1382 hp = hcp->db_cursor->dbp->master->internal;
1383 DB_THREAD_LOCK(hp->dbp);
1385 for (cp = TAILQ_FIRST(&hp->dbp->curs_queue); cp != NULL;
1386 cp = TAILQ_NEXT(cp, links)) {
1387 if (cp->internal == hcp)
1388 continue;
1390 lcp = (HASH_CURSOR *)cp->internal;
1392 if (!is_dup && lcp->pgno != chg_pgno)
1393 continue;
1395 if (is_dup) {
1396 if (F_ISSET(hcp, H_DELETED) && lcp->pgno != chg_pgno)
1397 continue;
1398 if (!F_ISSET(hcp, H_DELETED) && lcp->dpgno != chg_pgno)
1399 continue;
1402 if (page_deleted) {
1403 if (is_dup) {
1404 lcp->dpgno = hcp->dpgno;
1405 lcp->dndx = hcp->dndx;
1406 } else {
1407 lcp->pgno = hcp->pgno;
1408 lcp->bndx = hcp->bndx;
1409 lcp->bucket = hcp->bucket;
1411 F_CLR(lcp, H_ISDUP);
1412 continue;
1415 if (!is_dup && lcp->bndx > hcp->bndx)
1416 lcp->bndx--;
1417 else if (!is_dup && lcp->bndx == hcp->bndx)
1418 F_SET(lcp, H_DELETED);
1419 else if (is_dup && lcp->bndx == hcp->bndx) {
1420 /* Assign dpgno in case there was page conversion. */
1421 lcp->dpgno = hcp->dpgno;
1422 if (add && lcp->dndx >= hcp->dndx )
1423 lcp->dndx++;
1424 else if (!add && lcp->dndx > hcp->dndx)
1425 lcp->dndx--;
1426 else if (!add && lcp->dndx == hcp->dndx)
1427 F_SET(lcp, H_DELETED);
1429 /* Now adjust on-page information. */
1430 if (lcp->dpgno == PGNO_INVALID) {
1431 if (add) {
1432 lcp->dup_tlen += len;
1433 if (lcp->dndx > hcp->dndx)
1434 lcp->dup_off += len;
1435 } else {
1436 lcp->dup_tlen -= len;
1437 if (lcp->dndx > hcp->dndx)
1438 lcp->dup_off -= len;
1443 DB_THREAD_UNLOCK(hp->dbp);
1447 * __ham_hdup --
1448 * This function gets called when we create a duplicate handle for a
1449 * threaded DB. It should create the private part of the DB structure.
1451 * PUBLIC: int __ham_hdup __P((DB *, DB *));
1454 __ham_hdup(orig, new)
1455 DB *orig, *new;
1457 DBC *curs;
1458 HTAB *hashp;
1459 int ret;
1461 if ((hashp = (HTAB *)__db_malloc(sizeof(HTAB))) == NULL)
1462 return (ENOMEM);
1464 new->internal = hashp;
1466 hashp->dbp = new;
1467 hashp->hlock = 0;
1468 hashp->hdr = NULL;
1469 hashp->hash = ((HTAB *)orig->internal)->hash;
1470 if ((hashp->split_buf = (PAGE *)__db_malloc(orig->pgsize)) == NULL)
1471 return (ENOMEM);
1472 hashp->local_errno = 0;
1473 hashp->hash_accesses = 0;
1474 hashp->hash_collisions = 0;
1475 hashp->hash_expansions = 0;
1476 hashp->hash_overflows = 0;
1477 hashp->hash_bigpages = 0;
1478 /* Initialize the cursor queue. */
1479 ret = __ham_c_init(new, NULL, &curs);
1480 TAILQ_INSERT_TAIL(&new->curs_queue, curs, links);
1481 return (ret);