2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
8 * Copyright (c) 1990, 1993, 1994
9 * Margo Seltzer. All rights reserved.
12 * Copyright (c) 1990, 1993, 1994
13 * The Regents of the University of California. All rights reserved.
15 * This code is derived from software contributed to Berkeley by
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 static const char sccsid
[] = "@(#)hash.c 10.45 (Sleepycat) 5/11/98";
53 #ifndef NO_SYSTEM_INCLUDES
54 #include <sys/types.h>
69 static int __ham_c_close
__P((DBC
*));
70 static int __ham_c_del
__P((DBC
*, u_int32_t
));
71 static int __ham_c_get
__P((DBC
*, DBT
*, DBT
*, u_int32_t
));
72 static int __ham_c_put
__P((DBC
*, DBT
*, DBT
*, u_int32_t
));
73 static int __ham_c_init
__P((DB
*, DB_TXN
*, DBC
**));
74 static int __ham_cursor
__P((DB
*, DB_TXN
*, DBC
**));
75 static int __ham_delete
__P((DB
*, DB_TXN
*, DBT
*, u_int32_t
));
76 static int __ham_dup_return
__P((HTAB
*, HASH_CURSOR
*, DBT
*, u_int32_t
));
77 static int __ham_get
__P((DB
*, DB_TXN
*, DBT
*, DBT
*, u_int32_t
));
78 static void __ham_init_htab
__P((HTAB
*, u_int32_t
, u_int32_t
));
79 static int __ham_lookup
__P((HTAB
*,
80 HASH_CURSOR
*, const DBT
*, u_int32_t
, db_lockmode_t
));
81 static int __ham_overwrite
__P((HTAB
*, HASH_CURSOR
*, DBT
*));
82 static int __ham_put
__P((DB
*, DB_TXN
*, DBT
*, DBT
*, u_int32_t
));
83 static int __ham_sync
__P((DB
*, u_int32_t
));
85 /************************** INTERFACE ROUTINES ***************************/
91 * PUBLIC: int __ham_open __P((DB *, DB_INFO *));
94 __ham_open(dbp
, dbinfo
)
101 int file_existed
, ret
;
105 if ((hashp
= (HTAB
*)__db_calloc(1, sizeof(HTAB
))) == NULL
)
109 /* Set the hash function if specified by the user. */
110 if (dbinfo
!= NULL
&& dbinfo
->h_hash
!= NULL
)
111 hashp
->hash
= dbinfo
->h_hash
;
114 * Initialize the remaining fields of the dbp. The type, close and
115 * fd functions are all set in db_open.
117 dbp
->internal
= hashp
;
118 dbp
->cursor
= __ham_cursor
;
119 dbp
->del
= __ham_delete
;
120 dbp
->get
= __ham_get
;
121 dbp
->put
= __ham_put
;
122 dbp
->sync
= __ham_sync
;
124 /* If locking is turned on, lock the meta data page. */
125 if (F_ISSET(dbp
, DB_AM_LOCKING
)) {
126 dbp
->lock
.pgno
= BUCKET_INVALID
;
127 if ((ret
= lock_get(dbenv
->lk_info
, dbp
->locker
,
128 0, &dbp
->lock_dbt
, DB_LOCK_READ
, &hashp
->hlock
)) != 0) {
136 * Now, we can try to read the meta-data page and figure out
137 * if we set up locking and get the meta-data page properly.
138 * If this is a new file, initialize it, and put it back dirty.
140 if ((ret
= __ham_get_page(hashp
->dbp
, 0, (PAGE
**)&hashp
->hdr
)) != 0)
143 /* Initialize the hashp structure */
144 if (hashp
->hdr
->magic
== DB_HASHMAGIC
) {
146 /* File exists, verify the data in the header. */
147 if (hashp
->hash
== NULL
)
149 hashp
->hdr
->version
< 5 ? __ham_func4
: __ham_func5
;
150 if (hashp
->hash(CHARKEY
, sizeof(CHARKEY
)) !=
151 hashp
->hdr
->h_charkey
) {
152 __db_err(hashp
->dbp
->dbenv
,
153 "hash: incompatible hash function");
157 if (F_ISSET(hashp
->hdr
, DB_HASH_DUP
))
158 F_SET(dbp
, DB_AM_DUP
);
161 * File does not exist, we must initialize the header. If
162 * locking is enabled that means getting a write lock first.
165 if (F_ISSET(dbp
, DB_AM_LOCKING
) &&
166 ((ret
= lock_put(dbenv
->lk_info
, hashp
->hlock
)) != 0 ||
167 (ret
= lock_get(dbenv
->lk_info
, dbp
->locker
, 0,
168 &dbp
->lock_dbt
, DB_LOCK_WRITE
, &hashp
->hlock
)) != 0)) {
174 __ham_init_htab(hashp
,
175 dbinfo
!= NULL
? dbinfo
->h_nelem
: 0,
176 dbinfo
!= NULL
? dbinfo
->h_ffactor
: 0);
177 if (F_ISSET(dbp
, DB_AM_DUP
))
178 F_SET(hashp
->hdr
, DB_HASH_DUP
);
179 if ((ret
= __ham_dirty_page(hashp
, (PAGE
*)hashp
->hdr
)) != 0)
183 /* Initialize the default cursor. */
184 __ham_c_init(dbp
, NULL
, &curs
);
185 TAILQ_INSERT_TAIL(&dbp
->curs_queue
, curs
, links
);
187 /* Allocate memory for our split buffer. */
188 if ((hashp
->split_buf
= (PAGE
*)__db_malloc(dbp
->pgsize
)) == NULL
) {
193 #ifdef NO_STATISTICS_FOR_DB_ERR
195 "%s%lx\n%s%ld\n%s%ld\n%s%ld\n%s%ld\n%s0x%lx\n%s0x%lx\n%s%ld\n%s%ld\n%s0x%lx",
196 "TABLE POINTER ", (long)hashp
,
197 "BUCKET SIZE ", (long)hashp
->hdr
->pagesize
,
198 "FILL FACTOR ", (long)hashp
->hdr
->ffactor
,
199 "MAX BUCKET ", (long)hashp
->hdr
->max_bucket
,
200 "OVFL POINT ", (long)hashp
->hdr
->ovfl_point
,
201 "LAST FREED ", (long)hashp
->hdr
->last_freed
,
202 "HIGH MASK ", (long)hashp
->hdr
->high_mask
,
203 "LOW MASK ", (long)hashp
->hdr
->low_mask
,
204 "NELEM ", (long)hashp
->hdr
->nelem
,
205 "FLAGS ", (long)hashp
->hdr
->flags
);
208 /* Release the meta data page */
209 (void)__ham_put_page(hashp
->dbp
, (PAGE
*)hashp
->hdr
, 0);
210 if (F_ISSET(dbp
, DB_AM_LOCKING
) &&
211 (ret
= lock_put(dbenv
->lk_info
, hashp
->hlock
)) != 0) {
219 /* Sync the file so that we know that the meta data goes to disk. */
220 if (!file_existed
&& (ret
= dbp
->sync(dbp
, 0)) != 0)
224 out
: (void)__ham_close(dbp
);
229 * PUBLIC: int __ham_close __P((DB *));
238 DEBUG_LWRITE(dbp
, NULL
, "ham_close", NULL
, NULL
, 0);
239 hashp
= (HTAB
*)dbp
->internal
;
242 /* Free the split page. */
243 if (hashp
->split_buf
)
244 FREE(hashp
->split_buf
, dbp
->pgsize
);
246 if (hashp
->hdr
&& (t_ret
= __ham_put_page(hashp
->dbp
,
247 (PAGE
*)hashp
->hdr
, 0)) != 0 && ret
== 0)
249 if (hashp
->hlock
&& (t_ret
= lock_put(hashp
->dbp
->dbenv
->lk_info
,
250 hashp
->hlock
)) != 0 && ret
== 0)
253 FREE(hashp
, sizeof(HTAB
));
254 dbp
->internal
= NULL
;
258 /************************** LOCAL CREATION ROUTINES **********************/
260 * Returns 0 on No Error
263 __ham_init_htab(hashp
, nelem
, ffactor
)
265 u_int32_t nelem
, ffactor
;
267 int32_t l2
, nbuckets
;
269 memset(hashp
->hdr
, 0, sizeof(HASHHDR
));
270 hashp
->hdr
->ffactor
= ffactor
;
271 hashp
->hdr
->pagesize
= hashp
->dbp
->pgsize
;
272 ZERO_LSN(hashp
->hdr
->lsn
);
273 hashp
->hdr
->magic
= DB_HASHMAGIC
;
274 hashp
->hdr
->version
= DB_HASHVERSION
;
275 if (hashp
->hash
== NULL
)
277 hashp
->hdr
->version
< 5 ? __ham_func4
: __ham_func5
;
278 hashp
->hdr
->h_charkey
= hashp
->hash(CHARKEY
, sizeof(CHARKEY
));
279 if (nelem
!= 0 && hashp
->hdr
->ffactor
!= 0) {
280 nelem
= (nelem
- 1) / hashp
->hdr
->ffactor
+ 1;
281 l2
= __db_log2(nelem
> 2 ? nelem
: 2);
287 hashp
->hdr
->ovfl_point
= l2
;
288 hashp
->hdr
->last_freed
= PGNO_INVALID
;
290 hashp
->hdr
->max_bucket
= hashp
->hdr
->high_mask
= nbuckets
- 1;
291 hashp
->hdr
->low_mask
= (nbuckets
>> 1) - 1;
292 memcpy(hashp
->hdr
->uid
, hashp
->dbp
->lock
.fileid
, DB_FILE_ID_LEN
);
295 /********************** DESTROY/CLOSE ROUTINES ************************/
299 * Write modified pages to disk
306 __ham_sync(dbp
, flags
)
312 DEBUG_LWRITE(dbp
, NULL
, "ham_sync", NULL
, NULL
, flags
);
313 if ((ret
= __db_syncchk(dbp
, flags
)) != 0)
315 if (F_ISSET(dbp
, DB_AM_RDONLY
))
318 if ((ret
= memp_fsync(dbp
->mpf
)) == DB_INCOMPLETE
)
324 /*******************************SEARCH ROUTINES *****************************/
326 * All the access routines return
330 * 1 to indicate an external ERROR (i.e. key not found, etc)
331 * -1 to indicate an internal ERROR (i.e. out of memory, etc)
335 __ham_get(dbp
, txn
, key
, data
, flags
)
347 DEBUG_LREAD(dbp
, txn
, "ham_get", key
, NULL
, flags
);
348 if ((ret
= __db_getchk(dbp
, key
, data
, flags
)) != 0)
352 if (F_ISSET(dbp
, DB_AM_THREAD
) &&
353 (ret
= __db_gethandle(dbp
, __ham_hdup
, &ldbp
)) != 0)
356 hashp
= (HTAB
*)ldbp
->internal
;
357 SET_LOCKER(ldbp
, txn
);
358 GET_META(ldbp
, hashp
);
360 hashp
->hash_accesses
++;
361 hcp
= (HASH_CURSOR
*)TAILQ_FIRST(&ldbp
->curs_queue
)->internal
;
362 if ((ret
= __ham_lookup(hashp
, hcp
, key
, 0, DB_LOCK_READ
)) == 0) {
363 if (F_ISSET(hcp
, H_OK
))
364 ret
= __ham_dup_return(hashp
, hcp
, data
, DB_FIRST
);
365 else /* Key was not found */
369 if ((t_ret
= __ham_item_done(hashp
, hcp
, 0)) != 0 && ret
== 0)
371 RELEASE_META(ldbp
, hashp
);
372 if (F_ISSET(dbp
, DB_AM_THREAD
))
373 __db_puthandle(ldbp
);
378 __ham_put(dbp
, txn
, key
, data
, flags
)
392 DEBUG_LWRITE(dbp
, txn
, "ham_put", key
, data
, flags
);
393 if ((ret
= __db_putchk(dbp
, key
, data
,
394 flags
, F_ISSET(dbp
, DB_AM_RDONLY
), F_ISSET(dbp
, DB_AM_DUP
))) != 0)
398 if (F_ISSET(dbp
, DB_AM_THREAD
) &&
399 (ret
= __db_gethandle(dbp
, __ham_hdup
, &ldbp
)) != 0)
402 hashp
= (HTAB
*)ldbp
->internal
;
403 SET_LOCKER(ldbp
, txn
);
404 GET_META(ldbp
, hashp
);
405 hcp
= TAILQ_FIRST(&ldbp
->curs_queue
)->internal
;
407 nbytes
= (ISBIG(hashp
, key
->size
) ? HOFFPAGE_PSIZE
:
408 HKEYDATA_PSIZE(key
->size
)) +
409 (ISBIG(hashp
, data
->size
) ? HOFFPAGE_PSIZE
:
410 HKEYDATA_PSIZE(data
->size
));
412 hashp
->hash_accesses
++;
413 ret
= __ham_lookup(hashp
, hcp
, key
, nbytes
, DB_LOCK_WRITE
);
415 if (ret
== DB_NOTFOUND
) {
417 if (hcp
->seek_found_page
!= PGNO_INVALID
&&
418 hcp
->seek_found_page
!= hcp
->pgno
) {
419 if ((ret
= __ham_item_done(hashp
, hcp
, 0)) != 0)
421 hcp
->pgno
= hcp
->seek_found_page
;
422 hcp
->bndx
= NDX_INVALID
;
425 if (F_ISSET(data
, DB_DBT_PARTIAL
) && data
->doff
!= 0) {
427 * Doing a partial put, but the key does not exist
428 * and we are not beginning the write at 0. We
429 * must create a data item padded up to doff and
430 * then write the new bytes represented by val.
432 ret
= __ham_init_dbt(&tmp_val
, data
->size
+ data
->doff
,
433 &hcp
->big_data
, &hcp
->big_datalen
);
435 memset(tmp_val
.data
, 0, data
->doff
);
436 memcpy((u_int8_t
*)tmp_val
.data
+ data
->doff
,
437 data
->data
, data
->size
);
444 ret
= __ham_add_el(hashp
, hcp
, key
, myval
, H_KEYDATA
);
445 } else if (ret
== 0 && F_ISSET(hcp
, H_OK
)) {
446 if (flags
== DB_NOOVERWRITE
)
448 else if (F_ISSET(ldbp
, DB_AM_DUP
))
449 ret
= __ham_add_dup(hashp
, hcp
, data
, DB_KEYLAST
);
451 ret
= __ham_overwrite(hashp
, hcp
, data
);
454 /* Free up all the cursor pages. */
455 if ((t_ret
= __ham_item_done(hashp
, hcp
, ret
== 0)) != 0 && ret
== 0)
457 /* Now check if we have to grow. */
458 out
: if (ret
== 0 && F_ISSET(hcp
, H_EXPAND
)) {
459 ret
= __ham_expand_table(hashp
);
460 F_CLR(hcp
, H_EXPAND
);
463 if ((t_ret
= __ham_item_done(hashp
, hcp
, ret
== 0)) != 0 && ret
== 0)
465 RELEASE_META(ldbp
, hashp
);
466 if (F_ISSET(dbp
, DB_AM_THREAD
))
467 __db_puthandle(ldbp
);
472 __ham_cursor(dbp
, txnid
, dbcp
)
479 DEBUG_LWRITE(dbp
, txnid
, "ham_cursor", NULL
, NULL
, 0);
480 if ((ret
= __ham_c_init(dbp
, txnid
, dbcp
)) != 0)
484 TAILQ_INSERT_TAIL(&dbp
->curs_queue
, *dbcp
, links
);
485 DB_THREAD_UNLOCK(dbp
);
490 __ham_c_init(dbp
, txnid
, dbcp
)
496 HASH_CURSOR
*new_curs
;
498 if ((db_curs
= (DBC
*)__db_calloc(sizeof(DBC
), 1)) == NULL
)
502 (HASH_CURSOR
*)__db_calloc(sizeof(struct cursor_t
), 1)) == NULL
) {
503 FREE(db_curs
, sizeof(DBC
));
507 db_curs
->internal
= new_curs
;
508 db_curs
->c_close
= __ham_c_close
;
509 db_curs
->c_del
= __ham_c_del
;
510 db_curs
->c_get
= __ham_c_get
;
511 db_curs
->c_put
= __ham_c_put
;
512 db_curs
->txn
= txnid
;
515 new_curs
->db_cursor
= db_curs
;
516 __ham_item_init(new_curs
);
524 __ham_delete(dbp
, txn
, key
, flags
)
535 DEBUG_LWRITE(dbp
, txn
, "ham_delete", key
, NULL
, flags
);
537 __db_delchk(dbp
, key
, flags
, F_ISSET(dbp
, DB_AM_RDONLY
))) != 0)
541 if (F_ISSET(dbp
, DB_AM_THREAD
) &&
542 (ret
= __db_gethandle(dbp
, __ham_hdup
, &ldbp
)) != 0)
544 hashp
= (HTAB
*)ldbp
->internal
;
545 SET_LOCKER(ldbp
, txn
);
546 GET_META(ldbp
, hashp
);
547 hcp
= TAILQ_FIRST(&ldbp
->curs_queue
)->internal
;
549 hashp
->hash_accesses
++;
550 if ((ret
= __ham_lookup(hashp
, hcp
, key
, 0, DB_LOCK_WRITE
)) == 0) {
551 if (F_ISSET(hcp
, H_OK
))
552 ret
= __ham_del_pair(hashp
, hcp
, 1);
557 if ((t_ret
= __ham_item_done(hashp
, hcp
, ret
== 0)) != 0 && ret
== 0)
559 RELEASE_META(ldbp
, hashp
);
560 if (F_ISSET(dbp
, DB_AM_THREAD
))
561 __db_puthandle(ldbp
);
565 /* ****************** CURSORS ********************************** */
567 __ham_c_close(cursor
)
573 DEBUG_LWRITE(cursor
->dbp
, cursor
->txn
, "ham_c_close", NULL
, NULL
, 0);
575 * If the pagep, dpagep, and lock fields of the cursor are all NULL,
576 * then there really isn't a need to get a handle here. However,
577 * the normal case is that at least one of those fields is non-NULL,
578 * and putting those checks in here would couple the ham_item_done
579 * functionality with cursor close which would be pretty disgusting.
580 * Instead, we pay the overhead here of always getting the handle.
583 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
) &&
584 (ret
= __db_gethandle(cursor
->dbp
, __ham_hdup
, &ldbp
)) != 0)
587 ret
= __ham_c_iclose(ldbp
, cursor
);
589 if (F_ISSET(ldbp
, DB_AM_THREAD
))
590 __db_puthandle(ldbp
);
596 * Internal cursor close routine; assumes it is being passed the correct
597 * handle, rather than getting and putting a handle.
599 * PUBLIC: int __ham_c_iclose __P((DB *, DBC *));
602 __ham_c_iclose(dbp
, dbc
)
610 hashp
= (HTAB
*)dbp
->internal
;
611 hcp
= (HASH_CURSOR
*)dbc
->internal
;
612 ret
= __ham_item_done(hashp
, hcp
, 0);
615 FREE(hcp
->big_key
, hcp
->big_keylen
);
617 FREE(hcp
->big_data
, hcp
->big_datalen
);
620 * All cursors (except the default ones) are linked off the master.
621 * Therefore, when we close the cursor, we have to remove it from
622 * the master, not the local one.
623 * XXX I am always removing from the master; what about local cursors?
625 DB_THREAD_LOCK(dbc
->dbp
);
626 TAILQ_REMOVE(&dbc
->dbp
->curs_queue
, dbc
, links
);
627 DB_THREAD_UNLOCK(dbc
->dbp
);
629 FREE(hcp
, sizeof(HASH_CURSOR
));
630 FREE(dbc
, sizeof(DBC
));
636 __ham_c_del(cursor
, flags
)
642 HASH_CURSOR save_curs
;
644 db_pgno_t ppgno
, chg_pgno
;
647 DEBUG_LWRITE(cursor
->dbp
, cursor
->txn
, "ham_c_del", NULL
, NULL
, flags
);
649 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
) &&
650 (ret
= __db_gethandle(cursor
->dbp
, __ham_hdup
, &ldbp
)) != 0)
652 hashp
= (HTAB
*)ldbp
->internal
;
653 hcp
= (HASH_CURSOR
*)cursor
->internal
;
655 if ((ret
= __db_cdelchk(ldbp
, flags
,
656 F_ISSET(ldbp
, DB_AM_RDONLY
), IS_VALID(hcp
))) != 0)
658 if (F_ISSET(hcp
, H_DELETED
))
659 return (DB_NOTFOUND
);
661 SET_LOCKER(hashp
->dbp
, cursor
->txn
);
662 GET_META(hashp
->dbp
, hashp
);
663 hashp
->hash_accesses
++;
664 if ((ret
= __ham_get_cpage(hashp
, hcp
, DB_LOCK_WRITE
)) != 0)
666 if (F_ISSET(hcp
, H_ISDUP
) && hcp
->dpgno
!= PGNO_INVALID
) {
668 * We are about to remove a duplicate from offpage.
671 * 1. We will remove an item on a page, but there are more
672 * items on that page.
673 * 2. We will remove the last item on a page, but there is a
674 * following page of duplicates.
675 * 3. We will remove the last item on a page, this page was the
676 * last page in a duplicate set, but there were dups before
678 * 4. We will remove the last item on a page, removing the last
680 * In case 1 hcp->dpagep is unchanged.
681 * In case 2 hcp->dpagep comes back pointing to the next dup
683 * In case 3 hcp->dpagep comes back NULL.
684 * In case 4 hcp->dpagep comes back NULL.
686 * Case 4 results in deleting the pair off the master page.
687 * The normal code for doing this knows how to delete the
688 * duplicates, so we will handle this case in the normal code.
690 ppgno
= PREV_PGNO(hcp
->dpagep
);
691 if (ppgno
== PGNO_INVALID
&&
692 NEXT_PGNO(hcp
->dpagep
) == PGNO_INVALID
&&
693 NUM_ENT(hcp
->dpagep
) == 1)
696 /* Remove item from duplicate page. */
697 chg_pgno
= hcp
->dpgno
;
698 if ((ret
= __db_drem(hashp
->dbp
,
699 &hcp
->dpagep
, hcp
->dndx
, __ham_del_page
)) != 0)
702 if (hcp
->dpagep
== NULL
) {
703 if (ppgno
!= PGNO_INVALID
) { /* Case 3 */
705 if ((ret
= __ham_get_cpage(hashp
, hcp
,
708 hcp
->dndx
= NUM_ENT(hcp
->dpagep
);
709 F_SET(hcp
, H_DELETED
);
710 } else { /* Case 4 */
711 ret
= __ham_del_pair(hashp
, hcp
, 1);
712 hcp
->dpgno
= PGNO_INVALID
;
714 * Delpair updated the cursor queue, so we
715 * don't have to do that here.
717 chg_pgno
= PGNO_INVALID
;
719 } else if (PGNO(hcp
->dpagep
) != hcp
->dpgno
) {
720 hcp
->dndx
= 0; /* Case 2 */
721 hcp
->dpgno
= PGNO(hcp
->dpagep
);
722 if (ppgno
== PGNO_INVALID
)
723 memcpy(HOFFDUP_PGNO(P_ENTRY(hcp
->pagep
,
724 H_DATAINDEX(hcp
->bndx
))),
725 &hcp
->dpgno
, sizeof(db_pgno_t
));
726 F_SET(hcp
, H_DELETED
);
728 F_SET(hcp
, H_DELETED
);
729 if (chg_pgno
!= PGNO_INVALID
)
730 __ham_c_update(hcp
, chg_pgno
, 0, 0, 1);
731 } else if (F_ISSET(hcp
, H_ISDUP
)) { /* on page */
732 if (hcp
->dup_off
== 0 && DUP_SIZE(hcp
->dup_len
) ==
733 LEN_HDATA(hcp
->pagep
, hashp
->hdr
->pagesize
, hcp
->bndx
))
734 ret
= __ham_del_pair(hashp
, hcp
, 1);
739 F_SET(&repldbt
, DB_DBT_PARTIAL
);
740 repldbt
.doff
= hcp
->dup_off
;
741 repldbt
.dlen
= DUP_SIZE(hcp
->dup_len
);
743 ret
= __ham_replpair(hashp
, hcp
, &repldbt
, 0);
744 hcp
->dup_tlen
-= DUP_SIZE(hcp
->dup_len
);
745 F_SET(hcp
, H_DELETED
);
746 __ham_c_update(hcp
, hcp
->pgno
,
747 DUP_SIZE(hcp
->dup_len
), 0, 1);
751 /* Not a duplicate */
752 normal
: ret
= __ham_del_pair(hashp
, hcp
, 1);
754 out
: if ((t_ret
= __ham_item_done(hashp
, hcp
, ret
== 0)) != 0 && ret
== 0)
758 RELEASE_META(hashp
->dbp
, hashp
);
759 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
))
760 __db_puthandle(ldbp
);
765 __ham_c_get(cursor
, key
, data
, flags
)
773 HASH_CURSOR
*hcp
, save_curs
;
774 int get_key
, ret
, t_ret
;
776 DEBUG_LREAD(cursor
->dbp
, cursor
->txn
, "ham_c_get",
777 flags
== DB_SET
|| flags
== DB_SET_RANGE
? key
: NULL
,
780 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
) &&
781 (ret
= __db_gethandle(cursor
->dbp
, __ham_hdup
, &ldbp
)) != 0)
783 hashp
= (HTAB
*)(ldbp
->internal
);
784 hcp
= (HASH_CURSOR
*)cursor
->internal
;
787 __db_cgetchk(hashp
->dbp
, key
, data
, flags
, IS_VALID(hcp
))) != 0)
790 SET_LOCKER(hashp
->dbp
, cursor
->txn
);
791 GET_META(hashp
->dbp
, hashp
);
792 hashp
->hash_accesses
++;
800 if (hcp
->bucket
!= BUCKET_INVALID
) {
801 ret
= __ham_item_prev(hashp
, hcp
, DB_LOCK_READ
);
806 ret
= __ham_item_last(hashp
, hcp
, DB_LOCK_READ
);
809 ret
= __ham_item_first(hashp
, hcp
, DB_LOCK_READ
);
812 if (hcp
->bucket
== BUCKET_INVALID
)
814 ret
= __ham_item_next(hashp
, hcp
, DB_LOCK_READ
);
818 ret
= __ham_lookup(hashp
, hcp
, key
, 0, DB_LOCK_READ
);
822 if (F_ISSET(hcp
, H_DELETED
)) {
827 ret
= __ham_item(hashp
, hcp
, DB_LOCK_READ
);
832 * Must always enter this loop to do error handling and
833 * check for big key/data pair.
836 if (ret
!= 0 && ret
!= DB_NOTFOUND
)
838 else if (F_ISSET(hcp
, H_OK
)) {
840 if (get_key
&& (ret
= __db_ret(hashp
->dbp
, hcp
->pagep
,
841 H_KEYINDEX(hcp
->bndx
), key
, &hcp
->big_key
,
842 &hcp
->big_keylen
)) != 0)
845 ret
= __ham_dup_return(hashp
, hcp
, data
, flags
);
847 } else if (!F_ISSET(hcp
, H_NOMORE
)) {
853 * Ran out of entries in a bucket; change buckets.
858 ret
= __ham_item_done(hashp
, hcp
, 0);
859 if (hcp
->bucket
== 0) {
864 hcp
->bndx
= NDX_INVALID
;
866 ret
= __ham_item_prev(hashp
,
871 ret
= __ham_item_done(hashp
, hcp
, 0);
872 hcp
->bndx
= NDX_INVALID
;
874 hcp
->pgno
= PGNO_INVALID
;
876 if (hcp
->bucket
> hashp
->hdr
->max_bucket
) {
881 ret
= __ham_item_next(hashp
,
891 out1
: if ((t_ret
= __ham_item_done(hashp
, hcp
, 0)) != 0 && ret
== 0)
895 RELEASE_META(hashp
->dbp
, hashp
);
896 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
))
897 __db_puthandle(ldbp
);
902 __ham_c_put(cursor
, key
, data
, flags
)
909 HASH_CURSOR
*hcp
, save_curs
;
914 DEBUG_LWRITE(cursor
->dbp
, cursor
->txn
, "ham_c_put",
915 flags
== DB_KEYFIRST
|| flags
== DB_KEYLAST
? key
: NULL
,
918 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
) &&
919 (ret
= __db_gethandle(cursor
->dbp
, __ham_hdup
, &ldbp
)) != 0)
921 hashp
= (HTAB
*)(ldbp
->internal
);
922 hcp
= (HASH_CURSOR
*)cursor
->internal
;
925 if ((ret
= __db_cputchk(hashp
->dbp
, key
, data
, flags
,
926 F_ISSET(ldbp
, DB_AM_RDONLY
), IS_VALID(hcp
))) != 0)
928 if (F_ISSET(hcp
, H_DELETED
))
929 return (DB_NOTFOUND
);
931 SET_LOCKER(hashp
->dbp
, cursor
->txn
);
932 GET_META(hashp
->dbp
, hashp
);
938 nbytes
= (ISBIG(hashp
, key
->size
) ? HOFFPAGE_PSIZE
:
939 HKEYDATA_PSIZE(key
->size
)) +
940 (ISBIG(hashp
, data
->size
) ? HOFFPAGE_PSIZE
:
941 HKEYDATA_PSIZE(data
->size
));
942 ret
= __ham_lookup(hashp
, hcp
, key
, nbytes
, DB_LOCK_WRITE
);
947 ret
= __ham_item(hashp
, hcp
, DB_LOCK_WRITE
);
952 if (flags
== DB_CURRENT
&& !F_ISSET(ldbp
, DB_AM_DUP
))
953 ret
= __ham_overwrite(hashp
, hcp
, data
);
955 ret
= __ham_add_dup(hashp
, hcp
, data
, flags
);
958 if (ret
== 0 && F_ISSET(hcp
, H_EXPAND
)) {
959 ret
= __ham_expand_table(hashp
);
960 F_CLR(hcp
, H_EXPAND
);
963 if ((t_ret
= __ham_item_done(hashp
, hcp
, ret
== 0)) != 0 && ret
== 0)
967 RELEASE_META(hashp
->dbp
, hashp
);
968 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
))
969 __db_puthandle(ldbp
);
973 /********************************* UTILITIES ************************/
976 * __ham_expand_table --
978 * PUBLIC: int __ham_expand_table __P((HTAB *));
981 __ham_expand_table(hashp
)
985 u_int32_t old_bucket
, new_bucket
, spare_ndx
;
989 DIRTY_META(hashp
, ret
);
994 * If the split point is about to increase, make sure that we
995 * have enough extra pages. The calculation here is weird.
996 * We'd like to do this after we've upped max_bucket, but it's
997 * too late then because we've logged the meta-data split. What
998 * we'll do between then and now is increment max bucket and then
999 * see what the log of one greater than that is; here we have to
1000 * look at the log of max + 2. VERY NASTY STUFF.
1002 if (__db_log2(hashp
->hdr
->max_bucket
+ 2) > hashp
->hdr
->ovfl_point
) {
1004 * We are about to shift the split point. Make sure that
1005 * if the next doubling is going to be big (more than 8
1006 * pages), we have some extra pages around.
1008 if (hashp
->hdr
->max_bucket
+ 1 >= 8 &&
1009 hashp
->hdr
->spares
[hashp
->hdr
->ovfl_point
] <
1010 hashp
->hdr
->spares
[hashp
->hdr
->ovfl_point
- 1] +
1011 hashp
->hdr
->ovfl_point
+ 1)
1012 __ham_init_ovflpages(hashp
);
1015 /* Now we can log the meta-data split. */
1016 if (DB_LOGGING(hashp
->dbp
)) {
1017 if ((ret
= __ham_splitmeta_log(hashp
->dbp
->dbenv
->lg_info
,
1018 (DB_TXN
*)hashp
->dbp
->txn
, &new_lsn
, 0,
1019 hashp
->dbp
->log_fileid
,
1020 hashp
->hdr
->max_bucket
, hashp
->hdr
->ovfl_point
,
1021 hashp
->hdr
->spares
[hashp
->hdr
->ovfl_point
],
1022 &hashp
->hdr
->lsn
)) != 0)
1025 hashp
->hdr
->lsn
= new_lsn
;
1028 hashp
->hash_expansions
++;
1029 new_bucket
= ++hashp
->hdr
->max_bucket
;
1030 old_bucket
= (hashp
->hdr
->max_bucket
& hashp
->hdr
->low_mask
);
1033 * If the split point is increasing, copy the current contents
1034 * of the spare split bucket to the next bucket.
1036 spare_ndx
= __db_log2(hashp
->hdr
->max_bucket
+ 1);
1037 if (spare_ndx
> hashp
->hdr
->ovfl_point
) {
1038 hashp
->hdr
->spares
[spare_ndx
] =
1039 hashp
->hdr
->spares
[hashp
->hdr
->ovfl_point
];
1040 hashp
->hdr
->ovfl_point
= spare_ndx
;
1043 if (new_bucket
> hashp
->hdr
->high_mask
) {
1044 /* Starting a new doubling */
1045 hashp
->hdr
->low_mask
= hashp
->hdr
->high_mask
;
1046 hashp
->hdr
->high_mask
= new_bucket
| hashp
->hdr
->low_mask
;
1049 if (BUCKET_TO_PAGE(hashp
, new_bucket
) > MAX_PAGES(hashp
)) {
1050 __db_err(hashp
->dbp
->dbenv
,
1051 "hash: Cannot allocate new bucket. Pages exhausted.");
1055 /* Relocate records to the new bucket */
1056 return (__ham_split_page(hashp
, old_bucket
, new_bucket
));
1060 * PUBLIC: u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t));
1063 __ham_call_hash(hashp
, k
, len
)
1068 u_int32_t n
, bucket
;
1070 n
= (u_int32_t
)hashp
->hash(k
, len
);
1071 bucket
= n
& hashp
->hdr
->high_mask
;
1072 if (bucket
> hashp
->hdr
->max_bucket
)
1073 bucket
= bucket
& hashp
->hdr
->low_mask
;
1078 * Check for duplicates, and call __db_ret appropriately. Release
1079 * everything held by the cursor.
1082 __ham_dup_return(hashp
, hcp
, val
, flags
)
1089 DBT
*myval
, tmp_val
;
1096 /* Check for duplicate and return the first one. */
1097 ndx
= H_DATAINDEX(hcp
->bndx
);
1098 type
= HPAGE_TYPE(hcp
->pagep
, ndx
);
1103 * There are 3 cases:
1104 * 1. We are not in duplicate, simply call db_ret.
1105 * 2. We are looking at keys and stumbled onto a duplicate.
1106 * 3. We are in the middle of a duplicate set. (ISDUP set)
1110 * Here we check for the case where we just stumbled onto a
1111 * duplicate. In this case, we do initialization and then
1112 * let the normal duplicate code handle it.
1114 if (!F_ISSET(hcp
, H_ISDUP
)) {
1115 if (type
== H_DUPLICATE
) {
1116 F_SET(hcp
, H_ISDUP
);
1117 hcp
->dup_tlen
= LEN_HDATA(hcp
->pagep
,
1118 hashp
->hdr
->pagesize
, hcp
->bndx
);
1119 hk
= H_PAIRDATA(hcp
->pagep
, hcp
->bndx
);
1120 if (flags
== DB_LAST
|| flags
== DB_PREV
) {
1125 HKEYDATA_DATA(hk
) + hcp
->dup_off
,
1127 hcp
->dup_off
+= DUP_SIZE(len
);
1129 } while (hcp
->dup_off
< hcp
->dup_tlen
);
1130 hcp
->dup_off
-= DUP_SIZE(len
);
1134 HKEYDATA_DATA(hk
), sizeof(db_indx_t
));
1139 } else if (type
== H_OFFDUP
) {
1140 F_SET(hcp
, H_ISDUP
);
1141 memcpy(&pgno
, HOFFDUP_PGNO(P_ENTRY(hcp
->pagep
, ndx
)),
1143 if (flags
== DB_LAST
|| flags
== DB_PREV
) {
1144 if ((ret
= __db_dend(hashp
->dbp
,
1145 pgno
, &hcp
->dpagep
)) != 0)
1147 hcp
->dpgno
= PGNO(hcp
->dpagep
);
1148 hcp
->dndx
= NUM_ENT(hcp
->dpagep
) - 1;
1149 } else if ((ret
= __ham_next_cpage(hashp
,
1150 hcp
, pgno
, 0, H_ISDUP
)) != 0)
1156 * Now, everything is initialized, grab a duplicate if
1159 if (F_ISSET(hcp
, H_ISDUP
)) {
1160 if (hcp
->dpgno
!= PGNO_INVALID
) {
1165 * Copy the DBT in case we are retrieving into
1166 * user memory and we need the parameters for
1169 memcpy(&tmp_val
, val
, sizeof(*val
));
1170 F_SET(&tmp_val
, DB_DBT_PARTIAL
);
1171 tmp_val
.dlen
= hcp
->dup_len
;
1172 tmp_val
.doff
= hcp
->dup_off
+ sizeof(db_indx_t
);
1178 * Finally, if we had a duplicate, pp, ndx, and myval should be
1179 * set appropriately.
1181 if ((ret
= __db_ret(hashp
->dbp
, pp
, ndx
, myval
, &hcp
->big_data
,
1182 &hcp
->big_datalen
)) != 0)
1186 * In case we sent a temporary off to db_ret, set the real
1189 val
->data
= myval
->data
;
1190 val
->size
= myval
->size
;
1196 __ham_overwrite(hashp
, hcp
, nval
)
1201 DBT
*myval
, tmp_val
;
1204 if (F_ISSET(hashp
->dbp
, DB_AM_DUP
))
1205 return (__ham_add_dup(hashp
, hcp
, nval
, DB_KEYLAST
));
1206 else if (!F_ISSET(nval
, DB_DBT_PARTIAL
)) {
1208 memcpy(&tmp_val
, nval
, sizeof(*nval
));
1209 F_SET(&tmp_val
, DB_DBT_PARTIAL
);
1211 hk
= H_PAIRDATA(hcp
->pagep
, hcp
->bndx
);
1212 if (HPAGE_PTYPE(hk
) == H_OFFPAGE
)
1213 memcpy(&tmp_val
.dlen
,
1214 HOFFPAGE_TLEN(hk
), sizeof(u_int32_t
));
1216 tmp_val
.dlen
= LEN_HDATA(hcp
->pagep
,
1217 hashp
->hdr
->pagesize
,hcp
->bndx
);
1219 } else /* Regular partial put */
1222 return (__ham_replpair(hashp
, hcp
, myval
, 0));
1226 * Given a key and a cursor, sets the cursor to the page/ndx on which
1227 * the key resides. If the key is found, the cursor H_OK flag is set
1228 * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
1229 * If the key is not found, the H_OK flag is not set. If the sought
1230 * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
1231 * are set indicating where an add might take place. If it is 0,
1232 * non of the cursor pointer field are valid.
1235 __ham_lookup(hashp
, hcp
, key
, sought
, mode
)
1244 int match
, ret
, t_ret
;
1248 * Set up cursor so that we're looking for space to add an item
1249 * as we cycle through the pages looking for the key.
1251 if ((ret
= __ham_item_reset(hashp
, hcp
)) != 0)
1253 hcp
->seek_size
= sought
;
1255 hcp
->bucket
= __ham_call_hash(hashp
, (u_int8_t
*)key
->data
, key
->size
);
1257 if ((ret
= __ham_item_next(hashp
, hcp
, mode
)) != 0)
1260 if (F_ISSET(hcp
, H_NOMORE
))
1263 hk
= H_PAIRKEY(hcp
->pagep
, hcp
->bndx
);
1264 switch (HPAGE_PTYPE(hk
)) {
1266 memcpy(&tlen
, HOFFPAGE_TLEN(hk
), sizeof(u_int32_t
));
1267 if (tlen
== key
->size
) {
1269 HOFFPAGE_PGNO(hk
), sizeof(db_pgno_t
));
1270 match
= __db_moff(hashp
->dbp
, key
, pgno
);
1278 if (key
->size
== LEN_HKEY(hcp
->pagep
,
1279 hashp
->hdr
->pagesize
, hcp
->bndx
) &&
1281 HKEYDATA_DATA(hk
), key
->size
) == 0) {
1289 * These are errors because keys are never
1290 * duplicated, only data items are.
1292 return (__db_pgfmt(hashp
->dbp
, PGNO(hcp
->pagep
)));
1294 hashp
->hash_collisions
++;
1298 * Item was not found, adjust cursor properly.
1304 if ((t_ret
= __ham_item_done(hashp
, hcp
, 0)) != 0 && ret
== 0)
1310 * Initialize a dbt using some possibly already allocated storage
1312 * PUBLIC: int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *));
1315 __ham_init_dbt(dbt
, size
, bufp
, sizep
)
1321 memset(dbt
, 0, sizeof(*dbt
));
1322 if (*sizep
< size
) {
1323 if ((*bufp
= (void *)(*bufp
== NULL
?
1324 __db_malloc(size
) : __db_realloc(*bufp
, size
))) == NULL
) {
1336 * Adjust the cursor after an insert or delete. The cursor passed is
1337 * the one that was operated upon; we just need to check any of the
1340 * len indicates the length of the item added/deleted
1341 * add indicates if the item indicated by the cursor has just been
1342 * added (add == 1) or deleted (add == 0).
1343 * dup indicates if the addition occurred into a duplicate set.
1345 * PUBLIC: void __ham_c_update
1346 * PUBLIC: __P((HASH_CURSOR *, db_pgno_t, u_int32_t, int, int));
1349 __ham_c_update(hcp
, chg_pgno
, len
, add
, is_dup
)
1361 * Regular adds are always at the end of a given page, so we never
1362 * have to adjust anyone's cursor after a regular add.
1368 * Determine if a page was deleted. If this is a regular update
1369 * (i.e., not is_dup) then the deleted page's number will be that in
1370 * chg_pgno, and the pgno in the cursor will be different. If this
1371 * was an onpage-duplicate, then the same conditions apply. If this
1372 * was an off-page duplicate, then we need to verify if hcp->dpgno
1373 * is the same (no delete) or different (delete) than chg_pgno.
1375 if (!is_dup
|| hcp
->dpgno
== PGNO_INVALID
)
1377 chg_pgno
!= PGNO_INVALID
&& chg_pgno
!= hcp
->pgno
;
1380 chg_pgno
!= PGNO_INVALID
&& chg_pgno
!= hcp
->dpgno
;
1382 hp
= hcp
->db_cursor
->dbp
->master
->internal
;
1383 DB_THREAD_LOCK(hp
->dbp
);
1385 for (cp
= TAILQ_FIRST(&hp
->dbp
->curs_queue
); cp
!= NULL
;
1386 cp
= TAILQ_NEXT(cp
, links
)) {
1387 if (cp
->internal
== hcp
)
1390 lcp
= (HASH_CURSOR
*)cp
->internal
;
1392 if (!is_dup
&& lcp
->pgno
!= chg_pgno
)
1396 if (F_ISSET(hcp
, H_DELETED
) && lcp
->pgno
!= chg_pgno
)
1398 if (!F_ISSET(hcp
, H_DELETED
) && lcp
->dpgno
!= chg_pgno
)
1404 lcp
->dpgno
= hcp
->dpgno
;
1405 lcp
->dndx
= hcp
->dndx
;
1407 lcp
->pgno
= hcp
->pgno
;
1408 lcp
->bndx
= hcp
->bndx
;
1409 lcp
->bucket
= hcp
->bucket
;
1411 F_CLR(lcp
, H_ISDUP
);
1415 if (!is_dup
&& lcp
->bndx
> hcp
->bndx
)
1417 else if (!is_dup
&& lcp
->bndx
== hcp
->bndx
)
1418 F_SET(lcp
, H_DELETED
);
1419 else if (is_dup
&& lcp
->bndx
== hcp
->bndx
) {
1420 /* Assign dpgno in case there was page conversion. */
1421 lcp
->dpgno
= hcp
->dpgno
;
1422 if (add
&& lcp
->dndx
>= hcp
->dndx
)
1424 else if (!add
&& lcp
->dndx
> hcp
->dndx
)
1426 else if (!add
&& lcp
->dndx
== hcp
->dndx
)
1427 F_SET(lcp
, H_DELETED
);
1429 /* Now adjust on-page information. */
1430 if (lcp
->dpgno
== PGNO_INVALID
) {
1432 lcp
->dup_tlen
+= len
;
1433 if (lcp
->dndx
> hcp
->dndx
)
1434 lcp
->dup_off
+= len
;
1436 lcp
->dup_tlen
-= len
;
1437 if (lcp
->dndx
> hcp
->dndx
)
1438 lcp
->dup_off
-= len
;
1443 DB_THREAD_UNLOCK(hp
->dbp
);
1448 * This function gets called when we create a duplicate handle for a
1449 * threaded DB. It should create the private part of the DB structure.
1451 * PUBLIC: int __ham_hdup __P((DB *, DB *));
1454 __ham_hdup(orig
, new)
1461 if ((hashp
= (HTAB
*)__db_malloc(sizeof(HTAB
))) == NULL
)
1464 new->internal
= hashp
;
1469 hashp
->hash
= ((HTAB
*)orig
->internal
)->hash
;
1470 if ((hashp
->split_buf
= (PAGE
*)__db_malloc(orig
->pgsize
)) == NULL
)
1472 hashp
->local_errno
= 0;
1473 hashp
->hash_accesses
= 0;
1474 hashp
->hash_collisions
= 0;
1475 hashp
->hash_expansions
= 0;
1476 hashp
->hash_overflows
= 0;
1477 hashp
->hash_bigpages
= 0;
1478 /* Initialize the cursor queue. */
1479 ret
= __ham_c_init(new, NULL
, &curs
);
1480 TAILQ_INSERT_TAIL(&new->curs_queue
, curs
, links
);