2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
8 * Copyright (c) 1990, 1993, 1994
9 * Margo Seltzer. All rights reserved.
12 * Copyright (c) 1990, 1993, 1994
13 * The Regents of the University of California. All rights reserved.
15 * This code is derived from software contributed to Berkeley by
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 static const char sccsid
[] = "@(#)hash.c 10.45 (Sleepycat) 5/11/98";
53 #ifndef NO_SYSTEM_INCLUDES
54 #include <sys/types.h>
69 static int __ham_c_close
__P((DBC
*));
70 static int __ham_c_del
__P((DBC
*, u_int32_t
));
71 static int __ham_c_get
__P((DBC
*, DBT
*, DBT
*, u_int32_t
));
72 static int __ham_c_put
__P((DBC
*, DBT
*, DBT
*, u_int32_t
));
73 static int __ham_c_init
__P((DB
*, DB_TXN
*, DBC
**));
74 static int __ham_cursor
__P((DB
*, DB_TXN
*, DBC
**));
75 static int __ham_delete
__P((DB
*, DB_TXN
*, DBT
*, u_int32_t
));
76 static int __ham_dup_return
__P((HTAB
*, HASH_CURSOR
*, DBT
*, u_int32_t
));
77 static int __ham_get
__P((DB
*, DB_TXN
*, DBT
*, DBT
*, u_int32_t
));
78 static void __ham_init_htab
__P((HTAB
*, u_int32_t
, u_int32_t
));
79 static int __ham_lookup
__P((HTAB
*,
80 HASH_CURSOR
*, const DBT
*, u_int32_t
, db_lockmode_t
));
81 static int __ham_overwrite
__P((HTAB
*, HASH_CURSOR
*, DBT
*));
82 static int __ham_put
__P((DB
*, DB_TXN
*, DBT
*, DBT
*, u_int32_t
));
83 static int __ham_sync
__P((DB
*, u_int32_t
));
85 /************************** INTERFACE ROUTINES ***************************/
91 * PUBLIC: int __ham_open __P((DB *, DB_INFO *));
94 __ham_open(dbp
, dbinfo
)
101 int file_existed
, ret
;
105 if ((hashp
= (HTAB
*)__db_calloc(1, sizeof(HTAB
))) == NULL
)
109 /* Set the hash function if specified by the user. */
110 if (dbinfo
!= NULL
&& dbinfo
->h_hash
!= NULL
)
111 hashp
->hash
= dbinfo
->h_hash
;
114 * Initialize the remaining fields of the dbp. The type, close and
115 * fd functions are all set in db_open.
117 dbp
->internal
= hashp
;
118 dbp
->cursor
= __ham_cursor
;
119 dbp
->del
= __ham_delete
;
120 dbp
->get
= __ham_get
;
121 dbp
->put
= __ham_put
;
122 dbp
->sync
= __ham_sync
;
124 /* If locking is turned on, lock the meta data page. */
125 if (F_ISSET(dbp
, DB_AM_LOCKING
)) {
126 dbp
->lock
.pgno
= BUCKET_INVALID
;
127 if ((ret
= lock_get(dbenv
->lk_info
, dbp
->locker
,
128 0, &dbp
->lock_dbt
, DB_LOCK_READ
, &hashp
->hlock
)) != 0) {
136 * Now, we can try to read the meta-data page and figure out
137 * if we set up locking and get the meta-data page properly.
138 * If this is a new file, initialize it, and put it back dirty.
140 if ((ret
= __ham_get_page(hashp
->dbp
, 0, (PAGE
**)&hashp
->hdr
)) != 0)
143 /* Initialize the hashp structure */
144 if (hashp
->hdr
->magic
== DB_HASHMAGIC
) {
146 /* File exists, verify the data in the header. */
147 if (hashp
->hash
== NULL
)
149 hashp
->hdr
->version
< 5 ? __ham_func4
: __ham_func5
;
150 if (hashp
->hash(CHARKEY
, sizeof(CHARKEY
)) !=
151 hashp
->hdr
->h_charkey
) {
152 __db_err(hashp
->dbp
->dbenv
,
153 "hash: incompatible hash function");
157 if (F_ISSET(hashp
->hdr
, DB_HASH_DUP
))
158 F_SET(dbp
, DB_AM_DUP
);
161 * File does not exist, we must initialize the header. If
162 * locking is enabled that means getting a write lock first.
165 if (F_ISSET(dbp
, DB_AM_LOCKING
) &&
166 ((ret
= lock_put(dbenv
->lk_info
, hashp
->hlock
)) != 0 ||
167 (ret
= lock_get(dbenv
->lk_info
, dbp
->locker
, 0,
168 &dbp
->lock_dbt
, DB_LOCK_WRITE
, &hashp
->hlock
)) != 0)) {
174 __ham_init_htab(hashp
,
175 dbinfo
!= NULL
? dbinfo
->h_nelem
: 0,
176 dbinfo
!= NULL
? dbinfo
->h_ffactor
: 0);
177 if (F_ISSET(dbp
, DB_AM_DUP
))
178 F_SET(hashp
->hdr
, DB_HASH_DUP
);
179 if ((ret
= __ham_dirty_page(hashp
, (PAGE
*)hashp
->hdr
)) != 0)
183 /* Initialize the default cursor. */
184 __ham_c_init(dbp
, NULL
, &curs
);
185 TAILQ_INSERT_TAIL(&dbp
->curs_queue
, curs
, links
);
187 /* Allocate memory for our split buffer. */
188 if ((hashp
->split_buf
= (PAGE
*)__db_malloc(dbp
->pgsize
)) == NULL
) {
193 #ifdef NO_STATISTICS_FOR_DB_ERR
195 "%s%lx\n%s%ld\n%s%ld\n%s%ld\n%s%ld\n%s0x%lx\n%s0x%lx\n%s%ld\n%s%ld\n%s0x%lx",
196 "TABLE POINTER ", (long)hashp
,
197 "BUCKET SIZE ", (long)hashp
->hdr
->pagesize
,
198 "FILL FACTOR ", (long)hashp
->hdr
->ffactor
,
199 "MAX BUCKET ", (long)hashp
->hdr
->max_bucket
,
200 "OVFL POINT ", (long)hashp
->hdr
->ovfl_point
,
201 "LAST FREED ", (long)hashp
->hdr
->last_freed
,
202 "HIGH MASK ", (long)hashp
->hdr
->high_mask
,
203 "LOW MASK ", (long)hashp
->hdr
->low_mask
,
204 "NELEM ", (long)hashp
->hdr
->nelem
,
205 "FLAGS ", (long)hashp
->hdr
->flags
);
208 /* Release the meta data page */
209 (void)__ham_put_page(hashp
->dbp
, (PAGE
*)hashp
->hdr
, 0);
210 if (F_ISSET(dbp
, DB_AM_LOCKING
) &&
211 (ret
= lock_put(dbenv
->lk_info
, hashp
->hlock
)) != 0) {
219 /* Sync the file so that we know that the meta data goes to disk. */
220 if (!file_existed
&& (ret
= dbp
->sync(dbp
, 0)) != 0)
224 out
: (void)__ham_close(dbp
);
229 * PUBLIC: int __ham_close __P((DB *));
238 DEBUG_LWRITE(dbp
, NULL
, "ham_close", NULL
, NULL
, 0);
239 hashp
= (HTAB
*)dbp
->internal
;
242 /* Free the split page. */
243 if (hashp
->split_buf
)
244 FREE(hashp
->split_buf
, dbp
->pgsize
);
246 if (hashp
->hdr
&& (t_ret
= __ham_put_page(hashp
->dbp
,
247 (PAGE
*)hashp
->hdr
, 0)) != 0 && ret
== 0)
249 if (hashp
->hlock
&& (t_ret
= lock_put(hashp
->dbp
->dbenv
->lk_info
,
250 hashp
->hlock
)) != 0 && ret
== 0)
253 FREE(hashp
, sizeof(HTAB
));
254 dbp
->internal
= NULL
;
258 /************************** LOCAL CREATION ROUTINES **********************/
260 * Returns 0 on No Error
263 __ham_init_htab(hashp
, nelem
, ffactor
)
265 u_int32_t nelem
, ffactor
;
267 int32_t l2
, nbuckets
;
269 memset(hashp
->hdr
, 0, sizeof(HASHHDR
));
270 hashp
->hdr
->ffactor
= ffactor
;
271 hashp
->hdr
->pagesize
= hashp
->dbp
->pgsize
;
272 ZERO_LSN(hashp
->hdr
->lsn
);
273 hashp
->hdr
->magic
= DB_HASHMAGIC
;
274 hashp
->hdr
->version
= DB_HASHVERSION
;
275 if (hashp
->hash
== NULL
)
277 hashp
->hdr
->version
< 5 ? __ham_func4
: __ham_func5
;
278 hashp
->hdr
->h_charkey
= hashp
->hash(CHARKEY
, sizeof(CHARKEY
));
279 if (nelem
!= 0 && hashp
->hdr
->ffactor
!= 0) {
280 nelem
= (nelem
- 1) / hashp
->hdr
->ffactor
+ 1;
281 l2
= __db_log2(nelem
> 2 ? nelem
: 2);
287 hashp
->hdr
->ovfl_point
= l2
;
288 hashp
->hdr
->last_freed
= PGNO_INVALID
;
290 hashp
->hdr
->max_bucket
= hashp
->hdr
->high_mask
= nbuckets
- 1;
291 hashp
->hdr
->low_mask
= (nbuckets
>> 1) - 1;
292 memcpy(hashp
->hdr
->uid
, hashp
->dbp
->lock
.fileid
, DB_FILE_ID_LEN
);
295 /********************** DESTROY/CLOSE ROUTINES ************************/
299 * Write modified pages to disk
306 __ham_sync(dbp
, flags
)
312 DEBUG_LWRITE(dbp
, NULL
, "ham_sync", NULL
, NULL
, flags
);
313 if ((ret
= __db_syncchk(dbp
, flags
)) != 0)
315 if (F_ISSET(dbp
, DB_AM_RDONLY
))
318 if ((ret
= memp_fsync(dbp
->mpf
)) == DB_INCOMPLETE
)
324 /*******************************SEARCH ROUTINES *****************************/
326 * All the access routines return
330 * 1 to indicate an external ERROR (i.e. key not found, etc)
331 * -1 to indicate an internal ERROR (i.e. out of memory, etc)
335 __ham_get(dbp
, txn
, key
, data
, flags
)
347 DEBUG_LREAD(dbp
, txn
, "ham_get", key
, NULL
, flags
);
348 if ((ret
= __db_getchk(dbp
, key
, data
, flags
)) != 0)
352 if (F_ISSET(dbp
, DB_AM_THREAD
) &&
353 (ret
= __db_gethandle(dbp
, __ham_hdup
, &ldbp
)) != 0)
356 hashp
= (HTAB
*)ldbp
->internal
;
357 SET_LOCKER(ldbp
, txn
);
358 GET_META(ldbp
, hashp
);
360 hashp
->hash_accesses
++;
361 hcp
= (HASH_CURSOR
*)TAILQ_FIRST(&ldbp
->curs_queue
)->internal
;
362 if ((ret
= __ham_lookup(hashp
, hcp
, key
, 0, DB_LOCK_READ
)) == 0)
363 if (F_ISSET(hcp
, H_OK
))
364 ret
= __ham_dup_return(hashp
, hcp
, data
, DB_FIRST
);
365 else /* Key was not found */
368 if ((t_ret
= __ham_item_done(hashp
, hcp
, 0)) != 0 && ret
== 0)
370 RELEASE_META(ldbp
, hashp
);
371 if (F_ISSET(dbp
, DB_AM_THREAD
))
372 __db_puthandle(ldbp
);
377 __ham_put(dbp
, txn
, key
, data
, flags
)
391 DEBUG_LWRITE(dbp
, txn
, "ham_put", key
, data
, flags
);
392 if ((ret
= __db_putchk(dbp
, key
, data
,
393 flags
, F_ISSET(dbp
, DB_AM_RDONLY
), F_ISSET(dbp
, DB_AM_DUP
))) != 0)
397 if (F_ISSET(dbp
, DB_AM_THREAD
) &&
398 (ret
= __db_gethandle(dbp
, __ham_hdup
, &ldbp
)) != 0)
401 hashp
= (HTAB
*)ldbp
->internal
;
402 SET_LOCKER(ldbp
, txn
);
403 GET_META(ldbp
, hashp
);
404 hcp
= TAILQ_FIRST(&ldbp
->curs_queue
)->internal
;
406 nbytes
= (ISBIG(hashp
, key
->size
) ? HOFFPAGE_PSIZE
:
407 HKEYDATA_PSIZE(key
->size
)) +
408 (ISBIG(hashp
, data
->size
) ? HOFFPAGE_PSIZE
:
409 HKEYDATA_PSIZE(data
->size
));
411 hashp
->hash_accesses
++;
412 ret
= __ham_lookup(hashp
, hcp
, key
, nbytes
, DB_LOCK_WRITE
);
414 if (ret
== DB_NOTFOUND
) {
416 if (hcp
->seek_found_page
!= PGNO_INVALID
&&
417 hcp
->seek_found_page
!= hcp
->pgno
) {
418 if ((ret
= __ham_item_done(hashp
, hcp
, 0)) != 0)
420 hcp
->pgno
= hcp
->seek_found_page
;
421 hcp
->bndx
= NDX_INVALID
;
424 if (F_ISSET(data
, DB_DBT_PARTIAL
) && data
->doff
!= 0) {
426 * Doing a partial put, but the key does not exist
427 * and we are not beginning the write at 0. We
428 * must create a data item padded up to doff and
429 * then write the new bytes represented by val.
431 ret
= __ham_init_dbt(&tmp_val
, data
->size
+ data
->doff
,
432 &hcp
->big_data
, &hcp
->big_datalen
);
434 memset(tmp_val
.data
, 0, data
->doff
);
435 memcpy((u_int8_t
*)tmp_val
.data
+ data
->doff
,
436 data
->data
, data
->size
);
443 ret
= __ham_add_el(hashp
, hcp
, key
, myval
, H_KEYDATA
);
444 } else if (ret
== 0 && F_ISSET(hcp
, H_OK
)) {
445 if (flags
== DB_NOOVERWRITE
)
447 else if (F_ISSET(ldbp
, DB_AM_DUP
))
448 ret
= __ham_add_dup(hashp
, hcp
, data
, DB_KEYLAST
);
450 ret
= __ham_overwrite(hashp
, hcp
, data
);
453 /* Free up all the cursor pages. */
454 if ((t_ret
= __ham_item_done(hashp
, hcp
, ret
== 0)) != 0 && ret
== 0)
456 /* Now check if we have to grow. */
457 out
: if (ret
== 0 && F_ISSET(hcp
, H_EXPAND
)) {
458 ret
= __ham_expand_table(hashp
);
459 F_CLR(hcp
, H_EXPAND
);
462 if ((t_ret
= __ham_item_done(hashp
, hcp
, ret
== 0)) != 0 && ret
== 0)
464 RELEASE_META(ldbp
, hashp
);
465 if (F_ISSET(dbp
, DB_AM_THREAD
))
466 __db_puthandle(ldbp
);
471 __ham_cursor(dbp
, txnid
, dbcp
)
478 DEBUG_LWRITE(dbp
, txnid
, "ham_cursor", NULL
, NULL
, 0);
479 if ((ret
= __ham_c_init(dbp
, txnid
, dbcp
)) != 0)
483 TAILQ_INSERT_TAIL(&dbp
->curs_queue
, *dbcp
, links
);
484 DB_THREAD_UNLOCK(dbp
);
489 __ham_c_init(dbp
, txnid
, dbcp
)
495 HASH_CURSOR
*new_curs
;
497 if ((db_curs
= (DBC
*)__db_calloc(sizeof(DBC
), 1)) == NULL
)
501 (HASH_CURSOR
*)__db_calloc(sizeof(struct cursor_t
), 1)) == NULL
) {
502 FREE(db_curs
, sizeof(DBC
));
506 db_curs
->internal
= new_curs
;
507 db_curs
->c_close
= __ham_c_close
;
508 db_curs
->c_del
= __ham_c_del
;
509 db_curs
->c_get
= __ham_c_get
;
510 db_curs
->c_put
= __ham_c_put
;
511 db_curs
->txn
= txnid
;
514 new_curs
->db_cursor
= db_curs
;
515 __ham_item_init(new_curs
);
523 __ham_delete(dbp
, txn
, key
, flags
)
534 DEBUG_LWRITE(dbp
, txn
, "ham_delete", key
, NULL
, flags
);
536 __db_delchk(dbp
, key
, flags
, F_ISSET(dbp
, DB_AM_RDONLY
))) != 0)
540 if (F_ISSET(dbp
, DB_AM_THREAD
) &&
541 (ret
= __db_gethandle(dbp
, __ham_hdup
, &ldbp
)) != 0)
543 hashp
= (HTAB
*)ldbp
->internal
;
544 SET_LOCKER(ldbp
, txn
);
545 GET_META(ldbp
, hashp
);
546 hcp
= TAILQ_FIRST(&ldbp
->curs_queue
)->internal
;
548 hashp
->hash_accesses
++;
549 if ((ret
= __ham_lookup(hashp
, hcp
, key
, 0, DB_LOCK_WRITE
)) == 0)
550 if (F_ISSET(hcp
, H_OK
))
551 ret
= __ham_del_pair(hashp
, hcp
, 1);
555 if ((t_ret
= __ham_item_done(hashp
, hcp
, ret
== 0)) != 0 && ret
== 0)
557 RELEASE_META(ldbp
, hashp
);
558 if (F_ISSET(dbp
, DB_AM_THREAD
))
559 __db_puthandle(ldbp
);
563 /* ****************** CURSORS ********************************** */
565 __ham_c_close(cursor
)
571 DEBUG_LWRITE(cursor
->dbp
, cursor
->txn
, "ham_c_close", NULL
, NULL
, 0);
573 * If the pagep, dpagep, and lock fields of the cursor are all NULL,
574 * then there really isn't a need to get a handle here. However,
575 * the normal case is that at least one of those fields is non-NULL,
576 * and putting those checks in here would couple the ham_item_done
577 * functionality with cursor close which would be pretty disgusting.
578 * Instead, we pay the overhead here of always getting the handle.
581 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
) &&
582 (ret
= __db_gethandle(cursor
->dbp
, __ham_hdup
, &ldbp
)) != 0)
585 ret
= __ham_c_iclose(ldbp
, cursor
);
587 if (F_ISSET(ldbp
, DB_AM_THREAD
))
588 __db_puthandle(ldbp
);
594 * Internal cursor close routine; assumes it is being passed the correct
595 * handle, rather than getting and putting a handle.
597 * PUBLIC: int __ham_c_iclose __P((DB *, DBC *));
600 __ham_c_iclose(dbp
, dbc
)
608 hashp
= (HTAB
*)dbp
->internal
;
609 hcp
= (HASH_CURSOR
*)dbc
->internal
;
610 ret
= __ham_item_done(hashp
, hcp
, 0);
613 FREE(hcp
->big_key
, hcp
->big_keylen
);
615 FREE(hcp
->big_data
, hcp
->big_datalen
);
618 * All cursors (except the default ones) are linked off the master.
619 * Therefore, when we close the cursor, we have to remove it from
620 * the master, not the local one.
621 * XXX I am always removing from the master; what about local cursors?
623 DB_THREAD_LOCK(dbc
->dbp
);
624 TAILQ_REMOVE(&dbc
->dbp
->curs_queue
, dbc
, links
);
625 DB_THREAD_UNLOCK(dbc
->dbp
);
627 FREE(hcp
, sizeof(HASH_CURSOR
));
628 FREE(dbc
, sizeof(DBC
));
634 __ham_c_del(cursor
, flags
)
640 HASH_CURSOR save_curs
;
642 db_pgno_t ppgno
, chg_pgno
;
645 DEBUG_LWRITE(cursor
->dbp
, cursor
->txn
, "ham_c_del", NULL
, NULL
, flags
);
647 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
) &&
648 (ret
= __db_gethandle(cursor
->dbp
, __ham_hdup
, &ldbp
)) != 0)
650 hashp
= (HTAB
*)ldbp
->internal
;
651 hcp
= (HASH_CURSOR
*)cursor
->internal
;
653 if ((ret
= __db_cdelchk(ldbp
, flags
,
654 F_ISSET(ldbp
, DB_AM_RDONLY
), IS_VALID(hcp
))) != 0)
656 if (F_ISSET(hcp
, H_DELETED
))
657 return (DB_NOTFOUND
);
659 SET_LOCKER(hashp
->dbp
, cursor
->txn
);
660 GET_META(hashp
->dbp
, hashp
);
661 hashp
->hash_accesses
++;
662 if ((ret
= __ham_get_cpage(hashp
, hcp
, DB_LOCK_WRITE
)) != 0)
664 if (F_ISSET(hcp
, H_ISDUP
) && hcp
->dpgno
!= PGNO_INVALID
) {
666 * We are about to remove a duplicate from offpage.
669 * 1. We will remove an item on a page, but there are more
670 * items on that page.
671 * 2. We will remove the last item on a page, but there is a
672 * following page of duplicates.
673 * 3. We will remove the last item on a page, this page was the
674 * last page in a duplicate set, but there were dups before
676 * 4. We will remove the last item on a page, removing the last
678 * In case 1 hcp->dpagep is unchanged.
679 * In case 2 hcp->dpagep comes back pointing to the next dup
681 * In case 3 hcp->dpagep comes back NULL.
682 * In case 4 hcp->dpagep comes back NULL.
684 * Case 4 results in deleting the pair off the master page.
685 * The normal code for doing this knows how to delete the
686 * duplicates, so we will handle this case in the normal code.
688 ppgno
= PREV_PGNO(hcp
->dpagep
);
689 if (ppgno
== PGNO_INVALID
&&
690 NEXT_PGNO(hcp
->dpagep
) == PGNO_INVALID
&&
691 NUM_ENT(hcp
->dpagep
) == 1)
694 /* Remove item from duplicate page. */
695 chg_pgno
= hcp
->dpgno
;
696 if ((ret
= __db_drem(hashp
->dbp
,
697 &hcp
->dpagep
, hcp
->dndx
, __ham_del_page
)) != 0)
700 if (hcp
->dpagep
== NULL
) {
701 if (ppgno
!= PGNO_INVALID
) { /* Case 3 */
703 if ((ret
= __ham_get_cpage(hashp
, hcp
,
706 hcp
->dndx
= NUM_ENT(hcp
->dpagep
);
707 F_SET(hcp
, H_DELETED
);
708 } else { /* Case 4 */
709 ret
= __ham_del_pair(hashp
, hcp
, 1);
710 hcp
->dpgno
= PGNO_INVALID
;
712 * Delpair updated the cursor queue, so we
713 * don't have to do that here.
715 chg_pgno
= PGNO_INVALID
;
717 } else if (PGNO(hcp
->dpagep
) != hcp
->dpgno
) {
718 hcp
->dndx
= 0; /* Case 2 */
719 hcp
->dpgno
= PGNO(hcp
->dpagep
);
720 if (ppgno
== PGNO_INVALID
)
721 memcpy(HOFFDUP_PGNO(P_ENTRY(hcp
->pagep
,
722 H_DATAINDEX(hcp
->bndx
))),
723 &hcp
->dpgno
, sizeof(db_pgno_t
));
724 F_SET(hcp
, H_DELETED
);
726 F_SET(hcp
, H_DELETED
);
727 if (chg_pgno
!= PGNO_INVALID
)
728 __ham_c_update(hcp
, chg_pgno
, 0, 0, 1);
729 } else if (F_ISSET(hcp
, H_ISDUP
)) { /* on page */
730 if (hcp
->dup_off
== 0 && DUP_SIZE(hcp
->dup_len
) ==
731 LEN_HDATA(hcp
->pagep
, hashp
->hdr
->pagesize
, hcp
->bndx
))
732 ret
= __ham_del_pair(hashp
, hcp
, 1);
737 F_SET(&repldbt
, DB_DBT_PARTIAL
);
738 repldbt
.doff
= hcp
->dup_off
;
739 repldbt
.dlen
= DUP_SIZE(hcp
->dup_len
);
741 ret
= __ham_replpair(hashp
, hcp
, &repldbt
, 0);
742 hcp
->dup_tlen
-= DUP_SIZE(hcp
->dup_len
);
743 F_SET(hcp
, H_DELETED
);
744 __ham_c_update(hcp
, hcp
->pgno
,
745 DUP_SIZE(hcp
->dup_len
), 0, 1);
749 /* Not a duplicate */
750 normal
: ret
= __ham_del_pair(hashp
, hcp
, 1);
752 out
: if ((t_ret
= __ham_item_done(hashp
, hcp
, ret
== 0)) != 0 && ret
== 0)
756 RELEASE_META(hashp
->dbp
, hashp
);
757 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
))
758 __db_puthandle(ldbp
);
763 __ham_c_get(cursor
, key
, data
, flags
)
771 HASH_CURSOR
*hcp
, save_curs
;
772 int get_key
, ret
, t_ret
;
774 DEBUG_LREAD(cursor
->dbp
, cursor
->txn
, "ham_c_get",
775 flags
== DB_SET
|| flags
== DB_SET_RANGE
? key
: NULL
,
778 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
) &&
779 (ret
= __db_gethandle(cursor
->dbp
, __ham_hdup
, &ldbp
)) != 0)
781 hashp
= (HTAB
*)(ldbp
->internal
);
782 hcp
= (HASH_CURSOR
*)cursor
->internal
;
785 __db_cgetchk(hashp
->dbp
, key
, data
, flags
, IS_VALID(hcp
))) != 0)
788 SET_LOCKER(hashp
->dbp
, cursor
->txn
);
789 GET_META(hashp
->dbp
, hashp
);
790 hashp
->hash_accesses
++;
798 if (hcp
->bucket
!= BUCKET_INVALID
) {
799 ret
= __ham_item_prev(hashp
, hcp
, DB_LOCK_READ
);
804 ret
= __ham_item_last(hashp
, hcp
, DB_LOCK_READ
);
807 ret
= __ham_item_first(hashp
, hcp
, DB_LOCK_READ
);
810 if (hcp
->bucket
== BUCKET_INVALID
)
812 ret
= __ham_item_next(hashp
, hcp
, DB_LOCK_READ
);
816 ret
= __ham_lookup(hashp
, hcp
, key
, 0, DB_LOCK_READ
);
820 if (F_ISSET(hcp
, H_DELETED
)) {
825 ret
= __ham_item(hashp
, hcp
, DB_LOCK_READ
);
830 * Must always enter this loop to do error handling and
831 * check for big key/data pair.
834 if (ret
!= 0 && ret
!= DB_NOTFOUND
)
836 else if (F_ISSET(hcp
, H_OK
)) {
838 if (get_key
&& (ret
= __db_ret(hashp
->dbp
, hcp
->pagep
,
839 H_KEYINDEX(hcp
->bndx
), key
, &hcp
->big_key
,
840 &hcp
->big_keylen
)) != 0)
843 ret
= __ham_dup_return(hashp
, hcp
, data
, flags
);
845 } else if (!F_ISSET(hcp
, H_NOMORE
)) {
851 * Ran out of entries in a bucket; change buckets.
856 ret
= __ham_item_done(hashp
, hcp
, 0);
857 if (hcp
->bucket
== 0) {
862 hcp
->bndx
= NDX_INVALID
;
864 ret
= __ham_item_prev(hashp
,
869 ret
= __ham_item_done(hashp
, hcp
, 0);
870 hcp
->bndx
= NDX_INVALID
;
872 hcp
->pgno
= PGNO_INVALID
;
874 if (hcp
->bucket
> hashp
->hdr
->max_bucket
) {
879 ret
= __ham_item_next(hashp
,
889 out1
: if ((t_ret
= __ham_item_done(hashp
, hcp
, 0)) != 0 && ret
== 0)
893 RELEASE_META(hashp
->dbp
, hashp
);
894 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
))
895 __db_puthandle(ldbp
);
900 __ham_c_put(cursor
, key
, data
, flags
)
907 HASH_CURSOR
*hcp
, save_curs
;
912 DEBUG_LWRITE(cursor
->dbp
, cursor
->txn
, "ham_c_put",
913 flags
== DB_KEYFIRST
|| flags
== DB_KEYLAST
? key
: NULL
,
916 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
) &&
917 (ret
= __db_gethandle(cursor
->dbp
, __ham_hdup
, &ldbp
)) != 0)
919 hashp
= (HTAB
*)(ldbp
->internal
);
920 hcp
= (HASH_CURSOR
*)cursor
->internal
;
923 if ((ret
= __db_cputchk(hashp
->dbp
, key
, data
, flags
,
924 F_ISSET(ldbp
, DB_AM_RDONLY
), IS_VALID(hcp
))) != 0)
926 if (F_ISSET(hcp
, H_DELETED
))
927 return (DB_NOTFOUND
);
929 SET_LOCKER(hashp
->dbp
, cursor
->txn
);
930 GET_META(hashp
->dbp
, hashp
);
936 nbytes
= (ISBIG(hashp
, key
->size
) ? HOFFPAGE_PSIZE
:
937 HKEYDATA_PSIZE(key
->size
)) +
938 (ISBIG(hashp
, data
->size
) ? HOFFPAGE_PSIZE
:
939 HKEYDATA_PSIZE(data
->size
));
940 ret
= __ham_lookup(hashp
, hcp
, key
, nbytes
, DB_LOCK_WRITE
);
945 ret
= __ham_item(hashp
, hcp
, DB_LOCK_WRITE
);
950 if (flags
== DB_CURRENT
&& !F_ISSET(ldbp
, DB_AM_DUP
))
951 ret
= __ham_overwrite(hashp
, hcp
, data
);
953 ret
= __ham_add_dup(hashp
, hcp
, data
, flags
);
956 if (ret
== 0 && F_ISSET(hcp
, H_EXPAND
)) {
957 ret
= __ham_expand_table(hashp
);
958 F_CLR(hcp
, H_EXPAND
);
961 if ((t_ret
= __ham_item_done(hashp
, hcp
, ret
== 0)) != 0 && ret
== 0)
965 RELEASE_META(hashp
->dbp
, hashp
);
966 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
))
967 __db_puthandle(ldbp
);
971 /********************************* UTILITIES ************************/
974 * __ham_expand_table --
976 * PUBLIC: int __ham_expand_table __P((HTAB *));
979 __ham_expand_table(hashp
)
983 u_int32_t old_bucket
, new_bucket
, spare_ndx
;
987 DIRTY_META(hashp
, ret
);
992 * If the split point is about to increase, make sure that we
993 * have enough extra pages. The calculation here is weird.
994 * We'd like to do this after we've upped max_bucket, but it's
995 * too late then because we've logged the meta-data split. What
996 * we'll do between then and now is increment max bucket and then
997 * see what the log of one greater than that is; here we have to
998 * look at the log of max + 2. VERY NASTY STUFF.
1000 if (__db_log2(hashp
->hdr
->max_bucket
+ 2) > hashp
->hdr
->ovfl_point
) {
1002 * We are about to shift the split point. Make sure that
1003 * if the next doubling is going to be big (more than 8
1004 * pages), we have some extra pages around.
1006 if (hashp
->hdr
->max_bucket
+ 1 >= 8 &&
1007 hashp
->hdr
->spares
[hashp
->hdr
->ovfl_point
] <
1008 hashp
->hdr
->spares
[hashp
->hdr
->ovfl_point
- 1] +
1009 hashp
->hdr
->ovfl_point
+ 1)
1010 __ham_init_ovflpages(hashp
);
1013 /* Now we can log the meta-data split. */
1014 if (DB_LOGGING(hashp
->dbp
)) {
1015 if ((ret
= __ham_splitmeta_log(hashp
->dbp
->dbenv
->lg_info
,
1016 (DB_TXN
*)hashp
->dbp
->txn
, &new_lsn
, 0,
1017 hashp
->dbp
->log_fileid
,
1018 hashp
->hdr
->max_bucket
, hashp
->hdr
->ovfl_point
,
1019 hashp
->hdr
->spares
[hashp
->hdr
->ovfl_point
],
1020 &hashp
->hdr
->lsn
)) != 0)
1023 hashp
->hdr
->lsn
= new_lsn
;
1026 hashp
->hash_expansions
++;
1027 new_bucket
= ++hashp
->hdr
->max_bucket
;
1028 old_bucket
= (hashp
->hdr
->max_bucket
& hashp
->hdr
->low_mask
);
1031 * If the split point is increasing, copy the current contents
1032 * of the spare split bucket to the next bucket.
1034 spare_ndx
= __db_log2(hashp
->hdr
->max_bucket
+ 1);
1035 if (spare_ndx
> hashp
->hdr
->ovfl_point
) {
1036 hashp
->hdr
->spares
[spare_ndx
] =
1037 hashp
->hdr
->spares
[hashp
->hdr
->ovfl_point
];
1038 hashp
->hdr
->ovfl_point
= spare_ndx
;
1041 if (new_bucket
> hashp
->hdr
->high_mask
) {
1042 /* Starting a new doubling */
1043 hashp
->hdr
->low_mask
= hashp
->hdr
->high_mask
;
1044 hashp
->hdr
->high_mask
= new_bucket
| hashp
->hdr
->low_mask
;
1047 if (BUCKET_TO_PAGE(hashp
, new_bucket
) > MAX_PAGES(hashp
)) {
1048 __db_err(hashp
->dbp
->dbenv
,
1049 "hash: Cannot allocate new bucket. Pages exhausted.");
1053 /* Relocate records to the new bucket */
1054 return (__ham_split_page(hashp
, old_bucket
, new_bucket
));
1058 * PUBLIC: u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t));
1061 __ham_call_hash(hashp
, k
, len
)
1066 u_int32_t n
, bucket
;
1068 n
= (u_int32_t
)hashp
->hash(k
, len
);
1069 bucket
= n
& hashp
->hdr
->high_mask
;
1070 if (bucket
> hashp
->hdr
->max_bucket
)
1071 bucket
= bucket
& hashp
->hdr
->low_mask
;
1076 * Check for duplicates, and call __db_ret appropriately. Release
1077 * everything held by the cursor.
1080 __ham_dup_return(hashp
, hcp
, val
, flags
)
1087 DBT
*myval
, tmp_val
;
1094 /* Check for duplicate and return the first one. */
1095 ndx
= H_DATAINDEX(hcp
->bndx
);
1096 type
= HPAGE_TYPE(hcp
->pagep
, ndx
);
1101 * There are 3 cases:
1102 * 1. We are not in duplicate, simply call db_ret.
1103 * 2. We are looking at keys and stumbled onto a duplicate.
1104 * 3. We are in the middle of a duplicate set. (ISDUP set)
1108 * Here we check for the case where we just stumbled onto a
1109 * duplicate. In this case, we do initialization and then
1110 * let the normal duplicate code handle it.
1112 if (!F_ISSET(hcp
, H_ISDUP
))
1113 if (type
== H_DUPLICATE
) {
1114 F_SET(hcp
, H_ISDUP
);
1115 hcp
->dup_tlen
= LEN_HDATA(hcp
->pagep
,
1116 hashp
->hdr
->pagesize
, hcp
->bndx
);
1117 hk
= H_PAIRDATA(hcp
->pagep
, hcp
->bndx
);
1118 if (flags
== DB_LAST
|| flags
== DB_PREV
) {
1123 HKEYDATA_DATA(hk
) + hcp
->dup_off
,
1125 hcp
->dup_off
+= DUP_SIZE(len
);
1127 } while (hcp
->dup_off
< hcp
->dup_tlen
);
1128 hcp
->dup_off
-= DUP_SIZE(len
);
1132 HKEYDATA_DATA(hk
), sizeof(db_indx_t
));
1137 } else if (type
== H_OFFDUP
) {
1138 F_SET(hcp
, H_ISDUP
);
1139 memcpy(&pgno
, HOFFDUP_PGNO(P_ENTRY(hcp
->pagep
, ndx
)),
1141 if (flags
== DB_LAST
|| flags
== DB_PREV
) {
1142 if ((ret
= __db_dend(hashp
->dbp
,
1143 pgno
, &hcp
->dpagep
)) != 0)
1145 hcp
->dpgno
= PGNO(hcp
->dpagep
);
1146 hcp
->dndx
= NUM_ENT(hcp
->dpagep
) - 1;
1147 } else if ((ret
= __ham_next_cpage(hashp
,
1148 hcp
, pgno
, 0, H_ISDUP
)) != 0)
1154 * Now, everything is initialized, grab a duplicate if
1157 if (F_ISSET(hcp
, H_ISDUP
))
1158 if (hcp
->dpgno
!= PGNO_INVALID
) {
1163 * Copy the DBT in case we are retrieving into
1164 * user memory and we need the parameters for
1167 memcpy(&tmp_val
, val
, sizeof(*val
));
1168 F_SET(&tmp_val
, DB_DBT_PARTIAL
);
1169 tmp_val
.dlen
= hcp
->dup_len
;
1170 tmp_val
.doff
= hcp
->dup_off
+ sizeof(db_indx_t
);
1176 * Finally, if we had a duplicate, pp, ndx, and myval should be
1177 * set appropriately.
1179 if ((ret
= __db_ret(hashp
->dbp
, pp
, ndx
, myval
, &hcp
->big_data
,
1180 &hcp
->big_datalen
)) != 0)
1184 * In case we sent a temporary off to db_ret, set the real
1187 val
->data
= myval
->data
;
1188 val
->size
= myval
->size
;
1194 __ham_overwrite(hashp
, hcp
, nval
)
1199 DBT
*myval
, tmp_val
;
1202 if (F_ISSET(hashp
->dbp
, DB_AM_DUP
))
1203 return (__ham_add_dup(hashp
, hcp
, nval
, DB_KEYLAST
));
1204 else if (!F_ISSET(nval
, DB_DBT_PARTIAL
)) {
1206 memcpy(&tmp_val
, nval
, sizeof(*nval
));
1207 F_SET(&tmp_val
, DB_DBT_PARTIAL
);
1209 hk
= H_PAIRDATA(hcp
->pagep
, hcp
->bndx
);
1210 if (HPAGE_PTYPE(hk
) == H_OFFPAGE
)
1211 memcpy(&tmp_val
.dlen
,
1212 HOFFPAGE_TLEN(hk
), sizeof(u_int32_t
));
1214 tmp_val
.dlen
= LEN_HDATA(hcp
->pagep
,
1215 hashp
->hdr
->pagesize
,hcp
->bndx
);
1217 } else /* Regular partial put */
1220 return (__ham_replpair(hashp
, hcp
, myval
, 0));
1224 * Given a key and a cursor, sets the cursor to the page/ndx on which
1225 * the key resides. If the key is found, the cursor H_OK flag is set
1226 * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
1227 * If the key is not found, the H_OK flag is not set. If the sought
1228 * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
1229 * are set indicating where an add might take place. If it is 0,
1230 * non of the cursor pointer field are valid.
1233 __ham_lookup(hashp
, hcp
, key
, sought
, mode
)
1242 int match
, ret
, t_ret
;
1246 * Set up cursor so that we're looking for space to add an item
1247 * as we cycle through the pages looking for the key.
1249 if ((ret
= __ham_item_reset(hashp
, hcp
)) != 0)
1251 hcp
->seek_size
= sought
;
1253 hcp
->bucket
= __ham_call_hash(hashp
, (u_int8_t
*)key
->data
, key
->size
);
1255 if ((ret
= __ham_item_next(hashp
, hcp
, mode
)) != 0)
1258 if (F_ISSET(hcp
, H_NOMORE
))
1261 hk
= H_PAIRKEY(hcp
->pagep
, hcp
->bndx
);
1262 switch (HPAGE_PTYPE(hk
)) {
1264 memcpy(&tlen
, HOFFPAGE_TLEN(hk
), sizeof(u_int32_t
));
1265 if (tlen
== key
->size
) {
1267 HOFFPAGE_PGNO(hk
), sizeof(db_pgno_t
));
1268 match
= __db_moff(hashp
->dbp
, key
, pgno
);
1276 if (key
->size
== LEN_HKEY(hcp
->pagep
,
1277 hashp
->hdr
->pagesize
, hcp
->bndx
) &&
1279 HKEYDATA_DATA(hk
), key
->size
) == 0) {
1287 * These are errors because keys are never
1288 * duplicated, only data items are.
1290 return (__db_pgfmt(hashp
->dbp
, PGNO(hcp
->pagep
)));
1292 hashp
->hash_collisions
++;
1296 * Item was not found, adjust cursor properly.
1302 if ((t_ret
= __ham_item_done(hashp
, hcp
, 0)) != 0 && ret
== 0)
1308 * Initialize a dbt using some possibly already allocated storage
1310 * PUBLIC: int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *));
1313 __ham_init_dbt(dbt
, size
, bufp
, sizep
)
1319 memset(dbt
, 0, sizeof(*dbt
));
1320 if (*sizep
< size
) {
1321 if ((*bufp
= (void *)(*bufp
== NULL
?
1322 __db_malloc(size
) : __db_realloc(*bufp
, size
))) == NULL
) {
1334 * Adjust the cursor after an insert or delete. The cursor passed is
1335 * the one that was operated upon; we just need to check any of the
1338 * len indicates the length of the item added/deleted
1339 * add indicates if the item indicated by the cursor has just been
1340 * added (add == 1) or deleted (add == 0).
1341 * dup indicates if the addition occurred into a duplicate set.
1343 * PUBLIC: void __ham_c_update
1344 * PUBLIC: __P((HASH_CURSOR *, db_pgno_t, u_int32_t, int, int));
1347 __ham_c_update(hcp
, chg_pgno
, len
, add
, is_dup
)
1359 * Regular adds are always at the end of a given page, so we never
1360 * have to adjust anyone's cursor after a regular add.
1366 * Determine if a page was deleted. If this is a regular update
1367 * (i.e., not is_dup) then the deleted page's number will be that in
1368 * chg_pgno, and the pgno in the cursor will be different. If this
1369 * was an onpage-duplicate, then the same conditions apply. If this
1370 * was an off-page duplicate, then we need to verify if hcp->dpgno
1371 * is the same (no delete) or different (delete) than chg_pgno.
1373 if (!is_dup
|| hcp
->dpgno
== PGNO_INVALID
)
1375 chg_pgno
!= PGNO_INVALID
&& chg_pgno
!= hcp
->pgno
;
1378 chg_pgno
!= PGNO_INVALID
&& chg_pgno
!= hcp
->dpgno
;
1380 hp
= hcp
->db_cursor
->dbp
->master
->internal
;
1381 DB_THREAD_LOCK(hp
->dbp
);
1383 for (cp
= TAILQ_FIRST(&hp
->dbp
->curs_queue
); cp
!= NULL
;
1384 cp
= TAILQ_NEXT(cp
, links
)) {
1385 if (cp
->internal
== hcp
)
1388 lcp
= (HASH_CURSOR
*)cp
->internal
;
1390 if (!is_dup
&& lcp
->pgno
!= chg_pgno
)
1394 if (F_ISSET(hcp
, H_DELETED
) && lcp
->pgno
!= chg_pgno
)
1396 if (!F_ISSET(hcp
, H_DELETED
) && lcp
->dpgno
!= chg_pgno
)
1402 lcp
->dpgno
= hcp
->dpgno
;
1403 lcp
->dndx
= hcp
->dndx
;
1405 lcp
->pgno
= hcp
->pgno
;
1406 lcp
->bndx
= hcp
->bndx
;
1407 lcp
->bucket
= hcp
->bucket
;
1409 F_CLR(lcp
, H_ISDUP
);
1413 if (!is_dup
&& lcp
->bndx
> hcp
->bndx
)
1415 else if (!is_dup
&& lcp
->bndx
== hcp
->bndx
)
1416 F_SET(lcp
, H_DELETED
);
1417 else if (is_dup
&& lcp
->bndx
== hcp
->bndx
) {
1418 /* Assign dpgno in case there was page conversion. */
1419 lcp
->dpgno
= hcp
->dpgno
;
1420 if (add
&& lcp
->dndx
>= hcp
->dndx
)
1422 else if (!add
&& lcp
->dndx
> hcp
->dndx
)
1424 else if (!add
&& lcp
->dndx
== hcp
->dndx
)
1425 F_SET(lcp
, H_DELETED
);
1427 /* Now adjust on-page information. */
1428 if (lcp
->dpgno
== PGNO_INVALID
)
1430 lcp
->dup_tlen
+= len
;
1431 if (lcp
->dndx
> hcp
->dndx
)
1432 lcp
->dup_off
+= len
;
1434 lcp
->dup_tlen
-= len
;
1435 if (lcp
->dndx
> hcp
->dndx
)
1436 lcp
->dup_off
-= len
;
1440 DB_THREAD_UNLOCK(hp
->dbp
);
1445 * This function gets called when we create a duplicate handle for a
1446 * threaded DB. It should create the private part of the DB structure.
1448 * PUBLIC: int __ham_hdup __P((DB *, DB *));
1451 __ham_hdup(orig
, new)
1458 if ((hashp
= (HTAB
*)__db_malloc(sizeof(HTAB
))) == NULL
)
1461 new->internal
= hashp
;
1466 hashp
->hash
= ((HTAB
*)orig
->internal
)->hash
;
1467 if ((hashp
->split_buf
= (PAGE
*)__db_malloc(orig
->pgsize
)) == NULL
)
1469 hashp
->local_errno
= 0;
1470 hashp
->hash_accesses
= 0;
1471 hashp
->hash_collisions
= 0;
1472 hashp
->hash_expansions
= 0;
1473 hashp
->hash_overflows
= 0;
1474 hashp
->hash_bigpages
= 0;
1475 /* Initialize the cursor queue. */
1476 ret
= __ham_c_init(new, NULL
, &curs
);
1477 TAILQ_INSERT_TAIL(&new->curs_queue
, curs
, links
);