2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
8 * Copyright (c) 1990, 1993, 1994
9 * Margo Seltzer. All rights reserved.
12 * Copyright (c) 1990, 1993, 1994
13 * The Regents of the University of California. All rights reserved.
15 * This code is derived from software contributed to Berkeley by
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 static const char sccsid
[] = "@(#)hash_page.c 10.40 (Sleepycat) 6/2/98";
57 * Page manipulation for hashing package.
69 #ifndef NO_SYSTEM_INCLUDES
70 #include <sys/types.h>
80 static int __ham_lock_bucket
__P((DB
*, HASH_CURSOR
*, db_lockmode_t
));
83 static void __account_page(HTAB
*, db_pgno_t
, int);
87 * PUBLIC: int __ham_item __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
90 __ham_item(hashp
, cursorp
, mode
)
98 if (F_ISSET(cursorp
, H_DELETED
))
100 F_CLR(cursorp
, H_OK
| H_NOMORE
);
102 /* Check if we need to get a page for this cursor. */
103 if ((ret
= __ham_get_cpage(hashp
, cursorp
, mode
)) != 0)
106 /* Check if we are looking for space in which to insert an item. */
107 if (cursorp
->seek_size
&& cursorp
->seek_found_page
== PGNO_INVALID
108 && cursorp
->seek_size
< P_FREESPACE(cursorp
->pagep
))
109 cursorp
->seek_found_page
= cursorp
->pgno
;
111 /* Check if we need to go on to the next page. */
112 if (F_ISSET(cursorp
, H_ISDUP
) && cursorp
->dpgno
== PGNO_INVALID
)
114 * ISDUP is set, and offset is at the beginning of the datum.
115 * We need to grab the length of the datum, then set the datum
116 * pointer to be the beginning of the datum.
118 memcpy(&cursorp
->dup_len
,
119 HKEYDATA_DATA(H_PAIRDATA(cursorp
->pagep
, cursorp
->bndx
)) +
120 cursorp
->dup_off
, sizeof(db_indx_t
));
121 else if (F_ISSET(cursorp
, H_ISDUP
)) {
122 /* Make sure we're not about to run off the page. */
123 if (cursorp
->dpagep
== NULL
&& (ret
= __ham_get_page(hashp
->dbp
,
124 cursorp
->dpgno
, &cursorp
->dpagep
)) != 0)
127 if (cursorp
->dndx
>= NUM_ENT(cursorp
->dpagep
)) {
128 if (NEXT_PGNO(cursorp
->dpagep
) == PGNO_INVALID
) {
129 if ((ret
= __ham_put_page(hashp
->dbp
,
130 cursorp
->dpagep
, 0)) != 0)
132 F_CLR(cursorp
, H_ISDUP
);
133 cursorp
->dpagep
= NULL
;
134 cursorp
->dpgno
= PGNO_INVALID
;
135 cursorp
->dndx
= NDX_INVALID
;
137 } else if ((ret
= __ham_next_cpage(hashp
, cursorp
,
138 NEXT_PGNO(cursorp
->dpagep
), 0, H_ISDUP
)) != 0)
143 if (cursorp
->bndx
>= (db_indx_t
)H_NUMPAIRS(cursorp
->pagep
)) {
144 /* Fetch next page. */
145 if (NEXT_PGNO(cursorp
->pagep
) == PGNO_INVALID
) {
146 F_SET(cursorp
, H_NOMORE
);
147 if (cursorp
->dpagep
!= NULL
&&
148 (ret
= __ham_put_page(hashp
->dbp
,
149 cursorp
->dpagep
, 0)) != 0)
151 cursorp
->dpgno
= PGNO_INVALID
;
152 return (DB_NOTFOUND
);
154 next_pgno
= NEXT_PGNO(cursorp
->pagep
);
156 if ((ret
= __ham_next_cpage(hashp
,
157 cursorp
, next_pgno
, 0, 0)) != 0)
161 F_SET(cursorp
, H_OK
);
166 * PUBLIC: int __ham_item_reset __P((HTAB *, HASH_CURSOR *));
169 __ham_item_reset(hashp
, cursorp
)
171 HASH_CURSOR
*cursorp
;
176 ret
= __ham_put_page(hashp
->dbp
, cursorp
->pagep
, 0);
180 __ham_item_init(cursorp
);
185 * PUBLIC: void __ham_item_init __P((HASH_CURSOR *));
188 __ham_item_init(cursorp
)
189 HASH_CURSOR
*cursorp
;
191 cursorp
->pagep
= NULL
;
192 cursorp
->bucket
= BUCKET_INVALID
;
194 cursorp
->bndx
= NDX_INVALID
;
195 cursorp
->pgno
= PGNO_INVALID
;
196 cursorp
->dpgno
= PGNO_INVALID
;
197 cursorp
->dndx
= NDX_INVALID
;
198 cursorp
->dpagep
= NULL
;
200 cursorp
->seek_size
= 0;
201 cursorp
->seek_found_page
= PGNO_INVALID
;
205 * PUBLIC: int __ham_item_done __P((HTAB *, HASH_CURSOR *, int));
208 __ham_item_done(hashp
, cursorp
, dirty
)
210 HASH_CURSOR
*cursorp
;
218 ret
= __ham_put_page(hashp
->dbp
, cursorp
->pagep
,
219 dirty
&& cursorp
->dpagep
== NULL
);
220 cursorp
->pagep
= NULL
;
223 t_ret
= __ham_put_page(hashp
->dbp
, cursorp
->dpagep
, dirty
);
224 cursorp
->dpagep
= NULL
;
226 if (ret
== 0 && t_ret
!= 0)
230 * If we are running with transactions, then we must
231 * not relinquish locks explicitly.
233 if (cursorp
->lock
&& hashp
->dbp
->txn
== NULL
)
234 t_ret
= lock_put(hashp
->dbp
->dbenv
->lk_info
, cursorp
->lock
);
239 * We don't throw out the page number since we might want to
240 * continue getting on this page.
242 return (ret
!= 0 ? ret
: t_ret
);
246 * Returns the last item in a bucket.
248 * PUBLIC: int __ham_item_last __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
251 __ham_item_last(hashp
, cursorp
, mode
)
253 HASH_CURSOR
*cursorp
;
258 if ((ret
= __ham_item_reset(hashp
, cursorp
)) != 0)
261 cursorp
->bucket
= hashp
->hdr
->max_bucket
;
262 F_SET(cursorp
, H_OK
);
263 return (__ham_item_prev(hashp
, cursorp
, mode
));
267 * PUBLIC: int __ham_item_first __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
270 __ham_item_first(hashp
, cursorp
, mode
)
272 HASH_CURSOR
*cursorp
;
277 if ((ret
= __ham_item_reset(hashp
, cursorp
)) != 0)
279 F_SET(cursorp
, H_OK
);
281 return (__ham_item_next(hashp
, cursorp
, mode
));
286 * Returns a pointer to key/data pair on a page. In the case of
287 * bigkeys, just returns the page number and index of the bigkey
290 * PUBLIC: int __ham_item_prev __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
293 __ham_item_prev(hashp
, cursorp
, mode
)
295 HASH_CURSOR
*cursorp
;
302 * There are N cases for backing up in a hash file.
303 * Case 1: In the middle of a page, no duplicates, just dec the index.
304 * Case 2: In the middle of a duplicate set, back up one.
305 * Case 3: At the beginning of a duplicate set, get out of set and
306 * back up to next key.
307 * Case 4: At the beginning of a page; go to previous page.
308 * Case 5: At the beginning of a bucket; go to prev bucket.
310 F_CLR(cursorp
, H_OK
| H_NOMORE
| H_DELETED
);
313 * First handle the duplicates. Either you'll get the key here
314 * or you'll exit the duplicate set and drop into the code below
315 * to handle backing up through keys.
317 if (F_ISSET(cursorp
, H_ISDUP
)) {
318 if (cursorp
->dpgno
== PGNO_INVALID
) {
319 /* Duplicates are on-page. */
320 if (cursorp
->dup_off
!= 0) {
321 if ((ret
= __ham_get_cpage(hashp
,
322 cursorp
, mode
)) != 0)
327 memcpy(&h
->dup_len
, HKEYDATA_DATA(
328 H_PAIRDATA(h
->pagep
, h
->bndx
))
329 + h
->dup_off
- sizeof(db_indx_t
),
332 DUP_SIZE(cursorp
->dup_len
);
334 return (__ham_item(hashp
,
338 } else if (cursorp
->dndx
> 0) { /* Duplicates are off-page. */
340 return (__ham_item(hashp
, cursorp
, mode
));
341 } else if ((ret
= __ham_get_cpage(hashp
, cursorp
, mode
)) != 0)
343 else if (PREV_PGNO(cursorp
->dpagep
) == PGNO_INVALID
) {
344 F_CLR(cursorp
, H_ISDUP
); /* End of dups */
345 cursorp
->dpgno
= PGNO_INVALID
;
346 if (cursorp
->dpagep
!= NULL
)
347 (void)__ham_put_page(hashp
->dbp
,
349 cursorp
->dpagep
= NULL
;
350 } else if ((ret
= __ham_next_cpage(hashp
, cursorp
,
351 PREV_PGNO(cursorp
->dpagep
), 0, H_ISDUP
)) != 0)
354 cursorp
->dndx
= NUM_ENT(cursorp
->pagep
) - 1;
355 return (__ham_item(hashp
, cursorp
, mode
));
360 * If we get here, we are not in a duplicate set, and just need
361 * to back up the cursor. There are still three cases:
362 * midpage, beginning of page, beginning of bucket.
365 if (cursorp
->bndx
== 0) { /* Beginning of page. */
366 if ((ret
= __ham_get_cpage(hashp
, cursorp
, mode
)) != 0)
368 cursorp
->pgno
= PREV_PGNO(cursorp
->pagep
);
369 if (cursorp
->pgno
== PGNO_INVALID
) {
370 /* Beginning of bucket. */
371 F_SET(cursorp
, H_NOMORE
);
372 return (DB_NOTFOUND
);
373 } else if ((ret
= __ham_next_cpage(hashp
,
374 cursorp
, cursorp
->pgno
, 0, 0)) != 0)
377 cursorp
->bndx
= H_NUMPAIRS(cursorp
->pagep
);
381 * Either we've got the cursor set up to be decremented, or we
382 * have to find the end of a bucket.
384 if (cursorp
->bndx
== NDX_INVALID
) {
385 if (cursorp
->pagep
== NULL
)
386 next_pgno
= BUCKET_TO_PAGE(hashp
, cursorp
->bucket
);
391 if ((ret
= __ham_next_cpage(hashp
,
392 cursorp
, next_pgno
, 0, 0)) != 0)
394 got_page
: next_pgno
= NEXT_PGNO(cursorp
->pagep
);
395 cursorp
->bndx
= H_NUMPAIRS(cursorp
->pagep
);
396 } while (next_pgno
!= PGNO_INVALID
);
398 if (cursorp
->bndx
== 0) {
399 /* Bucket was empty. */
400 F_SET(cursorp
, H_NOMORE
);
401 return (DB_NOTFOUND
);
407 return (__ham_item(hashp
, cursorp
, mode
));
411 * Sets the cursor to the next key/data pair on a page.
413 * PUBLIC: int __ham_item_next __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
416 __ham_item_next(hashp
, cursorp
, mode
)
418 HASH_CURSOR
*cursorp
;
422 * Deleted on-page duplicates are a weird case. If we delete the last
423 * one, then our cursor is at the very end of a duplicate set and
424 * we actually need to go on to the next key.
426 if (F_ISSET(cursorp
, H_DELETED
)) {
427 if (cursorp
->bndx
!= NDX_INVALID
&&
428 F_ISSET(cursorp
, H_ISDUP
) &&
429 cursorp
->dpgno
== PGNO_INVALID
&&
430 cursorp
->dup_tlen
== cursorp
->dup_off
) {
431 F_CLR(cursorp
, H_ISDUP
);
432 cursorp
->dpgno
= PGNO_INVALID
;
435 F_CLR(cursorp
, H_DELETED
);
436 } else if (cursorp
->bndx
== NDX_INVALID
) {
438 cursorp
->dpgno
= PGNO_INVALID
;
439 F_CLR(cursorp
, H_ISDUP
);
440 } else if (F_ISSET(cursorp
, H_ISDUP
) && cursorp
->dpgno
!= PGNO_INVALID
)
442 else if (F_ISSET(cursorp
, H_ISDUP
)) {
444 cursorp
->dup_off
+= DUP_SIZE(cursorp
->dup_len
);
445 if (cursorp
->dup_off
>= cursorp
->dup_tlen
) {
446 F_CLR(cursorp
, H_ISDUP
);
447 cursorp
->dpgno
= PGNO_INVALID
;
453 return (__ham_item(hashp
, cursorp
, mode
));
457 * PUBLIC: void __ham_putitem __P((PAGE *p, const DBT *, int));
459 * This is a little bit sleazy in that we're overloading the meaning
460 * of the H_OFFPAGE type here. When we recover deletes, we have the
461 * entire entry instead of having only the DBT, so we'll pass type
462 * H_OFFPAGE to mean, "copy the whole entry" as opposed to constructing
463 * an H_KEYDATA around it.
466 __ham_putitem(p
, dbt
, type
)
475 /* Put the item element on the page. */
476 if (type
== H_OFFPAGE
) {
477 off
= HOFFSET(p
) - dbt
->size
;
478 HOFFSET(p
) = p
->inp
[n
] = off
;
479 memcpy(P_ENTRY(p
, n
), dbt
->data
, dbt
->size
);
481 off
= HOFFSET(p
) - HKEYDATA_SIZE(dbt
->size
);
482 HOFFSET(p
) = p
->inp
[n
] = off
;
483 PUT_HKEYDATA(P_ENTRY(p
, n
), dbt
->data
, dbt
->size
, type
);
486 /* Adjust page info. */
491 * PUBLIC: void __ham_reputpair
492 * PUBLIC: __P((PAGE *p, u_int32_t, u_int32_t, const DBT *, const DBT *));
494 * This is a special case to restore a key/data pair to its original
495 * location during recovery. We are guaranteed that the pair fits
496 * on the page and is not the last pair on the page (because if it's
497 * the last pair, the normal insert works).
500 __ham_reputpair(p
, psize
, ndx
, key
, data
)
502 u_int32_t psize
, ndx
;
503 const DBT
*key
, *data
;
505 db_indx_t i
, movebytes
, newbytes
;
508 /* First shuffle the existing items up on the page. */
510 (ndx
== 0 ? psize
: p
->inp
[H_DATAINDEX(ndx
- 1)]) - HOFFSET(p
);
511 newbytes
= key
->size
+ data
->size
;
512 from
= (u_int8_t
*)p
+ HOFFSET(p
);
513 memmove(from
- newbytes
, from
, movebytes
);
516 * Adjust the indices and move them up 2 spaces. Note that we
517 * have to check the exit condition inside the loop just in case
518 * we are dealing with index 0 (db_indx_t's are unsigned).
520 for (i
= NUM_ENT(p
) - 1; ; i
-- ) {
521 p
->inp
[i
+ 2] = p
->inp
[i
] - newbytes
;
522 if (i
== H_KEYINDEX(ndx
))
526 /* Put the key and data on the page. */
527 p
->inp
[H_KEYINDEX(ndx
)] =
528 (ndx
== 0 ? psize
: p
->inp
[H_DATAINDEX(ndx
- 1)]) - key
->size
;
529 p
->inp
[H_DATAINDEX(ndx
)] = p
->inp
[H_KEYINDEX(ndx
)] - data
->size
;
530 memcpy(P_ENTRY(p
, H_KEYINDEX(ndx
)), key
->data
, key
->size
);
531 memcpy(P_ENTRY(p
, H_DATAINDEX(ndx
)), data
->data
, data
->size
);
533 /* Adjust page info. */
534 HOFFSET(p
) -= newbytes
;
540 * PUBLIC: int __ham_del_pair __P((HTAB *, HASH_CURSOR *, int));
543 * TODO: if the item is an offdup, delete the other pages and then remove
544 * the pair. If the offpage page is 0, then you can just remove the pair.
547 __ham_del_pair(hashp
, cursorp
, reclaim_page
)
549 HASH_CURSOR
*cursorp
;
552 DBT data_dbt
, key_dbt
;
554 DB_LSN new_lsn
, *n_lsn
, tmp_lsn
;
557 db_pgno_t chg_pgno
, pgno
;
560 dbenv
= hashp
->dbp
->dbenv
;
562 if (cursorp
->pagep
== NULL
&& (ret
=
563 __ham_get_page(hashp
->dbp
, cursorp
->pgno
, &cursorp
->pagep
)) != 0)
569 * We optimize for the normal case which is when neither the key nor
570 * the data are large. In this case, we write a single log record
571 * and do the delete. If either is large, we'll call __big_delete
572 * to remove the big item and then update the page to remove the
573 * entry referring to the big item.
576 if (HPAGE_PTYPE(H_PAIRKEY(p
, ndx
)) == H_OFFPAGE
) {
577 memcpy(&pgno
, HOFFPAGE_PGNO(P_ENTRY(p
, H_KEYINDEX(ndx
))),
579 ret
= __db_doff(hashp
->dbp
, pgno
, __ham_del_page
);
583 switch (HPAGE_PTYPE(H_PAIRDATA(p
, ndx
))) {
586 HOFFPAGE_PGNO(P_ENTRY(p
, H_DATAINDEX(ndx
))),
588 ret
= __db_doff(hashp
->dbp
, pgno
, __ham_del_page
);
592 HOFFDUP_PGNO(P_ENTRY(p
, H_DATAINDEX(ndx
))),
594 ret
= __db_ddup(hashp
->dbp
, pgno
, __ham_del_page
);
595 F_CLR(cursorp
, H_ISDUP
);
599 * If we delete a pair that is/was a duplicate, then
600 * we had better clear the flag so that we update the
601 * cursor appropriately.
603 F_CLR(cursorp
, H_ISDUP
);
610 /* Now log the delete off this page. */
611 if (DB_LOGGING(hashp
->dbp
)) {
612 key_dbt
.data
= P_ENTRY(p
, H_KEYINDEX(ndx
));
614 LEN_HITEM(p
, hashp
->hdr
->pagesize
, H_KEYINDEX(ndx
));
615 data_dbt
.data
= P_ENTRY(p
, H_DATAINDEX(ndx
));
617 LEN_HITEM(p
, hashp
->hdr
->pagesize
, H_DATAINDEX(ndx
));
619 if ((ret
= __ham_insdel_log(dbenv
->lg_info
,
620 (DB_TXN
*)hashp
->dbp
->txn
, &new_lsn
, 0, DELPAIR
,
621 hashp
->dbp
->log_fileid
, PGNO(p
), (u_int32_t
)ndx
,
622 &LSN(p
), &key_dbt
, &data_dbt
)) != 0)
625 /* Move lsn onto page. */
629 __ham_dpair(hashp
->dbp
, p
, ndx
);
632 * If we are locking, we will not maintain this.
633 * XXXX perhaps we can retain incremental numbers and apply them
636 if (!F_ISSET(hashp
->dbp
, DB_AM_LOCKING
))
640 * If we need to reclaim the page, then check if the page is empty.
641 * There are two cases. If it's empty and it's not the first page
642 * in the bucket (i.e., the bucket page) then we can simply remove
643 * it. If it is the first chain in the bucket, then we need to copy
644 * the second page into it and remove the second page.
646 if (reclaim_page
&& NUM_ENT(p
) == 0 && PREV_PGNO(p
) == PGNO_INVALID
&&
647 NEXT_PGNO(p
) != PGNO_INVALID
) {
648 PAGE
*n_pagep
, *nn_pagep
;
652 * First page in chain is empty and we know that there
653 * are more pages in the chain.
656 __ham_get_page(hashp
->dbp
, NEXT_PGNO(p
), &n_pagep
)) != 0)
659 if (NEXT_PGNO(n_pagep
) != PGNO_INVALID
) {
661 __ham_get_page(hashp
->dbp
, NEXT_PGNO(n_pagep
),
663 (void) __ham_put_page(hashp
->dbp
, n_pagep
, 0);
668 if (DB_LOGGING(hashp
->dbp
)) {
669 key_dbt
.data
= n_pagep
;
670 key_dbt
.size
= hashp
->hdr
->pagesize
;
671 if ((ret
= __ham_copypage_log(dbenv
->lg_info
,
672 (DB_TXN
*)hashp
->dbp
->txn
, &new_lsn
, 0,
673 hashp
->dbp
->log_fileid
, PGNO(p
), &LSN(p
),
674 PGNO(n_pagep
), &LSN(n_pagep
), NEXT_PGNO(n_pagep
),
675 NEXT_PGNO(n_pagep
) == PGNO_INVALID
? NULL
:
676 &LSN(nn_pagep
), &key_dbt
)) != 0)
679 /* Move lsn onto page. */
680 LSN(p
) = new_lsn
; /* Structure assignment. */
681 LSN(n_pagep
) = new_lsn
;
682 if (NEXT_PGNO(n_pagep
) != PGNO_INVALID
)
683 LSN(nn_pagep
) = new_lsn
;
685 if (NEXT_PGNO(n_pagep
) != PGNO_INVALID
) {
686 PREV_PGNO(nn_pagep
) = PGNO(p
);
687 (void)__ham_put_page(hashp
->dbp
, nn_pagep
, 1);
692 memcpy(p
, n_pagep
, hashp
->hdr
->pagesize
);
695 PREV_PGNO(p
) = PGNO_INVALID
;
698 * Cursor is advanced to the beginning of the next page.
701 cursorp
->pgno
= PGNO(p
);
702 F_SET(cursorp
, H_DELETED
);
704 if ((ret
= __ham_dirty_page(hashp
, p
)) != 0 ||
705 (ret
= __ham_del_page(hashp
->dbp
, n_pagep
)) != 0)
707 } else if (reclaim_page
&&
708 NUM_ENT(p
) == 0 && PREV_PGNO(p
) != PGNO_INVALID
) {
709 PAGE
*n_pagep
, *p_pagep
;
712 __ham_get_page(hashp
->dbp
, PREV_PGNO(p
), &p_pagep
)) != 0)
715 if (NEXT_PGNO(p
) != PGNO_INVALID
) {
716 if ((ret
= __ham_get_page(hashp
->dbp
,
717 NEXT_PGNO(p
), &n_pagep
)) != 0) {
718 (void)__ham_put_page(hashp
->dbp
, p_pagep
, 0);
721 n_lsn
= &LSN(n_pagep
);
727 NEXT_PGNO(p_pagep
) = NEXT_PGNO(p
);
729 PREV_PGNO(n_pagep
) = PGNO(p_pagep
);
731 if (DB_LOGGING(hashp
->dbp
)) {
732 if ((ret
= __ham_newpage_log(dbenv
->lg_info
,
733 (DB_TXN
*)hashp
->dbp
->txn
, &new_lsn
, 0, DELOVFL
,
734 hashp
->dbp
->log_fileid
, PREV_PGNO(p
), &LSN(p_pagep
),
735 PGNO(p
), &LSN(p
), NEXT_PGNO(p
), n_lsn
)) != 0)
738 /* Move lsn onto page. */
739 LSN(p_pagep
) = new_lsn
; /* Structure assignment. */
741 LSN(n_pagep
) = new_lsn
;
744 cursorp
->pgno
= NEXT_PGNO(p
);
747 * Since we are about to delete the cursor page and we have
748 * just moved the cursor, we need to make sure that the
749 * old page pointer isn't left hanging around in the cursor.
751 cursorp
->pagep
= NULL
;
753 ret
= __ham_del_page(hashp
->dbp
, p
);
754 if ((tret
= __ham_put_page(hashp
->dbp
, p_pagep
, 1)) != 0 &&
757 if (n_pagep
!= NULL
&&
758 (tret
= __ham_put_page(hashp
->dbp
, n_pagep
, 1)) != 0 &&
765 * Mark item deleted so that we don't try to return it, and
766 * so that we update the cursor correctly on the next call
769 F_SET(cursorp
, H_DELETED
);
770 chg_pgno
= cursorp
->pgno
;
771 ret
= __ham_dirty_page(hashp
, p
);
773 __ham_c_update(cursorp
, chg_pgno
, 0, 0, 0);
776 * Since we just deleted a pair from the master page, anything
777 * in cursorp->dpgno should be cleared.
779 cursorp
->dpgno
= PGNO_INVALID
;
781 F_CLR(cursorp
, H_OK
);
787 * Given the key data indicated by the cursor, replace part/all of it
788 * according to the fields in the dbt.
790 * PUBLIC: int __ham_replpair __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
793 __ham_replpair(hashp
, hcp
, dbt
, make_dup
)
799 DBT old_dbt
, tdata
, tmp
;
801 int32_t change
; /* XXX: Possible overflow. */
803 int is_big
, ret
, type
;
804 u_int8_t
*beg
, *dest
, *end
, *hk
, *src
;
807 * Big item replacements are handled in generic code.
808 * Items that fit on the current page fall into 4 classes.
809 * 1. On-page element, same size
810 * 2. On-page element, new is bigger (fits)
811 * 3. On-page element, new is bigger (does not fit)
812 * 4. On-page element, old is bigger
813 * Numbers 1, 2, and 4 are essentially the same (and should
814 * be the common case). We handle case 3 as a delete and
819 * We need to compute the number of bytes that we are adding or
820 * removing from the entry. Normally, we can simply substract
821 * the number of bytes we are replacing (dbt->dlen) from the
822 * number of bytes we are inserting (dbt->size). However, if
823 * we are doing a partial put off the end of a record, then this
824 * formula doesn't work, because we are essentially adding
827 change
= dbt
->size
- dbt
->dlen
;
829 hk
= H_PAIRDATA(hcp
->pagep
, hcp
->bndx
);
830 is_big
= HPAGE_PTYPE(hk
) == H_OFFPAGE
;
833 memcpy(&len
, HOFFPAGE_TLEN(hk
), sizeof(u_int32_t
));
835 len
= LEN_HKEYDATA(hcp
->pagep
,
836 hashp
->dbp
->pgsize
, H_DATAINDEX(hcp
->bndx
));
838 if (dbt
->doff
+ dbt
->dlen
> len
)
839 change
+= dbt
->doff
+ dbt
->dlen
- len
;
842 if (change
> (int32_t)P_FREESPACE(hcp
->pagep
) || is_big
) {
844 * Case 3 -- two subcases.
845 * A. This is not really a partial operation, but an overwrite.
846 * Simple del and add works.
847 * B. This is a partial and we need to construct the data that
848 * we are really inserting (yuck).
849 * In both cases, we need to grab the key off the page (in
850 * some cases we could do this outside of this routine; for
851 * cleanliness we do it here. If you happen to be on a big
852 * key, this could be a performance hit).
855 F_SET(&tmp
, DB_DBT_MALLOC
| DB_DBT_INTERNAL
);
857 __db_ret(hashp
->dbp
, hcp
->pagep
, H_KEYINDEX(hcp
->bndx
),
858 &tmp
, &hcp
->big_key
, &hcp
->big_keylen
)) != 0)
861 if (dbt
->doff
== 0 && dbt
->dlen
== len
) {
862 ret
= __ham_del_pair(hashp
, hcp
, 0);
864 ret
= __ham_add_el(hashp
,
865 hcp
, &tmp
, dbt
, H_KEYDATA
);
866 } else { /* Case B */
867 type
= HPAGE_PTYPE(hk
) != H_OFFPAGE
?
868 HPAGE_PTYPE(hk
) : H_KEYDATA
;
870 F_SET(&tdata
, DB_DBT_MALLOC
| DB_DBT_INTERNAL
);
872 if ((ret
= __db_ret(hashp
->dbp
, hcp
->pagep
,
873 H_DATAINDEX(hcp
->bndx
), &tdata
, &hcp
->big_data
,
874 &hcp
->big_datalen
)) != 0)
877 /* Now we can delete the item. */
878 if ((ret
= __ham_del_pair(hashp
, hcp
, 0)) != 0) {
879 __db_free(tdata
.data
);
883 /* Now shift old data around to make room for new. */
885 tdata
.data
= (void *)__db_realloc(tdata
.data
,
886 tdata
.size
+ change
);
887 memset((u_int8_t
*)tdata
.data
+ tdata
.size
,
890 if (tdata
.data
== NULL
)
892 end
= (u_int8_t
*)tdata
.data
+ tdata
.size
;
894 src
= (u_int8_t
*)tdata
.data
+ dbt
->doff
+ dbt
->dlen
;
895 if (src
< end
&& tdata
.size
> dbt
->doff
+ dbt
->dlen
) {
896 len
= tdata
.size
- dbt
->doff
- dbt
->dlen
;
898 memmove(dest
, src
, len
);
900 memcpy((u_int8_t
*)tdata
.data
+ dbt
->doff
,
901 dbt
->data
, dbt
->size
);
902 tdata
.size
+= change
;
904 /* Now add the pair. */
905 ret
= __ham_add_el(hashp
, hcp
, &tmp
, &tdata
, type
);
906 __db_free(tdata
.data
);
908 err
: __db_free(tmp
.data
);
913 * Set up pointer into existing data. Do it before the log
914 * message so we can use it inside of the log setup.
916 beg
= HKEYDATA_DATA(H_PAIRDATA(hcp
->pagep
, hcp
->bndx
));
920 * If we are going to have to move bytes at all, figure out
921 * all the parameters here. Then log the call before moving
924 if (DB_LOGGING(hashp
->dbp
)) {
926 old_dbt
.size
= dbt
->dlen
;
927 if ((ret
= __ham_replace_log(hashp
->dbp
->dbenv
->lg_info
,
928 (DB_TXN
*)hashp
->dbp
->txn
, &new_lsn
, 0,
929 hashp
->dbp
->log_fileid
, PGNO(hcp
->pagep
),
930 (u_int32_t
)H_DATAINDEX(hcp
->bndx
), &LSN(hcp
->pagep
),
931 (u_int32_t
)dbt
->doff
, &old_dbt
, dbt
, make_dup
)) != 0)
934 LSN(hcp
->pagep
) = new_lsn
; /* Structure assignment. */
937 __ham_onpage_replace(hcp
->pagep
, hashp
->dbp
->pgsize
,
938 (u_int32_t
)H_DATAINDEX(hcp
->bndx
), (int32_t)dbt
->doff
, change
, dbt
);
944 * Replace data on a page with new data, possibly growing or shrinking what's
945 * there. This is called on two different occasions. On one (from replpair)
946 * we are interested in changing only the data. On the other (from recovery)
947 * we are replacing the entire data (header and all) with a new element. In
948 * the latter case, the off argument is negative.
949 * pagep: the page that we're changing
950 * ndx: page index of the element that is growing/shrinking.
951 * off: Offset at which we are beginning the replacement.
952 * change: the number of bytes (+ or -) that the element is growing/shrinking.
953 * dbt: the new data that gets written at beg.
954 * PUBLIC: void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t,
955 * PUBLIC: int32_t, DBT *));
958 __ham_onpage_replace(pagep
, pgsize
, ndx
, off
, change
, dbt
)
968 u_int8_t
*src
, *dest
;
973 src
= (u_int8_t
*)(pagep
) + HOFFSET(pagep
);
975 len
= pagep
->inp
[ndx
] - HOFFSET(pagep
);
976 else if ((u_int32_t
)off
>= LEN_HKEYDATA(pagep
, pgsize
, ndx
)) {
977 len
= HKEYDATA_DATA(P_ENTRY(pagep
, ndx
)) +
978 LEN_HKEYDATA(pagep
, pgsize
, ndx
) - src
;
981 len
= (HKEYDATA_DATA(P_ENTRY(pagep
, ndx
)) + off
) - src
;
983 memmove(dest
, src
, len
);
985 memset(dest
+ len
, 0, change
);
987 /* Now update the indices. */
988 for (i
= ndx
; i
< NUM_ENT(pagep
); i
++)
989 pagep
->inp
[i
] -= change
;
990 HOFFSET(pagep
) -= change
;
993 memcpy(HKEYDATA_DATA(P_ENTRY(pagep
, ndx
)) + off
,
994 dbt
->data
, dbt
->size
);
996 memcpy(P_ENTRY(pagep
, ndx
), dbt
->data
, dbt
->size
);
1000 * PUBLIC: int __ham_split_page __P((HTAB *, u_int32_t, u_int32_t));
1003 __ham_split_page(hashp
, obucket
, nbucket
)
1005 u_int32_t obucket
, nbucket
;
1010 PAGE
**pp
, *old_pagep
, *temp_pagep
, *new_pagep
;
1012 db_pgno_t bucket_pgno
, next_pgno
;
1013 u_int32_t big_len
, len
;
1017 dbenv
= hashp
->dbp
->dbenv
;
1018 temp_pagep
= old_pagep
= new_pagep
= NULL
;
1020 bucket_pgno
= BUCKET_TO_PAGE(hashp
, obucket
);
1021 if ((ret
= __ham_get_page(hashp
->dbp
, bucket_pgno
, &old_pagep
)) != 0)
1023 if ((ret
= __ham_new_page(hashp
, BUCKET_TO_PAGE(hashp
, nbucket
), P_HASH
,
1027 temp_pagep
= hashp
->split_buf
;
1028 memcpy(temp_pagep
, old_pagep
, hashp
->hdr
->pagesize
);
1030 if (DB_LOGGING(hashp
->dbp
)) {
1031 page_dbt
.size
= hashp
->hdr
->pagesize
;
1032 page_dbt
.data
= old_pagep
;
1033 if ((ret
= __ham_splitdata_log(dbenv
->lg_info
,
1034 (DB_TXN
*)hashp
->dbp
->txn
, &new_lsn
, 0,
1035 hashp
->dbp
->log_fileid
, SPLITOLD
, PGNO(old_pagep
),
1036 &page_dbt
, &LSN(old_pagep
))) != 0)
1040 P_INIT(old_pagep
, hashp
->hdr
->pagesize
, PGNO(old_pagep
), PGNO_INVALID
,
1041 PGNO_INVALID
, 0, P_HASH
);
1043 if (DB_LOGGING(hashp
->dbp
))
1044 LSN(old_pagep
) = new_lsn
; /* Structure assignment. */
1049 while (temp_pagep
!= NULL
) {
1050 for (n
= 0; n
< (db_indx_t
)H_NUMPAIRS(temp_pagep
); n
++) {
1052 __db_ret(hashp
->dbp
, temp_pagep
, H_KEYINDEX(n
),
1053 &key
, &big_buf
, &big_len
)) != 0)
1056 if (__ham_call_hash(hashp
, key
.data
, key
.size
)
1063 * Figure out how many bytes we need on the new
1064 * page to store the key/data pair.
1067 len
= LEN_HITEM(temp_pagep
, hashp
->hdr
->pagesize
,
1069 LEN_HITEM(temp_pagep
, hashp
->hdr
->pagesize
,
1071 2 * sizeof(db_indx_t
);
1073 if (P_FREESPACE(*pp
) < len
) {
1074 if (DB_LOGGING(hashp
->dbp
)) {
1075 page_dbt
.size
= hashp
->hdr
->pagesize
;
1076 page_dbt
.data
= *pp
;
1077 if ((ret
= __ham_splitdata_log(
1079 (DB_TXN
*)hashp
->dbp
->txn
,
1081 hashp
->dbp
->log_fileid
, SPLITNEW
,
1082 PGNO(*pp
), &page_dbt
,
1087 if ((ret
= __ham_add_ovflpage(hashp
,
1091 __ham_copy_item(hashp
, temp_pagep
, H_KEYINDEX(n
), *pp
);
1092 __ham_copy_item(hashp
, temp_pagep
, H_DATAINDEX(n
), *pp
);
1094 next_pgno
= NEXT_PGNO(temp_pagep
);
1096 /* Clear temp_page; if it's a link overflow page, free it. */
1097 if (PGNO(temp_pagep
) != bucket_pgno
&& (ret
=
1098 __ham_del_page(hashp
->dbp
, temp_pagep
)) != 0)
1101 if (next_pgno
== PGNO_INVALID
)
1104 __ham_get_page(hashp
->dbp
, next_pgno
, &temp_pagep
)) != 0)
1107 if (temp_pagep
!= NULL
&& DB_LOGGING(hashp
->dbp
)) {
1108 page_dbt
.size
= hashp
->hdr
->pagesize
;
1109 page_dbt
.data
= temp_pagep
;
1110 if ((ret
= __ham_splitdata_log(dbenv
->lg_info
,
1111 (DB_TXN
*)hashp
->dbp
->txn
, &new_lsn
, 0,
1112 hashp
->dbp
->log_fileid
, SPLITOLD
, PGNO(temp_pagep
),
1113 &page_dbt
, &LSN(temp_pagep
))) != 0)
1115 LSN(temp_pagep
) = new_lsn
;
1118 if (big_buf
!= NULL
)
1122 * If the original bucket spanned multiple pages, then we've got
1123 * a pointer to a page that used to be on the bucket chain. It
1124 * should be deleted.
1126 if (temp_pagep
!= NULL
&& PGNO(temp_pagep
) != bucket_pgno
&&
1127 (ret
= __ham_del_page(hashp
->dbp
, temp_pagep
)) != 0)
1131 * Write new buckets out.
1133 if (DB_LOGGING(hashp
->dbp
)) {
1134 page_dbt
.size
= hashp
->hdr
->pagesize
;
1135 page_dbt
.data
= old_pagep
;
1136 if ((ret
= __ham_splitdata_log(dbenv
->lg_info
,
1137 (DB_TXN
*)hashp
->dbp
->txn
, &new_lsn
, 0,
1138 hashp
->dbp
->log_fileid
, SPLITNEW
, PGNO(old_pagep
),
1139 &page_dbt
, &LSN(old_pagep
))) != 0)
1141 LSN(old_pagep
) = new_lsn
;
1143 page_dbt
.data
= new_pagep
;
1144 if ((ret
= __ham_splitdata_log(dbenv
->lg_info
,
1145 (DB_TXN
*)hashp
->dbp
->txn
, &new_lsn
, 0,
1146 hashp
->dbp
->log_fileid
, SPLITNEW
, PGNO(new_pagep
),
1147 &page_dbt
, &LSN(new_pagep
))) != 0)
1149 LSN(new_pagep
) = new_lsn
;
1151 ret
= __ham_put_page(hashp
->dbp
, old_pagep
, 1);
1152 if ((tret
= __ham_put_page(hashp
->dbp
, new_pagep
, 1)) != 0 &&
1157 err
: if (old_pagep
!= NULL
)
1158 (void)__ham_put_page(hashp
->dbp
, old_pagep
, 1);
1159 if (new_pagep
!= NULL
)
1160 (void)__ham_put_page(hashp
->dbp
, new_pagep
, 1);
1161 if (temp_pagep
!= NULL
&& PGNO(temp_pagep
) != bucket_pgno
)
1162 (void)__ham_put_page(hashp
->dbp
, temp_pagep
, 1);
1168 * Add the given pair to the page. The page in question may already be
1169 * held (i.e. it was already gotten). If it is, then the page is passed
1170 * in via the pagep parameter. On return, pagep will contain the page
1171 * to which we just added something. This allows us to link overflow
1172 * pages and return the new page having correctly put the last page.
1174 * PUBLIC: int __ham_add_el
1175 * PUBLIC: __P((HTAB *, HASH_CURSOR *, const DBT *, const DBT *, int));
1178 __ham_add_el(hashp
, hcp
, key
, val
, type
)
1181 const DBT
*key
, *val
;
1184 const DBT
*pkey
, *pdata
;
1185 DBT key_dbt
, data_dbt
;
1187 HOFFPAGE doff
, koff
;
1188 db_pgno_t next_pgno
;
1189 u_int32_t data_size
, key_size
, pairsize
, rectype
;
1190 int do_expand
, is_keybig
, is_databig
, ret
;
1191 int key_type
, data_type
;
1195 if (hcp
->pagep
== NULL
&& (ret
= __ham_get_page(hashp
->dbp
,
1196 hcp
->seek_found_page
!= PGNO_INVALID
? hcp
->seek_found_page
:
1197 hcp
->pgno
, &hcp
->pagep
)) != 0)
1200 key_size
= HKEYDATA_PSIZE(key
->size
);
1201 data_size
= HKEYDATA_PSIZE(val
->size
);
1202 is_keybig
= ISBIG(hashp
, key
->size
);
1203 is_databig
= ISBIG(hashp
, val
->size
);
1205 key_size
= HOFFPAGE_PSIZE
;
1207 data_size
= HOFFPAGE_PSIZE
;
1209 pairsize
= key_size
+ data_size
;
1211 /* Advance to first page in chain with room for item. */
1212 while (H_NUMPAIRS(hcp
->pagep
) && NEXT_PGNO(hcp
->pagep
) !=
1215 * This may not be the end of the chain, but the pair may fit
1216 * anyway. Check if it's a bigpair that fits or a regular
1219 if (P_FREESPACE(hcp
->pagep
) >= pairsize
)
1221 next_pgno
= NEXT_PGNO(hcp
->pagep
);
1223 __ham_next_cpage(hashp
, hcp
, next_pgno
, 0, 0)) != 0)
1228 * Check if we need to allocate a new page.
1230 if (P_FREESPACE(hcp
->pagep
) < pairsize
) {
1232 if ((ret
= __ham_add_ovflpage(hashp
,
1233 hcp
->pagep
, 1, &hcp
->pagep
)) != 0)
1235 hcp
->pgno
= PGNO(hcp
->pagep
);
1241 hcp
->bndx
= H_NUMPAIRS(hcp
->pagep
);
1242 F_CLR(hcp
, H_DELETED
);
1244 if ((ret
= __db_poff(hashp
->dbp
,
1245 key
, &koff
.pgno
, __ham_overflow_page
)) != 0)
1247 koff
.type
= H_OFFPAGE
;
1248 koff
.tlen
= key
->size
;
1249 key_dbt
.data
= &koff
;
1250 key_dbt
.size
= sizeof(koff
);
1252 key_type
= H_OFFPAGE
;
1255 key_type
= H_KEYDATA
;
1259 if ((ret
= __db_poff(hashp
->dbp
,
1260 val
, &doff
.pgno
, __ham_overflow_page
)) != 0)
1262 doff
.type
= H_OFFPAGE
;
1263 doff
.tlen
= val
->size
;
1264 data_dbt
.data
= &doff
;
1265 data_dbt
.size
= sizeof(doff
);
1267 data_type
= H_OFFPAGE
;
1273 if (DB_LOGGING(hashp
->dbp
)) {
1276 rectype
|= PAIR_DATAMASK
;
1278 rectype
|= PAIR_KEYMASK
;
1280 if ((ret
= __ham_insdel_log(hashp
->dbp
->dbenv
->lg_info
,
1281 (DB_TXN
*)hashp
->dbp
->txn
, &new_lsn
, 0, rectype
,
1282 hashp
->dbp
->log_fileid
, PGNO(hcp
->pagep
),
1283 (u_int32_t
)H_NUMPAIRS(hcp
->pagep
),
1284 &LSN(hcp
->pagep
), pkey
, pdata
)) != 0)
1287 /* Move lsn onto page. */
1288 LSN(hcp
->pagep
) = new_lsn
; /* Structure assignment. */
1291 __ham_putitem(hcp
->pagep
, pkey
, key_type
);
1292 __ham_putitem(hcp
->pagep
, pdata
, data_type
);
1295 * For splits, we are going to update item_info's page number
1296 * field, so that we can easily return to the same page the
1297 * next time we come in here. For other operations, this shouldn't
1298 * matter, since odds are this is the last thing that happens before
1299 * we return to the user program.
1301 hcp
->pgno
= PGNO(hcp
->pagep
);
1304 * XXX Maybe keep incremental numbers here
1306 if (!F_ISSET(hashp
->dbp
, DB_AM_LOCKING
))
1307 hashp
->hdr
->nelem
++;
1309 if (do_expand
|| (hashp
->hdr
->ffactor
!= 0 &&
1310 (u_int32_t
)H_NUMPAIRS(hcp
->pagep
) > hashp
->hdr
->ffactor
))
1311 F_SET(hcp
, H_EXPAND
);
1317 * Special __putitem call used in splitting -- copies one entry to
1318 * another. Works for all types of hash entries (H_OFFPAGE, H_KEYDATA,
1319 * H_DUPLICATE, H_OFFDUP). Since we log splits at a high level, we
1320 * do not need to do any logging here.
1322 * PUBLIC: void __ham_copy_item __P((HTAB *, PAGE *, u_int32_t, PAGE *));
1325 __ham_copy_item(hashp
, src_page
, src_ndx
, dest_page
)
1335 * Copy the key and data entries onto this new page.
1337 src
= P_ENTRY(src_page
, src_ndx
);
1339 /* Set up space on dest. */
1340 len
= LEN_HITEM(src_page
, hashp
->hdr
->pagesize
, src_ndx
);
1341 HOFFSET(dest_page
) -= len
;
1342 dest_page
->inp
[NUM_ENT(dest_page
)] = HOFFSET(dest_page
);
1343 dest
= P_ENTRY(dest_page
, NUM_ENT(dest_page
));
1344 NUM_ENT(dest_page
)++;
1346 memcpy(dest
, src
, len
);
1352 * pointer on success
1355 * PUBLIC: int __ham_add_ovflpage __P((HTAB *, PAGE *, int, PAGE **));
1358 __ham_add_ovflpage(hashp
, pagep
, release
, pp
)
1369 dbenv
= hashp
->dbp
->dbenv
;
1371 if ((ret
= __ham_overflow_page(hashp
->dbp
, P_HASH
, &new_pagep
)) != 0)
1374 if (DB_LOGGING(hashp
->dbp
)) {
1375 if ((ret
= __ham_newpage_log(dbenv
->lg_info
,
1376 (DB_TXN
*)hashp
->dbp
->txn
, &new_lsn
, 0, PUTOVFL
,
1377 hashp
->dbp
->log_fileid
, PGNO(pagep
), &LSN(pagep
),
1378 PGNO(new_pagep
), &LSN(new_pagep
), PGNO_INVALID
, NULL
)) != 0)
1381 /* Move lsn onto page. */
1382 LSN(pagep
) = LSN(new_pagep
) = new_lsn
;
1384 NEXT_PGNO(pagep
) = PGNO(new_pagep
);
1385 PREV_PGNO(new_pagep
) = PGNO(pagep
);
1388 ret
= __ham_put_page(hashp
->dbp
, pagep
, 1);
1390 hashp
->hash_overflows
++;
1397 * PUBLIC: int __ham_new_page __P((HTAB *, u_int32_t, u_int32_t, PAGE **));
1400 __ham_new_page(hashp
, addr
, type
, pp
)
1402 u_int32_t addr
, type
;
1408 if ((ret
= memp_fget(hashp
->dbp
->mpf
,
1409 &addr
, DB_MPOOL_CREATE
, &pagep
)) != 0)
1413 __account_page(hashp
, addr
, 1);
1415 /* This should not be necessary because page-in should do it. */
1417 hashp
->hdr
->pagesize
, addr
, PGNO_INVALID
, PGNO_INVALID
, 0, type
);
1424 * PUBLIC: int __ham_del_page __P((DB *, PAGE *));
1427 __ham_del_page(dbp
, pagep
)
1435 hashp
= (HTAB
*)dbp
->internal
;
1437 DIRTY_META(hashp
, ret
);
1440 __db_err(hashp
->dbp
->dbenv
,
1441 "free_ovflpage: unable to lock meta data page %s\n",
1444 * If we are going to return an error, then we should free
1445 * the page, so it doesn't stay pinned forever.
1447 (void)__ham_put_page(hashp
->dbp
, pagep
, 0);
1451 if (DB_LOGGING(hashp
->dbp
)) {
1452 if ((ret
= __ham_newpgno_log(hashp
->dbp
->dbenv
->lg_info
,
1453 (DB_TXN
*)hashp
->dbp
->txn
, &new_lsn
, 0, DELPGNO
,
1454 hashp
->dbp
->log_fileid
, PGNO(pagep
), hashp
->hdr
->last_freed
,
1455 (u_int32_t
)TYPE(pagep
), NEXT_PGNO(pagep
), P_INVALID
,
1456 &LSN(pagep
), &hashp
->hdr
->lsn
)) != 0)
1459 hashp
->hdr
->lsn
= new_lsn
;
1460 LSN(pagep
) = new_lsn
;
1467 __pgno
= pagep
->pgno
;
1469 memset(pagep
, 0xff, dbp
->pgsize
);
1470 pagep
->pgno
= __pgno
;
1474 TYPE(pagep
) = P_INVALID
;
1475 NEXT_PGNO(pagep
) = hashp
->hdr
->last_freed
;
1476 hashp
->hdr
->last_freed
= PGNO(pagep
);
1478 return (__ham_put_page(hashp
->dbp
, pagep
, 1));
1483 * PUBLIC: int __ham_put_page __P((DB *, PAGE *, int32_t));
1486 __ham_put_page(dbp
, pagep
, is_dirty
)
1492 __account_page((HTAB
*)dbp
->cookie
,
1493 ((BKT
*)((char *)pagep
- sizeof(BKT
)))->pgno
, -1);
1495 return (memp_fput(dbp
->mpf
, pagep
, (is_dirty
? DB_MPOOL_DIRTY
: 0)));
1499 * __ham_dirty_page --
1500 * Mark a page dirty.
1502 * PUBLIC: int __ham_dirty_page __P((HTAB *, PAGE *));
1505 __ham_dirty_page(hashp
, pagep
)
1509 return (memp_fset(hashp
->dbp
->mpf
, pagep
, DB_MPOOL_DIRTY
));
1513 * PUBLIC: int __ham_get_page __P((DB *, db_pgno_t, PAGE **));
1516 __ham_get_page(dbp
, addr
, pagep
)
1523 ret
= memp_fget(dbp
->mpf
, &addr
, DB_MPOOL_CREATE
, pagep
);
1526 __account_page((HTAB
*)dbp
->internal
, addr
, 1);
1532 * PUBLIC: int __ham_overflow_page __P((DB *, u_int32_t, PAGE **));
1535 __ham_overflow_page(dbp
, type
, pp
)
1540 DB_LSN
*lsnp
, new_lsn
;
1543 db_pgno_t new_addr
, next_free
, newalloc_flag
;
1544 u_int32_t offset
, splitnum
;
1547 hashp
= (HTAB
*)dbp
->internal
;
1550 DIRTY_META(hashp
, ret
);
1555 * This routine is split up into two parts. First we have
1556 * to figure out the address of the new page that we are
1557 * allocating. Then we have to log the allocation. Only
1558 * after the log do we get to complete allocation of the
1561 new_addr
= hashp
->hdr
->last_freed
;
1562 if (new_addr
!= PGNO_INVALID
) {
1563 if ((ret
= __ham_get_page(hashp
->dbp
, new_addr
, &p
)) != 0)
1565 next_free
= NEXT_PGNO(p
);
1569 splitnum
= hashp
->hdr
->ovfl_point
;
1570 hashp
->hdr
->spares
[splitnum
]++;
1571 offset
= hashp
->hdr
->spares
[splitnum
] -
1572 (splitnum
? hashp
->hdr
->spares
[splitnum
- 1] : 0);
1573 new_addr
= PGNO_OF(hashp
, hashp
->hdr
->ovfl_point
, offset
);
1574 if (new_addr
> MAX_PAGES(hashp
)) {
1575 __db_err(hashp
->dbp
->dbenv
, "hash: out of file pages");
1576 hashp
->hdr
->spares
[splitnum
]--;
1579 next_free
= PGNO_INVALID
;
1585 if (DB_LOGGING(hashp
->dbp
)) {
1586 if ((ret
= __ham_newpgno_log(hashp
->dbp
->dbenv
->lg_info
,
1587 (DB_TXN
*)hashp
->dbp
->txn
, &new_lsn
, 0, ALLOCPGNO
,
1588 hashp
->dbp
->log_fileid
, new_addr
, next_free
,
1589 0, newalloc_flag
, type
, lsnp
, &hashp
->hdr
->lsn
)) != 0)
1592 hashp
->hdr
->lsn
= new_lsn
;
1598 /* We just took something off the free list, initialize it. */
1599 hashp
->hdr
->last_freed
= next_free
;
1600 P_INIT(p
, hashp
->hdr
->pagesize
, PGNO(p
), PGNO_INVALID
,
1601 PGNO_INVALID
, 0, (u_int8_t
)type
);
1603 /* Get the new page. */
1604 if ((ret
= __ham_new_page(hashp
, new_addr
, type
, &p
)) != 0)
1607 if (DB_LOGGING(hashp
->dbp
))
1616 * PUBLIC: #ifdef DEBUG
1617 * PUBLIC: db_pgno_t __bucket_to_page __P((HTAB *, db_pgno_t));
1621 __bucket_to_page(hashp
, n
)
1629 ret_val
+= hashp
->hdr
->spares
[__db_log2(n
+ 1) - 1];
1635 * Create a bunch of overflow pages at the current split point.
1636 * PUBLIC: void __ham_init_ovflpages __P((HTAB *));
1639 __ham_init_ovflpages(hp
)
1644 db_pgno_t last_pgno
, new_pgno
;
1645 u_int32_t i
, curpages
, numpages
;
1647 curpages
= hp
->hdr
->spares
[hp
->hdr
->ovfl_point
] -
1648 hp
->hdr
->spares
[hp
->hdr
->ovfl_point
- 1];
1649 numpages
= hp
->hdr
->ovfl_point
+ 1 - curpages
;
1651 last_pgno
= hp
->hdr
->last_freed
;
1652 new_pgno
= PGNO_OF(hp
, hp
->hdr
->ovfl_point
, curpages
+ 1);
1653 if (DB_LOGGING(hp
->dbp
)) {
1654 (void)__ham_ovfl_log(hp
->dbp
->dbenv
->lg_info
,
1655 (DB_TXN
*)hp
->dbp
->txn
, &new_lsn
, 0,
1656 hp
->dbp
->log_fileid
, new_pgno
,
1657 numpages
, last_pgno
, hp
->hdr
->ovfl_point
, &hp
->hdr
->lsn
);
1658 hp
->hdr
->lsn
= new_lsn
;
1662 hp
->hdr
->spares
[hp
->hdr
->ovfl_point
] += numpages
;
1663 for (i
= numpages
; i
> 0; i
--) {
1664 if (__ham_new_page(hp
,
1665 PGNO_OF(hp
, hp
->hdr
->ovfl_point
, curpages
+ i
),
1666 P_INVALID
, &p
) != 0)
1669 NEXT_PGNO(p
) = last_pgno
;
1670 last_pgno
= PGNO(p
);
1671 (void)__ham_put_page(hp
->dbp
, p
, 1);
1673 hp
->hdr
->last_freed
= last_pgno
;
1677 * PUBLIC: int __ham_get_cpage __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
1680 __ham_get_cpage(hashp
, hcp
, mode
)
1687 if (hcp
->lock
== 0 && F_ISSET(hashp
->dbp
, DB_AM_LOCKING
) &&
1688 (ret
= __ham_lock_bucket(hashp
->dbp
, hcp
, mode
)) != 0)
1691 if (hcp
->pagep
== NULL
) {
1692 if (hcp
->pgno
== PGNO_INVALID
) {
1693 hcp
->pgno
= BUCKET_TO_PAGE(hashp
, hcp
->bucket
);
1698 __ham_get_page(hashp
->dbp
, hcp
->pgno
, &hcp
->pagep
)) != 0)
1702 if (hcp
->dpgno
!= PGNO_INVALID
&& hcp
->dpagep
== NULL
)
1704 __ham_get_page(hashp
->dbp
, hcp
->dpgno
, &hcp
->dpagep
)) != 0)
1710 * Get a new page at the cursor, putting the last page if necessary.
1711 * If the flag is set to H_ISDUP, then we are talking about the
1712 * duplicate page, not the main page.
1714 * PUBLIC: int __ham_next_cpage
1715 * PUBLIC: __P((HTAB *, HASH_CURSOR *, db_pgno_t, int, u_int32_t));
1718 __ham_next_cpage(hashp
, hcp
, pgno
, dirty
, flags
)
1728 if (LF_ISSET(H_ISDUP
) && hcp
->dpagep
!= NULL
&&
1729 (ret
= __ham_put_page(hashp
->dbp
, hcp
->dpagep
, dirty
)) != 0)
1731 else if (!LF_ISSET(H_ISDUP
) && hcp
->pagep
!= NULL
&&
1732 (ret
= __ham_put_page(hashp
->dbp
, hcp
->pagep
, dirty
)) != 0)
1735 if ((ret
= __ham_get_page(hashp
->dbp
, pgno
, &p
)) != 0)
1738 if (LF_ISSET(H_ISDUP
)) {
1752 * __ham_lock_bucket --
1753 * Get the lock on a particular bucket.
1756 __ham_lock_bucket(dbp
, hcp
, mode
)
1764 * What a way to trounce on the memory system. It might be
1765 * worth copying the lk_info into the hashp.
1768 dbp
->lock
.pgno
= (db_pgno_t
)(hcp
->bucket
);
1769 ret
= lock_get(dbp
->dbenv
->lk_info
,
1770 dbp
->txn
== NULL
? dbp
->locker
: dbp
->txn
->txnid
, 0,
1771 &dbp
->lock_dbt
, mode
, &hcp
->lock
);
1773 return (ret
< 0 ? EAGAIN
: ret
);
1778 * Delete a pair on a page, paying no attention to what the pair
1779 * represents. The caller is responsible for freeing up duplicates
1780 * or offpage entries that might be referenced by this pair.
1782 * PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t));
1785 __ham_dpair(dbp
, p
, pndx
)
1791 u_int8_t
*dest
, *src
;
1794 * Compute "delta", the amount we have to shift all of the
1795 * offsets. To find the delta, we just need to calculate
1796 * the size of the pair of elements we are removing.
1798 delta
= H_PAIRSIZE(p
, dbp
->pgsize
, pndx
);
1801 * The hard case: we want to remove something other than
1802 * the last item on the page. We need to shift data and
1805 if ((db_indx_t
)pndx
!= H_NUMPAIRS(p
) - 1) {
1807 * Move the data: src is the first occupied byte on
1808 * the page. (Length is delta.)
1810 src
= (u_int8_t
*)p
+ HOFFSET(p
);
1813 * Destination is delta bytes beyond src. This might
1814 * be an overlapping copy, so we have to use memmove.
1817 memmove(dest
, src
, p
->inp
[H_DATAINDEX(pndx
)] - HOFFSET(p
));
1820 /* Adjust the offsets. */
1821 for (n
= (db_indx_t
)pndx
; n
< (db_indx_t
)(H_NUMPAIRS(p
) - 1); n
++) {
1822 p
->inp
[H_KEYINDEX(n
)] = p
->inp
[H_KEYINDEX(n
+1)] + delta
;
1823 p
->inp
[H_DATAINDEX(n
)] = p
->inp
[H_DATAINDEX(n
+1)] + delta
;
1826 /* Adjust page metadata. */
1827 HOFFSET(p
) = HOFFSET(p
) + delta
;
1828 NUM_ENT(p
) = NUM_ENT(p
) - 2;
1833 __account_page(hashp
, pgno
, inout
)
1845 if (inout
== -1) /* XXX: Kluge */
1848 /* Find page in list. */
1849 for (i
= 0; i
< last
; i
++)
1850 if (list
[i
].pgno
== pgno
)
1854 list
[last
].times
= inout
;
1855 list
[last
].pgno
= pgno
;
1858 list
[i
].times
= inout
;
1859 if (list
[i
].times
== 0) {
1860 for (j
= i
; j
< last
; j
++)
1861 list
[j
] = list
[j
+ 1];
1864 for (i
= 0; i
< last
; i
++, list
[i
].times
++)
1865 if (list
[i
].times
> 20 &&
1866 !__is_bitmap_pgno(hashp
, list
[i
].pgno
))
1867 (void)fprintf(stderr
,
1868 "Warning: pg %lu has been out for %d times\n",
1869 (u_long
)list
[i
].pgno
, list
[i
].times
);
1871 #endif /* DEBUG_SLOW */