2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
8 * Copyright (c) 1990, 1993, 1994
9 * The Regents of the University of California. All rights reserved.
11 * This code is derived from software contributed to Berkeley by
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. All advertising materials mentioning features or use of this software
23 * must display the following acknowledgement:
24 * This product includes software developed by the University of
25 * California, Berkeley and its contributors.
26 * 4. Neither the name of the University nor the names of its contributors
27 * may be used to endorse or promote products derived from this software
28 * without specific prior written permission.
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45 static const char sccsid
[] = "@(#)hash_dup.c 10.14 (Sleepycat) 5/7/98";
52 * Manipulation of duplicates for the hash package.
61 #ifndef NO_SYSTEM_INCLUDES
62 #include <sys/types.h>
71 static int __ham_check_move
__P((HTAB
*, HASH_CURSOR
*, int32_t));
72 static int __ham_dup_convert
__P((HTAB
*, HASH_CURSOR
*));
73 static int __ham_make_dup
__P((const DBT
*, DBT
*d
, void **, u_int32_t
*));
76 * Called from hash_access to add a duplicate key. nval is the new
77 * value that we want to add. The flags correspond to the flag values
78 * to cursor_put indicating where to add the new element.
80 * Case 1: The existing duplicate set already resides on a separate page.
81 * We can use common code for this.
82 * Case 2: The element is small enough to just be added to the existing set.
83 * Case 3: The element is large enough to be a big item, so we're going to
84 * have to push the set onto a new page.
85 * Case 4: The element is large enough to push the duplicate set onto a
88 * PUBLIC: int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
91 __ham_add_dup(hashp
, hcp
, nval
, flags
)
98 u_int32_t del_len
, new_size
;
102 if (flags
== DB_CURRENT
&& hcp
->dpgno
== PGNO_INVALID
)
103 del_len
= hcp
->dup_len
;
107 if ((ret
= __ham_check_move(hashp
, hcp
,
108 (int32_t)DUP_SIZE(nval
->size
) - (int32_t)del_len
)) != 0)
112 * Check if resulting duplicate set is going to need to go
113 * onto a separate duplicate page. If so, convert the
114 * duplicate set and add the new one. After conversion,
115 * hcp->dndx is the first free ndx or the index of the
116 * current pointer into the duplicate set.
118 hk
= H_PAIRDATA(hcp
->pagep
, hcp
->bndx
);
119 new_size
= DUP_SIZE(nval
->size
) - del_len
+ LEN_HKEYDATA(hcp
->pagep
,
120 hashp
->hdr
->pagesize
, H_DATAINDEX(hcp
->bndx
));
123 * We convert to off-page duplicates if the item is a big item,
124 * the addition of the new item will make the set large, or
125 * if there isn't enough room on this page to add the next item.
127 if (HPAGE_PTYPE(hk
) != H_OFFDUP
&&
128 (HPAGE_PTYPE(hk
) == H_OFFPAGE
|| ISBIG(hashp
, new_size
) ||
129 DUP_SIZE(nval
->size
) - del_len
> P_FREESPACE(hcp
->pagep
))) {
131 if ((ret
= __ham_dup_convert(hashp
, hcp
)) != 0)
134 hk
= H_PAIRDATA(hcp
->pagep
, hcp
->bndx
);
137 /* There are two separate cases here: on page and off page. */
138 if (HPAGE_PTYPE(hk
) != H_OFFDUP
) {
139 if (HPAGE_PTYPE(hk
) != H_DUPLICATE
) {
140 HPAGE_PTYPE(hk
) = H_DUPLICATE
;
142 pval
.data
= HKEYDATA_DATA(hk
);
143 pval
.size
= LEN_HDATA(hcp
->pagep
, hashp
->hdr
->pagesize
,
146 __ham_make_dup(&pval
, &tmp_val
, &hcp
->big_data
,
147 &hcp
->big_datalen
)) != 0 || (ret
=
148 __ham_replpair(hashp
, hcp
, &tmp_val
, 1)) != 0)
152 /* Now make the new entry a duplicate. */
153 if ((ret
= __ham_make_dup(nval
,
154 &tmp_val
, &hcp
->big_data
, &hcp
->big_datalen
)) != 0)
158 switch (flags
) { /* On page. */
163 tmp_val
.doff
= LEN_HDATA(hcp
->pagep
,
164 hashp
->hdr
->pagesize
, hcp
->bndx
);
167 tmp_val
.doff
= hcp
->dup_off
;
168 tmp_val
.dlen
= DUP_SIZE(hcp
->dup_len
);
171 tmp_val
.doff
= hcp
->dup_off
;
174 tmp_val
.doff
= hcp
->dup_off
+ DUP_SIZE(hcp
->dup_len
);
177 /* Add the duplicate. */
178 ret
= __ham_replpair(hashp
, hcp
, &tmp_val
, 0);
180 ret
= __ham_dirty_page(hashp
, hcp
->pagep
);
181 __ham_c_update(hcp
, hcp
->pgno
, tmp_val
.size
, 1, 1);
185 /* If we get here, then we're on duplicate pages. */
186 if (hcp
->dpgno
== PGNO_INVALID
) {
187 memcpy(&hcp
->dpgno
, HOFFDUP_PGNO(hk
), sizeof(db_pgno_t
));
194 * The only way that we are already on a dup page is
195 * if we just converted the on-page representation.
196 * In that case, we've only got one page of duplicates.
198 if (hcp
->dpagep
== NULL
&& (ret
=
199 __db_dend(hashp
->dbp
, hcp
->dpgno
, &hcp
->dpagep
)) != 0)
204 if (hcp
->dpagep
== NULL
&& (ret
=
205 __db_dend(hashp
->dbp
, hcp
->dpgno
, &hcp
->dpagep
)) != 0)
207 hcp
->dpgno
= PGNO(hcp
->dpagep
);
208 hcp
->dndx
= NUM_ENT(hcp
->dpagep
);
211 if ((ret
= __db_ditem(hashp
->dbp
, hcp
->dpagep
, hcp
->dndx
,
212 BKEYDATA_SIZE(GET_BKEYDATA(hcp
->dpagep
, hcp
->dndx
)->len
)))
216 case DB_BEFORE
: /* The default behavior is correct. */
223 ret
= __db_dput(hashp
->dbp
,
224 nval
, &hcp
->dpagep
, &hcp
->dndx
, __ham_overflow_page
);
225 hcp
->pgno
= PGNO(hcp
->pagep
);
226 __ham_c_update(hcp
, hcp
->pgno
, nval
->size
, 1, 1);
231 * Convert an on-page set of duplicates to an offpage set of duplicates.
234 __ham_dup_convert(hashp
, hcp
)
246 * Create a new page for the duplicates.
249 __ham_overflow_page(hashp
->dbp
, P_DUPLICATE
, &hcp
->dpagep
)) != 0)
251 hcp
->dpagep
->type
= P_DUPLICATE
;
252 hcp
->dpgno
= PGNO(hcp
->dpagep
);
255 * Now put the duplicates onto the new page.
258 switch (HPAGE_PTYPE(H_PAIRDATA(hcp
->pagep
, hcp
->bndx
))) {
260 /* Simple case, one key on page; move it to dup page. */
263 LEN_HDATA(hcp
->pagep
, hashp
->hdr
->pagesize
, hcp
->bndx
);
264 dbt
.data
= HKEYDATA_DATA(H_PAIRDATA(hcp
->pagep
, hcp
->bndx
));
265 ret
= __db_pitem(hashp
->dbp
, hcp
->dpagep
,
266 (u_int32_t
)dndx
, BKEYDATA_SIZE(dbt
.size
), NULL
, &dbt
);
268 __ham_dirty_page(hashp
, hcp
->dpagep
);
271 /* Simple case, one key on page; move it to dup page. */
274 P_ENTRY(hcp
->pagep
, H_DATAINDEX(hcp
->bndx
)), HOFFPAGE_SIZE
);
275 B_TSET(bo
.type
, ho
.type
, 0);
278 dbt
.size
= BOVERFLOW_SIZE
;
281 ret
= __db_pitem(hashp
->dbp
, hcp
->dpagep
,
282 (u_int32_t
)dndx
, dbt
.size
, &dbt
, NULL
);
284 __ham_dirty_page(hashp
, hcp
->dpagep
);
287 p
= HKEYDATA_DATA(H_PAIRDATA(hcp
->pagep
, hcp
->bndx
));
289 LEN_HDATA(hcp
->pagep
, hashp
->hdr
->pagesize
, hcp
->bndx
);
291 for (dndx
= 0; p
< pend
; dndx
++) {
292 memcpy(&len
, p
, sizeof(db_indx_t
));
294 p
+= sizeof(db_indx_t
);
296 p
+= len
+ sizeof(db_indx_t
);
297 ret
= __db_dput(hashp
->dbp
, &dbt
,
298 &hcp
->dpagep
, &dndx
, __ham_overflow_page
);
304 ret
= __db_pgfmt(hashp
->dbp
, (u_long
)hcp
->pgno
);
308 * Now attach this to the source page in place of
309 * the old duplicate item.
311 __ham_move_offpage(hashp
, hcp
->pagep
,
312 (u_int32_t
)H_DATAINDEX(hcp
->bndx
), hcp
->dpgno
);
314 /* Can probably just do a "put" here. */
315 ret
= __ham_dirty_page(hashp
, hcp
->pagep
);
317 (void)__ham_del_page(hashp
->dbp
, hcp
->dpagep
);
324 __ham_make_dup(notdup
, duplicate
, bufp
, sizep
)
330 db_indx_t tsize
, item_size
;
334 item_size
= (db_indx_t
)notdup
->size
;
335 tsize
= DUP_SIZE(item_size
);
336 if ((ret
= __ham_init_dbt(duplicate
, tsize
, bufp
, sizep
)) != 0)
340 duplicate
->flags
= notdup
->flags
;
341 F_SET(duplicate
, DB_DBT_PARTIAL
);
344 memcpy(p
, &item_size
, sizeof(db_indx_t
));
345 p
+= sizeof(db_indx_t
);
346 memcpy(p
, notdup
->data
, notdup
->size
);
348 memcpy(p
, &item_size
, sizeof(db_indx_t
));
351 duplicate
->dlen
= notdup
->size
;
357 __ham_check_move(hashp
, hcp
, add_len
)
366 u_int32_t new_datalen
, old_len
, rectype
;
371 * Check if we can do whatever we need to on this page. If not,
372 * then we'll have to move the current element to a new page.
374 hk
= H_PAIRDATA(hcp
->pagep
, hcp
->bndx
);
377 * If the item is already off page duplicates or an offpage item,
378 * then we know we can do whatever we need to do in-place
380 if (HPAGE_PTYPE(hk
) == H_OFFDUP
|| HPAGE_PTYPE(hk
) == H_OFFPAGE
)
384 LEN_HITEM(hcp
->pagep
, hashp
->hdr
->pagesize
, H_DATAINDEX(hcp
->bndx
));
385 new_datalen
= old_len
- HKEYDATA_SIZE(0) + add_len
;
388 * We need to add a new page under two conditions:
389 * 1. The addition makes the total data length cross the BIG
390 * threshold and the OFFDUP structure won't fit on this page.
391 * 2. The addition does not make the total data cross the
392 * threshold, but the new data won't fit on the page.
393 * If neither of these is true, then we can return.
395 if (ISBIG(hashp
, new_datalen
) && (old_len
> HOFFDUP_SIZE
||
396 HOFFDUP_SIZE
- old_len
<= P_FREESPACE(hcp
->pagep
)))
399 if (!ISBIG(hashp
, new_datalen
) &&
400 add_len
<= (int32_t)P_FREESPACE(hcp
->pagep
))
404 * If we get here, then we need to move the item to a new page.
405 * Check if there are more pages in the chain.
408 new_datalen
= ISBIG(hashp
, new_datalen
) ?
409 HOFFDUP_SIZE
: HKEYDATA_SIZE(new_datalen
);
412 for (next_pgno
= NEXT_PGNO(hcp
->pagep
); next_pgno
!= PGNO_INVALID
;
413 next_pgno
= NEXT_PGNO(next_pagep
)) {
414 if (next_pagep
!= NULL
&&
415 (ret
= __ham_put_page(hashp
->dbp
, next_pagep
, 0)) != 0)
419 __ham_get_page(hashp
->dbp
, next_pgno
, &next_pagep
)) != 0)
422 if (P_FREESPACE(next_pagep
) >= new_datalen
)
426 /* No more pages, add one. */
427 if (next_pagep
== NULL
&&
428 (ret
= __ham_add_ovflpage(hashp
, hcp
->pagep
, 0, &next_pagep
)) != 0)
431 /* Add new page at the end of the chain. */
432 if (P_FREESPACE(next_pagep
) < new_datalen
&&
433 (ret
= __ham_add_ovflpage(hashp
, next_pagep
, 1, &next_pagep
)) != 0)
436 /* Copy the item to the new page. */
437 if (DB_LOGGING(hashp
->dbp
)) {
442 H_PAIRKEY(hcp
->pagep
, hcp
->bndx
)) == H_OFFPAGE
) {
443 rectype
|= PAIR_KEYMASK
;
444 k
.data
= H_PAIRKEY(hcp
->pagep
, hcp
->bndx
);
445 k
.size
= HOFFPAGE_SIZE
;
448 HKEYDATA_DATA(H_PAIRKEY(hcp
->pagep
, hcp
->bndx
));
449 k
.size
= LEN_HKEY(hcp
->pagep
,
450 hashp
->hdr
->pagesize
, hcp
->bndx
);
453 if (HPAGE_PTYPE(hk
) == H_OFFPAGE
) {
454 rectype
|= PAIR_DATAMASK
;
455 d
.data
= H_PAIRDATA(hcp
->pagep
, hcp
->bndx
);
456 d
.size
= HOFFPAGE_SIZE
;
459 HKEYDATA_DATA(H_PAIRDATA(hcp
->pagep
, hcp
->bndx
));
460 d
.size
= LEN_HDATA(hcp
->pagep
,
461 hashp
->hdr
->pagesize
, hcp
->bndx
);
465 if ((ret
= __ham_insdel_log(hashp
->dbp
->dbenv
->lg_info
,
466 (DB_TXN
*)hashp
->dbp
->txn
, &new_lsn
, 0, rectype
,
467 hashp
->dbp
->log_fileid
, PGNO(next_pagep
),
468 (u_int32_t
)H_NUMPAIRS(next_pagep
), &LSN(next_pagep
),
472 /* Move lsn onto page. */
473 LSN(next_pagep
) = new_lsn
; /* Structure assignment. */
476 __ham_copy_item(hashp
, hcp
->pagep
, H_KEYINDEX(hcp
->bndx
), next_pagep
);
477 __ham_copy_item(hashp
, hcp
->pagep
, H_DATAINDEX(hcp
->bndx
), next_pagep
);
479 /* Now delete the pair from the current page. */
480 ret
= __ham_del_pair(hashp
, hcp
, 0);
482 (void)__ham_put_page(hashp
->dbp
, hcp
->pagep
, 1);
483 hcp
->pagep
= next_pagep
;
484 hcp
->pgno
= PGNO(hcp
->pagep
);
485 hcp
->bndx
= H_NUMPAIRS(hcp
->pagep
) - 1;
486 F_SET(hcp
, H_EXPAND
);
491 * Replace an onpage set of duplicates with the OFFDUP structure that
492 * references the duplicate page.
493 * XXX This is really just a special case of __onpage_replace; we should
494 * probably combine them.
495 * PUBLIC: void __ham_move_offpage __P((HTAB *, PAGE *, u_int32_t, db_pgno_t));
498 __ham_move_offpage(hashp
, pagep
, ndx
, pgno
)
514 if (DB_LOGGING(hashp
->dbp
)) {
516 new_dbt
.size
= HOFFDUP_SIZE
;
517 old_dbt
.data
= P_ENTRY(pagep
, ndx
);
518 old_dbt
.size
= LEN_HITEM(pagep
, hashp
->hdr
->pagesize
, ndx
);
519 (void)__ham_replace_log(hashp
->dbp
->dbenv
->lg_info
,
520 (DB_TXN
*)hashp
->dbp
->txn
, &LSN(pagep
), 0,
521 hashp
->dbp
->log_fileid
, PGNO(pagep
), (u_int32_t
)ndx
,
522 &LSN(pagep
), -1, &old_dbt
, &new_dbt
, 0);
526 LEN_HITEM(pagep
, hashp
->hdr
->pagesize
, ndx
) - HOFFDUP_SIZE
;
530 src
= (u_int8_t
*)(pagep
) + HOFFSET(pagep
);
531 memmove(src
+ shrink
, src
, pagep
->inp
[ndx
] - HOFFSET(pagep
));
532 HOFFSET(pagep
) += shrink
;
534 /* Update index table. */
535 for (i
= ndx
; i
< NUM_ENT(pagep
); i
++)
536 pagep
->inp
[i
] += shrink
;
539 /* Now copy the offdup entry onto the page. */
540 memcpy(P_ENTRY(pagep
, ndx
), &od
, HOFFDUP_SIZE
);