Update.
[glibc.git] / db2 / hash / hash_dup.c
blobba248ddb170bf53e78a2cf5bc18520124417a2d2
1 /*-
2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
6 */
7 /*
8 * Copyright (c) 1990, 1993, 1994
9 * The Regents of the University of California. All rights reserved.
11 * This code is derived from software contributed to Berkeley by
12 * Margo Seltzer.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. All advertising materials mentioning features or use of this software
23 * must display the following acknowledgement:
24 * This product includes software developed by the University of
25 * California, Berkeley and its contributors.
26 * 4. Neither the name of the University nor the names of its contributors
27 * may be used to endorse or promote products derived from this software
28 * without specific prior written permission.
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * SUCH DAMAGE.
42 #include "config.h"
44 #ifndef lint
45 static const char sccsid[] = "@(#)hash_dup.c 10.14 (Sleepycat) 5/7/98";
46 #endif /* not lint */
49 * PACKAGE: hashing
51 * DESCRIPTION:
52 * Manipulation of duplicates for the hash package.
54 * ROUTINES:
56 * External
57 * __add_dup
58 * Internal
61 #ifndef NO_SYSTEM_INCLUDES
62 #include <sys/types.h>
64 #include <string.h>
65 #endif
67 #include "db_int.h"
68 #include "db_page.h"
69 #include "hash.h"
71 static int __ham_check_move __P((HTAB *, HASH_CURSOR *, int32_t));
72 static int __ham_dup_convert __P((HTAB *, HASH_CURSOR *));
73 static int __ham_make_dup __P((const DBT *, DBT *d, void **, u_int32_t *));
76 * Called from hash_access to add a duplicate key. nval is the new
77 * value that we want to add. The flags correspond to the flag values
78 * to cursor_put indicating where to add the new element.
79 * There are 4 cases.
80 * Case 1: The existing duplicate set already resides on a separate page.
81 * We can use common code for this.
82 * Case 2: The element is small enough to just be added to the existing set.
83 * Case 3: The element is large enough to be a big item, so we're going to
84 * have to push the set onto a new page.
85 * Case 4: The element is large enough to push the duplicate set onto a
86 * separate page.
88 * PUBLIC: int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
90 int
91 __ham_add_dup(hashp, hcp, nval, flags)
92 HTAB *hashp;
93 HASH_CURSOR *hcp;
94 DBT *nval;
95 u_int32_t flags;
97 DBT pval, tmp_val;
98 u_int32_t del_len, new_size;
99 int ret;
100 u_int8_t *hk;
102 if (flags == DB_CURRENT && hcp->dpgno == PGNO_INVALID)
103 del_len = hcp->dup_len;
104 else
105 del_len = 0;
107 if ((ret = __ham_check_move(hashp, hcp,
108 (int32_t)DUP_SIZE(nval->size) - (int32_t)del_len)) != 0)
109 return (ret);
112 * Check if resulting duplicate set is going to need to go
113 * onto a separate duplicate page. If so, convert the
114 * duplicate set and add the new one. After conversion,
115 * hcp->dndx is the first free ndx or the index of the
116 * current pointer into the duplicate set.
118 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
119 new_size = DUP_SIZE(nval->size) - del_len + LEN_HKEYDATA(hcp->pagep,
120 hashp->hdr->pagesize, H_DATAINDEX(hcp->bndx));
123 * We convert to off-page duplicates if the item is a big item,
124 * the addition of the new item will make the set large, or
125 * if there isn't enough room on this page to add the next item.
127 if (HPAGE_PTYPE(hk) != H_OFFDUP &&
128 (HPAGE_PTYPE(hk) == H_OFFPAGE || ISBIG(hashp, new_size) ||
129 DUP_SIZE(nval->size) - del_len > P_FREESPACE(hcp->pagep))) {
131 if ((ret = __ham_dup_convert(hashp, hcp)) != 0)
132 return (ret);
133 else
134 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
137 /* There are two separate cases here: on page and off page. */
138 if (HPAGE_PTYPE(hk) != H_OFFDUP) {
139 if (HPAGE_PTYPE(hk) != H_DUPLICATE) {
140 HPAGE_PTYPE(hk) = H_DUPLICATE;
141 pval.flags = 0;
142 pval.data = HKEYDATA_DATA(hk);
143 pval.size = LEN_HDATA(hcp->pagep, hashp->hdr->pagesize,
144 hcp->bndx);
145 if ((ret =
146 __ham_make_dup(&pval, &tmp_val, &hcp->big_data,
147 &hcp->big_datalen)) != 0 || (ret =
148 __ham_replpair(hashp, hcp, &tmp_val, 1)) != 0)
149 return (ret);
152 /* Now make the new entry a duplicate. */
153 if ((ret = __ham_make_dup(nval,
154 &tmp_val, &hcp->big_data, &hcp->big_datalen)) != 0)
155 return (ret);
157 tmp_val.dlen = 0;
158 switch (flags) { /* On page. */
159 case DB_KEYFIRST:
160 tmp_val.doff = 0;
161 break;
162 case DB_KEYLAST:
163 tmp_val.doff = LEN_HDATA(hcp->pagep,
164 hashp->hdr->pagesize, hcp->bndx);
165 break;
166 case DB_CURRENT:
167 tmp_val.doff = hcp->dup_off;
168 tmp_val.dlen = DUP_SIZE(hcp->dup_len);
169 break;
170 case DB_BEFORE:
171 tmp_val.doff = hcp->dup_off;
172 break;
173 case DB_AFTER:
174 tmp_val.doff = hcp->dup_off + DUP_SIZE(hcp->dup_len);
175 break;
177 /* Add the duplicate. */
178 ret = __ham_replpair(hashp, hcp, &tmp_val, 0);
179 if (ret == 0)
180 ret = __ham_dirty_page(hashp, hcp->pagep);
181 __ham_c_update(hcp, hcp->pgno, tmp_val.size, 1, 1);
182 return (ret);
185 /* If we get here, then we're on duplicate pages. */
186 if (hcp->dpgno == PGNO_INVALID) {
187 memcpy(&hcp->dpgno, HOFFDUP_PGNO(hk), sizeof(db_pgno_t));
188 hcp->dndx = 0;
191 switch (flags) {
192 case DB_KEYFIRST:
194 * The only way that we are already on a dup page is
195 * if we just converted the on-page representation.
196 * In that case, we've only got one page of duplicates.
198 if (hcp->dpagep == NULL && (ret =
199 __db_dend(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0)
200 return (ret);
201 hcp->dndx = 0;
202 break;
203 case DB_KEYLAST:
204 if (hcp->dpagep == NULL && (ret =
205 __db_dend(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0)
206 return (ret);
207 hcp->dpgno = PGNO(hcp->dpagep);
208 hcp->dndx = NUM_ENT(hcp->dpagep);
209 break;
210 case DB_CURRENT:
211 if ((ret = __db_ditem(hashp->dbp, hcp->dpagep, hcp->dndx,
212 BKEYDATA_SIZE(GET_BKEYDATA(hcp->dpagep, hcp->dndx)->len)))
213 != 0)
214 return (ret);
215 break;
216 case DB_BEFORE: /* The default behavior is correct. */
217 break;
218 case DB_AFTER:
219 hcp->dndx++;
220 break;
223 ret = __db_dput(hashp->dbp,
224 nval, &hcp->dpagep, &hcp->dndx, __ham_overflow_page);
225 hcp->pgno = PGNO(hcp->pagep);
226 __ham_c_update(hcp, hcp->pgno, nval->size, 1, 1);
227 return (ret);
231 * Convert an on-page set of duplicates to an offpage set of duplicates.
233 static int
234 __ham_dup_convert(hashp, hcp)
235 HTAB *hashp;
236 HASH_CURSOR *hcp;
238 BOVERFLOW bo;
239 DBT dbt;
240 HOFFPAGE ho;
241 db_indx_t dndx, len;
242 int ret;
243 u_int8_t *p, *pend;
246 * Create a new page for the duplicates.
248 if ((ret =
249 __ham_overflow_page(hashp->dbp, P_DUPLICATE, &hcp->dpagep)) != 0)
250 return (ret);
251 hcp->dpagep->type = P_DUPLICATE;
252 hcp->dpgno = PGNO(hcp->dpagep);
255 * Now put the duplicates onto the new page.
257 dbt.flags = 0;
258 switch (HPAGE_PTYPE(H_PAIRDATA(hcp->pagep, hcp->bndx))) {
259 case H_KEYDATA:
260 /* Simple case, one key on page; move it to dup page. */
261 dndx = 0;
262 dbt.size =
263 LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx);
264 dbt.data = HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx));
265 ret = __db_pitem(hashp->dbp, hcp->dpagep,
266 (u_int32_t)dndx, BKEYDATA_SIZE(dbt.size), NULL, &dbt);
267 if (ret == 0)
268 __ham_dirty_page(hashp, hcp->dpagep);
269 break;
270 case H_OFFPAGE:
271 /* Simple case, one key on page; move it to dup page. */
272 dndx = 0;
273 memcpy(&ho,
274 P_ENTRY(hcp->pagep, H_DATAINDEX(hcp->bndx)), HOFFPAGE_SIZE);
275 B_TSET(bo.type, ho.type, 0);
276 bo.pgno = ho.pgno;
277 bo.tlen = ho.tlen;
278 dbt.size = BOVERFLOW_SIZE;
279 dbt.data = &bo;
281 ret = __db_pitem(hashp->dbp, hcp->dpagep,
282 (u_int32_t)dndx, dbt.size, &dbt, NULL);
283 if (ret == 0)
284 __ham_dirty_page(hashp, hcp->dpagep);
285 break;
286 case H_DUPLICATE:
287 p = HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx));
288 pend = p +
289 LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx);
291 for (dndx = 0; p < pend; dndx++) {
292 memcpy(&len, p, sizeof(db_indx_t));
293 dbt.size = len;
294 p += sizeof(db_indx_t);
295 dbt.data = p;
296 p += len + sizeof(db_indx_t);
297 ret = __db_dput(hashp->dbp, &dbt,
298 &hcp->dpagep, &dndx, __ham_overflow_page);
299 if (ret != 0)
300 break;
302 break;
303 default:
304 ret = __db_pgfmt(hashp->dbp, (u_long)hcp->pgno);
306 if (ret == 0) {
308 * Now attach this to the source page in place of
309 * the old duplicate item.
311 __ham_move_offpage(hashp, hcp->pagep,
312 (u_int32_t)H_DATAINDEX(hcp->bndx), hcp->dpgno);
314 /* Can probably just do a "put" here. */
315 ret = __ham_dirty_page(hashp, hcp->pagep);
316 } else {
317 (void)__ham_del_page(hashp->dbp, hcp->dpagep);
318 hcp->dpagep = NULL;
320 return (ret);
323 static int
324 __ham_make_dup(notdup, duplicate, bufp, sizep)
325 const DBT *notdup;
326 DBT *duplicate;
327 void **bufp;
328 u_int32_t *sizep;
330 db_indx_t tsize, item_size;
331 int ret;
332 u_int8_t *p;
334 item_size = (db_indx_t)notdup->size;
335 tsize = DUP_SIZE(item_size);
336 if ((ret = __ham_init_dbt(duplicate, tsize, bufp, sizep)) != 0)
337 return (ret);
339 duplicate->dlen = 0;
340 duplicate->flags = notdup->flags;
341 F_SET(duplicate, DB_DBT_PARTIAL);
343 p = duplicate->data;
344 memcpy(p, &item_size, sizeof(db_indx_t));
345 p += sizeof(db_indx_t);
346 memcpy(p, notdup->data, notdup->size);
347 p += notdup->size;
348 memcpy(p, &item_size, sizeof(db_indx_t));
350 duplicate->doff = 0;
351 duplicate->dlen = notdup->size;
353 return (0);
356 static int
357 __ham_check_move(hashp, hcp, add_len)
358 HTAB *hashp;
359 HASH_CURSOR *hcp;
360 int32_t add_len;
362 DBT k, d;
363 DB_LSN new_lsn;
364 PAGE *next_pagep;
365 db_pgno_t next_pgno;
366 u_int32_t new_datalen, old_len, rectype;
367 u_int8_t *hk;
368 int ret;
371 * Check if we can do whatever we need to on this page. If not,
372 * then we'll have to move the current element to a new page.
374 hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
377 * If the item is already off page duplicates or an offpage item,
378 * then we know we can do whatever we need to do in-place
380 if (HPAGE_PTYPE(hk) == H_OFFDUP || HPAGE_PTYPE(hk) == H_OFFPAGE)
381 return (0);
383 old_len =
384 LEN_HITEM(hcp->pagep, hashp->hdr->pagesize, H_DATAINDEX(hcp->bndx));
385 new_datalen = old_len - HKEYDATA_SIZE(0) + add_len;
388 * We need to add a new page under two conditions:
389 * 1. The addition makes the total data length cross the BIG
390 * threshold and the OFFDUP structure won't fit on this page.
391 * 2. The addition does not make the total data cross the
392 * threshold, but the new data won't fit on the page.
393 * If neither of these is true, then we can return.
395 if (ISBIG(hashp, new_datalen) && (old_len > HOFFDUP_SIZE ||
396 HOFFDUP_SIZE - old_len <= P_FREESPACE(hcp->pagep)))
397 return (0);
399 if (!ISBIG(hashp, new_datalen) &&
400 add_len <= (int32_t)P_FREESPACE(hcp->pagep))
401 return (0);
404 * If we get here, then we need to move the item to a new page.
405 * Check if there are more pages in the chain.
408 new_datalen = ISBIG(hashp, new_datalen) ?
409 HOFFDUP_SIZE : HKEYDATA_SIZE(new_datalen);
411 next_pagep = NULL;
412 for (next_pgno = NEXT_PGNO(hcp->pagep); next_pgno != PGNO_INVALID;
413 next_pgno = NEXT_PGNO(next_pagep)) {
414 if (next_pagep != NULL &&
415 (ret = __ham_put_page(hashp->dbp, next_pagep, 0)) != 0)
416 return (ret);
418 if ((ret =
419 __ham_get_page(hashp->dbp, next_pgno, &next_pagep)) != 0)
420 return (ret);
422 if (P_FREESPACE(next_pagep) >= new_datalen)
423 break;
426 /* No more pages, add one. */
427 if (next_pagep == NULL &&
428 (ret = __ham_add_ovflpage(hashp, hcp->pagep, 0, &next_pagep)) != 0)
429 return (ret);
431 /* Add new page at the end of the chain. */
432 if (P_FREESPACE(next_pagep) < new_datalen &&
433 (ret = __ham_add_ovflpage(hashp, next_pagep, 1, &next_pagep)) != 0)
434 return (ret);
436 /* Copy the item to the new page. */
437 if (DB_LOGGING(hashp->dbp)) {
438 rectype = PUTPAIR;
439 k.flags = 0;
440 d.flags = 0;
441 if (HPAGE_PTYPE(
442 H_PAIRKEY(hcp->pagep, hcp->bndx)) == H_OFFPAGE) {
443 rectype |= PAIR_KEYMASK;
444 k.data = H_PAIRKEY(hcp->pagep, hcp->bndx);
445 k.size = HOFFPAGE_SIZE;
446 } else {
447 k.data =
448 HKEYDATA_DATA(H_PAIRKEY(hcp->pagep, hcp->bndx));
449 k.size = LEN_HKEY(hcp->pagep,
450 hashp->hdr->pagesize, hcp->bndx);
453 if (HPAGE_PTYPE(hk) == H_OFFPAGE) {
454 rectype |= PAIR_DATAMASK;
455 d.data = H_PAIRDATA(hcp->pagep, hcp->bndx);
456 d.size = HOFFPAGE_SIZE;
457 } else {
458 d.data =
459 HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx));
460 d.size = LEN_HDATA(hcp->pagep,
461 hashp->hdr->pagesize, hcp->bndx);
465 if ((ret = __ham_insdel_log(hashp->dbp->dbenv->lg_info,
466 (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, rectype,
467 hashp->dbp->log_fileid, PGNO(next_pagep),
468 (u_int32_t)H_NUMPAIRS(next_pagep), &LSN(next_pagep),
469 &k, &d)) != 0)
470 return (ret);
472 /* Move lsn onto page. */
473 LSN(next_pagep) = new_lsn; /* Structure assignment. */
476 __ham_copy_item(hashp, hcp->pagep, H_KEYINDEX(hcp->bndx), next_pagep);
477 __ham_copy_item(hashp, hcp->pagep, H_DATAINDEX(hcp->bndx), next_pagep);
479 /* Now delete the pair from the current page. */
480 ret = __ham_del_pair(hashp, hcp, 0);
482 (void)__ham_put_page(hashp->dbp, hcp->pagep, 1);
483 hcp->pagep = next_pagep;
484 hcp->pgno = PGNO(hcp->pagep);
485 hcp->bndx = H_NUMPAIRS(hcp->pagep) - 1;
486 F_SET(hcp, H_EXPAND);
487 return (ret);
491 * Replace an onpage set of duplicates with the OFFDUP structure that
492 * references the duplicate page.
493 * XXX This is really just a special case of __onpage_replace; we should
494 * probably combine them.
495 * PUBLIC: void __ham_move_offpage __P((HTAB *, PAGE *, u_int32_t, db_pgno_t));
497 void
498 __ham_move_offpage(hashp, pagep, ndx, pgno)
499 HTAB *hashp;
500 PAGE *pagep;
501 u_int32_t ndx;
502 db_pgno_t pgno;
504 DBT new_dbt;
505 DBT old_dbt;
506 HOFFDUP od;
507 db_indx_t i;
508 int32_t shrink;
509 u_int8_t *src;
511 od.type = H_OFFDUP;
512 od.pgno = pgno;
514 if (DB_LOGGING(hashp->dbp)) {
515 new_dbt.data = &od;
516 new_dbt.size = HOFFDUP_SIZE;
517 old_dbt.data = P_ENTRY(pagep, ndx);
518 old_dbt.size = LEN_HITEM(pagep, hashp->hdr->pagesize, ndx);
519 (void)__ham_replace_log(hashp->dbp->dbenv->lg_info,
520 (DB_TXN *)hashp->dbp->txn, &LSN(pagep), 0,
521 hashp->dbp->log_fileid, PGNO(pagep), (u_int32_t)ndx,
522 &LSN(pagep), -1, &old_dbt, &new_dbt, 0);
525 shrink =
526 LEN_HITEM(pagep, hashp->hdr->pagesize, ndx) - HOFFDUP_SIZE;
528 if (shrink != 0) {
529 /* Copy data. */
530 src = (u_int8_t *)(pagep) + HOFFSET(pagep);
531 memmove(src + shrink, src, pagep->inp[ndx] - HOFFSET(pagep));
532 HOFFSET(pagep) += shrink;
534 /* Update index table. */
535 for (i = ndx; i < NUM_ENT(pagep); i++)
536 pagep->inp[i] += shrink;
539 /* Now copy the offdup entry onto the page. */
540 memcpy(P_ENTRY(pagep, ndx), &od, HOFFDUP_SIZE);