2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
8 * Copyright (c) 1990, 1993, 1994, 1995, 1996
9 * Keith Bostic. All rights reserved.
12 * Copyright (c) 1990, 1993, 1994, 1995
13 * The Regents of the University of California. All rights reserved.
15 * This code is derived from software contributed to Berkeley by
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 static const char sccsid
[] = "@(#)db_overflow.c 10.11 (Sleepycat) 5/7/98";
53 #ifndef NO_SYSTEM_INCLUDES
54 #include <sys/types.h>
67 * Big key and data entries are stored on linked lists of pages. The initial
68 * reference is a structure with the total length of the item and the page
69 * number where it begins. Each entry in the linked list contains a pointer
70 * to the next page of data, and so on.
75 * Get an offpage item.
77 * PUBLIC: int __db_goff __P((DB *, DBT *,
78 * PUBLIC: u_int32_t, db_pgno_t, void **, u_int32_t *));
81 __db_goff(dbp
, dbt
, tlen
, pgno
, bpp
, bpsz
)
91 u_int32_t curoff
, needed
, start
;
96 * Check if the buffer is big enough; if it is not and we are
97 * allowed to malloc space, then we'll malloc it. If we are
98 * not (DB_DBT_USERMEM), then we'll set the dbt and return
101 if (F_ISSET(dbt
, DB_DBT_PARTIAL
)) {
110 * Allocate any necessary memory.
112 * XXX: Never allocate 0 bytes;
114 if (F_ISSET(dbt
, DB_DBT_USERMEM
)) {
115 if (needed
> dbt
->ulen
) {
119 } else if (F_ISSET(dbt
, DB_DBT_MALLOC
)) {
120 dbt
->data
= dbp
->db_malloc
== NULL
?
121 (void *)__db_malloc(needed
+ 1) :
122 (void *)dbp
->db_malloc(needed
+ 1);
123 if (dbt
->data
== NULL
)
125 } else if (*bpsz
== 0 || *bpsz
< needed
) {
126 *bpp
= (*bpp
== NULL
?
127 (void *)__db_malloc(needed
+ 1) :
128 (void *)__db_realloc(*bpp
, needed
+ 1));
137 * Step through the linked list of pages, copying the data on each
138 * one into the buffer. Never copy more than the total data length.
141 for (curoff
= 0, p
= dbt
->data
; pgno
!= P_INVALID
&& needed
> 0;) {
142 if ((ret
= memp_fget(dbp
->mpf
, &pgno
, 0, &h
)) != 0) {
143 (void)__db_pgerr(dbp
, pgno
);
146 /* Check if we need any bytes from this page. */
147 if (curoff
+ OV_LEN(h
) >= start
) {
148 src
= (u_int8_t
*)h
+ P_OVERHEAD
;
150 if (start
> curoff
) {
151 src
+= start
- curoff
;
152 bytes
-= start
- curoff
;
156 memcpy(p
, src
, bytes
);
162 memp_fput(dbp
->mpf
, h
, 0);
169 * Put an offpage item.
171 * PUBLIC: int __db_poff __P((DB *, const DBT *, db_pgno_t *,
172 * PUBLIC: int (*)(DB *, u_int32_t, PAGE **)));
175 __db_poff(dbp
, dbt
, pgnop
, newfunc
)
179 int (*newfunc
) __P((DB
*, u_int32_t
, PAGE
**));
182 DB_LSN new_lsn
, null_lsn
;
190 * Allocate pages and copy the key/data item into them. Calculate the
191 * number of bytes we get for pages we fill completely with a single
194 pagespace
= P_MAXSPACE(dbp
->pgsize
);
198 sz
= dbt
->size
; sz
> 0; p
+= pagespace
, sz
-= pagespace
) {
200 * Reduce pagespace so we terminate the loop correctly and
201 * don't copy too much data.
207 * Allocate and initialize a new page and copy all or part of
208 * the item onto the page. If sz is less than pagespace, we
209 * have a partial record.
211 if ((ret
= newfunc(dbp
, P_OVERFLOW
, &pagep
)) != 0)
213 if (DB_LOGGING(dbp
)) {
215 tmp_dbt
.size
= pagespace
;
217 if ((ret
= __db_big_log(dbp
->dbenv
->lg_info
, dbp
->txn
,
218 &new_lsn
, 0, DB_ADD_BIG
, dbp
->log_fileid
,
219 PGNO(pagep
), lastp
? PGNO(lastp
) : PGNO_INVALID
,
220 PGNO_INVALID
, &tmp_dbt
, &LSN(pagep
),
221 lastp
== NULL
? &null_lsn
: &LSN(lastp
),
225 /* Move lsn onto page. */
227 LSN(lastp
) = new_lsn
;
228 LSN(pagep
) = new_lsn
;
231 P_INIT(pagep
, dbp
->pgsize
,
232 PGNO(pagep
), PGNO_INVALID
, PGNO_INVALID
, 0, P_OVERFLOW
);
233 OV_LEN(pagep
) = pagespace
;
235 memcpy((u_int8_t
*)pagep
+ P_OVERHEAD
, p
, pagespace
);
238 * If this is the first entry, update the user's info.
239 * Otherwise, update the entry on the last page filled
240 * in and release that page.
243 *pgnop
= PGNO(pagep
);
245 lastp
->next_pgno
= PGNO(pagep
);
246 pagep
->prev_pgno
= PGNO(lastp
);
247 (void)memp_fput(dbp
->mpf
, lastp
, DB_MPOOL_DIRTY
);
251 (void)memp_fput(dbp
->mpf
, lastp
, DB_MPOOL_DIRTY
);
257 * Increment/decrement the reference count on an overflow page.
259 * PUBLIC: int __db_ovref __P((DB *, db_pgno_t, int32_t));
262 __db_ovref(dbp
, pgno
, adjust
)
270 if ((ret
= memp_fget(dbp
->mpf
, &pgno
, 0, &h
)) != 0) {
271 (void)__db_pgerr(dbp
, pgno
);
276 if ((ret
= __db_ovref_log(dbp
->dbenv
->lg_info
, dbp
->txn
,
277 &LSN(h
), 0, dbp
->log_fileid
, h
->pgno
, adjust
,
282 (void)memp_fput(dbp
->mpf
, h
, DB_MPOOL_DIRTY
);
288 * Delete an offpage chain of overflow pages.
290 * PUBLIC: int __db_doff __P((DB *, db_pgno_t, int (*)(DB *, PAGE *)));
293 __db_doff(dbp
, pgno
, freefunc
)
296 int (*freefunc
) __P((DB
*, PAGE
*));
304 if ((ret
= memp_fget(dbp
->mpf
, &pgno
, 0, &pagep
)) != 0) {
305 (void)__db_pgerr(dbp
, pgno
);
310 * If it's an overflow page and it's referenced by more than
311 * one key/data item, decrement the reference count and return.
313 if (TYPE(pagep
) == P_OVERFLOW
&& OV_REF(pagep
) > 1) {
314 (void)memp_fput(dbp
->mpf
, pagep
, 0);
315 return (__db_ovref(dbp
, pgno
, -1));
318 if (DB_LOGGING(dbp
)) {
319 tmp_dbt
.data
= (u_int8_t
*)pagep
+ P_OVERHEAD
;
320 tmp_dbt
.size
= OV_LEN(pagep
);
322 if ((ret
= __db_big_log(dbp
->dbenv
->lg_info
, dbp
->txn
,
323 &LSN(pagep
), 0, DB_REM_BIG
, dbp
->log_fileid
,
324 PGNO(pagep
), PREV_PGNO(pagep
), NEXT_PGNO(pagep
),
325 &tmp_dbt
, &LSN(pagep
), &null_lsn
, &null_lsn
)) != 0)
328 pgno
= pagep
->next_pgno
;
329 if ((ret
= freefunc(dbp
, pagep
)) != 0)
331 } while (pgno
!= PGNO_INVALID
);
338 * Match on overflow pages.
340 * Given a starting page number and a key, return <0, 0, >0 to indicate if the
341 * key on the page is less than, equal to or greater than the key specified.
343 * PUBLIC: int __db_moff __P((DB *, const DBT *, db_pgno_t));
346 __db_moff(dbp
, dbt
, pgno
)
352 u_int32_t cmp_bytes
, key_left
;
356 /* While there are both keys to compare. */
357 for (ret
= 0, p1
= dbt
->data
,
358 key_left
= dbt
->size
; key_left
> 0 && pgno
!= PGNO_INVALID
;) {
359 if (memp_fget(dbp
->mpf
, &pgno
, 0, &pagep
) != 0) {
360 (void)__db_pgerr(dbp
, pgno
);
361 return (0); /* No system error return. */
364 cmp_bytes
= OV_LEN(pagep
) < key_left
? OV_LEN(pagep
) : key_left
;
365 key_left
-= cmp_bytes
;
367 (u_int8_t
*)pagep
+ P_OVERHEAD
; cmp_bytes
-- > 0; ++p1
, ++p2
)
369 ret
= (long)*p1
- (long)*p2
;
372 pgno
= NEXT_PGNO(pagep
);
373 (void)memp_fput(dbp
->mpf
, pagep
, 0);
377 if (key_left
> 0) /* DBT is longer than page key. */
379 if (pgno
!= PGNO_INVALID
) /* DBT is shorter than page key. */