Update.
[glibc.git] / db2 / db / db_overflow.c
blobd28740dcbeaf8bb921dba7e2e1f91ea6c53c7eef
1 /*-
2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
6 */
7 /*
8 * Copyright (c) 1990, 1993, 1994, 1995, 1996
9 * Keith Bostic. All rights reserved.
12 * Copyright (c) 1990, 1993, 1994, 1995
13 * The Regents of the University of California. All rights reserved.
15 * This code is derived from software contributed to Berkeley by
16 * Mike Olson.
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44 * SUCH DAMAGE.
47 #include "config.h"
49 #ifndef lint
50 static const char sccsid[] = "@(#)db_overflow.c 10.11 (Sleepycat) 5/7/98";
51 #endif /* not lint */
53 #ifndef NO_SYSTEM_INCLUDES
54 #include <sys/types.h>
56 #include <errno.h>
57 #include <string.h>
58 #endif
60 #include "db_int.h"
61 #include "db_page.h"
62 #include "db_am.h"
65 * Big key/data code.
67 * Big key and data entries are stored on linked lists of pages. The initial
68 * reference is a structure with the total length of the item and the page
69 * number where it begins. Each entry in the linked list contains a pointer
70 * to the next page of data, and so on.
74 * __db_goff --
75 * Get an offpage item.
77 * PUBLIC: int __db_goff __P((DB *, DBT *,
78 * PUBLIC: u_int32_t, db_pgno_t, void **, u_int32_t *));
80 int
81 __db_goff(dbp, dbt, tlen, pgno, bpp, bpsz)
82 DB *dbp;
83 DBT *dbt;
84 u_int32_t tlen;
85 db_pgno_t pgno;
86 void **bpp;
87 u_int32_t *bpsz;
89 PAGE *h;
90 db_indx_t bytes;
91 u_int32_t curoff, needed, start;
92 u_int8_t *p, *src;
93 int ret;
96 * Check if the buffer is big enough; if it is not and we are
97 * allowed to malloc space, then we'll malloc it. If we are
98 * not (DB_DBT_USERMEM), then we'll set the dbt and return
99 * appropriately.
101 if (F_ISSET(dbt, DB_DBT_PARTIAL)) {
102 start = dbt->doff;
103 needed = dbt->dlen;
104 } else {
105 start = 0;
106 needed = tlen;
110 * Allocate any necessary memory.
112 * XXX: Never allocate 0 bytes;
114 if (F_ISSET(dbt, DB_DBT_USERMEM)) {
115 if (needed > dbt->ulen) {
116 dbt->size = needed;
117 return (ENOMEM);
119 } else if (F_ISSET(dbt, DB_DBT_MALLOC)) {
120 dbt->data = dbp->db_malloc == NULL ?
121 (void *)__db_malloc(needed + 1) :
122 (void *)dbp->db_malloc(needed + 1);
123 if (dbt->data == NULL)
124 return (ENOMEM);
125 } else if (*bpsz == 0 || *bpsz < needed) {
126 *bpp = (*bpp == NULL ?
127 (void *)__db_malloc(needed + 1) :
128 (void *)__db_realloc(*bpp, needed + 1));
129 if (*bpp == NULL)
130 return (ENOMEM);
131 *bpsz = needed + 1;
132 dbt->data = *bpp;
133 } else
134 dbt->data = *bpp;
137 * Step through the linked list of pages, copying the data on each
138 * one into the buffer. Never copy more than the total data length.
140 dbt->size = needed;
141 for (curoff = 0, p = dbt->data; pgno != P_INVALID && needed > 0;) {
142 if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) {
143 (void)__db_pgerr(dbp, pgno);
144 return (ret);
146 /* Check if we need any bytes from this page. */
147 if (curoff + OV_LEN(h) >= start) {
148 src = (u_int8_t *)h + P_OVERHEAD;
149 bytes = OV_LEN(h);
150 if (start > curoff) {
151 src += start - curoff;
152 bytes -= start - curoff;
154 if (bytes > needed)
155 bytes = needed;
156 memcpy(p, src, bytes);
157 p += bytes;
158 needed -= bytes;
160 curoff += OV_LEN(h);
161 pgno = h->next_pgno;
162 memp_fput(dbp->mpf, h, 0);
164 return (0);
168 * __db_poff --
169 * Put an offpage item.
171 * PUBLIC: int __db_poff __P((DB *, const DBT *, db_pgno_t *,
172 * PUBLIC: int (*)(DB *, u_int32_t, PAGE **)));
175 __db_poff(dbp, dbt, pgnop, newfunc)
176 DB *dbp;
177 const DBT *dbt;
178 db_pgno_t *pgnop;
179 int (*newfunc) __P((DB *, u_int32_t, PAGE **));
181 PAGE *pagep, *lastp;
182 DB_LSN new_lsn, null_lsn;
183 DBT tmp_dbt;
184 db_indx_t pagespace;
185 u_int32_t sz;
186 u_int8_t *p;
187 int ret;
190 * Allocate pages and copy the key/data item into them. Calculate the
191 * number of bytes we get for pages we fill completely with a single
192 * item.
194 pagespace = P_MAXSPACE(dbp->pgsize);
196 lastp = NULL;
197 for (p = dbt->data,
198 sz = dbt->size; sz > 0; p += pagespace, sz -= pagespace) {
200 * Reduce pagespace so we terminate the loop correctly and
201 * don't copy too much data.
203 if (sz < pagespace)
204 pagespace = sz;
207 * Allocate and initialize a new page and copy all or part of
208 * the item onto the page. If sz is less than pagespace, we
209 * have a partial record.
211 if ((ret = newfunc(dbp, P_OVERFLOW, &pagep)) != 0)
212 return (ret);
213 if (DB_LOGGING(dbp)) {
214 tmp_dbt.data = p;
215 tmp_dbt.size = pagespace;
216 ZERO_LSN(null_lsn);
217 if ((ret = __db_big_log(dbp->dbenv->lg_info, dbp->txn,
218 &new_lsn, 0, DB_ADD_BIG, dbp->log_fileid,
219 PGNO(pagep), lastp ? PGNO(lastp) : PGNO_INVALID,
220 PGNO_INVALID, &tmp_dbt, &LSN(pagep),
221 lastp == NULL ? &null_lsn : &LSN(lastp),
222 &null_lsn)) != 0)
223 return (ret);
225 /* Move lsn onto page. */
226 if (lastp)
227 LSN(lastp) = new_lsn;
228 LSN(pagep) = new_lsn;
231 P_INIT(pagep, dbp->pgsize,
232 PGNO(pagep), PGNO_INVALID, PGNO_INVALID, 0, P_OVERFLOW);
233 OV_LEN(pagep) = pagespace;
234 OV_REF(pagep) = 1;
235 memcpy((u_int8_t *)pagep + P_OVERHEAD, p, pagespace);
238 * If this is the first entry, update the user's info.
239 * Otherwise, update the entry on the last page filled
240 * in and release that page.
242 if (lastp == NULL)
243 *pgnop = PGNO(pagep);
244 else {
245 lastp->next_pgno = PGNO(pagep);
246 pagep->prev_pgno = PGNO(lastp);
247 (void)memp_fput(dbp->mpf, lastp, DB_MPOOL_DIRTY);
249 lastp = pagep;
251 (void)memp_fput(dbp->mpf, lastp, DB_MPOOL_DIRTY);
252 return (0);
256 * __db_ovref --
257 * Increment/decrement the reference count on an overflow page.
259 * PUBLIC: int __db_ovref __P((DB *, db_pgno_t, int32_t));
262 __db_ovref(dbp, pgno, adjust)
263 DB *dbp;
264 db_pgno_t pgno;
265 int32_t adjust;
267 PAGE *h;
268 int ret;
270 if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) {
271 (void)__db_pgerr(dbp, pgno);
272 return (ret);
275 if (DB_LOGGING(dbp))
276 if ((ret = __db_ovref_log(dbp->dbenv->lg_info, dbp->txn,
277 &LSN(h), 0, dbp->log_fileid, h->pgno, adjust,
278 &LSN(h))) != 0)
279 return (ret);
280 OV_REF(h) += adjust;
282 (void)memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
283 return (0);
287 * __db_doff --
288 * Delete an offpage chain of overflow pages.
290 * PUBLIC: int __db_doff __P((DB *, db_pgno_t, int (*)(DB *, PAGE *)));
293 __db_doff(dbp, pgno, freefunc)
294 DB *dbp;
295 db_pgno_t pgno;
296 int (*freefunc) __P((DB *, PAGE *));
298 PAGE *pagep;
299 DB_LSN null_lsn;
300 DBT tmp_dbt;
301 int ret;
303 do {
304 if ((ret = memp_fget(dbp->mpf, &pgno, 0, &pagep)) != 0) {
305 (void)__db_pgerr(dbp, pgno);
306 return (ret);
310 * If it's an overflow page and it's referenced by more than
311 * one key/data item, decrement the reference count and return.
313 if (TYPE(pagep) == P_OVERFLOW && OV_REF(pagep) > 1) {
314 (void)memp_fput(dbp->mpf, pagep, 0);
315 return (__db_ovref(dbp, pgno, -1));
318 if (DB_LOGGING(dbp)) {
319 tmp_dbt.data = (u_int8_t *)pagep + P_OVERHEAD;
320 tmp_dbt.size = OV_LEN(pagep);
321 ZERO_LSN(null_lsn);
322 if ((ret = __db_big_log(dbp->dbenv->lg_info, dbp->txn,
323 &LSN(pagep), 0, DB_REM_BIG, dbp->log_fileid,
324 PGNO(pagep), PREV_PGNO(pagep), NEXT_PGNO(pagep),
325 &tmp_dbt, &LSN(pagep), &null_lsn, &null_lsn)) != 0)
326 return (ret);
328 pgno = pagep->next_pgno;
329 if ((ret = freefunc(dbp, pagep)) != 0)
330 return (ret);
331 } while (pgno != PGNO_INVALID);
333 return (0);
337 * __db_moff --
338 * Match on overflow pages.
340 * Given a starting page number and a key, return <0, 0, >0 to indicate if the
341 * key on the page is less than, equal to or greater than the key specified.
343 * PUBLIC: int __db_moff __P((DB *, const DBT *, db_pgno_t));
346 __db_moff(dbp, dbt, pgno)
347 DB *dbp;
348 const DBT *dbt;
349 db_pgno_t pgno;
351 PAGE *pagep;
352 u_int32_t cmp_bytes, key_left;
353 u_int8_t *p1, *p2;
354 int ret;
356 /* While there are both keys to compare. */
357 for (ret = 0, p1 = dbt->data,
358 key_left = dbt->size; key_left > 0 && pgno != PGNO_INVALID;) {
359 if (memp_fget(dbp->mpf, &pgno, 0, &pagep) != 0) {
360 (void)__db_pgerr(dbp, pgno);
361 return (0); /* No system error return. */
364 cmp_bytes = OV_LEN(pagep) < key_left ? OV_LEN(pagep) : key_left;
365 key_left -= cmp_bytes;
366 for (p2 =
367 (u_int8_t *)pagep + P_OVERHEAD; cmp_bytes-- > 0; ++p1, ++p2)
368 if (*p1 != *p2) {
369 ret = (long)*p1 - (long)*p2;
370 break;
372 pgno = NEXT_PGNO(pagep);
373 (void)memp_fput(dbp->mpf, pagep, 0);
374 if (ret != 0)
375 return (ret);
377 if (key_left > 0) /* DBT is longer than page key. */
378 return (-1);
379 if (pgno != PGNO_INVALID) /* DBT is shorter than page key. */
380 return (1);
381 return (0);