Update.
[glibc.git] / db2 / btree / bt_open.c
blobf5974ec61ee70abb6ad6286832639d66430eee22
1 /*-
2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
6 */
7 /*
8 * Copyright (c) 1990, 1993, 1994, 1995, 1996
9 * Keith Bostic. All rights reserved.
12 * Copyright (c) 1990, 1993, 1994, 1995
13 * The Regents of the University of California. All rights reserved.
15 * This code is derived from software contributed to Berkeley by
16 * Mike Olson.
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44 * SUCH DAMAGE.
47 #include "config.h"
49 #ifndef lint
50 static const char sccsid[] = "@(#)bt_open.c 10.27 (Sleepycat) 5/6/98";
51 #endif /* not lint */
54 * Implementation of btree access method for 4.4BSD.
56 * The design here was originally based on that of the btree access method
57 * used in the Postgres database system at UC Berkeley. This implementation
58 * is wholly independent of the Postgres code.
61 #ifndef NO_SYSTEM_INCLUDES
62 #include <sys/types.h>
64 #include <errno.h>
65 #include <limits.h>
66 #include <string.h>
67 #endif
69 #include "db_int.h"
70 #include "db_page.h"
71 #include "btree.h"
73 static int __bam_keyalloc __P((BTREE *));
74 static int __bam_setmeta __P((DB *, BTREE *));
77 * __bam_open --
78 * Open a btree.
80 * PUBLIC: int __bam_open __P((DB *, DBTYPE, DB_INFO *));
82 int
83 __bam_open(dbp, type, dbinfo)
84 DB *dbp;
85 DBTYPE type;
86 DB_INFO *dbinfo;
88 BTREE *t;
89 int ret;
91 /* Allocate the btree internal structure. */
92 if ((t = (BTREE *)__db_calloc(1, sizeof(BTREE))) == NULL)
93 return (ENOMEM);
95 t->bt_sp = t->bt_csp = t->bt_stack;
96 t->bt_esp = t->bt_stack + sizeof(t->bt_stack) / sizeof(t->bt_stack[0]);
98 if ((type == DB_RECNO || F_ISSET(dbp, DB_BT_RECNUM)) &&
99 (ret = __bam_keyalloc(t)) != 0)
100 goto err;
103 * Intention is to make sure all of the user's selections are okay
104 * here and then use them without checking.
106 if (dbinfo != NULL) {
107 /* Minimum number of keys per page. */
108 if (dbinfo->bt_minkey == 0)
109 t->bt_minkey = DEFMINKEYPAGE;
110 else {
111 if (dbinfo->bt_minkey < 2)
112 goto einval;
113 t->bt_minkey = dbinfo->bt_minkey;
116 /* Maximum number of keys per page. */
117 if (dbinfo->bt_maxkey == 0)
118 t->bt_maxkey = 0;
119 else {
120 if (dbinfo->bt_maxkey < 1)
121 goto einval;
122 t->bt_maxkey = dbinfo->bt_maxkey;
126 * If no comparison, use default comparison. If no comparison
127 * and no prefix, use default prefix. (We can't default the
128 * prefix if the user supplies a comparison routine; shortening
129 * the keys may break their comparison algorithm.)
131 t->bt_compare = dbinfo->bt_compare == NULL ?
132 __bam_defcmp : dbinfo->bt_compare;
133 t->bt_prefix = dbinfo->bt_prefix == NULL ?
134 (dbinfo->bt_compare == NULL ?
135 __bam_defpfx : NULL) : dbinfo->bt_prefix;
136 } else {
137 t->bt_minkey = DEFMINKEYPAGE;
138 t->bt_compare = __bam_defcmp;
139 t->bt_prefix = __bam_defpfx;
142 /* Initialize the remaining fields of the DB. */
143 dbp->type = type;
144 dbp->internal = t;
145 dbp->cursor = __bam_cursor;
146 dbp->del = __bam_delete;
147 dbp->get = __bam_get;
148 dbp->put = __bam_put;
149 dbp->stat = __bam_stat;
150 dbp->sync = __bam_sync;
153 * The btree data structure requires that at least two key/data pairs
154 * can fit on a page, but other than that there's no fixed requirement.
155 * Translate the minimum number of items into the bytes a key/data pair
156 * can use before being placed on an overflow page. We calculate for
157 * the worst possible alignment by assuming every item requires the
158 * maximum alignment for padding.
160 * Recno uses the btree bt_ovflsize value -- it's close enough.
162 t->bt_ovflsize = (dbp->pgsize - P_OVERHEAD) / (t->bt_minkey * P_INDX)
163 - (BKEYDATA_PSIZE(0) + ALIGN(1, 4));
165 /* Create a root page if new tree. */
166 if ((ret = __bam_setmeta(dbp, t)) != 0)
167 goto err;
169 return (0);
171 einval: ret = EINVAL;
173 err: if (t != NULL) {
174 /* If we allocated room for key/data return, discard it. */
175 if (t->bt_rkey.data != NULL)
176 __db_free(t->bt_rkey.data);
178 FREE(t, sizeof(BTREE));
180 return (ret);
184 * __bam_bdup --
185 * Create a BTREE handle for a threaded DB handle.
187 * PUBLIC: int __bam_bdup __P((DB *, DB *));
190 __bam_bdup(orig, new)
191 DB *orig, *new;
193 BTREE *t, *ot;
194 int ret;
196 ot = orig->internal;
198 if ((t = (BTREE *)__db_calloc(1, sizeof(*t))) == NULL)
199 return (ENOMEM);
202 * !!!
203 * Ignore the cursor queue, only the first DB has attached cursors.
206 t->bt_sp = t->bt_csp = t->bt_stack;
207 t->bt_esp = t->bt_stack + sizeof(t->bt_stack) / sizeof(t->bt_stack[0]);
209 if ((orig->type == DB_RECNO || F_ISSET(orig, DB_BT_RECNUM)) &&
210 (ret = __bam_keyalloc(t)) != 0) {
211 FREE(t, sizeof(*t));
212 return (ret);
215 t->bt_maxkey = ot->bt_maxkey;
216 t->bt_minkey = ot->bt_minkey;
217 t->bt_compare = ot->bt_compare;
218 t->bt_prefix = ot->bt_prefix;
219 t->bt_ovflsize = ot->bt_ovflsize;
222 * !!!
223 * The entire RECNO structure is shared. If it breaks, the application
224 * was misusing it to start with.
226 t->bt_recno = ot->bt_recno;
228 new->internal = t;
230 return (0);
234 * __bam_keyalloc --
235 * Allocate return memory for recno keys.
237 static int
238 __bam_keyalloc(t)
239 BTREE *t;
242 * Recno keys are always the same size, and we don't want to have
243 * to check for space on each return. Allocate it now.
245 if ((t->bt_rkey.data = (void *)__db_malloc(sizeof(db_recno_t))) == NULL)
246 return (ENOMEM);
247 t->bt_rkey.ulen = sizeof(db_recno_t);
248 return (0);
252 * __bam_setmeta --
253 * Check (and optionally create) a tree.
255 static int
256 __bam_setmeta(dbp, t)
257 DB *dbp;
258 BTREE *t;
260 BTMETA *meta;
261 PAGE *root;
262 DB_LOCK metalock, rootlock;
263 db_pgno_t pgno;
264 int ret;
266 /* Get, and optionally create the metadata page. */
267 pgno = PGNO_METADATA;
268 if ((ret =
269 __bam_lget(dbp, 0, PGNO_METADATA, DB_LOCK_WRITE, &metalock)) != 0)
270 return (ret);
271 if ((ret =
272 __bam_pget(dbp, (PAGE **)&meta, &pgno, DB_MPOOL_CREATE)) != 0) {
273 (void)__BT_LPUT(dbp, metalock);
274 return (ret);
278 * If the magic number is correct, we're not creating the tree.
279 * Correct any fields that may not be right. Note, all of the
280 * local flags were set by db_open(3).
282 if (meta->magic != 0) {
283 t->bt_maxkey = meta->maxkey;
284 t->bt_minkey = meta->minkey;
286 (void)memp_fput(dbp->mpf, (PAGE *)meta, 0);
287 (void)__BT_LPUT(dbp, metalock);
288 return (0);
291 /* Initialize the tree structure metadata information. */
292 memset(meta, 0, sizeof(BTMETA));
293 ZERO_LSN(meta->lsn);
294 meta->pgno = PGNO_METADATA;
295 meta->magic = DB_BTREEMAGIC;
296 meta->version = DB_BTREEVERSION;
297 meta->pagesize = dbp->pgsize;
298 meta->maxkey = t->bt_maxkey;
299 meta->minkey = t->bt_minkey;
300 meta->free = PGNO_INVALID;
301 if (dbp->type == DB_RECNO)
302 F_SET(meta, BTM_RECNO);
303 if (F_ISSET(dbp, DB_AM_DUP))
304 F_SET(meta, BTM_DUP);
305 if (F_ISSET(dbp, DB_RE_FIXEDLEN))
306 F_SET(meta, BTM_FIXEDLEN);
307 if (F_ISSET(dbp, DB_BT_RECNUM))
308 F_SET(meta, BTM_RECNUM);
309 if (F_ISSET(dbp, DB_RE_RENUMBER))
310 F_SET(meta, BTM_RENUMBER);
311 memcpy(meta->uid, dbp->lock.fileid, DB_FILE_ID_LEN);
313 /* Create and initialize a root page. */
314 pgno = PGNO_ROOT;
315 if ((ret =
316 __bam_lget(dbp, 0, PGNO_ROOT, DB_LOCK_WRITE, &rootlock)) != 0)
317 return (ret);
318 if ((ret = __bam_pget(dbp, &root, &pgno, DB_MPOOL_CREATE)) != 0) {
319 (void)__BT_LPUT(dbp, rootlock);
320 return (ret);
322 P_INIT(root, dbp->pgsize, PGNO_ROOT, PGNO_INVALID,
323 PGNO_INVALID, 1, dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE);
324 ZERO_LSN(root->lsn);
326 /* Release the metadata and root pages. */
327 if ((ret = memp_fput(dbp->mpf, (PAGE *)meta, DB_MPOOL_DIRTY)) != 0)
328 return (ret);
329 if ((ret = memp_fput(dbp->mpf, root, DB_MPOOL_DIRTY)) != 0)
330 return (ret);
333 * Flush the metadata and root pages to disk -- since the user can't
334 * transaction protect open, the pages have to exist during recovery.
336 * XXX
337 * It's not useful to return not-yet-flushed here -- convert it to
338 * an error.
340 if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
341 ret = EINVAL;
343 /* Release the locks. */
344 (void)__BT_LPUT(dbp, metalock);
345 (void)__BT_LPUT(dbp, rootlock);
347 return (ret);