Update.
[glibc.git] / db2 / include / btree.h
blob1660d331e70bcca68d774805b8a45625926d506f
1 /*-
2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
6 */
7 /*
8 * Copyright (c) 1990, 1993, 1994, 1995, 1996
9 * Keith Bostic. All rights reserved.
12 * Copyright (c) 1990, 1993, 1994, 1995
13 * The Regents of the University of California. All rights reserved.
15 * This code is derived from software contributed to Berkeley by
16 * Mike Olson.
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44 * SUCH DAMAGE.
46 * @(#)btree.h 10.21 (Sleepycat) 5/23/98
49 /* Forward structure declarations. */
50 struct __btree; typedef struct __btree BTREE;
51 struct __cursor; typedef struct __cursor CURSOR;
52 struct __epg; typedef struct __epg EPG;
53 struct __rcursor; typedef struct __rcursor RCURSOR;
54 struct __recno; typedef struct __recno RECNO;
56 #undef DEFMINKEYPAGE /* Minimum keys per page */
57 #define DEFMINKEYPAGE (2)
59 #undef ISINTERNAL /* If an internal page. */
60 #define ISINTERNAL(p) (TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO)
61 #undef ISLEAF /* If a leaf page. */
62 #define ISLEAF(p) (TYPE(p) == P_LBTREE || TYPE(p) == P_LRECNO)
64 /* Allocate and discard thread structures. */
65 #define GETHANDLE(dbp, set_txn, dbpp, ret) { \
66 if (F_ISSET(dbp, DB_AM_THREAD)) { \
67 if ((ret = __db_gethandle(dbp, __bam_bdup, dbpp)) != 0) \
68 return (ret); \
69 } else \
70 *dbpp = dbp; \
71 *dbpp->txn = set_txn; \
73 #define PUTHANDLE(dbp) { \
74 dbp->txn = NULL; \
75 if (F_ISSET(dbp, DB_AM_THREAD)) \
76 __db_puthandle(dbp); \
80 * If doing transactions we have to hold the locks associated with a data item
81 * from a page for the entire transaction. However, we don't have to hold the
82 * locks associated with walking the tree. Distinguish between the two so that
83 * we don't tie up the internal pages of the tree longer than necessary.
85 #define __BT_LPUT(dbp, lock) \
86 (F_ISSET((dbp), DB_AM_LOCKING) ? \
87 lock_put((dbp)->dbenv->lk_info, lock) : 0)
88 #define __BT_TLPUT(dbp, lock) \
89 (F_ISSET((dbp), DB_AM_LOCKING) && (dbp)->txn == NULL ? \
90 lock_put((dbp)->dbenv->lk_info, lock) : 0)
93 * Flags to __bt_search() and __rec_search().
95 * Note, internal page searches must find the largest record less than key in
96 * the tree so that descents work. Leaf page searches must find the smallest
97 * record greater than key so that the returned index is the record's correct
98 * position for insertion.
100 * The flags parameter to the search routines describes three aspects of the
101 * search: the type of locking required (including if we're locking a pair of
102 * pages), the item to return in the presence of duplicates and whether or not
103 * to return deleted entries. To simplify both the mnemonic representation
104 * and the code that checks for various cases, we construct a set of bitmasks.
106 #define S_READ 0x00001 /* Read locks. */
107 #define S_WRITE 0x00002 /* Write locks. */
109 #define S_APPEND 0x00040 /* Append to the tree. */
110 #define S_DELNO 0x00080 /* Don't return deleted items. */
111 #define S_DUPFIRST 0x00100 /* Return first duplicate. */
112 #define S_DUPLAST 0x00200 /* Return last duplicate. */
113 #define S_EXACT 0x00400 /* Exact items only. */
114 #define S_PARENT 0x00800 /* Lock page pair. */
115 #define S_STACK 0x01000 /* Need a complete stack. */
117 #define S_DELETE (S_WRITE | S_DUPFIRST | S_DELNO | S_EXACT | S_STACK)
118 #define S_FIND (S_READ | S_DUPFIRST | S_DELNO)
119 #define S_INSERT (S_WRITE | S_DUPLAST | S_STACK)
120 #define S_KEYFIRST (S_WRITE | S_DUPFIRST | S_STACK)
121 #define S_KEYLAST (S_WRITE | S_DUPLAST | S_STACK)
122 #define S_WRPAIR (S_WRITE | S_DUPLAST | S_PARENT)
125 * If doing insert search (including keyfirst or keylast operations) or a
126 * split search on behalf of an insert, it's okay to return the entry one
127 * past the end of the page.
129 #define PAST_END_OK(f) \
130 ((f) == S_INSERT || \
131 (f) == S_KEYFIRST || (f) == S_KEYLAST || (f) == S_WRPAIR)
134 * Flags to __bam_iitem().
136 #define BI_DELETED 0x01 /* Key/data pair only placeholder. */
137 #define BI_DOINCR 0x02 /* Increment the record count. */
138 #define BI_NEWKEY 0x04 /* New key. */
141 * Various routines pass around page references. A page reference can be a
142 * pointer to the page or a page number; for either, an indx can designate
143 * an item on the page.
145 struct __epg {
146 PAGE *page; /* The page. */
147 db_indx_t indx; /* The index on the page. */
148 DB_LOCK lock; /* The page's lock. */
152 * All cursors are queued from the master DB structure. Convert the user's
153 * DB reference to the master DB reference. We lock the master DB mutex
154 * so that we can walk the cursor queue. There's no race in accessing the
155 * cursors, because if we're modifying a page, we have a write lock on it,
156 * and therefore no other thread than the current one can have a cursor that
157 * references the page.
159 #define CURSOR_SETUP(dbp) { \
160 (dbp) = (dbp)->master; \
161 DB_THREAD_LOCK(dbp); \
163 #define CURSOR_TEARDOWN(dbp) \
164 DB_THREAD_UNLOCK(dbp);
167 * Btree cursor.
169 * Arguments passed to __bam_ca_replace().
171 typedef enum {
172 REPLACE_SETUP,
173 REPLACE_SUCCESS,
174 REPLACE_FAILED
175 } ca_replace_arg;
176 struct __cursor {
177 DBC *dbc; /* Enclosing DBC. */
179 PAGE *page; /* Cursor page. */
181 db_pgno_t pgno; /* Page. */
182 db_indx_t indx; /* Page item ref'd by the cursor. */
184 db_pgno_t dpgno; /* Duplicate page. */
185 db_indx_t dindx; /* Page item ref'd by the cursor. */
187 DB_LOCK lock; /* Cursor read lock. */
188 db_lockmode_t mode; /* Lock mode. */
191 * If a cursor record is deleted, the key/data pair has to remain on
192 * the page so that subsequent inserts/deletes don't interrupt the
193 * cursor progression through the file. This results in interesting
194 * cases when "standard" operations, e.g., dbp->put() are done in the
195 * context of "deleted" cursors.
197 * C_DELETED -- The item referenced by the cursor has been "deleted"
198 * but not physically removed from the page.
199 * C_REPLACE -- The "deleted" item referenced by a cursor has been
200 * replaced by a dbp->put(), so the cursor is no longer
201 * responsible for physical removal from the page.
202 * C_REPLACE_SETUP --
203 * We are about to overwrite a "deleted" item, flag any
204 * cursors referencing it for transition to C_REPLACE
205 * state.
207 #define C_DELETED 0x0001
208 #define C_REPLACE 0x0002
209 #define C_REPLACE_SETUP 0x0004
212 * Internal cursor held for DB->get; don't hold locks unless involved
213 * in a TXN.
215 #define C_INTERNAL 0x0008
216 u_int32_t flags;
220 * Recno cursor.
222 * Arguments passed to __ram_ca().
224 typedef enum {
225 CA_DELETE,
226 CA_IAFTER,
227 CA_IBEFORE
228 } ca_recno_arg;
229 struct __rcursor {
230 DBC *dbc; /* Enclosing DBC. */
232 db_recno_t recno; /* Current record number. */
235 * Cursors referencing "deleted" records are positioned between
236 * two records, and so must be specially adjusted until they are
237 * moved.
239 #define CR_DELETED 0x0001 /* Record deleted. */
240 u_int32_t flags;
244 * We maintain a stack of the pages that we're locking in the tree. Btree's
245 * (currently) only save two levels of the tree at a time, so the default
246 * stack is always large enough. Recno trees have to lock the entire tree to
247 * do inserts/deletes, however. Grow the stack as necessary.
249 #undef BT_STK_CLR
250 #define BT_STK_CLR(t) \
251 ((t)->bt_csp = (t)->bt_sp)
253 #undef BT_STK_ENTER
254 #define BT_STK_ENTER(t, pagep, page_indx, lock, ret) do { \
255 if ((ret = \
256 (t)->bt_csp == (t)->bt_esp ? __bam_stkgrow(t) : 0) == 0) { \
257 (t)->bt_csp->page = pagep; \
258 (t)->bt_csp->indx = page_indx; \
259 (t)->bt_csp->lock = lock; \
261 } while (0)
263 #undef BT_STK_PUSH
264 #define BT_STK_PUSH(t, pagep, page_indx, lock, ret) do { \
265 BT_STK_ENTER(t, pagep, page_indx, lock, ret); \
266 ++(t)->bt_csp; \
267 } while (0)
269 #undef BT_STK_POP
270 #define BT_STK_POP(t) \
271 ((t)->bt_csp == (t)->bt_stack ? NULL : --(t)->bt_csp)
274 * The in-memory recno data structure.
276 * !!!
277 * These fields are ignored as far as multi-threading is concerned. There
278 * are no transaction semantics associated with backing files, nor is there
279 * any thread protection.
281 #undef RECNO_OOB
282 #define RECNO_OOB 0 /* Illegal record number. */
284 struct __recno {
285 int re_delim; /* Variable-length delimiting byte. */
286 int re_pad; /* Fixed-length padding byte. */
287 u_int32_t re_len; /* Length for fixed-length records. */
289 char *re_source; /* Source file name. */
290 int re_fd; /* Source file descriptor */
291 db_recno_t re_last; /* Last record number read. */
292 void *re_cmap; /* Current point in mapped space. */
293 void *re_smap; /* Start of mapped space. */
294 void *re_emap; /* End of mapped space. */
295 size_t re_msize; /* Size of mapped region. */
296 /* Recno input function. */
297 int (*re_irec) __P((DB *, db_recno_t));
299 #define RECNO_EOF 0x0001 /* EOF on backing source file. */
300 #define RECNO_MODIFIED 0x0002 /* Tree was modified. */
301 u_int32_t flags;
305 * The in-memory btree data structure.
307 struct __btree {
309 * These fields are per-thread and are initialized when the BTREE structure
310 * is created.
312 db_pgno_t bt_lpgno; /* Last insert location. */
314 DBT bt_rkey; /* Returned key. */
315 DBT bt_rdata; /* Returned data. */
317 EPG *bt_sp; /* Stack pointer. */
318 EPG *bt_csp; /* Current stack entry. */
319 EPG *bt_esp; /* End stack pointer. */
320 EPG bt_stack[5];
322 RECNO *bt_recno; /* Private recno structure. */
324 DB_BTREE_LSTAT lstat; /* Btree local statistics. */
327 * These fields are copied from the original BTREE structure and never
328 * change.
330 db_indx_t bt_maxkey; /* Maximum keys per page. */
331 db_indx_t bt_minkey; /* Minimum keys per page. */
333 int (*bt_compare) /* Comparison function. */
334 __P((const DBT *, const DBT *));
335 size_t(*bt_prefix) /* Prefix function. */
336 __P((const DBT *, const DBT *));
338 db_indx_t bt_ovflsize; /* Maximum key/data on-page size. */
341 #include "btree_auto.h"
342 #include "btree_ext.h"
343 #include "db_am.h"
344 #include "common_ext.h"