Update.
[glibc.git] / db2 / btree / bt_rsearch.c
blobcaa6b3515e1dcc10a991309775d0fc59a0c64888
1 /*-
2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
6 */
7 /*
8 * Copyright (c) 1990, 1993, 1994, 1995, 1996
9 * Keith Bostic. All rights reserved.
12 * Copyright (c) 1990, 1993
13 * The Regents of the University of California. All rights reserved.
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 * must display the following acknowledgement:
25 * This product includes software developed by the University of
26 * California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 * may be used to endorse or promote products derived from this software
29 * without specific prior written permission.
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
44 #include "config.h"
46 #ifndef lint
47 static const char sccsid[] = "@(#)bt_rsearch.c 10.15 (Sleepycat) 5/6/98";
48 #endif /* not lint */
50 #ifndef NO_SYSTEM_INCLUDES
51 #include <sys/types.h>
52 #endif
54 #include "db_int.h"
55 #include "db_page.h"
56 #include "btree.h"
59 * __bam_rsearch --
60 * Search a btree for a record number.
62 * PUBLIC: int __bam_rsearch __P((DB *, db_recno_t *, u_int32_t, int, int *));
64 int
65 __bam_rsearch(dbp, recnop, flags, stop, exactp)
66 DB *dbp;
67 db_recno_t *recnop;
68 u_int32_t flags;
69 int stop, *exactp;
71 BINTERNAL *bi;
72 BTREE *t;
73 DB_LOCK lock;
74 PAGE *h;
75 RINTERNAL *ri;
76 db_indx_t indx, top;
77 db_pgno_t pg;
78 db_recno_t i, recno, total;
79 int isappend, ret, stack;
81 t = dbp->internal;
84 * We test for groups of flags, S_APPEND is the only one that can be
85 * OR'd into the set. Clear it now so that the tests for equality
86 * will work.
88 if ((isappend = LF_ISSET(S_APPEND)) != 0)
89 LF_CLR(S_APPEND);
92 * There are several ways we search a btree tree. The flags argument
93 * specifies if we're acquiring read or write locks and if we are
94 * locking pairs of pages. See btree.h for more details.
96 * If write-locking pages, we need to know whether or not to acquire a
97 * write lock on a page before getting it. This depends on how deep it
98 * is in tree, which we don't know until we acquire the root page. So,
99 * if we need to lock the root page we may have to upgrade it later,
100 * because we won't get the correct lock initially.
102 * Retrieve the root page.
104 pg = PGNO_ROOT;
105 if ((ret = __bam_lget(dbp, 0, PGNO_ROOT,
106 flags == S_INSERT || flags == S_DELETE ?
107 DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
108 return (ret);
109 if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) {
110 (void)__BT_LPUT(dbp, lock);
111 return (ret);
113 total = RE_NREC(h);
116 * If appending to the tree, set the record number now -- we have the
117 * root page locked.
119 * Delete only deletes exact matches, read only returns exact matches.
120 * Note, this is different from __bam_search(), which returns non-exact
121 * matches for read.
123 * The record may not exist. We can only return the correct location
124 * for the record immediately after the last record in the tree, so do
125 * a fast check now.
127 if (isappend) {
128 *exactp = 0;
129 *recnop = recno = total + 1;
130 } else {
131 recno = *recnop;
132 if (recno <= total)
133 *exactp = 1;
134 else {
135 *exactp = 0;
136 if (!PAST_END_OK(flags) || recno > total + 1) {
137 (void)memp_fput(dbp->mpf, h, 0);
138 (void)__BT_LPUT(dbp, lock);
139 return (DB_NOTFOUND);
144 /* Decide if we're building a stack based on the operation. */
145 BT_STK_CLR(t);
146 stack = flags == S_DELETE || flags == S_INSERT;
149 * Decide if we need to save this page; if we do, write lock it, and
150 * start to build a stack.
152 if (LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) {
153 (void)memp_fput(dbp->mpf, h, 0);
154 if ((ret = __bam_lget(dbp, 1, pg, DB_LOCK_WRITE, &lock)) != 0)
155 return (ret);
156 if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) {
157 (void)__BT_LPUT(dbp, lock);
158 return (ret);
160 stack = 1;
164 * !!!
165 * Record numbers in the tree are 0-based, but the recno is
166 * 1-based. All of the calculations below have to take this
167 * into account.
169 for (total = 0;;) {
170 switch (TYPE(h)) {
171 case P_LBTREE:
172 recno -= total;
175 * There may be logically deleted records on the page,
176 * walk the page correcting for them. The record may
177 * not exist if there are enough deleted records in the
178 * page.
180 if (recno <= NUM_ENT(h))
181 for (i = recno - 1;; --i) {
182 if (B_DISSET(GET_BKEYDATA(h,
183 i * P_INDX + O_INDX)->type))
184 ++recno;
185 if (i == 0)
186 break;
188 if (recno > NUM_ENT(h)) {
189 *exactp = 0;
190 if (!PAST_END_OK(flags) ||
191 recno > (db_recno_t)(NUM_ENT(h) + 1)) {
192 ret = DB_NOTFOUND;
193 goto err;
198 /* Correct from 1-based to 0-based for a page offset. */
199 --recno;
200 BT_STK_ENTER(t, h, recno * P_INDX, lock, ret);
201 return (ret);
202 case P_IBTREE:
203 for (indx = 0, top = NUM_ENT(h);;) {
204 bi = GET_BINTERNAL(h, indx);
205 if (++indx == top || total + bi->nrecs >= recno)
206 break;
207 total += bi->nrecs;
209 pg = bi->pgno;
210 break;
211 case P_LRECNO:
212 recno -= total;
214 /* Correct from 1-based to 0-based for a page offset. */
215 --recno;
216 BT_STK_ENTER(t, h, recno, lock, ret);
217 return (ret);
218 case P_IRECNO:
219 for (indx = 0, top = NUM_ENT(h);;) {
220 ri = GET_RINTERNAL(h, indx);
221 if (++indx == top || total + ri->nrecs >= recno)
222 break;
223 total += ri->nrecs;
225 pg = ri->pgno;
226 break;
227 default:
228 return (__db_pgfmt(dbp, h->pgno));
230 --indx;
232 if (stack) {
233 /* Return if this is the lowest page wanted. */
234 if (LF_ISSET(S_PARENT) && stop == h->level) {
235 BT_STK_ENTER(t, h, indx, lock, ret);
236 return (ret);
238 BT_STK_PUSH(t, h, indx, lock, ret);
239 if (ret)
240 goto err;
242 if ((ret = __bam_lget(dbp, 0, pg,
243 LF_ISSET(S_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ,
244 &lock)) != 0)
245 goto err;
246 } else {
247 (void)memp_fput(dbp->mpf, h, 0);
250 * Decide if we want to return a pointer to the next
251 * page in the stack. If we do, write lock it and
252 * never unlock it.
254 if (LF_ISSET(S_PARENT) &&
255 (u_int8_t)(stop + 1) >= (u_int8_t)(h->level - 1))
256 stack = 1;
258 if ((ret = __bam_lget(dbp, 1, pg,
259 LF_ISSET(S_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ,
260 &lock)) != 0)
261 goto err;
264 if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0)
265 goto err;
267 /* NOTREACHED */
269 err: BT_STK_POP(t);
270 __bam_stkrel(dbp);
271 return (ret);
275 * __bam_adjust --
276 * Adjust the tree after adding or deleting a record.
278 * PUBLIC: int __bam_adjust __P((DB *, BTREE *, int32_t));
281 __bam_adjust(dbp, t, adjust)
282 DB *dbp;
283 BTREE *t;
284 int32_t adjust;
286 EPG *epg;
287 PAGE *h;
288 int ret;
290 /* Update the record counts for the tree. */
291 for (epg = t->bt_sp; epg <= t->bt_csp; ++epg) {
292 h = epg->page;
293 if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO) {
294 if (DB_LOGGING(dbp) &&
295 (ret = __bam_cadjust_log(dbp->dbenv->lg_info,
296 dbp->txn, &LSN(h), 0, dbp->log_fileid,
297 PGNO(h), &LSN(h), (u_int32_t)epg->indx,
298 adjust, 1)) != 0)
299 return (ret);
301 if (TYPE(h) == P_IBTREE)
302 GET_BINTERNAL(h, epg->indx)->nrecs += adjust;
303 else
304 GET_RINTERNAL(h, epg->indx)->nrecs += adjust;
306 if (PGNO(h) == PGNO_ROOT)
307 RE_NREC_ADJ(h, adjust);
309 if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
310 return (ret);
313 return (0);
317 * __bam_nrecs --
318 * Return the number of records in the tree.
320 * PUBLIC: int __bam_nrecs __P((DB *, db_recno_t *));
323 __bam_nrecs(dbp, rep)
324 DB *dbp;
325 db_recno_t *rep;
327 DB_LOCK lock;
328 PAGE *h;
329 db_pgno_t pgno;
330 int ret;
332 pgno = PGNO_ROOT;
333 if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_READ, &lock)) != 0)
334 return (ret);
335 if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0)
336 return (ret);
338 *rep = RE_NREC(h);
340 (void)memp_fput(dbp->mpf, h, 0);
341 (void)__BT_TLPUT(dbp, lock);
343 return (0);
347 * __bam_total --
348 * Return the number of records below a page.
350 * PUBLIC: db_recno_t __bam_total __P((PAGE *));
352 db_recno_t
353 __bam_total(h)
354 PAGE *h;
356 db_recno_t nrecs;
357 db_indx_t indx, top;
359 nrecs = 0;
360 top = NUM_ENT(h);
362 switch (TYPE(h)) {
363 case P_LBTREE:
364 /* Check for logically deleted records. */
365 for (indx = 0; indx < top; indx += P_INDX)
366 if (!B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type))
367 ++nrecs;
368 break;
369 case P_IBTREE:
370 for (indx = 0; indx < top; indx += O_INDX)
371 nrecs += GET_BINTERNAL(h, indx)->nrecs;
372 break;
373 case P_LRECNO:
374 nrecs = NUM_ENT(h);
375 break;
376 case P_IRECNO:
377 for (indx = 0; indx < top; indx += O_INDX)
378 nrecs += GET_RINTERNAL(h, indx)->nrecs;
379 break;
382 return (nrecs);