2 * Copyright (c) 1990, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * @(#)bt_seq.c 8.7 (Berkeley) 7/20/94
33 * $DragonFly: src/lib/libc/db/btree/bt_seq.c,v 1.7 2005/11/12 23:01:54 swildner Exp $
36 #include <sys/types.h>
46 static int __bt_first (BTREE
*, const DBT
*, EPG
*, int *);
47 static int __bt_seqadv (BTREE
*, EPG
*, int);
48 static int __bt_seqset (BTREE
*, EPG
*, DBT
*, int);
51 * Sequential scan support.
53 * The tree can be scanned sequentially, starting from either end of the
54 * tree or from any specific key. A scan request before any scanning is
55 * done is initialized as starting from the least node.
60 * Btree sequential scan interface.
63 * dbp: pointer to access method
64 * key: key for positioning and return value
65 * data: data return value
66 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
69 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
72 __bt_seq(const DB
*dbp
, DBT
*key
, DBT
*data
, u_int flags
)
80 /* Toss any page pinned across calls. */
81 if (t
->bt_pinned
!= NULL
) {
82 mpool_put(t
->bt_mp
, t
->bt_pinned
, 0);
87 * If scan unitialized as yet, or starting at a specific record, set
88 * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin
89 * the page the cursor references if they're successful.
94 if (F_ISSET(&t
->bt_cursor
, CURS_INIT
)) {
95 status
= __bt_seqadv(t
, &e
, flags
);
102 status
= __bt_seqset(t
, &e
, key
, flags
);
109 if (status
== RET_SUCCESS
) {
110 __bt_setcur(t
, e
.page
->pgno
, e
.index
);
113 __bt_ret(t
, &e
, key
, &t
->bt_rkey
, data
, &t
->bt_rdata
, 0);
116 * If the user is doing concurrent access, we copied the
117 * key/data, toss the page.
119 if (F_ISSET(t
, B_DB_LOCK
))
120 mpool_put(t
->bt_mp
, e
.page
, 0);
122 t
->bt_pinned
= e
.page
;
129 * Set the sequential scan to a specific key.
133 * ep: storage for returned key
134 * key: key for initial scan position
135 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV
138 * Pins the page the cursor references.
141 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
144 __bt_seqset(BTREE
*t
, EPG
*ep
, DBT
*key
, int flags
)
151 * Find the first, last or specific key in the tree and point the
152 * cursor at it. The cursor may not be moved until a new key has
156 case R_CURSOR
: /* Keyed scan. */
158 * Find the first instance of the key or the smallest key
159 * which is greater than or equal to the specified key.
161 if (key
->data
== NULL
|| key
->size
== 0) {
165 return (__bt_first(t
, key
, ep
, &exact
));
166 case R_FIRST
: /* First record. */
168 /* Walk down the left-hand side of the tree. */
169 for (pg
= P_ROOT
;;) {
170 if ((h
= mpool_get(t
->bt_mp
, pg
, 0)) == NULL
)
173 /* Check for an empty tree. */
174 if (NEXTINDEX(h
) == 0) {
175 mpool_put(t
->bt_mp
, h
, 0);
176 return (RET_SPECIAL
);
179 if (h
->flags
& (P_BLEAF
| P_RLEAF
))
181 pg
= GETBINTERNAL(h
, 0)->pgno
;
182 mpool_put(t
->bt_mp
, h
, 0);
187 case R_LAST
: /* Last record. */
189 /* Walk down the right-hand side of the tree. */
190 for (pg
= P_ROOT
;;) {
191 if ((h
= mpool_get(t
->bt_mp
, pg
, 0)) == NULL
)
194 /* Check for an empty tree. */
195 if (NEXTINDEX(h
) == 0) {
196 mpool_put(t
->bt_mp
, h
, 0);
197 return (RET_SPECIAL
);
200 if (h
->flags
& (P_BLEAF
| P_RLEAF
))
202 pg
= GETBINTERNAL(h
, NEXTINDEX(h
) - 1)->pgno
;
203 mpool_put(t
->bt_mp
, h
, 0);
207 ep
->index
= NEXTINDEX(h
) - 1;
210 return (RET_SUCCESS
);
215 * Advance the sequential scan.
219 * flags: R_NEXT, R_PREV
222 * Pins the page the new key/data record is on.
225 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
228 __bt_seqadv(BTREE
*t
, EPG
*ep
, int flags
)
237 * There are a couple of states that we can be in. The cursor has
238 * been initialized by the time we get here, but that's all we know.
243 * The cursor was deleted where there weren't any duplicate records,
244 * so the key was saved. Find out where that key would go in the
245 * current tree. It doesn't matter if the returned key is an exact
246 * match or not -- if it's an exact match, the record was added after
247 * the delete so we can just return it. If not, as long as there's
248 * a record there, return it.
250 if (F_ISSET(c
, CURS_ACQUIRE
))
251 return (__bt_first(t
, &c
->key
, ep
, &exact
));
253 /* Get the page referenced by the cursor. */
254 if ((h
= mpool_get(t
->bt_mp
, c
->pg
.pgno
, 0)) == NULL
)
258 * Find the next/previous record in the tree and point the cursor at
259 * it. The cursor may not be moved until a new key has been found.
262 case R_NEXT
: /* Next record. */
264 * The cursor was deleted in duplicate records, and moved
265 * forward to a record that has yet to be returned. Clear
266 * that flag, and return the record.
268 if (F_ISSET(c
, CURS_AFTER
))
271 if (++idx
== NEXTINDEX(h
)) {
273 mpool_put(t
->bt_mp
, h
, 0);
275 return (RET_SPECIAL
);
276 if ((h
= mpool_get(t
->bt_mp
, pg
, 0)) == NULL
)
281 case R_PREV
: /* Previous record. */
283 * The cursor was deleted in duplicate records, and moved
284 * backward to a record that has yet to be returned. Clear
285 * that flag, and return the record.
287 if (F_ISSET(c
, CURS_BEFORE
)) {
288 usecurrent
: F_CLR(c
, CURS_AFTER
| CURS_BEFORE
);
290 ep
->index
= c
->pg
.index
;
291 return (RET_SUCCESS
);
296 mpool_put(t
->bt_mp
, h
, 0);
298 return (RET_SPECIAL
);
299 if ((h
= mpool_get(t
->bt_mp
, pg
, 0)) == NULL
)
301 idx
= NEXTINDEX(h
) - 1;
311 return (RET_SUCCESS
);
316 * Find the first entry.
322 * exactp: pointer to exact match flag
325 * The first entry in the tree greater than or equal to key,
326 * or RET_SPECIAL if no such key exists.
329 __bt_first(BTREE
*t
, const DBT
*key
, EPG
*erval
, int *exactp
)
336 * Find any matching record; __bt_search pins the page.
338 * If it's an exact match and duplicates are possible, walk backwards
339 * in the tree until we find the first one. Otherwise, make sure it's
340 * a valid key (__bt_search may return an index just past the end of a
341 * page) and return it.
343 if ((ep
= __bt_search(t
, key
, exactp
)) == NULL
)
346 if (F_ISSET(t
, B_NODUPS
)) {
348 return (RET_SUCCESS
);
352 * Walk backwards, as long as the entry matches and there are
353 * keys left in the tree. Save a copy of each match in case
359 if (save
.page
->pgno
!= ep
->page
->pgno
) {
360 mpool_put(t
->bt_mp
, save
.page
, 0);
363 save
.index
= ep
->index
;
366 * Don't unpin the page the last (or original) match
367 * was on, but make sure it's unpinned if an error
370 if (ep
->index
== 0) {
371 if (h
->prevpg
== P_INVALID
)
373 if (h
->pgno
!= save
.page
->pgno
)
374 mpool_put(t
->bt_mp
, h
, 0);
375 if ((h
= mpool_get(t
->bt_mp
,
376 h
->prevpg
, 0)) == NULL
) {
377 if (h
->pgno
== save
.page
->pgno
)
383 ep
->index
= NEXTINDEX(h
);
386 } while (__bt_cmp(t
, key
, ep
) == 0);
389 * Reach here with the last page that was looked at pinned,
390 * which may or may not be the same as the last (or original)
391 * match page. If it's not useful, release it.
393 if (h
->pgno
!= save
.page
->pgno
)
394 mpool_put(t
->bt_mp
, h
, 0);
397 return (RET_SUCCESS
);
400 /* If at the end of a page, find the next entry. */
401 if (ep
->index
== NEXTINDEX(ep
->page
)) {
404 mpool_put(t
->bt_mp
, h
, 0);
406 return (RET_SPECIAL
);
407 if ((h
= mpool_get(t
->bt_mp
, pg
, 0)) == NULL
)
413 return (RET_SUCCESS
);
418 * Set the cursor to an entry in the tree.
426 __bt_setcur(BTREE
*t
, pgno_t pgno
, u_int index
)
428 /* Lose any already deleted key. */
429 if (t
->bt_cursor
.key
.data
!= NULL
) {
430 free(t
->bt_cursor
.key
.data
);
431 t
->bt_cursor
.key
.size
= 0;
432 t
->bt_cursor
.key
.data
= NULL
;
434 F_CLR(&t
->bt_cursor
, CURS_ACQUIRE
| CURS_AFTER
| CURS_BEFORE
);
436 /* Update the cursor. */
437 t
->bt_cursor
.pg
.pgno
= pgno
;
438 t
->bt_cursor
.pg
.index
= index
;
439 F_SET(&t
->bt_cursor
, CURS_INIT
);