Update.
[glibc.git] / db2 / btree / bt_recno.c
blob38dbbd1c55b6872cc5342784a98bb3ac064782d2
1 /*-
2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1997, 1998
5 * Sleepycat Software. All rights reserved.
6 */
8 #include "config.h"
10 #ifndef lint
11 static const char sccsid[] = "@(#)bt_recno.c 10.37 (Sleepycat) 5/23/98";
12 #endif /* not lint */
14 #ifndef NO_SYSTEM_INCLUDES
15 #include <sys/types.h>
17 #include <errno.h>
18 #include <limits.h>
19 #include <string.h>
20 #endif
22 #include "db_int.h"
23 #include "db_page.h"
24 #include "btree.h"
26 static int __ram_add __P((DB *, db_recno_t *, DBT *, u_int32_t, u_int32_t));
27 static int __ram_c_close __P((DBC *));
28 static int __ram_c_del __P((DBC *, u_int32_t));
29 static int __ram_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
30 static int __ram_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
31 static int __ram_fmap __P((DB *, db_recno_t));
32 static int __ram_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
33 static int __ram_iget __P((DB *, DBT *, DBT *));
34 static int __ram_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
35 static int __ram_source __P((DB *, RECNO *, const char *));
36 static int __ram_sync __P((DB *, u_int32_t));
37 static int __ram_update __P((DB *, db_recno_t, int));
38 static int __ram_vmap __P((DB *, db_recno_t));
39 static int __ram_writeback __P((DB *));
42 * If we're renumbering records, then we have to detect in the cursor that a
43 * record was deleted, and adjust the cursor as necessary. If not renumbering
44 * records, then we can detect this by looking at the actual record, so we
45 * ignore the cursor delete flag.
47 #define CD_SET(dbp, cp) { \
48 if (F_ISSET(dbp, DB_RE_RENUMBER)) \
49 F_SET(cp, CR_DELETED); \
51 #define CD_CLR(dbp, cp) { \
52 if (F_ISSET(dbp, DB_RE_RENUMBER)) \
53 F_CLR(cp, CR_DELETED); \
55 #define CD_ISSET(dbp, cp) \
56 (F_ISSET(dbp, DB_RE_RENUMBER) && F_ISSET(cp, CR_DELETED))
59 * __ram_open --
60 * Recno open function.
62 * PUBLIC: int __ram_open __P((DB *, DBTYPE, DB_INFO *));
64 int
65 __ram_open(dbp, type, dbinfo)
66 DB *dbp;
67 DBTYPE type;
68 DB_INFO *dbinfo;
70 BTREE *t;
71 RECNO *rp;
72 int ret;
74 COMPQUIET(type, DB_RECNO);
76 ret = 0;
78 /* Allocate and initialize the private RECNO structure. */
79 if ((rp = (RECNO *)__db_calloc(1, sizeof(*rp))) == NULL)
80 return (ENOMEM);
82 if (dbinfo != NULL) {
84 * If the user specified a source tree, open it and map it in.
86 * !!!
87 * We don't complain if the user specified transactions or
88 * threads. It's possible to make it work, but you'd better
89 * know what you're doing!
91 if (dbinfo->re_source == NULL) {
92 rp->re_fd = -1;
93 F_SET(rp, RECNO_EOF);
94 } else {
95 if ((ret =
96 __ram_source(dbp, rp, dbinfo->re_source)) != 0)
97 goto err;
100 /* Copy delimiter, length and padding values. */
101 rp->re_delim =
102 F_ISSET(dbp, DB_RE_DELIMITER) ? dbinfo->re_delim : '\n';
103 rp->re_pad = F_ISSET(dbp, DB_RE_PAD) ? dbinfo->re_pad : ' ';
105 if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
106 if ((rp->re_len = dbinfo->re_len) == 0) {
107 __db_err(dbp->dbenv,
108 "record length must be greater than 0");
109 ret = EINVAL;
110 goto err;
112 } else
113 rp->re_len = 0;
114 } else {
115 rp->re_delim = '\n';
116 rp->re_pad = ' ';
117 rp->re_fd = -1;
118 F_SET(rp, RECNO_EOF);
121 /* Open the underlying btree. */
122 if ((ret = __bam_open(dbp, DB_RECNO, dbinfo)) != 0)
123 goto err;
125 /* Set the routines necessary to make it look like a recno tree. */
126 dbp->cursor = __ram_cursor;
127 dbp->del = __ram_delete;
128 dbp->get = __ram_get;
129 dbp->put = __ram_put;
130 dbp->sync = __ram_sync;
132 /* Link in the private recno structure. */
133 ((BTREE *)dbp->internal)->bt_recno = rp;
135 /* If we're snapshotting an underlying source file, do it now. */
136 if (dbinfo != NULL && F_ISSET(dbinfo, DB_SNAPSHOT))
137 if ((ret = __ram_snapshot(dbp)) != 0 && ret != DB_NOTFOUND)
138 goto err;
140 return (0);
142 err: /* If we mmap'd a source file, discard it. */
143 if (rp->re_smap != NULL)
144 (void)__db_unmapfile(rp->re_smap, rp->re_msize);
146 /* If we opened a source file, discard it. */
147 if (rp->re_fd != -1)
148 (void)__db_close(rp->re_fd);
149 if (rp->re_source != NULL)
150 FREES(rp->re_source);
152 /* If we allocated room for key/data return, discard it. */
153 t = dbp->internal;
154 if (t != NULL && t->bt_rkey.data != NULL)
155 __db_free(t->bt_rkey.data);
157 FREE(rp, sizeof(*rp));
159 return (ret);
163 * __ram_cursor --
164 * Recno db->cursor function.
166 * PUBLIC: int __ram_cursor __P((DB *, DB_TXN *, DBC **));
169 __ram_cursor(dbp, txn, dbcp)
170 DB *dbp;
171 DB_TXN *txn;
172 DBC **dbcp;
174 RCURSOR *cp;
175 DBC *dbc;
177 DEBUG_LWRITE(dbp, txn, "ram_cursor", NULL, NULL, 0);
179 if ((dbc = (DBC *)__db_calloc(1, sizeof(DBC))) == NULL)
180 return (ENOMEM);
181 if ((cp = (RCURSOR *)__db_calloc(1, sizeof(RCURSOR))) == NULL) {
182 __db_free(dbc);
183 return (ENOMEM);
186 cp->dbc = dbc;
187 cp->recno = RECNO_OOB;
189 dbc->dbp = dbp;
190 dbc->txn = txn;
191 dbc->internal = cp;
192 dbc->c_close = __ram_c_close;
193 dbc->c_del = __ram_c_del;
194 dbc->c_get = __ram_c_get;
195 dbc->c_put = __ram_c_put;
198 * All cursors are queued from the master DB structure. Add the
199 * cursor to that queue.
201 CURSOR_SETUP(dbp);
202 TAILQ_INSERT_HEAD(&dbp->curs_queue, dbc, links);
203 CURSOR_TEARDOWN(dbp);
205 *dbcp = dbc;
206 return (0);
210 * __ram_get --
211 * Recno db->get function.
213 static int
214 __ram_get(argdbp, txn, key, data, flags)
215 DB *argdbp;
216 DB_TXN *txn;
217 DBT *key, *data;
218 u_int32_t flags;
220 DB *dbp;
221 int ret;
223 DEBUG_LWRITE(argdbp, txn, "ram_get", key, NULL, flags);
225 /* Check for invalid flags. */
226 if ((ret = __db_getchk(argdbp, key, data, flags)) != 0)
227 return (ret);
229 GETHANDLE(argdbp, txn, &dbp, ret);
231 ret = __ram_iget(dbp, key, data);
233 PUTHANDLE(dbp);
234 return (ret);
238 * __ram_iget --
239 * Internal ram get function, called for both standard and cursor
240 * get after the flags have been checked.
242 static int
243 __ram_iget(dbp, key, data)
244 DB *dbp;
245 DBT *key, *data;
247 BTREE *t;
248 PAGE *h;
249 db_indx_t indx;
250 db_recno_t recno;
251 int exact, ret, stack;
253 stack = 0;
254 t = dbp->internal;
256 /* Check the user's record number and fill in as necessary. */
257 if ((ret = __ram_getno(dbp, key, &recno, 0)) != 0)
258 goto done;
260 /* Search the tree for the record. */
261 if ((ret = __bam_rsearch(dbp, &recno, S_FIND, 1, &exact)) != 0)
262 goto done;
263 if (!exact)
264 return (DB_NOTFOUND);
265 stack = 1;
267 h = t->bt_csp->page;
268 indx = t->bt_csp->indx;
270 /* If the record has already been deleted, we couldn't have found it. */
271 if (B_DISSET(GET_BKEYDATA(h, indx)->type)) {
272 ret = DB_KEYEMPTY;
273 goto done;
276 /* Return the data item. */
277 ret = __db_ret(dbp,
278 h, indx, data, &t->bt_rdata.data, &t->bt_rdata.ulen);
279 ++t->lstat.bt_get;
281 done: /* Discard the stack. */
282 if (stack)
283 __bam_stkrel(dbp);
285 return (ret);
289 * __ram_put --
290 * Recno db->put function.
292 static int
293 __ram_put(argdbp, txn, key, data, flags)
294 DB *argdbp;
295 DB_TXN *txn;
296 DBT *key, *data;
297 u_int32_t flags;
299 BTREE *t;
300 DB *dbp;
301 db_recno_t recno;
302 int ret;
304 DEBUG_LWRITE(argdbp, txn, "ram_put", key, data, flags);
306 /* Check for invalid flags. */
307 if ((ret = __db_putchk(argdbp,
308 key, data, flags, F_ISSET(argdbp, DB_AM_RDONLY), 0)) != 0)
309 return (ret);
311 GETHANDLE(argdbp, txn, &dbp, ret);
314 * If we're appending to the tree, make sure we've read in all of
315 * the backing source file. Otherwise, check the user's record
316 * number and fill in as necessary.
318 ret = LF_ISSET(DB_APPEND) ?
319 __ram_snapshot(dbp) : __ram_getno(dbp, key, &recno, 1);
321 /* Add the record. */
322 if (ret == 0)
323 ret = __ram_add(dbp, &recno, data, flags, 0);
325 /* If we're appending to the tree, we have to return the record. */
326 if (ret == 0 && LF_ISSET(DB_APPEND)) {
327 t = dbp->internal;
328 ret = __db_retcopy(key, &recno, sizeof(recno),
329 &t->bt_rkey.data, &t->bt_rkey.ulen, dbp->db_malloc);
332 PUTHANDLE(dbp);
333 return (ret);
337 * __ram_sync --
338 * Recno db->sync function.
340 static int
341 __ram_sync(argdbp, flags)
342 DB *argdbp;
343 u_int32_t flags;
345 DB *dbp;
346 int ret;
348 DEBUG_LWRITE(argdbp, NULL, "ram_sync", NULL, NULL, flags);
350 /* Sync the underlying btree. */
351 if ((ret = __bam_sync(argdbp, flags)) != 0)
352 return (ret);
354 /* Copy back the backing source file. */
355 GETHANDLE(argdbp, NULL, &dbp, ret);
356 ret = __ram_writeback(dbp);
357 PUTHANDLE(dbp);
359 return (ret);
363 * __ram_close --
364 * Recno db->close function.
366 * PUBLIC: int __ram_close __P((DB *));
369 __ram_close(argdbp)
370 DB *argdbp;
372 RECNO *rp;
374 DEBUG_LWRITE(argdbp, NULL, "ram_close", NULL, NULL, 0);
376 rp = ((BTREE *)argdbp->internal)->bt_recno;
378 /* Close any underlying mmap region. */
379 if (rp->re_smap != NULL)
380 (void)__db_unmapfile(rp->re_smap, rp->re_msize);
382 /* Close any backing source file descriptor. */
383 if (rp->re_fd != -1)
384 (void)__db_close(rp->re_fd);
386 /* Free any backing source file name. */
387 if (rp->re_source != NULL)
388 FREES(rp->re_source);
390 /* Free allocated memory. */
391 FREE(rp, sizeof(RECNO));
392 ((BTREE *)argdbp->internal)->bt_recno = NULL;
394 /* Close the underlying btree. */
395 return (__bam_close(argdbp));
399 * __ram_c_close --
400 * Recno cursor->close function.
402 static int
403 __ram_c_close(dbc)
404 DBC *dbc;
406 DEBUG_LWRITE(dbc->dbp, dbc->txn, "ram_c_close", NULL, NULL, 0);
408 return (__ram_c_iclose(dbc->dbp, dbc));
412 * __ram_c_iclose --
413 * Close a single cursor -- internal version.
415 * PUBLIC: int __ram_c_iclose __P((DB *, DBC *));
418 __ram_c_iclose(dbp, dbc)
419 DB *dbp;
420 DBC *dbc;
422 /* Remove the cursor from the queue. */
423 CURSOR_SETUP(dbp);
424 TAILQ_REMOVE(&dbp->curs_queue, dbc, links);
425 CURSOR_TEARDOWN(dbp);
427 /* Discard the structures. */
428 FREE(dbc->internal, sizeof(RCURSOR));
429 FREE(dbc, sizeof(DBC));
431 return (0);
435 * __ram_c_del --
436 * Recno cursor->c_del function.
438 static int
439 __ram_c_del(dbc, flags)
440 DBC *dbc;
441 u_int32_t flags;
443 DBT key;
444 RCURSOR *cp;
445 int ret;
447 DEBUG_LWRITE(dbc->dbp, dbc->txn, "ram_c_del", NULL, NULL, flags);
449 cp = dbc->internal;
451 /* Check for invalid flags. */
452 if ((ret = __db_cdelchk(dbc->dbp, flags,
453 F_ISSET(dbc->dbp, DB_AM_RDONLY), cp->recno != RECNO_OOB)) != 0)
454 return (ret);
456 /* If already deleted, return failure. */
457 if (CD_ISSET(dbc->dbp, cp))
458 return (DB_KEYEMPTY);
460 /* Build a normal delete request. */
461 memset(&key, 0, sizeof(key));
462 key.data = &cp->recno;
463 key.size = sizeof(db_recno_t);
464 if ((ret = __ram_delete(dbc->dbp, dbc->txn, &key, 0)) == 0)
465 CD_SET(dbc->dbp, cp);
467 return (ret);
471 * __ram_c_get --
472 * Recno cursor->c_get function.
474 static int
475 __ram_c_get(dbc, key, data, flags)
476 DBC *dbc;
477 DBT *key, *data;
478 u_int32_t flags;
480 BTREE *t;
481 DB *dbp;
482 RCURSOR *cp, copy;
483 int ret;
485 DEBUG_LREAD(dbc->dbp, dbc->txn, "ram_c_get",
486 flags == DB_SET || flags == DB_SET_RANGE ? key : NULL,
487 NULL, flags);
489 cp = dbc->internal;
490 dbp = dbc->dbp;
492 /* Check for invalid flags. */
493 if ((ret = __db_cgetchk(dbc->dbp,
494 key, data, flags, cp->recno != RECNO_OOB)) != 0)
495 return (ret);
497 GETHANDLE(dbc->dbp, dbc->txn, &dbp, ret);
498 t = dbp->internal;
500 /* Initialize the cursor for a new retrieval. */
501 copy = *cp;
503 retry: /* Update the record number. */
504 switch (flags) {
505 case DB_CURRENT:
506 if (CD_ISSET(dbp, cp)) {
507 PUTHANDLE(dbp);
508 return (DB_KEYEMPTY);
510 break;
511 case DB_NEXT:
512 if (CD_ISSET(dbp, cp))
513 break;
514 if (cp->recno != RECNO_OOB) {
515 ++cp->recno;
516 break;
518 /* FALLTHROUGH */
519 case DB_FIRST:
520 flags = DB_NEXT;
521 cp->recno = 1;
522 break;
523 case DB_PREV:
524 if (cp->recno != RECNO_OOB) {
525 if (cp->recno == 1)
526 return (DB_NOTFOUND);
527 --cp->recno;
528 break;
530 /* FALLTHROUGH */
531 case DB_LAST:
532 flags = DB_PREV;
533 if (((ret = __ram_snapshot(dbp)) != 0) && ret != DB_NOTFOUND)
534 goto err;
535 if ((ret = __bam_nrecs(dbp, &cp->recno)) != 0)
536 goto err;
537 if (cp->recno == 0)
538 return (DB_NOTFOUND);
539 break;
540 case DB_SET:
541 case DB_SET_RANGE:
542 if ((ret = __ram_getno(dbp, key, &cp->recno, 0)) != 0)
543 goto err;
544 break;
548 * Return the key if the user didn't give us one, and then pass it
549 * into __ram_iget().
551 if (flags != DB_SET && flags != DB_SET_RANGE &&
552 (ret = __db_retcopy(key, &cp->recno, sizeof(cp->recno),
553 &t->bt_rkey.data, &t->bt_rkey.ulen, dbp->db_malloc)) != 0)
554 return (ret);
557 * The cursor was reset, so the delete adjustment is no
558 * longer necessary.
560 CD_CLR(dbp, cp);
563 * Retrieve the record.
565 * Skip any keys that don't really exist.
567 if ((ret = __ram_iget(dbp, key, data)) != 0)
568 if (ret == DB_KEYEMPTY &&
569 (flags == DB_NEXT || flags == DB_PREV))
570 goto retry;
572 err: if (ret != 0)
573 *cp = copy;
575 PUTHANDLE(dbp);
576 return (ret);
580 * __ram_c_put --
581 * Recno cursor->c_put function.
583 static int
584 __ram_c_put(dbc, key, data, flags)
585 DBC *dbc;
586 DBT *key, *data;
587 u_int32_t flags;
589 BTREE *t;
590 RCURSOR *cp, copy;
591 DB *dbp;
592 int exact, ret;
593 void *arg;
595 DEBUG_LWRITE(dbc->dbp, dbc->txn, "ram_c_put", NULL, data, flags);
597 cp = dbc->internal;
599 if ((ret = __db_cputchk(dbc->dbp, key, data, flags,
600 F_ISSET(dbc->dbp, DB_AM_RDONLY), cp->recno != RECNO_OOB)) != 0)
601 return (ret);
603 GETHANDLE(dbc->dbp, dbc->txn, &dbp, ret);
604 t = dbp->internal;
606 /* Initialize the cursor for a new retrieval. */
607 copy = *cp;
610 * To split, we need a valid key for the page. Since it's a cursor,
611 * we have to build one.
613 * The split code discards all short-term locks and stack pages.
615 if (0) {
616 split: arg = &cp->recno;
617 if ((ret = __bam_split(dbp, arg)) != 0)
618 goto err;
621 if ((ret = __bam_rsearch(dbp, &cp->recno, S_INSERT, 1, &exact)) != 0)
622 goto err;
623 if (!exact) {
624 ret = DB_NOTFOUND;
625 goto err;
627 if ((ret = __bam_iitem(dbp, &t->bt_csp->page,
628 &t->bt_csp->indx, key, data, flags, 0)) == DB_NEEDSPLIT) {
629 if ((ret = __bam_stkrel(dbp)) != 0)
630 goto err;
631 goto split;
633 if ((ret = __bam_stkrel(dbp)) != 0)
634 goto err;
636 switch (flags) {
637 case DB_AFTER:
638 /* Adjust the cursors. */
639 __ram_ca(dbp, cp->recno, CA_IAFTER);
641 /* Set this cursor to reference the new record. */
642 cp->recno = copy.recno + 1;
643 break;
644 case DB_BEFORE:
645 /* Adjust the cursors. */
646 __ram_ca(dbp, cp->recno, CA_IBEFORE);
648 /* Set this cursor to reference the new record. */
649 cp->recno = copy.recno;
650 break;
654 * The cursor was reset, so the delete adjustment is no
655 * longer necessary.
657 CD_CLR(dbp, cp);
659 err: if (ret != 0)
660 *cp = copy;
662 PUTHANDLE(dbp);
663 return (ret);
667 * __ram_ca --
668 * Adjust cursors.
670 * PUBLIC: void __ram_ca __P((DB *, db_recno_t, ca_recno_arg));
672 void
673 __ram_ca(dbp, recno, op)
674 DB *dbp;
675 db_recno_t recno;
676 ca_recno_arg op;
678 DBC *dbc;
679 RCURSOR *cp;
682 * Adjust the cursors. See the comment in __bam_ca_delete().
684 CURSOR_SETUP(dbp);
685 for (dbc = TAILQ_FIRST(&dbp->curs_queue);
686 dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
687 cp = (RCURSOR *)dbc->internal;
688 switch (op) {
689 case CA_DELETE:
690 if (recno > cp->recno)
691 --cp->recno;
692 break;
693 case CA_IAFTER:
694 if (recno > cp->recno)
695 ++cp->recno;
696 break;
697 case CA_IBEFORE:
698 if (recno >= cp->recno)
699 ++cp->recno;
700 break;
703 CURSOR_TEARDOWN(dbp);
706 #ifdef DEBUG
708 * __ram_cprint --
709 * Display the current recno cursor list.
711 * PUBLIC: int __ram_cprint __P((DB *));
714 __ram_cprint(dbp)
715 DB *dbp;
717 DBC *dbc;
718 RCURSOR *cp;
720 CURSOR_SETUP(dbp);
721 for (dbc = TAILQ_FIRST(&dbp->curs_queue);
722 dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
723 cp = (RCURSOR *)dbc->internal;
724 fprintf(stderr,
725 "%#0x: recno: %lu\n", (u_int)cp, (u_long)cp->recno);
727 CURSOR_TEARDOWN(dbp);
729 return (0);
731 #endif /* DEBUG */
734 * __ram_getno --
735 * Check the user's record number, and make sure we've seen it.
737 * PUBLIC: int __ram_getno __P((DB *, const DBT *, db_recno_t *, int));
740 __ram_getno(dbp, key, rep, can_create)
741 DB *dbp;
742 const DBT *key;
743 db_recno_t *rep;
744 int can_create;
746 db_recno_t recno;
748 /* Check the user's record number. */
749 if ((recno = *(db_recno_t *)key->data) == 0) {
750 __db_err(dbp->dbenv, "illegal record number of 0");
751 return (EINVAL);
753 if (rep != NULL)
754 *rep = recno;
757 * Btree can neither create records or read them in. Recno can
758 * do both, see if we can find the record.
760 return (dbp->type == DB_RECNO ?
761 __ram_update(dbp, recno, can_create) : 0);
765 * __ram_snapshot --
766 * Read in any remaining records from the backing input file.
768 * PUBLIC: int __ram_snapshot __P((DB *));
771 __ram_snapshot(dbp)
772 DB *dbp;
774 return (__ram_update(dbp, DB_MAX_RECORDS, 0));
778 * __ram_update --
779 * Ensure the tree has records up to and including the specified one.
781 static int
782 __ram_update(dbp, recno, can_create)
783 DB *dbp;
784 db_recno_t recno;
785 int can_create;
787 BTREE *t;
788 RECNO *rp;
789 db_recno_t nrecs;
790 int ret;
792 t = dbp->internal;
793 rp = t->bt_recno;
796 * If we can't create records and we've read the entire backing input
797 * file, we're done.
799 if (!can_create && F_ISSET(rp, RECNO_EOF))
800 return (0);
803 * If we haven't seen this record yet, try to get it from the original
804 * file.
806 if ((ret = __bam_nrecs(dbp, &nrecs)) != 0)
807 return (ret);
808 if (!F_ISSET(rp, RECNO_EOF) && recno > nrecs) {
809 if ((ret = rp->re_irec(dbp, recno)) != 0)
810 return (ret);
811 if ((ret = __bam_nrecs(dbp, &nrecs)) != 0)
812 return (ret);
816 * If we can create records, create empty ones up to the requested
817 * record.
819 if (!can_create || recno <= nrecs + 1)
820 return (0);
822 t->bt_rdata.dlen = 0;
823 t->bt_rdata.doff = 0;
824 t->bt_rdata.flags = 0;
825 if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
826 if (t->bt_rdata.ulen < rp->re_len) {
827 t->bt_rdata.data = t->bt_rdata.data == NULL ?
828 (void *)__db_malloc(rp->re_len) :
829 (void *)__db_realloc(t->bt_rdata.data, rp->re_len);
830 if (t->bt_rdata.data == NULL) {
831 t->bt_rdata.ulen = 0;
832 return (ENOMEM);
834 t->bt_rdata.ulen = rp->re_len;
836 t->bt_rdata.size = rp->re_len;
837 memset(t->bt_rdata.data, rp->re_pad, rp->re_len);
838 } else
839 t->bt_rdata.size = 0;
841 while (recno > ++nrecs)
842 if ((ret = __ram_add(dbp,
843 &nrecs, &t->bt_rdata, 0, BI_DELETED)) != 0)
844 return (ret);
845 return (0);
849 * __ram_source --
850 * Load information about the backing file.
852 static int
853 __ram_source(dbp, rp, fname)
854 DB *dbp;
855 RECNO *rp;
856 const char *fname;
858 size_t size;
859 u_int32_t bytes, mbytes, oflags;
860 int ret;
862 if ((ret = __db_appname(dbp->dbenv,
863 DB_APP_DATA, NULL, fname, 0, NULL, &rp->re_source)) != 0)
864 return (ret);
866 oflags = F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0;
867 if ((ret =
868 __db_open(rp->re_source, oflags, oflags, 0, &rp->re_fd)) != 0) {
869 __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
870 goto err;
874 * XXX
875 * We'd like to test to see if the file is too big to mmap. Since we
876 * don't know what size or type off_t's or size_t's are, or the largest
877 * unsigned integral type is, or what random insanity the local C
878 * compiler will perpetrate, doing the comparison in a portable way is
879 * flatly impossible. Hope that mmap fails if the file is too large.
881 if ((ret = __db_ioinfo(rp->re_source,
882 rp->re_fd, &mbytes, &bytes, NULL)) != 0) {
883 __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
884 goto err;
886 if (mbytes == 0 && bytes == 0) {
887 F_SET(rp, RECNO_EOF);
888 return (0);
891 size = mbytes * MEGABYTE + bytes;
892 if ((ret = __db_mapfile(rp->re_source,
893 rp->re_fd, (size_t)size, 1, &rp->re_smap)) != 0)
894 goto err;
895 rp->re_cmap = rp->re_smap;
896 rp->re_emap = (u_int8_t *)rp->re_smap + (rp->re_msize = size);
897 rp->re_irec = F_ISSET(dbp, DB_RE_FIXEDLEN) ? __ram_fmap : __ram_vmap;
898 return (0);
900 err: FREES(rp->re_source)
901 return (ret);
905 * __ram_writeback --
906 * Rewrite the backing file.
908 static int
909 __ram_writeback(dbp)
910 DB *dbp;
912 RECNO *rp;
913 DBT key, data;
914 db_recno_t keyno;
915 ssize_t nw;
916 int fd, ret, t_ret;
917 u_int8_t delim, *pad;
919 rp = ((BTREE *)dbp->internal)->bt_recno;
921 /* If the file wasn't modified, we're done. */
922 if (!F_ISSET(rp, RECNO_MODIFIED))
923 return (0);
925 /* If there's no backing source file, we're done. */
926 if (rp->re_source == NULL) {
927 F_CLR(rp, RECNO_MODIFIED);
928 return (0);
932 * Read any remaining records into the tree.
934 * XXX
935 * This is why we can't support transactions when applications specify
936 * backing (re_source) files. At this point we have to read in the
937 * rest of the records from the file so that we can write all of the
938 * records back out again, which could modify a page for which we'd
939 * have to log changes and which we don't have locked. This could be
940 * partially fixed by taking a snapshot of the entire file during the
941 * db_open(), or, since db_open() isn't transaction protected, as part
942 * of the first DB operation. But, if a checkpoint occurs then, the
943 * part of the log holding the copy of the file could be discarded, and
944 * that would make it impossible to recover in the face of disaster.
945 * This could all probably be fixed, but it would require transaction
946 * protecting the backing source file, i.e. mpool would have to know
947 * about it, and we don't want to go there.
949 if ((ret = __ram_snapshot(dbp)) != 0 && ret != DB_NOTFOUND)
950 return (ret);
953 * !!!
954 * Close any underlying mmap region. This is required for Windows NT
955 * (4.0, Service Pack 2) -- if the file is still mapped, the following
956 * open will fail.
958 if (rp->re_smap != NULL) {
959 (void)__db_unmapfile(rp->re_smap, rp->re_msize);
960 rp->re_smap = NULL;
963 /* Get rid of any backing file descriptor, just on GP's. */
964 if (rp->re_fd != -1) {
965 (void)__db_close(rp->re_fd);
966 rp->re_fd = -1;
969 /* Open the file, truncating it. */
970 if ((ret = __db_open(rp->re_source,
971 DB_SEQUENTIAL | DB_TRUNCATE,
972 DB_SEQUENTIAL | DB_TRUNCATE, 0, &fd)) != 0) {
973 __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
974 return (ret);
978 * We step through the records, writing each one out. Use the record
979 * number and the dbp->get() function, instead of a cursor, so we find
980 * and write out "deleted" or non-existent records.
982 memset(&key, 0, sizeof(key));
983 memset(&data, 0, sizeof(data));
984 key.size = sizeof(db_recno_t);
985 key.data = &keyno;
988 * We'll need the delimiter if we're doing variable-length records,
989 * and the pad character if we're doing fixed-length records.
991 delim = rp->re_delim;
992 if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
993 if ((pad = (u_int8_t *)__db_malloc(rp->re_len)) == NULL) {
994 ret = ENOMEM;
995 goto err;
997 memset(pad, rp->re_pad, rp->re_len);
998 } else
999 COMPQUIET(pad, NULL);
1000 for (keyno = 1;; ++keyno) {
1001 switch (ret = dbp->get(dbp, NULL, &key, &data, 0)) {
1002 case 0:
1003 if ((ret =
1004 __db_write(fd, data.data, data.size, &nw)) != 0)
1005 goto err;
1006 if (nw != (ssize_t)data.size) {
1007 ret = EIO;
1008 goto err;
1010 break;
1011 case DB_KEYEMPTY:
1012 if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
1013 if ((ret =
1014 __db_write(fd, pad, rp->re_len, &nw)) != 0)
1015 goto err;
1016 if (nw != (ssize_t)rp->re_len) {
1017 ret = EIO;
1018 goto err;
1021 break;
1022 case DB_NOTFOUND:
1023 ret = 0;
1024 goto done;
1026 if (!F_ISSET(dbp, DB_RE_FIXEDLEN)) {
1027 if ((ret = __db_write(fd, &delim, 1, &nw)) != 0)
1028 goto err;
1029 if (nw != 1) {
1030 ret = EIO;
1031 goto err;
1036 err:
1037 done: /* Close the file descriptor. */
1038 if ((t_ret = __db_close(fd)) != 0 || ret == 0)
1039 ret = t_ret;
1041 if (ret == 0)
1042 F_CLR(rp, RECNO_MODIFIED);
1043 return (ret);
1047 * __ram_fmap --
1048 * Get fixed length records from a file.
1050 static int
1051 __ram_fmap(dbp, top)
1052 DB *dbp;
1053 db_recno_t top;
1055 BTREE *t;
1056 DBT data;
1057 RECNO *rp;
1058 db_recno_t recno;
1059 u_int32_t len;
1060 u_int8_t *sp, *ep, *p;
1061 int ret;
1063 if ((ret = __bam_nrecs(dbp, &recno)) != 0)
1064 return (ret);
1066 t = dbp->internal;
1067 rp = t->bt_recno;
1068 if (t->bt_rdata.ulen < rp->re_len) {
1069 t->bt_rdata.data = t->bt_rdata.data == NULL ?
1070 (void *)__db_malloc(rp->re_len) :
1071 (void *)__db_realloc(t->bt_rdata.data, rp->re_len);
1072 if (t->bt_rdata.data == NULL) {
1073 t->bt_rdata.ulen = 0;
1074 return (ENOMEM);
1076 t->bt_rdata.ulen = rp->re_len;
1079 memset(&data, 0, sizeof(data));
1080 data.data = t->bt_rdata.data;
1081 data.size = rp->re_len;
1083 sp = (u_int8_t *)rp->re_cmap;
1084 ep = (u_int8_t *)rp->re_emap;
1085 while (recno < top) {
1086 if (sp >= ep) {
1087 F_SET(rp, RECNO_EOF);
1088 return (DB_NOTFOUND);
1090 len = rp->re_len;
1091 for (p = t->bt_rdata.data;
1092 sp < ep && len > 0; *p++ = *sp++, --len)
1096 * Another process may have read this record from the input
1097 * file and stored it into the database already, in which
1098 * case we don't need to repeat that operation. We detect
1099 * this by checking if the last record we've read is greater
1100 * or equal to the number of records in the database.
1102 * XXX
1103 * We should just do a seek, since the records are fixed
1104 * length.
1106 if (rp->re_last >= recno) {
1107 if (len != 0)
1108 memset(p, rp->re_pad, len);
1110 ++recno;
1111 if ((ret = __ram_add(dbp, &recno, &data, 0, 0)) != 0)
1112 return (ret);
1114 ++rp->re_last;
1116 rp->re_cmap = sp;
1117 return (0);
1121 * __ram_vmap --
1122 * Get variable length records from a file.
1124 static int
1125 __ram_vmap(dbp, top)
1126 DB *dbp;
1127 db_recno_t top;
1129 BTREE *t;
1130 DBT data;
1131 RECNO *rp;
1132 db_recno_t recno;
1133 u_int8_t *sp, *ep;
1134 int delim, ret;
1136 t = dbp->internal;
1137 rp = t->bt_recno;
1139 if ((ret = __bam_nrecs(dbp, &recno)) != 0)
1140 return (ret);
1142 memset(&data, 0, sizeof(data));
1144 delim = rp->re_delim;
1146 sp = (u_int8_t *)rp->re_cmap;
1147 ep = (u_int8_t *)rp->re_emap;
1148 while (recno < top) {
1149 if (sp >= ep) {
1150 F_SET(rp, RECNO_EOF);
1151 return (DB_NOTFOUND);
1153 for (data.data = sp; sp < ep && *sp != delim; ++sp)
1157 * Another process may have read this record from the input
1158 * file and stored it into the database already, in which
1159 * case we don't need to repeat that operation. We detect
1160 * this by checking if the last record we've read is greater
1161 * or equal to the number of records in the database.
1163 if (rp->re_last >= recno) {
1164 data.size = sp - (u_int8_t *)data.data;
1165 ++recno;
1166 if ((ret = __ram_add(dbp, &recno, &data, 0, 0)) != 0)
1167 return (ret);
1169 ++rp->re_last;
1170 ++sp;
1172 rp->re_cmap = sp;
1173 return (0);
1177 * __ram_add --
1178 * Add records into the tree.
1180 static int
1181 __ram_add(dbp, recnop, data, flags, bi_flags)
1182 DB *dbp;
1183 db_recno_t *recnop;
1184 DBT *data;
1185 u_int32_t flags, bi_flags;
1187 BKEYDATA *bk;
1188 BTREE *t;
1189 PAGE *h;
1190 db_indx_t indx;
1191 int exact, isdeleted, ret, stack;
1193 t = dbp->internal;
1195 retry: /* Find the slot for insertion. */
1196 if ((ret = __bam_rsearch(dbp, recnop,
1197 S_INSERT | (LF_ISSET(DB_APPEND) ? S_APPEND : 0), 1, &exact)) != 0)
1198 return (ret);
1199 h = t->bt_csp->page;
1200 indx = t->bt_csp->indx;
1201 stack = 1;
1204 * If DB_NOOVERWRITE is set and the item already exists in the tree,
1205 * return an error unless the item has been marked for deletion.
1207 isdeleted = 0;
1208 if (exact) {
1209 bk = GET_BKEYDATA(h, indx);
1210 if (B_DISSET(bk->type)) {
1211 isdeleted = 1;
1212 __bam_ca_replace(dbp, h->pgno, indx, REPLACE_SETUP);
1213 } else
1214 if (LF_ISSET(DB_NOOVERWRITE)) {
1215 ret = DB_KEYEXIST;
1216 goto err;
1221 * Select the arguments for __bam_iitem() and do the insert. If the
1222 * key is an exact match, or we're replacing the data item with a
1223 * new data item, replace the current item. If the key isn't an exact
1224 * match, we're inserting a new key/data pair, before the search
1225 * location.
1227 switch (ret = __bam_iitem(dbp,
1228 &h, &indx, NULL, data, exact ? DB_CURRENT : DB_BEFORE, bi_flags)) {
1229 case 0:
1231 * Done. Clean up the cursor and adjust the internal page
1232 * counts.
1234 if (isdeleted)
1235 __bam_ca_replace(dbp, h->pgno, indx, REPLACE_SUCCESS);
1236 break;
1237 case DB_NEEDSPLIT:
1239 * We have to split the page. Back out the cursor setup,
1240 * discard the stack of pages, and do the split.
1242 if (isdeleted)
1243 __bam_ca_replace(dbp, h->pgno, indx, REPLACE_FAILED);
1245 (void)__bam_stkrel(dbp);
1246 stack = 0;
1248 if ((ret = __bam_split(dbp, recnop)) != 0)
1249 break;
1251 goto retry;
1252 /* NOTREACHED */
1253 default:
1254 if (isdeleted)
1255 __bam_ca_replace(dbp, h->pgno, indx, REPLACE_FAILED);
1256 break;
1259 err: if (stack)
1260 __bam_stkrel(dbp);
1262 return (ret);