2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
10 static const char sccsid
[] = "@(#)mp_region.c 10.30 (Sleepycat) 5/31/98";
13 #ifndef NO_SYSTEM_INCLUDES
14 #include <sys/types.h>
24 #include "common_ext.h"
28 * Allocate some space in the mpool region.
30 * PUBLIC: int __memp_ralloc __P((DB_MPOOL *, size_t, size_t *, void *));
33 __memp_ralloc(dbmp
, len
, offsetp
, retp
)
42 int nomore
, restart
, ret
, wrote
;
48 alloc
: if ((ret
= __db_shalloc(dbmp
->addr
, len
, MUTEX_ALIGNMENT
, &p
)) == 0) {
50 *offsetp
= R_OFFSET(dbmp
, p
);
55 __db_err(dbmp
->dbenv
, "%s", strerror(ret
));
59 /* Look for a buffer on the free list that's the right size. */
61 SH_TAILQ_FIRST(&mp
->bhfq
, __bh
); bhp
!= NULL
; bhp
= nbhp
) {
62 nbhp
= SH_TAILQ_NEXT(bhp
, q
, __bh
);
64 if (__db_shsizeof(bhp
) == len
) {
65 SH_TAILQ_REMOVE(&mp
->bhfq
, bhp
, q
, __bh
);
67 *offsetp
= R_OFFSET(dbmp
, bhp
);
73 /* Discard from the free list until we've freed enough memory. */
76 SH_TAILQ_FIRST(&mp
->bhfq
, __bh
); bhp
!= NULL
; bhp
= nbhp
) {
77 nbhp
= SH_TAILQ_NEXT(bhp
, q
, __bh
);
79 SH_TAILQ_REMOVE(&mp
->bhfq
, bhp
, q
, __bh
);
80 __db_shalloc_free(dbmp
->addr
, bhp
);
81 --mp
->stat
.st_page_clean
;
84 * Retry as soon as we've freed up sufficient space. If we
85 * will have to coalesce memory to satisfy the request, don't
86 * try until it's likely (possible?) that we'll succeed.
88 total
+= fsize
= __db_shsizeof(bhp
);
89 if (fsize
>= len
|| total
>= 3 * len
)
93 retry
: /* Find a buffer we can flush; pure LRU. */
96 SH_TAILQ_FIRST(&mp
->bhq
, __bh
); bhp
!= NULL
; bhp
= nbhp
) {
97 nbhp
= SH_TAILQ_NEXT(bhp
, q
, __bh
);
99 /* Ignore pinned or locked (I/O in progress) buffers. */
100 if (bhp
->ref
!= 0 || F_ISSET(bhp
, BH_LOCKED
))
103 /* Find the associated MPOOLFILE. */
104 mfp
= R_ADDR(dbmp
, bhp
->mf_offset
);
107 * Write the page if it's dirty.
109 * If we wrote the page, fall through and free the buffer. We
110 * don't have to rewalk the list to acquire the buffer because
111 * it was never available for any other process to modify it.
112 * If we didn't write the page, but we discarded and reacquired
113 * the region lock, restart the buffer list walk. If we neither
114 * wrote the buffer nor discarded the region lock, continue down
117 if (F_ISSET(bhp
, BH_DIRTY
)) {
118 if ((ret
= __memp_bhwrite(dbmp
,
119 mfp
, bhp
, &restart
, &wrote
)) != 0)
123 * It's possible that another process wants this buffer
124 * and incremented the ref count while we were writing
131 ++mp
->stat
.st_rw_evict
;
138 ++mp
->stat
.st_ro_evict
;
141 * Check to see if the buffer is the size we're looking for.
142 * If it is, simply reuse it.
144 total
+= fsize
= __db_shsizeof(bhp
);
146 __memp_bhfree(dbmp
, mfp
, bhp
, 0);
149 *offsetp
= R_OFFSET(dbmp
, bhp
);
150 *(void **)retp
= bhp
;
154 /* Free the buffer. */
155 __memp_bhfree(dbmp
, mfp
, bhp
, 1);
158 * Retry as soon as we've freed up sufficient space. If we
159 * have to coalesce of memory to satisfy the request, don't
160 * try until it's likely (possible?) that we'll succeed.
162 if (fsize
>= len
|| total
>= 3 * len
)
165 /* Restart the walk if we discarded the region lock. */
175 * Attach to, and optionally create, the mpool region.
177 * PUBLIC: int __memp_ropen
178 * PUBLIC: __P((DB_MPOOL *, const char *, size_t, int, int, u_int32_t));
181 __memp_ropen(dbmp
, path
, cachesize
, mode
, is_private
, flags
)
185 int mode
, is_private
;
193 * Unlike other DB subsystems, mpool can't simply grow the region
194 * because it returns pointers into the region to its clients. To
195 * "grow" the region, we'd have to allocate a new region and then
196 * store a region number in the structures that reference regional
197 * objects. It's reasonable that we fail regardless, as clients
198 * shouldn't have every page in the region pinned, so the only
199 * "failure" mode should be a performance penalty because we don't
200 * find a page in the cache that we'd like to have found.
202 * Up the user's cachesize by 25% to account for our overhead.
205 if (cachesize
< DB_CACHESIZE_MIN
) {
206 if (cachesize
== 0) {
208 cachesize
= DB_CACHESIZE_DEF
;
210 cachesize
= DB_CACHESIZE_MIN
;
212 rlen
= cachesize
+ cachesize
/ 4;
217 * If it's a private mpool, use malloc, it's a lot faster than
218 * instantiating a region.
220 dbmp
->reginfo
.dbenv
= dbmp
->dbenv
;
221 dbmp
->reginfo
.appname
= DB_APP_NONE
;
223 dbmp
->reginfo
.path
= NULL
;
225 if ((dbmp
->reginfo
.path
= __db_strdup(path
)) == NULL
)
227 dbmp
->reginfo
.file
= DB_DEFAULT_MPOOL_FILE
;
228 dbmp
->reginfo
.mode
= mode
;
229 dbmp
->reginfo
.size
= rlen
;
230 dbmp
->reginfo
.dbflags
= flags
;
231 dbmp
->reginfo
.flags
= 0;
233 F_SET(&dbmp
->reginfo
, REGION_SIZEDEF
);
236 * If we're creating a temporary region, don't use any standard
240 dbmp
->reginfo
.appname
= DB_APP_TMP
;
241 dbmp
->reginfo
.file
= NULL
;
242 F_SET(&dbmp
->reginfo
, REGION_PRIVATE
);
245 if ((ret
= __db_rattach(&dbmp
->reginfo
)) != 0) {
246 if (dbmp
->reginfo
.path
!= NULL
)
247 FREES(dbmp
->reginfo
.path
);
252 * The MPOOL structure is first in the region, the rest of the region
255 dbmp
->mp
= dbmp
->reginfo
.addr
;
256 dbmp
->addr
= (u_int8_t
*)dbmp
->mp
+ sizeof(MPOOL
);
258 /* Initialize a created region. */
259 if (F_ISSET(&dbmp
->reginfo
, REGION_CREATED
)) {
261 SH_TAILQ_INIT(&mp
->bhq
);
262 SH_TAILQ_INIT(&mp
->bhfq
);
263 SH_TAILQ_INIT(&mp
->mpfq
);
265 __db_shalloc_init(dbmp
->addr
, rlen
- sizeof(MPOOL
));
268 * Assume we want to keep the hash chains with under 10 pages
269 * on each chain. We don't know the pagesize in advance, and
270 * it may differ for different files. Use a pagesize of 1K for
271 * the calculation -- we walk these chains a lot, they should
275 __db_tablesize((cachesize
/ (1 * 1024)) / 10);
277 /* Allocate hash table space and initialize it. */
278 if ((ret
= __db_shalloc(dbmp
->addr
,
279 mp
->htab_buckets
* sizeof(DB_HASHTAB
),
280 0, &dbmp
->htab
)) != 0)
282 __db_hashinit(dbmp
->htab
, mp
->htab_buckets
);
283 mp
->htab
= R_OFFSET(dbmp
, dbmp
->htab
);
288 memset(&mp
->stat
, 0, sizeof(mp
->stat
));
289 mp
->stat
.st_cachesize
= cachesize
;
294 /* Get the local hash table address. */
295 dbmp
->htab
= R_ADDR(dbmp
, dbmp
->mp
->htab
);
300 err
: UNLOCKREGION(dbmp
);
301 (void)__db_rdetach(&dbmp
->reginfo
);
302 if (F_ISSET(&dbmp
->reginfo
, REGION_CREATED
))
303 (void)memp_unlink(path
, 1, dbmp
->dbenv
);
305 if (dbmp
->reginfo
.path
!= NULL
)
306 FREES(dbmp
->reginfo
.path
);