Update.
[glibc.git] / db2 / mp / mp_region.c
blobb8a72286cd818f41ac9a80b72ad937cecc496e28
1 /*-
2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
6 */
7 #include "config.h"
9 #ifndef lint
10 static const char sccsid[] = "@(#)mp_region.c 10.30 (Sleepycat) 5/31/98";
11 #endif /* not lint */
13 #ifndef NO_SYSTEM_INCLUDES
14 #include <sys/types.h>
16 #include <errno.h>
17 #include <string.h>
18 #endif
20 #include "db_int.h"
21 #include "shqueue.h"
22 #include "db_shash.h"
23 #include "mp.h"
24 #include "common_ext.h"
27 * __memp_ralloc --
28 * Allocate some space in the mpool region.
30 * PUBLIC: int __memp_ralloc __P((DB_MPOOL *, size_t, size_t *, void *));
32 int
33 __memp_ralloc(dbmp, len, offsetp, retp)
34 DB_MPOOL *dbmp;
35 size_t len, *offsetp;
36 void *retp;
38 BH *bhp, *nbhp;
39 MPOOL *mp;
40 MPOOLFILE *mfp;
41 size_t fsize, total;
42 int nomore, restart, ret, wrote;
43 void *p;
45 mp = dbmp->mp;
47 nomore = 0;
48 alloc: if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) {
49 if (offsetp != NULL)
50 *offsetp = R_OFFSET(dbmp, p);
51 *(void **)retp = p;
52 return (0);
54 if (nomore) {
55 __db_err(dbmp->dbenv, "%s", strerror(ret));
56 return (ret);
59 /* Look for a buffer on the free list that's the right size. */
60 for (bhp =
61 SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
62 nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
64 if (__db_shsizeof(bhp) == len) {
65 SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
66 if (offsetp != NULL)
67 *offsetp = R_OFFSET(dbmp, bhp);
68 *(void **)retp = bhp;
69 return (0);
73 /* Discard from the free list until we've freed enough memory. */
74 total = 0;
75 for (bhp =
76 SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
77 nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
79 SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
80 __db_shalloc_free(dbmp->addr, bhp);
81 --mp->stat.st_page_clean;
84 * Retry as soon as we've freed up sufficient space. If we
85 * will have to coalesce memory to satisfy the request, don't
86 * try until it's likely (possible?) that we'll succeed.
88 total += fsize = __db_shsizeof(bhp);
89 if (fsize >= len || total >= 3 * len)
90 goto alloc;
93 retry: /* Find a buffer we can flush; pure LRU. */
94 total = 0;
95 for (bhp =
96 SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
97 nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
99 /* Ignore pinned or locked (I/O in progress) buffers. */
100 if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED))
101 continue;
103 /* Find the associated MPOOLFILE. */
104 mfp = R_ADDR(dbmp, bhp->mf_offset);
107 * Write the page if it's dirty.
109 * If we wrote the page, fall through and free the buffer. We
110 * don't have to rewalk the list to acquire the buffer because
111 * it was never available for any other process to modify it.
112 * If we didn't write the page, but we discarded and reacquired
113 * the region lock, restart the buffer list walk. If we neither
114 * wrote the buffer nor discarded the region lock, continue down
115 * the buffer list.
117 if (F_ISSET(bhp, BH_DIRTY)) {
118 if ((ret = __memp_bhwrite(dbmp,
119 mfp, bhp, &restart, &wrote)) != 0)
120 return (ret);
123 * It's possible that another process wants this buffer
124 * and incremented the ref count while we were writing
125 * it.
127 if (bhp->ref != 0)
128 goto retry;
130 if (wrote)
131 ++mp->stat.st_rw_evict;
132 else {
133 if (restart)
134 goto retry;
135 continue;
137 } else
138 ++mp->stat.st_ro_evict;
141 * Check to see if the buffer is the size we're looking for.
142 * If it is, simply reuse it.
144 total += fsize = __db_shsizeof(bhp);
145 if (fsize == len) {
146 __memp_bhfree(dbmp, mfp, bhp, 0);
148 if (offsetp != NULL)
149 *offsetp = R_OFFSET(dbmp, bhp);
150 *(void **)retp = bhp;
151 return (0);
154 /* Free the buffer. */
155 __memp_bhfree(dbmp, mfp, bhp, 1);
158 * Retry as soon as we've freed up sufficient space. If we
159 * have to coalesce of memory to satisfy the request, don't
160 * try until it's likely (possible?) that we'll succeed.
162 if (fsize >= len || total >= 3 * len)
163 goto alloc;
165 /* Restart the walk if we discarded the region lock. */
166 if (restart)
167 goto retry;
169 nomore = 1;
170 goto alloc;
174 * __memp_ropen --
175 * Attach to, and optionally create, the mpool region.
177 * PUBLIC: int __memp_ropen
178 * PUBLIC: __P((DB_MPOOL *, const char *, size_t, int, int, u_int32_t));
181 __memp_ropen(dbmp, path, cachesize, mode, is_private, flags)
182 DB_MPOOL *dbmp;
183 const char *path;
184 size_t cachesize;
185 int mode, is_private;
186 u_int32_t flags;
188 MPOOL *mp;
189 size_t rlen;
190 int defcache, ret;
193 * Unlike other DB subsystems, mpool can't simply grow the region
194 * because it returns pointers into the region to its clients. To
195 * "grow" the region, we'd have to allocate a new region and then
196 * store a region number in the structures that reference regional
197 * objects. It's reasonable that we fail regardless, as clients
198 * shouldn't have every page in the region pinned, so the only
199 * "failure" mode should be a performance penalty because we don't
200 * find a page in the cache that we'd like to have found.
202 * Up the user's cachesize by 25% to account for our overhead.
204 defcache = 0;
205 if (cachesize < DB_CACHESIZE_MIN) {
206 if (cachesize == 0) {
207 defcache = 1;
208 cachesize = DB_CACHESIZE_DEF;
209 } else
210 cachesize = DB_CACHESIZE_MIN;
212 rlen = cachesize + cachesize / 4;
215 * Map in the region.
217 * If it's a private mpool, use malloc, it's a lot faster than
218 * instantiating a region.
220 dbmp->reginfo.dbenv = dbmp->dbenv;
221 dbmp->reginfo.appname = DB_APP_NONE;
222 if (path == NULL)
223 dbmp->reginfo.path = NULL;
224 else
225 if ((dbmp->reginfo.path = __db_strdup(path)) == NULL)
226 return (ENOMEM);
227 dbmp->reginfo.file = DB_DEFAULT_MPOOL_FILE;
228 dbmp->reginfo.mode = mode;
229 dbmp->reginfo.size = rlen;
230 dbmp->reginfo.dbflags = flags;
231 dbmp->reginfo.flags = 0;
232 if (defcache)
233 F_SET(&dbmp->reginfo, REGION_SIZEDEF);
236 * If we're creating a temporary region, don't use any standard
237 * naming.
239 if (is_private) {
240 dbmp->reginfo.appname = DB_APP_TMP;
241 dbmp->reginfo.file = NULL;
242 F_SET(&dbmp->reginfo, REGION_PRIVATE);
245 if ((ret = __db_rattach(&dbmp->reginfo)) != 0) {
246 if (dbmp->reginfo.path != NULL)
247 FREES(dbmp->reginfo.path);
248 return (ret);
252 * The MPOOL structure is first in the region, the rest of the region
253 * is free space.
255 dbmp->mp = dbmp->reginfo.addr;
256 dbmp->addr = (u_int8_t *)dbmp->mp + sizeof(MPOOL);
258 /* Initialize a created region. */
259 if (F_ISSET(&dbmp->reginfo, REGION_CREATED)) {
260 mp = dbmp->mp;
261 SH_TAILQ_INIT(&mp->bhq);
262 SH_TAILQ_INIT(&mp->bhfq);
263 SH_TAILQ_INIT(&mp->mpfq);
265 __db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL));
268 * Assume we want to keep the hash chains with under 10 pages
269 * on each chain. We don't know the pagesize in advance, and
270 * it may differ for different files. Use a pagesize of 1K for
271 * the calculation -- we walk these chains a lot, they should
272 * be short.
274 mp->htab_buckets =
275 __db_tablesize((cachesize / (1 * 1024)) / 10);
277 /* Allocate hash table space and initialize it. */
278 if ((ret = __db_shalloc(dbmp->addr,
279 mp->htab_buckets * sizeof(DB_HASHTAB),
280 0, &dbmp->htab)) != 0)
281 goto err;
282 __db_hashinit(dbmp->htab, mp->htab_buckets);
283 mp->htab = R_OFFSET(dbmp, dbmp->htab);
285 ZERO_LSN(mp->lsn);
286 mp->lsn_cnt = 0;
288 memset(&mp->stat, 0, sizeof(mp->stat));
289 mp->stat.st_cachesize = cachesize;
291 mp->flags = 0;
294 /* Get the local hash table address. */
295 dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab);
297 UNLOCKREGION(dbmp);
298 return (0);
300 err: UNLOCKREGION(dbmp);
301 (void)__db_rdetach(&dbmp->reginfo);
302 if (F_ISSET(&dbmp->reginfo, REGION_CREATED))
303 (void)memp_unlink(path, 1, dbmp->dbenv);
305 if (dbmp->reginfo.path != NULL)
306 FREES(dbmp->reginfo.path);
307 return (ret);