Update.
[glibc.git] / db2 / include / mp.h
blob8635efa722c476fd5b6b45af9faa305aa01e7e8a
1 /*-
2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
7 * @(#)mp.h 10.33 (Sleepycat) 5/4/98
8 */
10 struct __bh; typedef struct __bh BH;
11 struct __db_mpreg; typedef struct __db_mpreg DB_MPREG;
12 struct __mpool; typedef struct __mpool MPOOL;
13 struct __mpoolfile; typedef struct __mpoolfile MPOOLFILE;
15 /* Default mpool name. */
16 #define DB_DEFAULT_MPOOL_FILE "__db_mpool.share"
19 * We default to 128K (16 8K pages) if the user doesn't specify, and
20 * require a minimum of 20K.
22 #ifndef DB_CACHESIZE_DEF
23 #define DB_CACHESIZE_DEF (128 * 1024)
24 #endif
25 #define DB_CACHESIZE_MIN ( 20 * 1024)
27 #define INVALID 0 /* Invalid shared memory offset. */
30 * There are three ways we do locking in the mpool code:
32 * Locking a handle mutex to provide concurrency for DB_THREAD operations.
33 * Locking the region mutex to provide mutual exclusion while reading and
34 * writing structures in the shared region.
35 * Locking buffer header mutexes during I/O.
37 * The first will not be further described here. We use the shared mpool
38 * region lock to provide mutual exclusion while reading/modifying all of
39 * the data structures, including the buffer headers. We use a per-buffer
40 * header lock to wait on buffer I/O. The order of locking is as follows:
42 * Searching for a buffer:
43 * Acquire the region lock.
44 * Find the buffer header.
45 * Increment the reference count (guarantee the buffer stays).
46 * While the BH_LOCKED flag is set (I/O is going on) {
47 * Release the region lock.
48 * Explicitly yield the processor if it's not the first pass
49 * through this loop, otherwise, we can simply spin because
50 * we'll be simply switching between the two locks.
51 * Request the buffer lock.
52 * The I/O will complete...
53 * Acquire the buffer lock.
54 * Release the buffer lock.
55 * Acquire the region lock.
56 * }
57 * Return the buffer.
59 * Reading/writing a buffer:
60 * Acquire the region lock.
61 * Find/create the buffer header.
62 * If reading, increment the reference count (guarantee the buffer stays).
63 * Set the BH_LOCKED flag.
64 * Acquire the buffer lock (guaranteed not to block).
65 * Release the region lock.
66 * Do the I/O and/or initialize the buffer contents.
67 * Release the buffer lock.
68 * At this point, the buffer lock is available, but the logical
69 * operation (flagged by BH_LOCKED) is not yet completed. For
70 * this reason, among others, threads checking the BH_LOCKED flag
71 * must loop around their test.
72 * Acquire the region lock.
73 * Clear the BH_LOCKED flag.
74 * Release the region lock.
75 * Return/discard the buffer.
77 * Pointers to DB_MPOOL, MPOOL, DB_MPOOLFILE and MPOOLFILE structures are not
78 * reacquired when a region lock is reacquired because they couldn't have been
79 * closed/discarded and because they never move in memory.
81 #define LOCKINIT(dbmp, mutexp) \
82 if (F_ISSET(dbmp, MP_LOCKHANDLE | MP_LOCKREGION)) \
83 (void)__db_mutex_init(mutexp, \
84 MUTEX_LOCK_OFFSET((dbmp)->reginfo.addr, mutexp))
86 #define LOCKHANDLE(dbmp, mutexp) \
87 if (F_ISSET(dbmp, MP_LOCKHANDLE)) \
88 (void)__db_mutex_lock(mutexp, (dbmp)->reginfo.fd)
89 #define UNLOCKHANDLE(dbmp, mutexp) \
90 if (F_ISSET(dbmp, MP_LOCKHANDLE)) \
91 (void)__db_mutex_unlock(mutexp, (dbmp)->reginfo.fd)
93 #define LOCKREGION(dbmp) \
94 if (F_ISSET(dbmp, MP_LOCKREGION)) \
95 (void)__db_mutex_lock(&((RLAYOUT *)(dbmp)->mp)->lock, \
96 (dbmp)->reginfo.fd)
97 #define UNLOCKREGION(dbmp) \
98 if (F_ISSET(dbmp, MP_LOCKREGION)) \
99 (void)__db_mutex_unlock(&((RLAYOUT *)(dbmp)->mp)->lock, \
100 (dbmp)->reginfo.fd)
102 #define LOCKBUFFER(dbmp, bhp) \
103 if (F_ISSET(dbmp, MP_LOCKREGION)) \
104 (void)__db_mutex_lock(&(bhp)->mutex, (dbmp)->reginfo.fd)
105 #define UNLOCKBUFFER(dbmp, bhp) \
106 if (F_ISSET(dbmp, MP_LOCKREGION)) \
107 (void)__db_mutex_unlock(&(bhp)->mutex, (dbmp)->reginfo.fd)
110 * DB_MPOOL --
111 * Per-process memory pool structure.
113 struct __db_mpool {
114 /* These fields need to be protected for multi-threaded support. */
115 db_mutex_t *mutexp; /* Structure lock. */
117 /* List of pgin/pgout routines. */
118 LIST_HEAD(__db_mpregh, __db_mpreg) dbregq;
120 /* List of DB_MPOOLFILE's. */
121 TAILQ_HEAD(__db_mpoolfileh, __db_mpoolfile) dbmfq;
123 /* These fields are not protected. */
124 DB_ENV *dbenv; /* Reference to error information. */
125 REGINFO reginfo; /* Region information. */
127 MPOOL *mp; /* Address of the shared MPOOL. */
129 void *addr; /* Address of shalloc() region. */
131 DB_HASHTAB *htab; /* Hash table of bucket headers. */
133 #define MP_LOCKHANDLE 0x01 /* Threaded, lock handles and region. */
134 #define MP_LOCKREGION 0x02 /* Concurrent access, lock region. */
135 u_int32_t flags;
139 * DB_MPREG --
140 * DB_MPOOL registry of pgin/pgout functions.
142 struct __db_mpreg {
143 LIST_ENTRY(__db_mpreg) q; /* Linked list. */
145 int ftype; /* File type. */
146 /* Pgin, pgout routines. */
147 int (DB_CALLBACK *pgin) __P((db_pgno_t, void *, DBT *));
148 int (DB_CALLBACK *pgout) __P((db_pgno_t, void *, DBT *));
152 * DB_MPOOLFILE --
153 * Per-process DB_MPOOLFILE information.
155 struct __db_mpoolfile {
156 /* These fields need to be protected for multi-threaded support. */
157 db_mutex_t *mutexp; /* Structure lock. */
159 int fd; /* Underlying file descriptor. */
161 u_int32_t pinref; /* Pinned block reference count. */
163 /* These fields are not protected. */
164 TAILQ_ENTRY(__db_mpoolfile) q; /* Linked list of DB_MPOOLFILE's. */
166 DB_MPOOL *dbmp; /* Overlying DB_MPOOL. */
167 MPOOLFILE *mfp; /* Underlying MPOOLFILE. */
169 void *addr; /* Address of mmap'd region. */
170 size_t len; /* Length of mmap'd region. */
172 /* These fields need to be protected for multi-threaded support. */
173 #define MP_READONLY 0x01 /* File is readonly. */
174 #define MP_UPGRADE 0x02 /* File descriptor is readwrite. */
175 #define MP_UPGRADE_FAIL 0x04 /* Upgrade wasn't possible. */
176 u_int32_t flags;
180 * MPOOL --
181 * Shared memory pool region. One of these is allocated in shared
182 * memory, and describes the pool.
184 struct __mpool {
185 RLAYOUT rlayout; /* General region information. */
187 SH_TAILQ_HEAD(__bhq) bhq; /* LRU list of buckets. */
188 SH_TAILQ_HEAD(__bhfq) bhfq; /* Free buckets. */
189 SH_TAILQ_HEAD(__mpfq) mpfq; /* List of MPOOLFILEs. */
192 * We make the assumption that the early pages of the file are far
193 * more likely to be retrieved than the later pages, which means
194 * that the top bits are more interesting for hashing since they're
195 * less likely to collide. On the other hand, since 512 4K pages
196 * represents a 2MB file, only the bottom 9 bits of the page number
197 * are likely to be set. We XOR in the offset in the MPOOL of the
198 * MPOOLFILE that backs this particular page, since that should also
199 * be unique for the page.
201 #define BUCKET(mp, mf_offset, pgno) \
202 (((pgno) ^ ((mf_offset) << 9)) % (mp)->htab_buckets)
204 size_t htab; /* Hash table offset. */
205 size_t htab_buckets; /* Number of hash table entries. */
207 DB_LSN lsn; /* Maximum checkpoint LSN. */
208 u_int32_t lsn_cnt; /* Checkpoint buffers left to write. */
210 DB_MPOOL_STAT stat; /* Global mpool statistics. */
212 #define MP_LSN_RETRY 0x01 /* Retry all BH_WRITE buffers. */
213 u_int32_t flags;
217 * MPOOLFILE --
218 * Shared DB_MPOOLFILE information.
220 struct __mpoolfile {
221 SH_TAILQ_ENTRY q; /* List of MPOOLFILEs */
223 u_int32_t ref; /* Reference count. */
225 int ftype; /* File type. */
227 int32_t lsn_off; /* Page's LSN offset. */
228 u_int32_t clear_len; /* Bytes to clear on page create. */
230 size_t path_off; /* File name location. */
231 size_t fileid_off; /* File identification location. */
233 size_t pgcookie_len; /* Pgin/pgout cookie length. */
234 size_t pgcookie_off; /* Pgin/pgout cookie location. */
236 u_int32_t lsn_cnt; /* Checkpoint buffers left to write. */
238 db_pgno_t last_pgno; /* Last page in the file. */
239 db_pgno_t orig_last_pgno; /* Original last page in the file. */
241 #define MP_CAN_MMAP 0x01 /* If the file can be mmap'd. */
242 #define MP_TEMP 0x02 /* Backing file is a temporary. */
243 u_int32_t flags;
245 DB_MPOOL_FSTAT stat; /* Per-file mpool statistics. */
249 * BH --
250 * Buffer header.
252 struct __bh {
253 db_mutex_t mutex; /* Structure lock. */
255 u_int16_t ref; /* Reference count. */
257 #define BH_CALLPGIN 0x001 /* Page needs to be reworked... */
258 #define BH_DIRTY 0x002 /* Page was modified. */
259 #define BH_DISCARD 0x004 /* Page is useless. */
260 #define BH_LOCKED 0x008 /* Page is locked (I/O in progress). */
261 #define BH_TRASH 0x010 /* Page is garbage. */
262 #define BH_WRITE 0x020 /* Page scheduled for writing. */
263 u_int16_t flags;
265 SH_TAILQ_ENTRY q; /* LRU queue. */
266 SH_TAILQ_ENTRY hq; /* MPOOL hash bucket queue. */
268 db_pgno_t pgno; /* Underlying MPOOLFILE page number. */
269 size_t mf_offset; /* Associated MPOOLFILE offset. */
272 * !!!
273 * This array must be size_t aligned -- the DB access methods put PAGE
274 * and other structures into it, and expect to be able to access them
275 * directly. (We guarantee size_t alignment in the db_mpool(3) manual
276 * page as well.)
278 u_int8_t buf[1]; /* Variable length data. */
281 #include "mp_ext.h"