2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
7 * @(#)db_int.h.src 10.62 (Sleepycat) 5/23/98
10 #ifndef _DB_INTERNAL_H_
11 #define _DB_INTERNAL_H_
13 #include <db.h> /* Standard DB include file. */
16 /*******************************************************
17 * General purpose constants and macros.
18 *******************************************************/
19 #define UINT16_T_MAX 0xffff /* Maximum 16 bit unsigned. */
20 #define UINT32_T_MAX 0xffffffff /* Maximum 32 bit unsigned. */
22 #define DB_MIN_PGSIZE 0x000200 /* Minimum page size. */
23 #define DB_MAX_PGSIZE 0x010000 /* Maximum page size. */
25 #define DB_MINCACHE 10 /* Minimum cached pages */
27 #define MEGABYTE 1048576
30 * If we are unable to determine the underlying filesystem block size, use
31 * 8K on the grounds that most OS's use less than 8K as their VM page size.
33 #define DB_DEF_IOSIZE (8 * 1024)
36 * Aligning items to particular sizes or in pages or memory. ALIGNP is a
37 * separate macro, as we've had to cast the pointer to different integral
38 * types on different architectures.
40 * We cast pointers into unsigned longs when manipulating them because C89
41 * guarantees that u_long is the largest available integral type and further,
42 * to never generate overflows. However, neither C89 or C9X requires that
43 * any integer type be large enough to hold a pointer, although C9X created
44 * the intptr_t type, which is guaranteed to hold a pointer but may or may
45 * not exist. At some point in the future, we should test for intptr_t and
46 * use it where available.
49 #define ALIGNTYPE u_long
51 #define ALIGNP(value, bound) ALIGN((ALIGNTYPE)value, bound)
53 #define ALIGN(value, bound) (((value) + (bound) - 1) & ~((bound) - 1))
56 * There are several on-page structures that are declared to have a number of
57 * fields followed by a variable length array of items. The structure size
58 * without including the variable length array or the address of the first of
59 * those elements can be found using SSZ.
61 * This macro can also be used to find the offset of a structure element in a
62 * structure. This is used in various places to copy structure elements from
63 * unaligned memory references, e.g., pointers into a packed page.
65 * There are two versions because compilers object if you take the address of
69 #define SSZ(name, field) ((int)&(((name *)0)->field))
72 #define SSZA(name, field) ((int)&(((name *)0)->field[0]))
74 /* Macros to return per-process address, offsets based on shared regions. */
75 #define R_ADDR(base, offset) ((void *)((u_int8_t *)((base)->addr) + offset))
76 #define R_OFFSET(base, p) ((u_int8_t *)(p) - (u_int8_t *)(base)->addr)
78 /* Free and free-string macros that overwrite memory. */
81 #define FREE(p, len) { \
82 memset(p, 0xff, len); \
91 #define FREE(p, len) { \
100 /* Structure used to print flag values. */
101 typedef struct __fn
{
102 u_int32_t mask
; /* Flag value. */
103 const char *name
; /* Flag name. */
106 /* Set, clear and test flags. */
107 #define F_SET(p, f) (p)->flags |= (f)
108 #define F_CLR(p, f) (p)->flags &= ~(f)
109 #define F_ISSET(p, f) ((p)->flags & (f))
110 #define LF_SET(f) (flags |= (f))
111 #define LF_CLR(f) (flags &= ~(f))
112 #define LF_ISSET(f) (flags & (f))
114 /* Display separator string. */
116 #define DB_LINE "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
118 /* Global variables. */
119 typedef struct __db_globals
{
120 int db_mutexlocks
; /* DB_MUTEXLOCKS */
121 int db_region_anon
; /* DB_REGION_ANON, DB_REGION_NAME */
122 int db_region_init
; /* DB_REGION_INIT */
123 int db_tsl_spins
; /* DB_TSL_SPINS */
124 int db_pageyield
; /* DB_PAGEYIELD */
126 extern DB_GLOBALS __db_global_values
;
127 #define DB_GLOBAL(v) __db_global_values.v
129 /* Unused, or not-used-yet variable. "Shut that bloody compiler up!" */
130 #define COMPQUIET(n, v) (n) = (v)
133 * Win16 needs specific syntax on callback functions. Nobody else cares.
136 #define DB_CALLBACK /* Nothing. */
139 /*******************************************************
141 *******************************************************/
143 * We use 1024 as the maximum path length. It's too hard to figure out what
144 * the real path length is, as it was traditionally stored in <sys/param.h>,
145 * and that file isn't always available.
148 #define MAXPATHLEN 1024
150 #define PATH_DOT "." /* Current working directory. */
151 #define PATH_SEPARATOR "/" /* Path separator character. */
153 /*******************************************************
155 *******************************************************/
156 typedef unsigned char tsl_t
;
162 * Various systems require different alignments for mutexes (the worst we've
163 * seen so far is 16-bytes on some HP architectures). The mutex (tsl_t) must
164 * be first in the db_mutex_t structure, which must itself be first in the
165 * region. This ensures the alignment is as returned by mmap(2), which should
166 * be sufficient. All other mutex users must ensure proper alignment locally.
168 #define MUTEX_ALIGNMENT 1
171 * The offset of a mutex in memory.
174 * Not an off_t, so backing file offsets MUST be less than 4Gb. See the
175 * off field of the db_mutex_t as well.
177 #define MUTEX_LOCK_OFFSET(a, b) ((u_int32_t)((u_int8_t *)b - (u_int8_t *)a))
179 typedef struct _db_mutex_t
{
180 #ifdef HAVE_SPINLOCKS
181 tsl_t tsl_resource
; /* Resource test and set. */
183 u_int32_t pid
; /* Lock holder: 0 or process pid. */
186 u_int32_t off
; /* Backing file offset. */
187 u_int32_t pid
; /* Lock holder: 0 or process pid. */
189 u_int32_t spins
; /* Spins before block. */
190 u_int32_t mutex_set_wait
; /* Granted after wait. */
191 u_int32_t mutex_set_nowait
; /* Granted without waiting. */
194 #include "mutex_ext.h"
196 /*******************************************************
198 *******************************************************/
199 /* Lock/unlock a DB thread. */
200 #define DB_THREAD_LOCK(dbp) \
201 if (F_ISSET(dbp, DB_AM_THREAD)) \
202 (void)__db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1);
203 #define DB_THREAD_UNLOCK(dbp) \
204 if (F_ISSET(dbp, DB_AM_THREAD)) \
205 (void)__db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1);
207 /* Btree/recno local statistics structure. */
208 struct __db_bt_lstat
; typedef struct __db_bt_lstat DB_BTREE_LSTAT
;
209 struct __db_bt_lstat
{
210 u_int32_t bt_freed
; /* Pages freed for reuse. */
211 u_int32_t bt_pfxsaved
; /* Bytes saved by prefix compression. */
212 u_int32_t bt_split
; /* Total number of splits. */
213 u_int32_t bt_rootsplit
; /* Root page splits. */
214 u_int32_t bt_fastsplit
; /* Fast splits. */
215 u_int32_t bt_added
; /* Items added. */
216 u_int32_t bt_deleted
; /* Items deleted. */
217 u_int32_t bt_get
; /* Items retrieved. */
218 u_int32_t bt_cache_hit
; /* Hits in fast-insert code. */
219 u_int32_t bt_cache_miss
; /* Misses in fast-insert code. */
222 /*******************************************************
224 *******************************************************/
225 /* Type passed to __db_appname(). */
227 DB_APP_NONE
=0, /* No type (region). */
228 DB_APP_DATA
, /* Data file. */
229 DB_APP_LOG
, /* Log file. */
230 DB_APP_TMP
/* Temporary file. */
233 /*******************************************************
234 * Shared memory regions.
235 *******************************************************/
237 * The shared memory regions share an initial structure so that the general
238 * region code can handle races between the region being deleted and other
239 * processes waiting on the region mutex.
242 * Note, the mutex must be the first entry in the region; see comment above.
244 typedef struct _rlayout
{
245 db_mutex_t lock
; /* Region mutex. */
246 #define DB_REGIONMAGIC 0x120897
247 u_int32_t valid
; /* Valid magic number. */
248 u_int32_t refcnt
; /* Region reference count. */
249 size_t size
; /* Region length. */
250 int majver
; /* Major version number. */
251 int minver
; /* Minor version number. */
252 int patch
; /* Patch version number. */
253 #define INVALID_SEGID -1
254 int segid
; /* shmget(2) ID, or Win16 segment ID. */
256 #define REGION_ANONYMOUS 0x01 /* Region is/should be in anon mem. */
261 * DB creates all regions on 4K boundaries out of sheer paranoia, so that
262 * we don't make the underlying VM unhappy.
264 #define DB_VMPAGESIZE (4 * 1024)
265 #define DB_ROUNDOFF(i) { \
266 (i) += DB_VMPAGESIZE - 1; \
267 (i) -= (i) % DB_VMPAGESIZE; \
271 * The interface to region attach is nasty, there is a lot of complex stuff
272 * going on, which has to be retained between create/attach and detach. The
273 * REGINFO structure keeps track of it.
275 struct __db_reginfo
; typedef struct __db_reginfo REGINFO
;
276 struct __db_reginfo
{
278 DB_ENV
*dbenv
; /* Region naming info. */
279 APPNAME appname
; /* Region naming info. */
280 char *path
; /* Region naming info. */
281 const char *file
; /* Region naming info. */
282 int mode
; /* Region mode, if a file. */
283 size_t size
; /* Region size. */
284 u_int32_t dbflags
; /* Region file open flags, if a file. */
287 char *name
; /* Region name. */
288 void *addr
; /* Region address. */
289 int fd
; /* Fcntl(2) locking file descriptor.
290 NB: this is only valid if a regular
291 file is backing the shared region,
292 and mmap(2) is being used to map it
293 into our address space. */
294 int segid
; /* shmget(2) ID, or Win16 segment ID. */
297 /* 0x0001 COMMON MASK with RLAYOUT structure. */
298 #define REGION_CANGROW 0x0002 /* Can grow. */
299 #define REGION_CREATED 0x0004 /* Created. */
300 #define REGION_HOLDINGSYS 0x0008 /* Holding system resources. */
301 #define REGION_LASTDETACH 0x0010 /* Delete on last detach. */
302 #define REGION_MALLOC 0x0020 /* Created in malloc'd memory. */
303 #define REGION_PRIVATE 0x0040 /* Private to thread/process. */
304 #define REGION_REMOVED 0x0080 /* Already deleted. */
305 #define REGION_SIZEDEF 0x0100 /* Use default region size if exists. */
309 /*******************************************************
311 *******************************************************/
313 * File types for DB access methods. Negative numbers are reserved to DB.
315 #define DB_FTYPE_BTREE -1 /* Btree. */
316 #define DB_FTYPE_HASH -2 /* Hash. */
318 /* Structure used as the DB pgin/pgout pgcookie. */
319 typedef struct __dbpginfo
{
320 size_t db_pagesize
; /* Underlying page size. */
321 int needswap
; /* If swapping required. */
324 /*******************************************************
326 *******************************************************/
327 /* Initialize an LSN to 'zero'. */
328 #define ZERO_LSN(LSN) { \
333 /* Return 1 if LSN is a 'zero' lsn, otherwise return 0. */
334 #define IS_ZERO_LSN(LSN) ((LSN).file == 0)
336 /* Test if we need to log a change. */
337 #define DB_LOGGING(dbp) \
338 (F_ISSET(dbp, DB_AM_LOGGING) && !F_ISSET(dbp, DB_AM_RECOVER))
342 * Debugging macro to log operations.
343 * If DEBUG_WOP is defined, log operations that modify the database.
344 * If DEBUG_ROP is defined, log operations that read the database.
348 * O operation (string)
353 #define LOG_OP(D, T, O, K, A, F) { \
356 if (DB_LOGGING((D))) { \
357 memset(&_op, 0, sizeof(_op)); \
359 _op.size = strlen(O) + 1; \
360 (void)__db_debug_log((D)->dbenv->lg_info, \
361 T, &_lsn, 0, &_op, (D)->log_fileid, K, A, F); \
365 #define DEBUG_LREAD(D, T, O, K, A, F) LOG_OP(D, T, O, K, A, F)
367 #define DEBUG_LREAD(D, T, O, K, A, F)
370 #define DEBUG_LWRITE(D, T, O, K, A, F) LOG_OP(D, T, O, K, A, F)
372 #define DEBUG_LWRITE(D, T, O, K, A, F)
375 #define DEBUG_LREAD(D, T, O, K, A, F)
376 #define DEBUG_LWRITE(D, T, O, K, A, F)
377 #endif /* DIAGNOSTIC */
379 /*******************************************************
380 * Transactions and recovery.
381 *******************************************************/
383 * Out of band value for a lock. The locks are returned to callers as offsets
384 * into the lock regions. Since the RLAYOUT structure begins all regions, an
385 * offset of 0 is guaranteed not to be a valid lock.
387 #define LOCK_INVALID 0
389 /* The structure allocated for every transaction. */
391 DB_TXNMGR
*mgrp
; /* Pointer to transaction manager. */
392 DB_TXN
*parent
; /* Pointer to transaction's parent. */
393 DB_LSN last_lsn
; /* Lsn of last log write. */
394 u_int32_t txnid
; /* Unique transaction id. */
395 size_t off
; /* Detail structure within region. */
396 TAILQ_ENTRY(__db_txn
) links
;
402 #endif /* !_DB_INTERNAL_H_ */