tdb: Reduce freelist contention
[Samba.git] / lib / tdb / common / tdb_private.h
bloba672159578246efbdf7eeda959aa1388c65c68ad
1 #ifndef TDB_PRIVATE_H
2 #define TDB_PRIVATE_H
3 /*
4 Unix SMB/CIFS implementation.
6 trivial database library - private includes
8 Copyright (C) Andrew Tridgell 2005
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "replace.h"
29 #include "system/filesys.h"
30 #include "system/time.h"
31 #include "system/shmem.h"
32 #include "system/select.h"
33 #include "system/wait.h"
34 #include "tdb.h"
36 /* #define TDB_TRACE 1 */
37 #ifndef HAVE_GETPAGESIZE
38 #define getpagesize() 0x2000
39 #endif
41 typedef uint32_t tdb_len_t;
42 typedef uint32_t tdb_off_t;
44 #ifndef offsetof
45 #define offsetof(t,f) ((unsigned int)&((t *)0)->f)
46 #endif
48 #define TDB_MAGIC_FOOD "TDB file\n"
49 #define TDB_VERSION (0x26011967 + 6)
50 #define TDB_MAGIC (0x26011999U)
51 #define TDB_FREE_MAGIC (~TDB_MAGIC)
52 #define TDB_DEAD_MAGIC (0xFEE1DEAD)
53 #define TDB_RECOVERY_MAGIC (0xf53bc0e7U)
54 #define TDB_RECOVERY_INVALID_MAGIC (0x0)
55 #define TDB_HASH_RWLOCK_MAGIC (0xbad1a51U)
56 #define TDB_ALIGNMENT 4
57 #define DEFAULT_HASH_SIZE 131
58 #define FREELIST_TOP (sizeof(struct tdb_header))
59 #define TDB_ALIGN(x,a) (((x) + (a)-1) & ~((a)-1))
60 #define TDB_BYTEREV(x) (((((x)&0xff)<<24)|((x)&0xFF00)<<8)|(((x)>>8)&0xFF00)|((x)>>24))
61 #define TDB_DEAD(r) ((r)->magic == TDB_DEAD_MAGIC)
62 #define TDB_BAD_MAGIC(r) ((r)->magic != TDB_MAGIC && !TDB_DEAD(r))
63 #define TDB_HASH_TOP(hash) (FREELIST_TOP + (BUCKET(hash)+1)*sizeof(tdb_off_t))
64 #define TDB_HASHTABLE_SIZE(tdb) ((tdb->hash_size+1)*sizeof(tdb_off_t))
65 #define TDB_DATA_START(hash_size) (TDB_HASH_TOP(hash_size-1) + sizeof(tdb_off_t))
66 #define TDB_RECOVERY_HEAD offsetof(struct tdb_header, recovery_start)
67 #define TDB_SEQNUM_OFS offsetof(struct tdb_header, sequence_number)
68 #define TDB_PAD_BYTE 0x42
69 #define TDB_PAD_U32 0x42424242
71 /* NB assumes there is a local variable called "tdb" that is the
72 * current context, also takes doubly-parenthesized print-style
73 * argument. */
74 #define TDB_LOG(x) tdb->log.log_fn x
76 #ifdef TDB_TRACE
77 void tdb_trace(struct tdb_context *tdb, const char *op);
78 void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op);
79 void tdb_trace_open(struct tdb_context *tdb, const char *op,
80 unsigned hash_size, unsigned tdb_flags, unsigned open_flags);
81 void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret);
82 void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret);
83 void tdb_trace_1rec(struct tdb_context *tdb, const char *op,
84 TDB_DATA rec);
85 void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op,
86 TDB_DATA rec, int ret);
87 void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op,
88 TDB_DATA rec, TDB_DATA ret);
89 void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op,
90 TDB_DATA rec1, TDB_DATA rec2, unsigned flag,
91 int ret);
92 void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op,
93 TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret);
94 #else
95 #define tdb_trace(tdb, op)
96 #define tdb_trace_seqnum(tdb, seqnum, op)
97 #define tdb_trace_open(tdb, op, hash_size, tdb_flags, open_flags)
98 #define tdb_trace_ret(tdb, op, ret)
99 #define tdb_trace_retrec(tdb, op, ret)
100 #define tdb_trace_1rec(tdb, op, rec)
101 #define tdb_trace_1rec_ret(tdb, op, rec, ret)
102 #define tdb_trace_1rec_retrec(tdb, op, rec, ret)
103 #define tdb_trace_2rec_flag_ret(tdb, op, rec1, rec2, flag, ret)
104 #define tdb_trace_2rec_retrec(tdb, op, rec1, rec2, ret)
105 #endif /* !TDB_TRACE */
107 /* lock offsets */
108 #define OPEN_LOCK 0
109 #define ACTIVE_LOCK 4
110 #define TRANSACTION_LOCK 8
112 /* free memory if the pointer is valid and zero the pointer */
113 #ifndef SAFE_FREE
114 #define SAFE_FREE(x) do { if ((x) != NULL) {free(x); (x)=NULL;} } while(0)
115 #endif
117 #define BUCKET(hash) ((hash) % tdb->hash_size)
119 #define DOCONV() (tdb->flags & TDB_CONVERT)
120 #define CONVERT(x) (DOCONV() ? tdb_convert(&x, sizeof(x)) : &x)
123 /* the body of the database is made of one tdb_record for the free space
124 plus a separate data list for each hash value */
125 struct tdb_record {
126 tdb_off_t next; /* offset of the next record in the list */
127 tdb_len_t rec_len; /* total byte length of record */
128 tdb_len_t key_len; /* byte length of key */
129 tdb_len_t data_len; /* byte length of data */
130 uint32_t full_hash; /* the full 32 bit hash of the key */
131 uint32_t magic; /* try to catch errors */
132 /* the following union is implied:
133 union {
134 char record[rec_len];
135 struct {
136 char key[key_len];
137 char data[data_len];
139 uint32_t totalsize; (tailer)
145 /* this is stored at the front of every database */
146 struct tdb_header {
147 char magic_food[32]; /* for /etc/magic */
148 uint32_t version; /* version of the code */
149 uint32_t hash_size; /* number of hash entries */
150 tdb_off_t rwlocks; /* obsolete - kept to detect old formats */
151 tdb_off_t recovery_start; /* offset of transaction recovery region */
152 tdb_off_t sequence_number; /* used when TDB_SEQNUM is set */
153 uint32_t magic1_hash; /* hash of TDB_MAGIC_FOOD. */
154 uint32_t magic2_hash; /* hash of TDB_MAGIC. */
155 tdb_off_t reserved[27];
158 struct tdb_lock_type {
159 uint32_t off;
160 uint32_t count;
161 uint32_t ltype;
164 struct tdb_traverse_lock {
165 struct tdb_traverse_lock *next;
166 uint32_t off;
167 uint32_t hash;
168 int lock_rw;
171 enum tdb_lock_flags {
172 /* WAIT == F_SETLKW, NOWAIT == F_SETLK */
173 TDB_LOCK_NOWAIT = 0,
174 TDB_LOCK_WAIT = 1,
175 /* If set, don't log an error on failure. */
176 TDB_LOCK_PROBE = 2,
177 /* If set, don't actually lock at all. */
178 TDB_LOCK_MARK_ONLY = 4,
181 struct tdb_methods {
182 int (*tdb_read)(struct tdb_context *, tdb_off_t , void *, tdb_len_t , int );
183 int (*tdb_write)(struct tdb_context *, tdb_off_t, const void *, tdb_len_t);
184 void (*next_hash_chain)(struct tdb_context *, uint32_t *);
185 int (*tdb_oob)(struct tdb_context *, tdb_off_t , tdb_len_t, int );
186 int (*tdb_expand_file)(struct tdb_context *, tdb_off_t , tdb_off_t );
189 struct tdb_context {
190 char *name; /* the name of the database */
191 void *map_ptr; /* where it is currently mapped */
192 int fd; /* open file descriptor for the database */
193 tdb_len_t map_size; /* how much space has been mapped */
194 int read_only; /* opened read-only */
195 int traverse_read; /* read-only traversal */
196 int traverse_write; /* read-write traversal */
197 struct tdb_lock_type allrecord_lock; /* .offset == upgradable */
198 int num_lockrecs;
199 struct tdb_lock_type *lockrecs; /* only real locks, all with count>0 */
200 int lockrecs_array_length;
201 enum TDB_ERROR ecode; /* error code for last tdb error */
202 uint32_t hash_size;
203 uint32_t flags; /* the flags passed to tdb_open */
204 struct tdb_traverse_lock travlocks; /* current traversal locks */
205 struct tdb_context *next; /* all tdbs to avoid multiple opens */
206 dev_t device; /* uniquely identifies this tdb */
207 ino_t inode; /* uniquely identifies this tdb */
208 struct tdb_logging_context log;
209 unsigned int (*hash_fn)(TDB_DATA *key);
210 int open_flags; /* flags used in the open - needed by reopen */
211 const struct tdb_methods *methods;
212 struct tdb_transaction *transaction;
213 int page_size;
214 int max_dead_records;
215 #ifdef TDB_TRACE
216 int tracefd;
217 #endif
218 volatile sig_atomic_t *interrupt_sig_ptr;
223 internal prototypes
225 int tdb_munmap(struct tdb_context *tdb);
226 int tdb_mmap(struct tdb_context *tdb);
227 int tdb_lock(struct tdb_context *tdb, int list, int ltype);
228 int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype);
229 int tdb_nest_lock(struct tdb_context *tdb, uint32_t offset, int ltype,
230 enum tdb_lock_flags flags);
231 int tdb_nest_unlock(struct tdb_context *tdb, uint32_t offset, int ltype,
232 bool mark_lock);
233 int tdb_unlock(struct tdb_context *tdb, int list, int ltype);
234 int tdb_brlock(struct tdb_context *tdb,
235 int rw_type, tdb_off_t offset, size_t len,
236 enum tdb_lock_flags flags);
237 int tdb_brunlock(struct tdb_context *tdb,
238 int rw_type, tdb_off_t offset, size_t len);
239 bool tdb_have_extra_locks(struct tdb_context *tdb);
240 void tdb_release_transaction_locks(struct tdb_context *tdb);
241 int tdb_transaction_lock(struct tdb_context *tdb, int ltype,
242 enum tdb_lock_flags lockflags);
243 int tdb_transaction_unlock(struct tdb_context *tdb, int ltype);
244 int tdb_recovery_area(struct tdb_context *tdb,
245 const struct tdb_methods *methods,
246 tdb_off_t *recovery_offset,
247 struct tdb_record *rec);
248 int tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
249 enum tdb_lock_flags flags, bool upgradable);
250 int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype, bool mark_lock);
251 int tdb_allrecord_upgrade(struct tdb_context *tdb);
252 int tdb_write_lock_record(struct tdb_context *tdb, tdb_off_t off);
253 int tdb_write_unlock_record(struct tdb_context *tdb, tdb_off_t off);
254 int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
255 int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
256 void *tdb_convert(void *buf, uint32_t size);
257 int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec);
258 tdb_off_t tdb_allocate(struct tdb_context *tdb, int hash, tdb_len_t length,
259 struct tdb_record *rec);
260 int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
261 int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
262 int tdb_lock_record(struct tdb_context *tdb, tdb_off_t off);
263 int tdb_unlock_record(struct tdb_context *tdb, tdb_off_t off);
264 bool tdb_needs_recovery(struct tdb_context *tdb);
265 int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec);
266 int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec);
267 int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct tdb_record *rec);
268 unsigned char *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len);
269 int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key,
270 tdb_off_t offset, tdb_len_t len,
271 int (*parser)(TDB_DATA key, TDB_DATA data,
272 void *private_data),
273 void *private_data);
274 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
275 struct tdb_record *rec);
276 tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
277 struct tdb_record *r, tdb_len_t length,
278 tdb_off_t *p_last_ptr);
279 int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash);
280 void tdb_io_init(struct tdb_context *tdb);
281 int tdb_expand(struct tdb_context *tdb, tdb_off_t size);
282 tdb_off_t tdb_expand_adjust(tdb_off_t map_size, tdb_off_t size, int page_size);
283 int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off,
284 struct tdb_record *rec);
285 bool tdb_write_all(int fd, const void *buf, size_t count);
286 int tdb_transaction_recover(struct tdb_context *tdb);
287 void tdb_header_hash(struct tdb_context *tdb,
288 uint32_t *magic1_hash, uint32_t *magic2_hash);
289 unsigned int tdb_old_hash(TDB_DATA *key);
290 size_t tdb_dead_space(struct tdb_context *tdb, tdb_off_t off);
291 bool tdb_add_off_t(tdb_off_t a, tdb_off_t b, tdb_off_t *pret);
293 /* tdb_off_t and tdb_len_t right now are both uint32_t */
294 #define tdb_add_len_t tdb_add_off_t
295 #endif /* TDB_PRIVATE_H */