4 Unix SMB/CIFS implementation.
6 trivial database library - private includes
8 Copyright (C) Andrew Tridgell 2005
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
29 #include "system/filesys.h"
30 #include "system/time.h"
31 #include "system/shmem.h"
32 #include "system/select.h"
33 #include "system/wait.h"
36 /* #define TDB_TRACE 1 */
37 #ifndef HAVE_GETPAGESIZE
38 #define getpagesize() 0x2000
41 typedef uint32_t tdb_len_t
;
42 typedef uint32_t tdb_off_t
;
45 #define offsetof(t,f) ((unsigned int)&((t *)0)->f)
48 #define TDB_MAGIC_FOOD "TDB file\n"
49 #define TDB_VERSION (0x26011967 + 6)
50 #define TDB_MAGIC (0x26011999U)
51 #define TDB_FREE_MAGIC (~TDB_MAGIC)
52 #define TDB_DEAD_MAGIC (0xFEE1DEAD)
53 #define TDB_RECOVERY_MAGIC (0xf53bc0e7U)
54 #define TDB_RECOVERY_INVALID_MAGIC (0x0)
55 #define TDB_HASH_RWLOCK_MAGIC (0xbad1a51U)
56 #define TDB_FEATURE_FLAG_MAGIC (0xbad1a52U)
57 #define TDB_ALIGNMENT 4
58 #define DEFAULT_HASH_SIZE 131
59 #define FREELIST_TOP (sizeof(struct tdb_header))
60 #define TDB_ALIGN(x,a) (((x) + (a)-1) & ~((a)-1))
61 #define TDB_BYTEREV(x) (((((x)&0xff)<<24)|((x)&0xFF00)<<8)|(((x)>>8)&0xFF00)|((x)>>24))
62 #define TDB_DEAD(r) ((r)->magic == TDB_DEAD_MAGIC)
63 #define TDB_BAD_MAGIC(r) ((r)->magic != TDB_MAGIC && !TDB_DEAD(r))
64 #define TDB_HASH_TOP(hash) (FREELIST_TOP + (BUCKET(hash)+1)*sizeof(tdb_off_t))
65 #define TDB_HASHTABLE_SIZE(tdb) ((tdb->hash_size+1)*sizeof(tdb_off_t))
66 #define TDB_DATA_START(hash_size) (TDB_HASH_TOP(hash_size-1) + sizeof(tdb_off_t))
67 #define TDB_RECOVERY_HEAD offsetof(struct tdb_header, recovery_start)
68 #define TDB_SEQNUM_OFS offsetof(struct tdb_header, sequence_number)
69 #define TDB_PAD_BYTE 0x42
70 #define TDB_PAD_U32 0x42424242
72 #define TDB_FEATURE_FLAG_MUTEX 0x00000001
74 #define TDB_SUPPORTED_FEATURE_FLAGS ( \
75 TDB_FEATURE_FLAG_MUTEX | \
78 /* NB assumes there is a local variable called "tdb" that is the
79 * current context, also takes doubly-parenthesized print-style
81 #define TDB_LOG(x) tdb->log.log_fn x
84 void tdb_trace(struct tdb_context
*tdb
, const char *op
);
85 void tdb_trace_seqnum(struct tdb_context
*tdb
, uint32_t seqnum
, const char *op
);
86 void tdb_trace_open(struct tdb_context
*tdb
, const char *op
,
87 unsigned hash_size
, unsigned tdb_flags
, unsigned open_flags
);
88 void tdb_trace_ret(struct tdb_context
*tdb
, const char *op
, int ret
);
89 void tdb_trace_retrec(struct tdb_context
*tdb
, const char *op
, TDB_DATA ret
);
90 void tdb_trace_1rec(struct tdb_context
*tdb
, const char *op
,
92 void tdb_trace_1rec_ret(struct tdb_context
*tdb
, const char *op
,
93 TDB_DATA rec
, int ret
);
94 void tdb_trace_1rec_retrec(struct tdb_context
*tdb
, const char *op
,
95 TDB_DATA rec
, TDB_DATA ret
);
96 void tdb_trace_2rec_flag_ret(struct tdb_context
*tdb
, const char *op
,
97 TDB_DATA rec1
, TDB_DATA rec2
, unsigned flag
,
99 void tdb_trace_1plusn_rec_flag_ret(struct tdb_context
*tdb
, const char *op
,
101 const TDB_DATA
*recs
, int num_recs
,
102 unsigned flag
, int ret
);
103 void tdb_trace_2rec_retrec(struct tdb_context
*tdb
, const char *op
,
104 TDB_DATA rec1
, TDB_DATA rec2
, TDB_DATA ret
);
106 #define tdb_trace(tdb, op)
107 #define tdb_trace_seqnum(tdb, seqnum, op)
108 #define tdb_trace_open(tdb, op, hash_size, tdb_flags, open_flags)
109 #define tdb_trace_ret(tdb, op, ret)
110 #define tdb_trace_retrec(tdb, op, ret)
111 #define tdb_trace_1rec(tdb, op, rec)
112 #define tdb_trace_1rec_ret(tdb, op, rec, ret)
113 #define tdb_trace_1rec_retrec(tdb, op, rec, ret)
114 #define tdb_trace_2rec_flag_ret(tdb, op, rec1, rec2, flag, ret)
115 #define tdb_trace_1plusn_rec_flag_ret(tdb, op, rec, recs, num_recs, flag, ret);
116 #define tdb_trace_2rec_retrec(tdb, op, rec1, rec2, ret)
117 #endif /* !TDB_TRACE */
121 #define ACTIVE_LOCK 4
122 #define TRANSACTION_LOCK 8
124 /* free memory if the pointer is valid and zero the pointer */
126 #define SAFE_FREE(x) do { if ((x) != NULL) {free(x); (x)=NULL;} } while(0)
130 * Note: the BUCKET macro is broken as it returns an unexpected result when
131 * called as BUCKET(-1) for the freelist:
133 * -1 is sign converted to an unsigned int 4294967295 and then the modulo
134 * tdb->hashtable_size is computed. So with a hashtable_size of 10 the result
137 * 4294967295 % hashtable_size = 5.
139 * where it should be -1 (C uses symmetric modulo).
141 * As all callers will lock the same wrong list consistently locking is still
142 * consistent. We can not change this without an incompatible on-disk format
143 * change, otherwise different tdb versions would use incompatible locking.
145 #define BUCKET(hash) ((hash) % tdb->hash_size)
147 #define DOCONV() (tdb->flags & TDB_CONVERT)
148 #define CONVERT(x) (DOCONV() ? tdb_convert(&x, sizeof(x)) : &x)
151 /* the body of the database is made of one tdb_record for the free space
152 plus a separate data list for each hash value */
154 tdb_off_t next
; /* offset of the next record in the list */
155 tdb_len_t rec_len
; /* total byte length of record */
156 tdb_len_t key_len
; /* byte length of key */
157 tdb_len_t data_len
; /* byte length of data */
158 uint32_t full_hash
; /* the full 32 bit hash of the key */
159 uint32_t magic
; /* try to catch errors */
160 /* the following union is implied:
162 char record[rec_len];
167 uint32_t totalsize; (tailer)
173 /* this is stored at the front of every database */
175 char magic_food
[32]; /* for /etc/magic */
176 uint32_t version
; /* version of the code */
177 uint32_t hash_size
; /* number of hash entries */
178 tdb_off_t rwlocks
; /* obsolete - kept to detect old formats */
179 tdb_off_t recovery_start
; /* offset of transaction recovery region */
180 tdb_off_t sequence_number
; /* used when TDB_SEQNUM is set */
181 uint32_t magic1_hash
; /* hash of TDB_MAGIC_FOOD. */
182 uint32_t magic2_hash
; /* hash of TDB_MAGIC. */
183 uint32_t feature_flags
;
184 tdb_len_t mutex_size
; /* set if TDB_FEATURE_FLAG_MUTEX is set */
185 tdb_off_t reserved
[25];
188 struct tdb_lock_type
{
194 struct tdb_chainwalk_ctx
{
199 struct tdb_traverse_lock
{
200 struct tdb_traverse_lock
*next
;
206 void tdb_chainwalk_init(struct tdb_chainwalk_ctx
*ctx
, tdb_off_t ptr
);
207 bool tdb_chainwalk_check(struct tdb_context
*tdb
,
208 struct tdb_chainwalk_ctx
*ctx
,
211 enum tdb_lock_flags
{
212 /* WAIT == F_SETLKW, NOWAIT == F_SETLK */
215 /* If set, don't log an error on failure. */
217 /* If set, don't actually lock at all. */
218 TDB_LOCK_MARK_ONLY
= 4,
222 int (*tdb_read
)(struct tdb_context
*, tdb_off_t
, void *, tdb_len_t
, int );
223 int (*tdb_write
)(struct tdb_context
*, tdb_off_t
, const void *, tdb_len_t
);
224 void (*next_hash_chain
)(struct tdb_context
*, uint32_t *);
225 int (*tdb_oob
)(struct tdb_context
*, tdb_off_t
, tdb_len_t
, int );
226 int (*tdb_expand_file
)(struct tdb_context
*, tdb_off_t
, tdb_off_t
);
232 char *name
; /* the name of the database */
233 void *map_ptr
; /* where it is currently mapped */
234 int fd
; /* open file descriptor for the database */
235 tdb_len_t map_size
; /* how much space has been mapped */
236 int read_only
; /* opened read-only */
237 int traverse_read
; /* read-only traversal */
238 int traverse_write
; /* read-write traversal */
239 struct tdb_lock_type allrecord_lock
; /* .offset == upgradable */
241 struct tdb_lock_type
*lockrecs
; /* only real locks, all with count>0 */
242 int lockrecs_array_length
;
244 tdb_off_t hdr_ofs
; /* this is 0 or header.mutex_size */
245 struct tdb_mutexes
*mutexes
; /* mmap of the mutex area */
247 enum TDB_ERROR ecode
; /* error code for last tdb error */
249 uint32_t feature_flags
;
250 uint32_t flags
; /* the flags passed to tdb_open */
251 struct tdb_traverse_lock travlocks
; /* current traversal locks */
252 struct tdb_context
*next
; /* all tdbs to avoid multiple opens */
253 dev_t device
; /* uniquely identifies this tdb */
254 ino_t inode
; /* uniquely identifies this tdb */
255 struct tdb_logging_context log
;
256 unsigned int (*hash_fn
)(TDB_DATA
*key
);
257 int open_flags
; /* flags used in the open - needed by reopen */
258 const struct tdb_methods
*methods
;
259 struct tdb_transaction
*transaction
;
261 int max_dead_records
;
265 volatile sig_atomic_t *interrupt_sig_ptr
;
272 int tdb_munmap(struct tdb_context
*tdb
);
273 int tdb_mmap(struct tdb_context
*tdb
);
274 int tdb_lock(struct tdb_context
*tdb
, int list
, int ltype
);
275 int tdb_lock_nonblock(struct tdb_context
*tdb
, int list
, int ltype
);
276 int tdb_nest_lock(struct tdb_context
*tdb
, uint32_t offset
, int ltype
,
277 enum tdb_lock_flags flags
);
278 int tdb_nest_unlock(struct tdb_context
*tdb
, uint32_t offset
, int ltype
,
280 int tdb_unlock(struct tdb_context
*tdb
, int list
, int ltype
);
281 int tdb_brlock(struct tdb_context
*tdb
,
282 int rw_type
, tdb_off_t offset
, size_t len
,
283 enum tdb_lock_flags flags
);
284 int tdb_brunlock(struct tdb_context
*tdb
,
285 int rw_type
, tdb_off_t offset
, size_t len
);
286 bool tdb_have_extra_locks(struct tdb_context
*tdb
);
287 void tdb_release_transaction_locks(struct tdb_context
*tdb
);
288 int tdb_transaction_lock(struct tdb_context
*tdb
, int ltype
,
289 enum tdb_lock_flags lockflags
);
290 int tdb_transaction_unlock(struct tdb_context
*tdb
, int ltype
);
291 int tdb_recovery_area(struct tdb_context
*tdb
,
292 const struct tdb_methods
*methods
,
293 tdb_off_t
*recovery_offset
,
294 struct tdb_record
*rec
);
295 int tdb_allrecord_lock(struct tdb_context
*tdb
, int ltype
,
296 enum tdb_lock_flags flags
, bool upgradable
);
297 int tdb_allrecord_unlock(struct tdb_context
*tdb
, int ltype
, bool mark_lock
);
298 int tdb_allrecord_upgrade(struct tdb_context
*tdb
);
299 int tdb_write_lock_record(struct tdb_context
*tdb
, tdb_off_t off
);
300 int tdb_write_unlock_record(struct tdb_context
*tdb
, tdb_off_t off
);
301 int tdb_ofs_read(struct tdb_context
*tdb
, tdb_off_t offset
, tdb_off_t
*d
);
302 int tdb_ofs_write(struct tdb_context
*tdb
, tdb_off_t offset
, tdb_off_t
*d
);
303 void *tdb_convert(void *buf
, uint32_t size
);
304 int tdb_free(struct tdb_context
*tdb
, tdb_off_t offset
, struct tdb_record
*rec
);
305 tdb_off_t
tdb_allocate(struct tdb_context
*tdb
, int hash
, tdb_len_t length
,
306 struct tdb_record
*rec
);
307 int tdb_ofs_read(struct tdb_context
*tdb
, tdb_off_t offset
, tdb_off_t
*d
);
308 int tdb_ofs_write(struct tdb_context
*tdb
, tdb_off_t offset
, tdb_off_t
*d
);
309 int tdb_lock_record(struct tdb_context
*tdb
, tdb_off_t off
);
310 int tdb_unlock_record(struct tdb_context
*tdb
, tdb_off_t off
);
311 bool tdb_needs_recovery(struct tdb_context
*tdb
);
312 int tdb_rec_read(struct tdb_context
*tdb
, tdb_off_t offset
, struct tdb_record
*rec
);
313 int tdb_rec_write(struct tdb_context
*tdb
, tdb_off_t offset
, struct tdb_record
*rec
);
314 unsigned char *tdb_alloc_read(struct tdb_context
*tdb
, tdb_off_t offset
, tdb_len_t len
);
315 int tdb_parse_data(struct tdb_context
*tdb
, TDB_DATA key
,
316 tdb_off_t offset
, tdb_len_t len
,
317 int (*parser
)(TDB_DATA key
, TDB_DATA data
,
320 tdb_off_t
tdb_find_lock_hash(struct tdb_context
*tdb
, TDB_DATA key
, uint32_t hash
, int locktype
,
321 struct tdb_record
*rec
);
322 tdb_off_t
tdb_find_dead(struct tdb_context
*tdb
, uint32_t hash
,
323 struct tdb_record
*r
, tdb_len_t length
,
324 tdb_off_t
*p_last_ptr
);
325 int tdb_trim_dead(struct tdb_context
*tdb
, uint32_t hash
);
326 void tdb_io_init(struct tdb_context
*tdb
);
327 int tdb_expand(struct tdb_context
*tdb
, tdb_off_t size
);
328 tdb_off_t
tdb_expand_adjust(tdb_off_t map_size
, tdb_off_t size
, int page_size
);
329 int tdb_rec_free_read(struct tdb_context
*tdb
, tdb_off_t off
,
330 struct tdb_record
*rec
);
331 bool tdb_write_all(int fd
, const void *buf
, size_t count
);
332 int tdb_transaction_recover(struct tdb_context
*tdb
);
333 void tdb_header_hash(struct tdb_context
*tdb
,
334 uint32_t *magic1_hash
, uint32_t *magic2_hash
);
335 unsigned int tdb_old_hash(TDB_DATA
*key
);
336 size_t tdb_dead_space(struct tdb_context
*tdb
, tdb_off_t off
);
337 bool tdb_add_off_t(tdb_off_t a
, tdb_off_t b
, tdb_off_t
*pret
);
339 /* tdb_off_t and tdb_len_t right now are both uint32_t */
340 #define tdb_add_len_t tdb_add_off_t
342 size_t tdb_mutex_size(struct tdb_context
*tdb
);
343 bool tdb_have_mutexes(struct tdb_context
*tdb
);
344 int tdb_mutex_init(struct tdb_context
*tdb
);
345 int tdb_mutex_mmap(struct tdb_context
*tdb
);
346 int tdb_mutex_munmap(struct tdb_context
*tdb
);
347 bool tdb_mutex_lock(struct tdb_context
*tdb
, int rw
, off_t off
, off_t len
,
348 bool waitflag
, int *pret
);
349 bool tdb_mutex_unlock(struct tdb_context
*tdb
, int rw
, off_t off
, off_t len
,
351 int tdb_mutex_allrecord_lock(struct tdb_context
*tdb
, int ltype
,
352 enum tdb_lock_flags flags
);
353 int tdb_mutex_allrecord_unlock(struct tdb_context
*tdb
);
354 int tdb_mutex_allrecord_upgrade(struct tdb_context
*tdb
);
355 void tdb_mutex_allrecord_downgrade(struct tdb_context
*tdb
);
357 #endif /* TDB_PRIVATE_H */