2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "tdb_private.h"
30 /* read a freelist record and check for simple errors */
31 int tdb_rec_free_read(struct tdb_context
*tdb
, tdb_off_t off
, struct list_struct
*rec
)
33 if (tdb
->methods
->tdb_read(tdb
, off
, rec
, sizeof(*rec
),DOCONV()) == -1)
36 if (rec
->magic
== TDB_MAGIC
) {
37 /* this happens when a app is showdown while deleting a record - we should
38 not completely fail when this happens */
39 TDB_LOG((tdb
, TDB_DEBUG_WARNING
, "tdb_rec_free_read non-free magic 0x%x at offset=%d - fixing\n",
41 rec
->magic
= TDB_FREE_MAGIC
;
42 if (tdb
->methods
->tdb_write(tdb
, off
, rec
, sizeof(*rec
)) == -1)
46 if (rec
->magic
!= TDB_FREE_MAGIC
) {
47 /* Ensure ecode is set for log fn. */
48 tdb
->ecode
= TDB_ERR_CORRUPT
;
49 TDB_LOG((tdb
, TDB_DEBUG_WARNING
, "tdb_rec_free_read bad magic 0x%x at offset=%d\n",
51 return TDB_ERRCODE(TDB_ERR_CORRUPT
, -1);
53 if (tdb
->methods
->tdb_oob(tdb
, rec
->next
+sizeof(*rec
), 0) != 0)
60 /* Remove an element from the freelist. Must have alloc lock. */
61 static int remove_from_freelist(struct tdb_context
*tdb
, tdb_off_t off
, tdb_off_t next
)
63 tdb_off_t last_ptr
, i
;
65 /* read in the freelist top */
66 last_ptr
= FREELIST_TOP
;
67 while (tdb_ofs_read(tdb
, last_ptr
, &i
) != -1 && i
!= 0) {
70 return tdb_ofs_write(tdb
, last_ptr
, &next
);
72 /* Follow chain (next offset is at start of record) */
75 TDB_LOG((tdb
, TDB_DEBUG_FATAL
,"remove_from_freelist: not on list at off=%d\n", off
));
76 return TDB_ERRCODE(TDB_ERR_CORRUPT
, -1);
80 /* update a record tailer (must hold allocation lock) */
81 static int update_tailer(struct tdb_context
*tdb
, tdb_off_t offset
,
82 const struct list_struct
*rec
)
86 /* Offset of tailer from record header */
87 totalsize
= sizeof(*rec
) + rec
->rec_len
;
88 return tdb_ofs_write(tdb
, offset
+ totalsize
- sizeof(tdb_off_t
),
92 /* Add an element into the freelist. Merge adjacent records if
94 int tdb_free(struct tdb_context
*tdb
, tdb_off_t offset
, struct list_struct
*rec
)
96 tdb_off_t right
, left
;
98 /* Allocation and tailer lock */
99 if (tdb_lock(tdb
, -1, F_WRLCK
) != 0)
102 /* set an initial tailer, so if we fail we don't leave a bogus record */
103 if (update_tailer(tdb
, offset
, rec
) != 0) {
104 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, "tdb_free: update_tailer failed!\n"));
108 /* Look right first (I'm an Australian, dammit) */
109 right
= offset
+ sizeof(*rec
) + rec
->rec_len
;
110 if (right
+ sizeof(*rec
) <= tdb
->map_size
) {
111 struct list_struct r
;
113 if (tdb
->methods
->tdb_read(tdb
, right
, &r
, sizeof(r
), DOCONV()) == -1) {
114 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, "tdb_free: right read failed at %u\n", right
));
118 /* If it's free, expand to include it. */
119 if (r
.magic
== TDB_FREE_MAGIC
) {
120 if (remove_from_freelist(tdb
, right
, r
.next
) == -1) {
121 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, "tdb_free: right free failed at %u\n", right
));
124 rec
->rec_len
+= sizeof(r
) + r
.rec_len
;
130 left
= offset
- sizeof(tdb_off_t
);
131 if (left
> TDB_DATA_START(tdb
->header
.hash_size
)) {
132 struct list_struct l
;
135 /* Read in tailer and jump back to header */
136 if (tdb_ofs_read(tdb
, left
, &leftsize
) == -1) {
137 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, "tdb_free: left offset read failed at %u\n", left
));
141 /* it could be uninitialised data */
142 if (leftsize
== 0 || leftsize
== TDB_PAD_U32
) {
146 left
= offset
- leftsize
;
148 /* Now read in record */
149 if (tdb
->methods
->tdb_read(tdb
, left
, &l
, sizeof(l
), DOCONV()) == -1) {
150 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, "tdb_free: left read failed at %u (%u)\n", left
, leftsize
));
154 /* If it's free, expand to include it. */
155 if (l
.magic
== TDB_FREE_MAGIC
) {
156 if (remove_from_freelist(tdb
, left
, l
.next
) == -1) {
157 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, "tdb_free: left free failed at %u\n", left
));
161 rec
->rec_len
+= leftsize
;
167 if (update_tailer(tdb
, offset
, rec
) == -1) {
168 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, "tdb_free: update_tailer failed at %u\n", offset
));
172 /* Now, prepend to free list */
173 rec
->magic
= TDB_FREE_MAGIC
;
175 if (tdb_ofs_read(tdb
, FREELIST_TOP
, &rec
->next
) == -1 ||
176 tdb_rec_write(tdb
, offset
, rec
) == -1 ||
177 tdb_ofs_write(tdb
, FREELIST_TOP
, &offset
) == -1) {
178 TDB_LOG((tdb
, TDB_DEBUG_FATAL
, "tdb_free record write failed at offset=%d\n", offset
));
182 /* And we're done. */
183 tdb_unlock(tdb
, -1, F_WRLCK
);
187 tdb_unlock(tdb
, -1, F_WRLCK
);
193 the core of tdb_allocate - called when we have decided which
194 free list entry to use
196 static tdb_off_t
tdb_allocate_ofs(struct tdb_context
*tdb
, tdb_len_t length
, tdb_off_t rec_ptr
,
197 struct list_struct
*rec
, tdb_off_t last_ptr
)
199 struct list_struct newrec
;
200 tdb_off_t newrec_ptr
;
202 memset(&newrec
, '\0', sizeof(newrec
));
204 /* found it - now possibly split it up */
205 if (rec
->rec_len
> length
+ MIN_REC_SIZE
) {
206 /* Length of left piece */
207 length
= TDB_ALIGN(length
, TDB_ALIGNMENT
);
209 /* Right piece to go on free list */
210 newrec
.rec_len
= rec
->rec_len
- (sizeof(*rec
) + length
);
211 newrec_ptr
= rec_ptr
+ sizeof(*rec
) + length
;
213 /* And left record is shortened */
214 rec
->rec_len
= length
;
219 /* Remove allocated record from the free list */
220 if (tdb_ofs_write(tdb
, last_ptr
, &rec
->next
) == -1) {
224 /* Update header: do this before we drop alloc
225 lock, otherwise tdb_free() might try to
226 merge with us, thinking we're free.
227 (Thanks Jeremy Allison). */
228 rec
->magic
= TDB_MAGIC
;
229 if (tdb_rec_write(tdb
, rec_ptr
, rec
) == -1) {
233 /* Did we create new block? */
235 /* Update allocated record tailer (we
237 if (update_tailer(tdb
, rec_ptr
, rec
) == -1) {
241 /* Free new record */
242 if (tdb_free(tdb
, newrec_ptr
, &newrec
) == -1) {
247 /* all done - return the new record offset */
251 /* allocate some space from the free list. The offset returned points
252 to a unconnected list_struct within the database with room for at
253 least length bytes of total data
255 0 is returned if the space could not be allocated
257 tdb_off_t
tdb_allocate(struct tdb_context
*tdb
, tdb_len_t length
, struct list_struct
*rec
)
259 tdb_off_t rec_ptr
, last_ptr
, newrec_ptr
;
261 tdb_off_t rec_ptr
, last_ptr
;
265 if (tdb_lock(tdb
, -1, F_WRLCK
) == -1)
268 /* Extra bytes required for tailer */
269 length
+= sizeof(tdb_off_t
);
272 last_ptr
= FREELIST_TOP
;
274 /* read in the freelist top */
275 if (tdb_ofs_read(tdb
, FREELIST_TOP
, &rec_ptr
) == -1)
279 bestfit
.last_ptr
= 0;
283 this is a best fit allocation strategy. Originally we used
284 a first fit strategy, but it suffered from massive fragmentation
285 issues when faced with a slowly increasing record size.
288 if (tdb_rec_free_read(tdb
, rec_ptr
, rec
) == -1) {
292 if (rec
->rec_len
>= length
) {
293 if (bestfit
.rec_ptr
== 0 ||
294 rec
->rec_len
< bestfit
.rec_len
) {
295 bestfit
.rec_len
= rec
->rec_len
;
296 bestfit
.rec_ptr
= rec_ptr
;
297 bestfit
.last_ptr
= last_ptr
;
298 /* consider a fit to be good enough if
299 we aren't wasting more than half
301 if (bestfit
.rec_len
< 2*length
) {
307 /* move to the next record */
312 if (bestfit
.rec_ptr
!= 0) {
313 if (tdb_rec_free_read(tdb
, bestfit
.rec_ptr
, rec
) == -1) {
317 newrec_ptr
= tdb_allocate_ofs(tdb
, length
, bestfit
.rec_ptr
, rec
, bestfit
.last_ptr
);
318 tdb_unlock(tdb
, -1, F_WRLCK
);
322 /* we didn't find enough space. See if we can expand the
323 database and if we can then try again */
324 if (tdb_expand(tdb
, length
+ sizeof(*rec
)) == 0)
327 tdb_unlock(tdb
, -1, F_WRLCK
);