r23798: updated old Temple Place FSF addresses to new URL
[Samba.git] / source / lib / tdb / common / freelist.c
blobb109643f23561790f7616a963551c7bf64c13385
1 /*
2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "tdb_private.h"
30 /* read a freelist record and check for simple errors */
31 int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off, struct list_struct *rec)
33 if (tdb->methods->tdb_read(tdb, off, rec, sizeof(*rec),DOCONV()) == -1)
34 return -1;
36 if (rec->magic == TDB_MAGIC) {
37 /* this happens when a app is showdown while deleting a record - we should
38 not completely fail when this happens */
39 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read non-free magic 0x%x at offset=%d - fixing\n",
40 rec->magic, off));
41 rec->magic = TDB_FREE_MAGIC;
42 if (tdb->methods->tdb_write(tdb, off, rec, sizeof(*rec)) == -1)
43 return -1;
46 if (rec->magic != TDB_FREE_MAGIC) {
47 /* Ensure ecode is set for log fn. */
48 tdb->ecode = TDB_ERR_CORRUPT;
49 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read bad magic 0x%x at offset=%d\n",
50 rec->magic, off));
51 return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
53 if (tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0) != 0)
54 return -1;
55 return 0;
60 /* Remove an element from the freelist. Must have alloc lock. */
61 static int remove_from_freelist(struct tdb_context *tdb, tdb_off_t off, tdb_off_t next)
63 tdb_off_t last_ptr, i;
65 /* read in the freelist top */
66 last_ptr = FREELIST_TOP;
67 while (tdb_ofs_read(tdb, last_ptr, &i) != -1 && i != 0) {
68 if (i == off) {
69 /* We've found it! */
70 return tdb_ofs_write(tdb, last_ptr, &next);
72 /* Follow chain (next offset is at start of record) */
73 last_ptr = i;
75 TDB_LOG((tdb, TDB_DEBUG_FATAL,"remove_from_freelist: not on list at off=%d\n", off));
76 return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
80 /* update a record tailer (must hold allocation lock) */
81 static int update_tailer(struct tdb_context *tdb, tdb_off_t offset,
82 const struct list_struct *rec)
84 tdb_off_t totalsize;
86 /* Offset of tailer from record header */
87 totalsize = sizeof(*rec) + rec->rec_len;
88 return tdb_ofs_write(tdb, offset + totalsize - sizeof(tdb_off_t),
89 &totalsize);
92 /* Add an element into the freelist. Merge adjacent records if
93 neccessary. */
94 int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
96 tdb_off_t right, left;
98 /* Allocation and tailer lock */
99 if (tdb_lock(tdb, -1, F_WRLCK) != 0)
100 return -1;
102 /* set an initial tailer, so if we fail we don't leave a bogus record */
103 if (update_tailer(tdb, offset, rec) != 0) {
104 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed!\n"));
105 goto fail;
108 /* Look right first (I'm an Australian, dammit) */
109 right = offset + sizeof(*rec) + rec->rec_len;
110 if (right + sizeof(*rec) <= tdb->map_size) {
111 struct list_struct r;
113 if (tdb->methods->tdb_read(tdb, right, &r, sizeof(r), DOCONV()) == -1) {
114 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: right read failed at %u\n", right));
115 goto left;
118 /* If it's free, expand to include it. */
119 if (r.magic == TDB_FREE_MAGIC) {
120 if (remove_from_freelist(tdb, right, r.next) == -1) {
121 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: right free failed at %u\n", right));
122 goto left;
124 rec->rec_len += sizeof(r) + r.rec_len;
128 left:
129 /* Look left */
130 left = offset - sizeof(tdb_off_t);
131 if (left > TDB_DATA_START(tdb->header.hash_size)) {
132 struct list_struct l;
133 tdb_off_t leftsize;
135 /* Read in tailer and jump back to header */
136 if (tdb_ofs_read(tdb, left, &leftsize) == -1) {
137 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left offset read failed at %u\n", left));
138 goto update;
141 /* it could be uninitialised data */
142 if (leftsize == 0 || leftsize == TDB_PAD_U32) {
143 goto update;
146 left = offset - leftsize;
148 /* Now read in record */
149 if (tdb->methods->tdb_read(tdb, left, &l, sizeof(l), DOCONV()) == -1) {
150 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left read failed at %u (%u)\n", left, leftsize));
151 goto update;
154 /* If it's free, expand to include it. */
155 if (l.magic == TDB_FREE_MAGIC) {
156 if (remove_from_freelist(tdb, left, l.next) == -1) {
157 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left free failed at %u\n", left));
158 goto update;
159 } else {
160 offset = left;
161 rec->rec_len += leftsize;
166 update:
167 if (update_tailer(tdb, offset, rec) == -1) {
168 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed at %u\n", offset));
169 goto fail;
172 /* Now, prepend to free list */
173 rec->magic = TDB_FREE_MAGIC;
175 if (tdb_ofs_read(tdb, FREELIST_TOP, &rec->next) == -1 ||
176 tdb_rec_write(tdb, offset, rec) == -1 ||
177 tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
178 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free record write failed at offset=%d\n", offset));
179 goto fail;
182 /* And we're done. */
183 tdb_unlock(tdb, -1, F_WRLCK);
184 return 0;
186 fail:
187 tdb_unlock(tdb, -1, F_WRLCK);
188 return -1;
193 the core of tdb_allocate - called when we have decided which
194 free list entry to use
196 static tdb_off_t tdb_allocate_ofs(struct tdb_context *tdb, tdb_len_t length, tdb_off_t rec_ptr,
197 struct list_struct *rec, tdb_off_t last_ptr)
199 struct list_struct newrec;
200 tdb_off_t newrec_ptr;
202 memset(&newrec, '\0', sizeof(newrec));
204 /* found it - now possibly split it up */
205 if (rec->rec_len > length + MIN_REC_SIZE) {
206 /* Length of left piece */
207 length = TDB_ALIGN(length, TDB_ALIGNMENT);
209 /* Right piece to go on free list */
210 newrec.rec_len = rec->rec_len - (sizeof(*rec) + length);
211 newrec_ptr = rec_ptr + sizeof(*rec) + length;
213 /* And left record is shortened */
214 rec->rec_len = length;
215 } else {
216 newrec_ptr = 0;
219 /* Remove allocated record from the free list */
220 if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1) {
221 return 0;
224 /* Update header: do this before we drop alloc
225 lock, otherwise tdb_free() might try to
226 merge with us, thinking we're free.
227 (Thanks Jeremy Allison). */
228 rec->magic = TDB_MAGIC;
229 if (tdb_rec_write(tdb, rec_ptr, rec) == -1) {
230 return 0;
233 /* Did we create new block? */
234 if (newrec_ptr) {
235 /* Update allocated record tailer (we
236 shortened it). */
237 if (update_tailer(tdb, rec_ptr, rec) == -1) {
238 return 0;
241 /* Free new record */
242 if (tdb_free(tdb, newrec_ptr, &newrec) == -1) {
243 return 0;
247 /* all done - return the new record offset */
248 return rec_ptr;
251 /* allocate some space from the free list. The offset returned points
252 to a unconnected list_struct within the database with room for at
253 least length bytes of total data
255 0 is returned if the space could not be allocated
257 tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct list_struct *rec)
259 tdb_off_t rec_ptr, last_ptr, newrec_ptr;
260 struct {
261 tdb_off_t rec_ptr, last_ptr;
262 tdb_len_t rec_len;
263 } bestfit;
265 if (tdb_lock(tdb, -1, F_WRLCK) == -1)
266 return 0;
268 /* Extra bytes required for tailer */
269 length += sizeof(tdb_off_t);
271 again:
272 last_ptr = FREELIST_TOP;
274 /* read in the freelist top */
275 if (tdb_ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1)
276 goto fail;
278 bestfit.rec_ptr = 0;
279 bestfit.last_ptr = 0;
280 bestfit.rec_len = 0;
283 this is a best fit allocation strategy. Originally we used
284 a first fit strategy, but it suffered from massive fragmentation
285 issues when faced with a slowly increasing record size.
287 while (rec_ptr) {
288 if (tdb_rec_free_read(tdb, rec_ptr, rec) == -1) {
289 goto fail;
292 if (rec->rec_len >= length) {
293 if (bestfit.rec_ptr == 0 ||
294 rec->rec_len < bestfit.rec_len) {
295 bestfit.rec_len = rec->rec_len;
296 bestfit.rec_ptr = rec_ptr;
297 bestfit.last_ptr = last_ptr;
298 /* consider a fit to be good enough if
299 we aren't wasting more than half
300 the space */
301 if (bestfit.rec_len < 2*length) {
302 break;
307 /* move to the next record */
308 last_ptr = rec_ptr;
309 rec_ptr = rec->next;
312 if (bestfit.rec_ptr != 0) {
313 if (tdb_rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) {
314 goto fail;
317 newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr, rec, bestfit.last_ptr);
318 tdb_unlock(tdb, -1, F_WRLCK);
319 return newrec_ptr;
322 /* we didn't find enough space. See if we can expand the
323 database and if we can then try again */
324 if (tdb_expand(tdb, length + sizeof(*rec)) == 0)
325 goto again;
326 fail:
327 tdb_unlock(tdb, -1, F_WRLCK);
328 return 0;