r25068: Older samba3 DCs will return DCERPC_FAULT_OP_RNG_ERROR for every opcode on the
[Samba.git] / source / lib / tdb / common / freelist.c
blob01b61aff86561cf456464b3131ebde780bee4c76
1 /*
2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 2 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 #include "tdb_private.h"
31 /* read a freelist record and check for simple errors */
32 int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off, struct list_struct *rec)
34 if (tdb->methods->tdb_read(tdb, off, rec, sizeof(*rec),DOCONV()) == -1)
35 return -1;
37 if (rec->magic == TDB_MAGIC) {
38 /* this happens when a app is showdown while deleting a record - we should
39 not completely fail when this happens */
40 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read non-free magic 0x%x at offset=%d - fixing\n",
41 rec->magic, off));
42 rec->magic = TDB_FREE_MAGIC;
43 if (tdb->methods->tdb_write(tdb, off, rec, sizeof(*rec)) == -1)
44 return -1;
47 if (rec->magic != TDB_FREE_MAGIC) {
48 /* Ensure ecode is set for log fn. */
49 tdb->ecode = TDB_ERR_CORRUPT;
50 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read bad magic 0x%x at offset=%d\n",
51 rec->magic, off));
52 return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
54 if (tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0) != 0)
55 return -1;
56 return 0;
61 /* Remove an element from the freelist. Must have alloc lock. */
62 static int remove_from_freelist(struct tdb_context *tdb, tdb_off_t off, tdb_off_t next)
64 tdb_off_t last_ptr, i;
66 /* read in the freelist top */
67 last_ptr = FREELIST_TOP;
68 while (tdb_ofs_read(tdb, last_ptr, &i) != -1 && i != 0) {
69 if (i == off) {
70 /* We've found it! */
71 return tdb_ofs_write(tdb, last_ptr, &next);
73 /* Follow chain (next offset is at start of record) */
74 last_ptr = i;
76 TDB_LOG((tdb, TDB_DEBUG_FATAL,"remove_from_freelist: not on list at off=%d\n", off));
77 return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
81 /* update a record tailer (must hold allocation lock) */
82 static int update_tailer(struct tdb_context *tdb, tdb_off_t offset,
83 const struct list_struct *rec)
85 tdb_off_t totalsize;
87 /* Offset of tailer from record header */
88 totalsize = sizeof(*rec) + rec->rec_len;
89 return tdb_ofs_write(tdb, offset + totalsize - sizeof(tdb_off_t),
90 &totalsize);
93 /* Add an element into the freelist. Merge adjacent records if
94 neccessary. */
95 int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
97 tdb_off_t right, left;
99 /* Allocation and tailer lock */
100 if (tdb_lock(tdb, -1, F_WRLCK) != 0)
101 return -1;
103 /* set an initial tailer, so if we fail we don't leave a bogus record */
104 if (update_tailer(tdb, offset, rec) != 0) {
105 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed!\n"));
106 goto fail;
109 /* Look right first (I'm an Australian, dammit) */
110 right = offset + sizeof(*rec) + rec->rec_len;
111 if (right + sizeof(*rec) <= tdb->map_size) {
112 struct list_struct r;
114 if (tdb->methods->tdb_read(tdb, right, &r, sizeof(r), DOCONV()) == -1) {
115 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: right read failed at %u\n", right));
116 goto left;
119 /* If it's free, expand to include it. */
120 if (r.magic == TDB_FREE_MAGIC) {
121 if (remove_from_freelist(tdb, right, r.next) == -1) {
122 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: right free failed at %u\n", right));
123 goto left;
125 rec->rec_len += sizeof(r) + r.rec_len;
129 left:
130 /* Look left */
131 left = offset - sizeof(tdb_off_t);
132 if (left > TDB_DATA_START(tdb->header.hash_size)) {
133 struct list_struct l;
134 tdb_off_t leftsize;
136 /* Read in tailer and jump back to header */
137 if (tdb_ofs_read(tdb, left, &leftsize) == -1) {
138 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left offset read failed at %u\n", left));
139 goto update;
142 /* it could be uninitialised data */
143 if (leftsize == 0 || leftsize == TDB_PAD_U32) {
144 goto update;
147 left = offset - leftsize;
149 /* Now read in record */
150 if (tdb->methods->tdb_read(tdb, left, &l, sizeof(l), DOCONV()) == -1) {
151 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left read failed at %u (%u)\n", left, leftsize));
152 goto update;
155 /* If it's free, expand to include it. */
156 if (l.magic == TDB_FREE_MAGIC) {
157 if (remove_from_freelist(tdb, left, l.next) == -1) {
158 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left free failed at %u\n", left));
159 goto update;
160 } else {
161 offset = left;
162 rec->rec_len += leftsize;
167 update:
168 if (update_tailer(tdb, offset, rec) == -1) {
169 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed at %u\n", offset));
170 goto fail;
173 /* Now, prepend to free list */
174 rec->magic = TDB_FREE_MAGIC;
176 if (tdb_ofs_read(tdb, FREELIST_TOP, &rec->next) == -1 ||
177 tdb_rec_write(tdb, offset, rec) == -1 ||
178 tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
179 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free record write failed at offset=%d\n", offset));
180 goto fail;
183 /* And we're done. */
184 tdb_unlock(tdb, -1, F_WRLCK);
185 return 0;
187 fail:
188 tdb_unlock(tdb, -1, F_WRLCK);
189 return -1;
194 the core of tdb_allocate - called when we have decided which
195 free list entry to use
197 static tdb_off_t tdb_allocate_ofs(struct tdb_context *tdb, tdb_len_t length, tdb_off_t rec_ptr,
198 struct list_struct *rec, tdb_off_t last_ptr)
200 struct list_struct newrec;
201 tdb_off_t newrec_ptr;
203 memset(&newrec, '\0', sizeof(newrec));
205 /* found it - now possibly split it up */
206 if (rec->rec_len > length + MIN_REC_SIZE) {
207 /* Length of left piece */
208 length = TDB_ALIGN(length, TDB_ALIGNMENT);
210 /* Right piece to go on free list */
211 newrec.rec_len = rec->rec_len - (sizeof(*rec) + length);
212 newrec_ptr = rec_ptr + sizeof(*rec) + length;
214 /* And left record is shortened */
215 rec->rec_len = length;
216 } else {
217 newrec_ptr = 0;
220 /* Remove allocated record from the free list */
221 if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1) {
222 return 0;
225 /* Update header: do this before we drop alloc
226 lock, otherwise tdb_free() might try to
227 merge with us, thinking we're free.
228 (Thanks Jeremy Allison). */
229 rec->magic = TDB_MAGIC;
230 if (tdb_rec_write(tdb, rec_ptr, rec) == -1) {
231 return 0;
234 /* Did we create new block? */
235 if (newrec_ptr) {
236 /* Update allocated record tailer (we
237 shortened it). */
238 if (update_tailer(tdb, rec_ptr, rec) == -1) {
239 return 0;
242 /* Free new record */
243 if (tdb_free(tdb, newrec_ptr, &newrec) == -1) {
244 return 0;
248 /* all done - return the new record offset */
249 return rec_ptr;
252 /* allocate some space from the free list. The offset returned points
253 to a unconnected list_struct within the database with room for at
254 least length bytes of total data
256 0 is returned if the space could not be allocated
258 tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct list_struct *rec)
260 tdb_off_t rec_ptr, last_ptr, newrec_ptr;
261 struct {
262 tdb_off_t rec_ptr, last_ptr;
263 tdb_len_t rec_len;
264 } bestfit;
266 if (tdb_lock(tdb, -1, F_WRLCK) == -1)
267 return 0;
269 /* Extra bytes required for tailer */
270 length += sizeof(tdb_off_t);
272 again:
273 last_ptr = FREELIST_TOP;
275 /* read in the freelist top */
276 if (tdb_ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1)
277 goto fail;
279 bestfit.rec_ptr = 0;
280 bestfit.last_ptr = 0;
281 bestfit.rec_len = 0;
284 this is a best fit allocation strategy. Originally we used
285 a first fit strategy, but it suffered from massive fragmentation
286 issues when faced with a slowly increasing record size.
288 while (rec_ptr) {
289 if (tdb_rec_free_read(tdb, rec_ptr, rec) == -1) {
290 goto fail;
293 if (rec->rec_len >= length) {
294 if (bestfit.rec_ptr == 0 ||
295 rec->rec_len < bestfit.rec_len) {
296 bestfit.rec_len = rec->rec_len;
297 bestfit.rec_ptr = rec_ptr;
298 bestfit.last_ptr = last_ptr;
299 /* consider a fit to be good enough if
300 we aren't wasting more than half
301 the space */
302 if (bestfit.rec_len < 2*length) {
303 break;
308 /* move to the next record */
309 last_ptr = rec_ptr;
310 rec_ptr = rec->next;
313 if (bestfit.rec_ptr != 0) {
314 if (tdb_rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) {
315 goto fail;
318 newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr, rec, bestfit.last_ptr);
319 tdb_unlock(tdb, -1, F_WRLCK);
320 return newrec_ptr;
323 /* we didn't find enough space. See if we can expand the
324 database and if we can then try again */
325 if (tdb_expand(tdb, length + sizeof(*rec)) == 0)
326 goto again;
327 fail:
328 tdb_unlock(tdb, -1, F_WRLCK);
329 return 0;