Allow inclusion from <fcntl.h>.
[glibc.git] / nscd / mem.c
bloba41c0bdb074b8d97c491b7b106744167cb03604c
1 /* Cache memory handling.
2 Copyright (C) 2004, 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@redhat.com>, 2004.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License version 2 as
8 published by the Free Software Foundation.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #include <assert.h>
20 #include <errno.h>
21 #include <error.h>
22 #include <fcntl.h>
23 #include <inttypes.h>
24 #include <libintl.h>
25 #include <limits.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29 #include <sys/mman.h>
30 #include <sys/param.h>
32 #include "dbg_log.h"
33 #include "nscd.h"
36 static int
37 sort_he (const void *p1, const void *p2)
39 struct hashentry *h1 = *(struct hashentry **) p1;
40 struct hashentry *h2 = *(struct hashentry **) p2;
42 if (h1 < h2)
43 return -1;
44 if (h1 > h2)
45 return 1;
46 return 0;
50 static int
51 sort_he_data (const void *p1, const void *p2)
53 struct hashentry *h1 = *(struct hashentry **) p1;
54 struct hashentry *h2 = *(struct hashentry **) p2;
56 if (h1->packet < h2->packet)
57 return -1;
58 if (h1->packet > h2->packet)
59 return 1;
60 return 0;
64 /* Basic definitions for the bitmap implementation. Only BITMAP_T
65 needs to be changed to choose a different word size. */
66 #define BITMAP_T uint8_t
67 #define BITS (CHAR_BIT * sizeof (BITMAP_T))
68 #define ALLBITS ((((BITMAP_T) 1) << BITS) - 1)
69 #define HIGHBIT (((BITMAP_T) 1) << (BITS - 1))
72 static void
73 markrange (BITMAP_T *mark, ref_t start, size_t len)
75 /* Adjust parameters for block alignment. */
76 start /= BLOCK_ALIGN;
77 len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN;
79 size_t elem = start / BITS;
81 if (start % BITS != 0)
83 if (start % BITS + len <= BITS)
85 /* All fits in the partial byte. */
86 mark[elem] |= (ALLBITS >> (BITS - len)) << (start % BITS);
87 return;
90 mark[elem++] |= 0xff << (start % BITS);
91 len -= BITS - (start % BITS);
94 while (len >= BITS)
96 mark[elem++] = ALLBITS;
97 len -= BITS;
100 if (len > 0)
101 mark[elem] |= ALLBITS >> (BITS - len);
105 void
106 gc (struct database_dyn *db)
108 /* We need write access. */
109 pthread_rwlock_wrlock (&db->lock);
111 /* And the memory handling lock. */
112 pthread_mutex_lock (&db->memlock);
114 /* We need an array representing the data area. All memory
115 allocation is BLOCK_ALIGN aligned so this is the level at which
116 we have to look at the memory. We use a mark and sweep algorithm
117 where the marks are placed in this array. */
118 assert (db->head->first_free % BLOCK_ALIGN == 0);
119 BITMAP_T mark[(db->head->first_free / BLOCK_ALIGN + BITS - 1) / BITS];
120 memset (mark, '\0', sizeof (mark));
122 /* Create an array which can hold pointer to all the entries in hash
123 entries. */
124 struct hashentry *he[db->head->nentries];
125 struct hashentry *he_data[db->head->nentries];
127 size_t cnt = 0;
128 for (size_t idx = 0; idx < db->head->module; ++idx)
130 ref_t *prevp = &db->head->array[idx];
131 ref_t run = *prevp;
133 while (run != ENDREF)
135 assert (cnt < db->head->nentries);
136 he[cnt] = (struct hashentry *) (db->data + run);
138 he[cnt]->prevp = prevp;
139 prevp = &he[cnt]->next;
141 /* This is the hash entry itself. */
142 markrange (mark, run, sizeof (struct hashentry));
144 /* Add the information for the data itself. We do this
145 only for the one special entry marked with FIRST. */
146 if (he[cnt]->first)
148 struct datahead *dh
149 = (struct datahead *) (db->data + he[cnt]->packet);
150 markrange (mark, he[cnt]->packet, dh->allocsize);
153 run = he[cnt]->next;
155 ++cnt;
158 assert (cnt == db->head->nentries);
160 /* Sort the entries by the addresses of the referenced data. All
161 the entries pointing to the same DATAHEAD object will have the
162 same key. Stability of the sorting is unimportant. */
163 memcpy (he_data, he, cnt * sizeof (struct hashentry *));
164 qsort (he_data, cnt, sizeof (struct hashentry *), sort_he_data);
166 /* Sort the entries by their address. */
167 qsort (he, cnt, sizeof (struct hashentry *), sort_he);
169 /* Determine the highest used address. */
170 size_t high = sizeof (mark);
171 while (high > 0 && mark[high - 1] == 0)
172 --high;
174 /* No memory used. */
175 if (high == 0)
177 db->head->first_free = 0;
178 goto out;
181 /* Determine the highest offset. */
182 BITMAP_T mask = HIGHBIT;
183 ref_t highref = (high * BITS - 1) * BLOCK_ALIGN;
184 while ((mark[high - 1] & mask) == 0)
186 mask >>= 1;
187 highref -= BLOCK_ALIGN;
190 /* Now we can iterate over the MARK array and find bits which are not
191 set. These represent memory which can be recovered. */
192 size_t byte = 0;
193 /* Find the first gap. */
194 while (byte < high && mark[byte] == ALLBITS)
195 ++byte;
197 if (byte == high
198 || (byte == high - 1 && (mark[byte] & ~(mask | (mask - 1))) == 0))
199 /* No gap. */
200 goto out;
202 mask = 1;
203 cnt = 0;
204 while ((mark[byte] & mask) != 0)
206 ++cnt;
207 mask <<= 1;
209 ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN;
210 assert (off_free <= db->head->first_free);
212 struct hashentry **next_hash = he;
213 struct hashentry **next_data = he_data;
215 /* Skip over the hash entries in the first block which does not get
216 moved. */
217 while (next_hash < &he[db->head->nentries]
218 && *next_hash < (struct hashentry *) (db->data + off_free))
219 ++next_hash;
221 while (next_data < &he_data[db->head->nentries]
222 && (*next_data)->packet < off_free)
223 ++next_data;
226 /* Now we start modifying the data. Make sure all readers of the
227 data are aware of this and temporarily don't use the data. */
228 ++db->head->gc_cycle;
229 assert ((db->head->gc_cycle & 1) == 1);
232 /* We do not perform the move operations right away since the
233 he_data array is not sorted by the address of the data. */
234 struct moveinfo
236 void *from;
237 void *to;
238 size_t size;
239 struct moveinfo *next;
240 } *moves = NULL;
242 while (byte < high)
244 /* Search for the next filled block. BYTE is the index of the
245 entry in MARK, MASK is the bit, and CNT is the bit number.
246 OFF_FILLED is the corresponding offset. */
247 if ((mark[byte] & ~(mask - 1)) == 0)
249 /* No other bit set in the same element of MARK. Search in the
250 following memory. */
252 ++byte;
253 while (byte < high && mark[byte] == 0);
255 if (byte == high)
256 /* That was it. */
257 break;
259 mask = 1;
260 cnt = 0;
262 /* Find the exact bit. */
263 while ((mark[byte] & mask) == 0)
265 ++cnt;
266 mask <<= 1;
269 ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN;
270 assert (off_alloc <= db->head->first_free);
272 /* Find the end of the used area. */
273 if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1))
275 /* All other bits set. Search the next bytes in MARK. */
277 ++byte;
278 while (byte < high && mark[byte] == ALLBITS);
280 mask = 1;
281 cnt = 0;
283 if (byte < high)
285 /* Find the exact bit. */
286 while ((mark[byte] & mask) != 0)
288 ++cnt;
289 mask <<= 1;
293 ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN;
294 assert (off_allocend <= db->head->first_free);
295 /* Now we know that we can copy the area from OFF_ALLOC to
296 OFF_ALLOCEND (not included) to the memory starting at
297 OFF_FREE. First fix up all the entries for the
298 displacement. */
299 ref_t disp = off_alloc - off_free;
301 struct moveinfo *new_move
302 = (struct moveinfo *) alloca (sizeof (*new_move));
303 new_move->from = db->data + off_alloc;
304 new_move->to = db->data + off_free;
305 new_move->size = off_allocend - off_alloc;
306 /* Create a circular list to be always able to append at the end. */
307 if (moves == NULL)
308 moves = new_move->next = new_move;
309 else
311 new_move->next = moves->next;
312 moves = moves->next = new_move;
315 /* The following loop will prepare to move this much data. */
316 off_free += off_allocend - off_alloc;
318 while (off_alloc < off_allocend)
320 /* Determine whether the next entry is for a hash entry or
321 the data. */
322 if ((struct hashentry *) (db->data + off_alloc) == *next_hash)
324 /* Just correct the forward reference. */
325 *(*next_hash++)->prevp -= disp;
327 off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1)
328 & ~BLOCK_ALIGN_M1);
330 else
332 assert (next_data < &he_data[db->head->nentries]);
333 assert ((*next_data)->packet == off_alloc);
335 struct datahead *dh = (struct datahead *) (db->data + off_alloc);
338 assert ((*next_data)->key >= (*next_data)->packet);
339 assert ((*next_data)->key + (*next_data)->len
340 <= (*next_data)->packet + dh->allocsize);
342 (*next_data)->packet -= disp;
343 (*next_data)->key -= disp;
344 ++next_data;
346 while (next_data < &he_data[db->head->nentries]
347 && (*next_data)->packet == off_alloc);
349 off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1;
352 assert (off_alloc == off_allocend);
354 assert (off_alloc <= db->head->first_free);
355 if (off_alloc == db->head->first_free)
356 /* We are done, that was the last block. */
357 break;
359 assert (next_hash == &he[db->head->nentries]);
360 assert (next_data == &he_data[db->head->nentries]);
362 /* Now perform the actual moves. */
363 if (moves != NULL)
365 struct moveinfo *runp = moves->next;
368 assert ((char *) runp->to >= db->data);
369 assert ((char *) runp->to + runp->size
370 <= db->data + db->head->first_free);
371 assert ((char *) runp->from >= db->data);
372 assert ((char *) runp->from + runp->size
373 <= db->data + db->head->first_free);
375 /* The regions may overlap. */
376 memmove (runp->to, runp->from, runp->size);
377 runp = runp->next;
379 while (runp != moves->next);
381 if (__builtin_expect (debug_level >= 3, 0))
382 dbg_log (_("freed %zu bytes in %s cache"),
383 db->head->first_free
384 - ((char *) moves->to + moves->size - db->data),
385 dbnames[db - dbs]);
387 /* The byte past the end of the last copied block is the next
388 available byte. */
389 db->head->first_free = (char *) moves->to + moves->size - db->data;
391 /* Consistency check. */
392 if (__builtin_expect (debug_level >= 3, 0))
394 for (size_t idx = 0; idx < db->head->module; ++idx)
396 ref_t run = db->head->array[idx];
397 size_t cnt = 0;
399 while (run != ENDREF)
401 if (run + sizeof (struct hashentry) > db->head->first_free)
403 dbg_log ("entry %zu in hash bucket %zu out of bounds: "
404 "%" PRIu32 "+%zu > %zu\n",
405 cnt, idx, run, sizeof (struct hashentry),
406 (size_t) db->head->first_free);
407 break;
410 struct hashentry *he = (struct hashentry *) (db->data + run);
412 if (he->key + he->len > db->head->first_free)
413 dbg_log ("key of entry %zu in hash bucket %zu out of "
414 "bounds: %" PRIu32 "+%zu > %zu\n",
415 cnt, idx, he->key, (size_t) he->len,
416 (size_t) db->head->first_free);
418 if (he->packet + sizeof (struct datahead)
419 > db->head->first_free)
420 dbg_log ("packet of entry %zu in hash bucket %zu out of "
421 "bounds: %" PRIu32 "+%zu > %zu\n",
422 cnt, idx, he->packet, sizeof (struct datahead),
423 (size_t) db->head->first_free);
424 else
426 struct datahead *dh = (struct datahead *) (db->data
427 + he->packet);
428 if (he->packet + dh->allocsize
429 > db->head->first_free)
430 dbg_log ("full key of entry %zu in hash bucket %zu "
431 "out of bounds: %" PRIu32 "+%zu > %zu",
432 cnt, idx, he->packet, (size_t) dh->allocsize,
433 (size_t) db->head->first_free);
436 run = he->next;
437 ++cnt;
443 /* Make sure the data on disk is updated. */
444 if (db->persistent)
445 msync (db->head, db->data + db->head->first_free - (char *) db->head,
446 MS_ASYNC);
449 /* Now we are done modifying the data. */
450 ++db->head->gc_cycle;
451 assert ((db->head->gc_cycle & 1) == 0);
453 /* We are done. */
454 out:
455 pthread_mutex_unlock (&db->memlock);
456 pthread_rwlock_unlock (&db->lock);
460 void *
461 mempool_alloc (struct database_dyn *db, size_t len)
463 /* Make sure LEN is a multiple of our maximum alignment so we can
464 keep track of used memory is multiples of this alignment value. */
465 if ((len & BLOCK_ALIGN_M1) != 0)
466 len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1);
468 pthread_mutex_lock (&db->memlock);
470 assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0);
472 bool tried_resize = false;
473 void *res;
474 retry:
475 res = db->data + db->head->first_free;
477 if (__builtin_expect (db->head->first_free + len > db->head->data_size, 0))
479 if (! tried_resize)
481 /* Try to resize the database. Grow size of 1/8th. */
482 size_t oldtotal = (sizeof (struct database_pers_head)
483 + db->head->module * sizeof (ref_t)
484 + db->head->data_size);
485 size_t new_data_size = (db->head->data_size
486 + MAX (2 * len, db->head->data_size / 8));
487 size_t newtotal = (sizeof (struct database_pers_head)
488 + db->head->module * sizeof (ref_t)
489 + new_data_size);
490 if (newtotal > db->max_db_size)
492 new_data_size -= newtotal - db->max_db_size;
493 newtotal = db->max_db_size;
496 if (db->mmap_used && newtotal > oldtotal
497 /* We only have to adjust the file size. The new pages
498 become magically available. */
499 && TEMP_FAILURE_RETRY_VAL (posix_fallocate (db->wr_fd, oldtotal,
500 newtotal
501 - oldtotal)) == 0)
503 db->head->data_size = new_data_size;
504 tried_resize = true;
505 goto retry;
509 if (! db->last_alloc_failed)
511 dbg_log (_("no more memory for database '%s'"), dbnames[db - dbs]);
513 db->last_alloc_failed = true;
516 /* No luck. */
517 res = NULL;
519 else
521 db->head->first_free += len;
523 db->last_alloc_failed = false;
526 pthread_mutex_unlock (&db->memlock);
528 return res;