[BZ #5939]
[glibc.git] / nscd / mem.c
blob048e3ddd322df6ffe987c78a9e49eb3a88b99e15
1 /* Cache memory handling.
2 Copyright (C) 2004, 2005, 2006 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@redhat.com>, 2004.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #include <assert.h>
21 #include <errno.h>
22 #include <error.h>
23 #include <fcntl.h>
24 #include <inttypes.h>
25 #include <libintl.h>
26 #include <limits.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include <sys/mman.h>
31 #include <sys/param.h>
33 #include "dbg_log.h"
34 #include "nscd.h"
37 /* Wrapper functions with error checking for standard functions. */
38 extern void *xmalloc (size_t n);
39 extern void *xcalloc (size_t n, size_t s);
42 static int
43 sort_he (const void *p1, const void *p2)
45 struct hashentry *h1 = *(struct hashentry **) p1;
46 struct hashentry *h2 = *(struct hashentry **) p2;
48 if (h1 < h2)
49 return -1;
50 if (h1 > h2)
51 return 1;
52 return 0;
56 static int
57 sort_he_data (const void *p1, const void *p2)
59 struct hashentry *h1 = *(struct hashentry **) p1;
60 struct hashentry *h2 = *(struct hashentry **) p2;
62 if (h1->packet < h2->packet)
63 return -1;
64 if (h1->packet > h2->packet)
65 return 1;
66 return 0;
70 /* Basic definitions for the bitmap implementation. Only BITMAP_T
71 needs to be changed to choose a different word size. */
72 #define BITMAP_T uint8_t
73 #define BITS (CHAR_BIT * sizeof (BITMAP_T))
74 #define ALLBITS ((((BITMAP_T) 1) << BITS) - 1)
75 #define HIGHBIT (((BITMAP_T) 1) << (BITS - 1))
78 static void
79 markrange (BITMAP_T *mark, ref_t start, size_t len)
81 /* Adjust parameters for block alignment. */
82 start /= BLOCK_ALIGN;
83 len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN;
85 size_t elem = start / BITS;
87 if (start % BITS != 0)
89 if (start % BITS + len <= BITS)
91 /* All fits in the partial byte. */
92 mark[elem] |= (ALLBITS >> (BITS - len)) << (start % BITS);
93 return;
96 mark[elem++] |= 0xff << (start % BITS);
97 len -= BITS - (start % BITS);
100 while (len >= BITS)
102 mark[elem++] = ALLBITS;
103 len -= BITS;
106 if (len > 0)
107 mark[elem] |= ALLBITS >> (BITS - len);
111 void
112 gc (struct database_dyn *db)
114 /* We need write access. */
115 pthread_rwlock_wrlock (&db->lock);
117 /* And the memory handling lock. */
118 pthread_mutex_lock (&db->memlock);
120 /* We need an array representing the data area. All memory
121 allocation is BLOCK_ALIGN aligned so this is the level at which
122 we have to look at the memory. We use a mark and sweep algorithm
123 where the marks are placed in this array. */
124 assert (db->head->first_free % BLOCK_ALIGN == 0);
126 BITMAP_T *mark;
127 bool mark_use_malloc;
128 /* In prune_cache we are also using a dynamically allocated array.
129 If the array in the caller is too large we have malloc'ed it. */
130 size_t stack_used = sizeof (bool) * db->head->module;
131 if (__builtin_expect (stack_used > MAX_STACK_USE, 0))
132 stack_used = 0;
133 size_t memory_needed = ((db->head->first_free / BLOCK_ALIGN + BITS - 1)
134 / BITS) * sizeof (BITMAP_T);
135 if (memory_needed <= MAX_STACK_USE)
137 mark = (BITMAP_T *) alloca (memory_needed);
138 mark_use_malloc = false;
139 memset (mark, '\0', memory_needed);
140 stack_used = memory_needed;
142 else
144 mark = (BITMAP_T *) xcalloc (1, memory_needed);
145 mark_use_malloc = true;
148 /* Create an array which can hold pointer to all the entries in hash
149 entries. */
150 memory_needed = 2 * db->head->nentries * sizeof (struct hashentry *);
151 struct hashentry **he;
152 struct hashentry **he_data;
153 bool he_use_malloc;
154 if (stack_used + memory_needed <= MAX_STACK_USE)
156 he = alloca (db->head->nentries * sizeof (struct hashentry *));
157 he_data = alloca (db->head->nentries * sizeof (struct hashentry *));
158 he_use_malloc = false;
160 else
162 he = xmalloc (memory_needed);
163 he_data = &he[db->head->nentries * sizeof (struct hashentry *)];
164 he_use_malloc = true;
167 size_t cnt = 0;
168 for (size_t idx = 0; idx < db->head->module; ++idx)
170 ref_t *prevp = &db->head->array[idx];
171 ref_t run = *prevp;
173 while (run != ENDREF)
175 assert (cnt < db->head->nentries);
176 he[cnt] = (struct hashentry *) (db->data + run);
178 he[cnt]->prevp = prevp;
179 prevp = &he[cnt]->next;
181 /* This is the hash entry itself. */
182 markrange (mark, run, sizeof (struct hashentry));
184 /* Add the information for the data itself. We do this
185 only for the one special entry marked with FIRST. */
186 if (he[cnt]->first)
188 struct datahead *dh
189 = (struct datahead *) (db->data + he[cnt]->packet);
190 markrange (mark, he[cnt]->packet, dh->allocsize);
193 run = he[cnt]->next;
195 ++cnt;
198 assert (cnt == db->head->nentries);
200 /* Sort the entries by the addresses of the referenced data. All
201 the entries pointing to the same DATAHEAD object will have the
202 same key. Stability of the sorting is unimportant. */
203 memcpy (he_data, he, cnt * sizeof (struct hashentry *));
204 qsort (he_data, cnt, sizeof (struct hashentry *), sort_he_data);
206 /* Sort the entries by their address. */
207 qsort (he, cnt, sizeof (struct hashentry *), sort_he);
209 /* Determine the highest used address. */
210 size_t high = sizeof (mark);
211 while (high > 0 && mark[high - 1] == 0)
212 --high;
214 /* No memory used. */
215 if (high == 0)
217 db->head->first_free = 0;
218 goto out;
221 /* Determine the highest offset. */
222 BITMAP_T mask = HIGHBIT;
223 ref_t highref = (high * BITS - 1) * BLOCK_ALIGN;
224 while ((mark[high - 1] & mask) == 0)
226 mask >>= 1;
227 highref -= BLOCK_ALIGN;
230 /* Now we can iterate over the MARK array and find bits which are not
231 set. These represent memory which can be recovered. */
232 size_t byte = 0;
233 /* Find the first gap. */
234 while (byte < high && mark[byte] == ALLBITS)
235 ++byte;
237 if (byte == high
238 || (byte == high - 1 && (mark[byte] & ~(mask | (mask - 1))) == 0))
239 /* No gap. */
240 goto out;
242 mask = 1;
243 cnt = 0;
244 while ((mark[byte] & mask) != 0)
246 ++cnt;
247 mask <<= 1;
249 ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN;
250 assert (off_free <= db->head->first_free);
252 struct hashentry **next_hash = he;
253 struct hashentry **next_data = he_data;
255 /* Skip over the hash entries in the first block which does not get
256 moved. */
257 while (next_hash < &he[db->head->nentries]
258 && *next_hash < (struct hashentry *) (db->data + off_free))
259 ++next_hash;
261 while (next_data < &he_data[db->head->nentries]
262 && (*next_data)->packet < off_free)
263 ++next_data;
266 /* Now we start modifying the data. Make sure all readers of the
267 data are aware of this and temporarily don't use the data. */
268 ++db->head->gc_cycle;
269 assert ((db->head->gc_cycle & 1) == 1);
272 /* We do not perform the move operations right away since the
273 he_data array is not sorted by the address of the data. */
274 struct moveinfo
276 void *from;
277 void *to;
278 size_t size;
279 struct moveinfo *next;
280 } *moves = NULL;
282 while (byte < high)
284 /* Search for the next filled block. BYTE is the index of the
285 entry in MARK, MASK is the bit, and CNT is the bit number.
286 OFF_FILLED is the corresponding offset. */
287 if ((mark[byte] & ~(mask - 1)) == 0)
289 /* No other bit set in the same element of MARK. Search in the
290 following memory. */
292 ++byte;
293 while (byte < high && mark[byte] == 0);
295 if (byte == high)
296 /* That was it. */
297 break;
299 mask = 1;
300 cnt = 0;
302 /* Find the exact bit. */
303 while ((mark[byte] & mask) == 0)
305 ++cnt;
306 mask <<= 1;
309 ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN;
310 assert (off_alloc <= db->head->first_free);
312 /* Find the end of the used area. */
313 if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1))
315 /* All other bits set. Search the next bytes in MARK. */
317 ++byte;
318 while (byte < high && mark[byte] == ALLBITS);
320 mask = 1;
321 cnt = 0;
323 if (byte < high)
325 /* Find the exact bit. */
326 while ((mark[byte] & mask) != 0)
328 ++cnt;
329 mask <<= 1;
333 ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN;
334 assert (off_allocend <= db->head->first_free);
335 /* Now we know that we can copy the area from OFF_ALLOC to
336 OFF_ALLOCEND (not included) to the memory starting at
337 OFF_FREE. First fix up all the entries for the
338 displacement. */
339 ref_t disp = off_alloc - off_free;
341 struct moveinfo *new_move
342 = (struct moveinfo *) alloca (sizeof (*new_move));
343 new_move->from = db->data + off_alloc;
344 new_move->to = db->data + off_free;
345 new_move->size = off_allocend - off_alloc;
346 /* Create a circular list to be always able to append at the end. */
347 if (moves == NULL)
348 moves = new_move->next = new_move;
349 else
351 new_move->next = moves->next;
352 moves = moves->next = new_move;
355 /* The following loop will prepare to move this much data. */
356 off_free += off_allocend - off_alloc;
358 while (off_alloc < off_allocend)
360 /* Determine whether the next entry is for a hash entry or
361 the data. */
362 if ((struct hashentry *) (db->data + off_alloc) == *next_hash)
364 /* Just correct the forward reference. */
365 *(*next_hash++)->prevp -= disp;
367 off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1)
368 & ~BLOCK_ALIGN_M1);
370 else
372 assert (next_data < &he_data[db->head->nentries]);
373 assert ((*next_data)->packet == off_alloc);
375 struct datahead *dh = (struct datahead *) (db->data + off_alloc);
378 assert ((*next_data)->key >= (*next_data)->packet);
379 assert ((*next_data)->key + (*next_data)->len
380 <= (*next_data)->packet + dh->allocsize);
382 (*next_data)->packet -= disp;
383 (*next_data)->key -= disp;
384 ++next_data;
386 while (next_data < &he_data[db->head->nentries]
387 && (*next_data)->packet == off_alloc);
389 off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1;
392 assert (off_alloc == off_allocend);
394 assert (off_alloc <= db->head->first_free);
395 if (off_alloc == db->head->first_free)
396 /* We are done, that was the last block. */
397 break;
399 assert (next_hash == &he[db->head->nentries]);
400 assert (next_data == &he_data[db->head->nentries]);
402 /* Now perform the actual moves. */
403 if (moves != NULL)
405 struct moveinfo *runp = moves->next;
408 assert ((char *) runp->to >= db->data);
409 assert ((char *) runp->to + runp->size
410 <= db->data + db->head->first_free);
411 assert ((char *) runp->from >= db->data);
412 assert ((char *) runp->from + runp->size
413 <= db->data + db->head->first_free);
415 /* The regions may overlap. */
416 memmove (runp->to, runp->from, runp->size);
417 runp = runp->next;
419 while (runp != moves->next);
421 if (__builtin_expect (debug_level >= 3, 0))
422 dbg_log (_("freed %zu bytes in %s cache"),
423 db->head->first_free
424 - ((char *) moves->to + moves->size - db->data),
425 dbnames[db - dbs]);
427 /* The byte past the end of the last copied block is the next
428 available byte. */
429 db->head->first_free = (char *) moves->to + moves->size - db->data;
431 /* Consistency check. */
432 if (__builtin_expect (debug_level >= 3, 0))
434 for (size_t idx = 0; idx < db->head->module; ++idx)
436 ref_t run = db->head->array[idx];
437 size_t cnt = 0;
439 while (run != ENDREF)
441 if (run + sizeof (struct hashentry) > db->head->first_free)
443 dbg_log ("entry %zu in hash bucket %zu out of bounds: "
444 "%" PRIu32 "+%zu > %zu\n",
445 cnt, idx, run, sizeof (struct hashentry),
446 (size_t) db->head->first_free);
447 break;
450 struct hashentry *he = (struct hashentry *) (db->data + run);
452 if (he->key + he->len > db->head->first_free)
453 dbg_log ("key of entry %zu in hash bucket %zu out of "
454 "bounds: %" PRIu32 "+%zu > %zu\n",
455 cnt, idx, he->key, (size_t) he->len,
456 (size_t) db->head->first_free);
458 if (he->packet + sizeof (struct datahead)
459 > db->head->first_free)
460 dbg_log ("packet of entry %zu in hash bucket %zu out of "
461 "bounds: %" PRIu32 "+%zu > %zu\n",
462 cnt, idx, he->packet, sizeof (struct datahead),
463 (size_t) db->head->first_free);
464 else
466 struct datahead *dh = (struct datahead *) (db->data
467 + he->packet);
468 if (he->packet + dh->allocsize
469 > db->head->first_free)
470 dbg_log ("full key of entry %zu in hash bucket %zu "
471 "out of bounds: %" PRIu32 "+%zu > %zu",
472 cnt, idx, he->packet, (size_t) dh->allocsize,
473 (size_t) db->head->first_free);
476 run = he->next;
477 ++cnt;
483 /* Make sure the data on disk is updated. */
484 if (db->persistent)
485 msync (db->head, db->data + db->head->first_free - (char *) db->head,
486 MS_ASYNC);
489 /* Now we are done modifying the data. */
490 ++db->head->gc_cycle;
491 assert ((db->head->gc_cycle & 1) == 0);
493 /* We are done. */
494 out:
495 pthread_mutex_unlock (&db->memlock);
496 pthread_rwlock_unlock (&db->lock);
498 if (he_use_malloc)
499 free (he);
500 if (mark_use_malloc)
501 free (mark);
505 void *
506 mempool_alloc (struct database_dyn *db, size_t len)
508 /* Make sure LEN is a multiple of our maximum alignment so we can
509 keep track of used memory is multiples of this alignment value. */
510 if ((len & BLOCK_ALIGN_M1) != 0)
511 len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1);
513 pthread_mutex_lock (&db->memlock);
515 assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0);
517 bool tried_resize = false;
518 void *res;
519 retry:
520 res = db->data + db->head->first_free;
522 if (__builtin_expect (db->head->first_free + len > db->head->data_size, 0))
524 if (! tried_resize)
526 /* Try to resize the database. Grow size of 1/8th. */
527 size_t oldtotal = (sizeof (struct database_pers_head)
528 + roundup (db->head->module * sizeof (ref_t),
529 ALIGN)
530 + db->head->data_size);
531 size_t new_data_size = (db->head->data_size
532 + MAX (2 * len, db->head->data_size / 8));
533 size_t newtotal = (sizeof (struct database_pers_head)
534 + roundup (db->head->module * sizeof (ref_t), ALIGN)
535 + new_data_size);
536 if (newtotal > db->max_db_size)
538 new_data_size -= newtotal - db->max_db_size;
539 newtotal = db->max_db_size;
542 if (db->mmap_used && newtotal > oldtotal
543 /* We only have to adjust the file size. The new pages
544 become magically available. */
545 && TEMP_FAILURE_RETRY_VAL (posix_fallocate (db->wr_fd, oldtotal,
546 newtotal
547 - oldtotal)) == 0)
549 db->head->data_size = new_data_size;
550 tried_resize = true;
551 goto retry;
555 if (! db->last_alloc_failed)
557 dbg_log (_("no more memory for database '%s'"), dbnames[db - dbs]);
559 db->last_alloc_failed = true;
562 /* No luck. */
563 res = NULL;
565 else
567 db->head->first_free += len;
569 db->last_alloc_failed = false;
572 pthread_mutex_unlock (&db->memlock);
574 return res;