Updated to fedora-glibc-20080516T2152
[glibc.git] / nscd / mem.c
blob14928d633ca9ddfea042e1d0a75290ae0a3b85f6
1 /* Cache memory handling.
2 Copyright (C) 2004, 2005, 2006, 2008 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@redhat.com>, 2004.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #include <assert.h>
21 #include <errno.h>
22 #include <error.h>
23 #include <fcntl.h>
24 #include <inttypes.h>
25 #include <libintl.h>
26 #include <limits.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include <sys/mman.h>
31 #include <sys/param.h>
33 #include "dbg_log.h"
34 #include "nscd.h"
37 /* Wrapper functions with error checking for standard functions. */
38 extern void *xmalloc (size_t n);
39 extern void *xcalloc (size_t n, size_t s);
42 static int
43 sort_he (const void *p1, const void *p2)
45 struct hashentry *h1 = *(struct hashentry **) p1;
46 struct hashentry *h2 = *(struct hashentry **) p2;
48 if (h1 < h2)
49 return -1;
50 if (h1 > h2)
51 return 1;
52 return 0;
56 static int
57 sort_he_data (const void *p1, const void *p2)
59 struct hashentry *h1 = *(struct hashentry **) p1;
60 struct hashentry *h2 = *(struct hashentry **) p2;
62 if (h1->packet < h2->packet)
63 return -1;
64 if (h1->packet > h2->packet)
65 return 1;
66 return 0;
70 /* Basic definitions for the bitmap implementation. Only BITMAP_T
71 needs to be changed to choose a different word size. */
72 #define BITMAP_T uint8_t
73 #define BITS (CHAR_BIT * sizeof (BITMAP_T))
74 #define ALLBITS ((((BITMAP_T) 1) << BITS) - 1)
75 #define HIGHBIT (((BITMAP_T) 1) << (BITS - 1))
78 static void
79 markrange (BITMAP_T *mark, ref_t start, size_t len)
81 /* Adjust parameters for block alignment. */
82 start /= BLOCK_ALIGN;
83 len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN;
85 size_t elem = start / BITS;
87 if (start % BITS != 0)
89 if (start % BITS + len <= BITS)
91 /* All fits in the partial byte. */
92 mark[elem] |= (ALLBITS >> (BITS - len)) << (start % BITS);
93 return;
96 mark[elem++] |= 0xff << (start % BITS);
97 len -= BITS - (start % BITS);
100 while (len >= BITS)
102 mark[elem++] = ALLBITS;
103 len -= BITS;
106 if (len > 0)
107 mark[elem] |= ALLBITS >> (BITS - len);
111 void
112 gc (struct database_dyn *db)
114 /* We need write access. */
115 pthread_rwlock_wrlock (&db->lock);
117 /* And the memory handling lock. */
118 pthread_mutex_lock (&db->memlock);
120 /* We need an array representing the data area. All memory
121 allocation is BLOCK_ALIGN aligned so this is the level at which
122 we have to look at the memory. We use a mark and sweep algorithm
123 where the marks are placed in this array. */
124 assert (db->head->first_free % BLOCK_ALIGN == 0);
126 BITMAP_T *mark;
127 bool mark_use_malloc;
128 /* In prune_cache we are also using a dynamically allocated array.
129 If the array in the caller is too large we have malloc'ed it. */
130 size_t stack_used = sizeof (bool) * db->head->module;
131 if (__builtin_expect (stack_used > MAX_STACK_USE, 0))
132 stack_used = 0;
133 size_t memory_needed = ((db->head->first_free / BLOCK_ALIGN + BITS - 1)
134 / BITS) * sizeof (BITMAP_T);
135 if (memory_needed <= MAX_STACK_USE)
137 mark = (BITMAP_T *) alloca (memory_needed);
138 mark_use_malloc = false;
139 memset (mark, '\0', memory_needed);
140 stack_used = memory_needed;
142 else
144 mark = (BITMAP_T *) xcalloc (1, memory_needed);
145 mark_use_malloc = true;
148 /* Create an array which can hold pointer to all the entries in hash
149 entries. */
150 memory_needed = 2 * db->head->nentries * sizeof (struct hashentry *);
151 struct hashentry **he;
152 struct hashentry **he_data;
153 bool he_use_malloc;
154 if (stack_used + memory_needed <= MAX_STACK_USE)
156 he = alloca (db->head->nentries * sizeof (struct hashentry *));
157 he_data = alloca (db->head->nentries * sizeof (struct hashentry *));
158 he_use_malloc = false;
160 else
162 he = xmalloc (memory_needed);
163 he_data = &he[db->head->nentries * sizeof (struct hashentry *)];
164 he_use_malloc = true;
167 size_t cnt = 0;
168 for (size_t idx = 0; idx < db->head->module; ++idx)
170 ref_t *prevp = &db->head->array[idx];
171 ref_t run = *prevp;
173 while (run != ENDREF)
175 assert (cnt < db->head->nentries);
176 he[cnt] = (struct hashentry *) (db->data + run);
178 he[cnt]->prevp = prevp;
179 prevp = &he[cnt]->next;
181 /* This is the hash entry itself. */
182 markrange (mark, run, sizeof (struct hashentry));
184 /* Add the information for the data itself. We do this
185 only for the one special entry marked with FIRST. */
186 if (he[cnt]->first)
188 struct datahead *dh
189 = (struct datahead *) (db->data + he[cnt]->packet);
190 markrange (mark, he[cnt]->packet, dh->allocsize);
193 run = he[cnt]->next;
195 ++cnt;
198 assert (cnt == db->head->nentries);
200 /* Go through the list of in-flight memory blocks. */
201 struct mem_in_flight *mrunp = mem_in_flight_list;
202 while (mrunp != NULL)
204 /* NB: There can be no race between this test and another thread
205 setting the field to the index we are looking for because
206 this would require the other thread to also have the memlock
207 for the database.
209 NB2: we do not have to look at latter blocks (higher indices) if
210 earlier blocks are not in flight. They are always allocated in
211 sequence. */
212 for (enum in_flight idx = IDX_result_data;
213 idx < IDX_last && mrunp->block[idx].dbidx == db - dbs; ++idx)
215 assert ((char *) mrunp->block[idx].blockaddr > db->data);
216 assert ((char *) mrunp->block[idx].blockaddr
217 + mrunp->block[0].blocklen <= db->data + db->memsize);
218 markrange (mark, (char *) mrunp->block[idx].blockaddr - db->data,
219 mrunp->block[idx].blocklen);
222 mrunp = mrunp->next;
225 /* Sort the entries by the addresses of the referenced data. All
226 the entries pointing to the same DATAHEAD object will have the
227 same key. Stability of the sorting is unimportant. */
228 memcpy (he_data, he, cnt * sizeof (struct hashentry *));
229 qsort (he_data, cnt, sizeof (struct hashentry *), sort_he_data);
231 /* Sort the entries by their address. */
232 qsort (he, cnt, sizeof (struct hashentry *), sort_he);
234 /* Determine the highest used address. */
235 size_t high = sizeof (mark);
236 while (high > 0 && mark[high - 1] == 0)
237 --high;
239 /* No memory used. */
240 if (high == 0)
242 db->head->first_free = 0;
243 goto out;
246 /* Determine the highest offset. */
247 BITMAP_T mask = HIGHBIT;
248 ref_t highref = (high * BITS - 1) * BLOCK_ALIGN;
249 while ((mark[high - 1] & mask) == 0)
251 mask >>= 1;
252 highref -= BLOCK_ALIGN;
255 /* Now we can iterate over the MARK array and find bits which are not
256 set. These represent memory which can be recovered. */
257 size_t byte = 0;
258 /* Find the first gap. */
259 while (byte < high && mark[byte] == ALLBITS)
260 ++byte;
262 if (byte == high
263 || (byte == high - 1 && (mark[byte] & ~(mask | (mask - 1))) == 0))
264 /* No gap. */
265 goto out;
267 mask = 1;
268 cnt = 0;
269 while ((mark[byte] & mask) != 0)
271 ++cnt;
272 mask <<= 1;
274 ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN;
275 assert (off_free <= db->head->first_free);
277 struct hashentry **next_hash = he;
278 struct hashentry **next_data = he_data;
280 /* Skip over the hash entries in the first block which does not get
281 moved. */
282 while (next_hash < &he[db->head->nentries]
283 && *next_hash < (struct hashentry *) (db->data + off_free))
284 ++next_hash;
286 while (next_data < &he_data[db->head->nentries]
287 && (*next_data)->packet < off_free)
288 ++next_data;
291 /* Now we start modifying the data. Make sure all readers of the
292 data are aware of this and temporarily don't use the data. */
293 ++db->head->gc_cycle;
294 assert ((db->head->gc_cycle & 1) == 1);
297 /* We do not perform the move operations right away since the
298 he_data array is not sorted by the address of the data. */
299 struct moveinfo
301 void *from;
302 void *to;
303 size_t size;
304 struct moveinfo *next;
305 } *moves = NULL;
307 while (byte < high)
309 /* Search for the next filled block. BYTE is the index of the
310 entry in MARK, MASK is the bit, and CNT is the bit number.
311 OFF_FILLED is the corresponding offset. */
312 if ((mark[byte] & ~(mask - 1)) == 0)
314 /* No other bit set in the same element of MARK. Search in the
315 following memory. */
317 ++byte;
318 while (byte < high && mark[byte] == 0);
320 if (byte == high)
321 /* That was it. */
322 break;
324 mask = 1;
325 cnt = 0;
327 /* Find the exact bit. */
328 while ((mark[byte] & mask) == 0)
330 ++cnt;
331 mask <<= 1;
334 ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN;
335 assert (off_alloc <= db->head->first_free);
337 /* Find the end of the used area. */
338 if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1))
340 /* All other bits set. Search the next bytes in MARK. */
342 ++byte;
343 while (byte < high && mark[byte] == ALLBITS);
345 mask = 1;
346 cnt = 0;
348 if (byte < high)
350 /* Find the exact bit. */
351 while ((mark[byte] & mask) != 0)
353 ++cnt;
354 mask <<= 1;
358 ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN;
359 assert (off_allocend <= db->head->first_free);
360 /* Now we know that we can copy the area from OFF_ALLOC to
361 OFF_ALLOCEND (not included) to the memory starting at
362 OFF_FREE. First fix up all the entries for the
363 displacement. */
364 ref_t disp = off_alloc - off_free;
366 struct moveinfo *new_move
367 = (struct moveinfo *) alloca (sizeof (*new_move));
368 new_move->from = db->data + off_alloc;
369 new_move->to = db->data + off_free;
370 new_move->size = off_allocend - off_alloc;
371 /* Create a circular list to be always able to append at the end. */
372 if (moves == NULL)
373 moves = new_move->next = new_move;
374 else
376 new_move->next = moves->next;
377 moves = moves->next = new_move;
380 /* The following loop will prepare to move this much data. */
381 off_free += off_allocend - off_alloc;
383 while (off_alloc < off_allocend)
385 /* Determine whether the next entry is for a hash entry or
386 the data. */
387 if ((struct hashentry *) (db->data + off_alloc) == *next_hash)
389 /* Just correct the forward reference. */
390 *(*next_hash++)->prevp -= disp;
392 off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1)
393 & ~BLOCK_ALIGN_M1);
395 else
397 assert (next_data < &he_data[db->head->nentries]);
398 assert ((*next_data)->packet == off_alloc);
400 struct datahead *dh = (struct datahead *) (db->data + off_alloc);
403 assert ((*next_data)->key >= (*next_data)->packet);
404 assert ((*next_data)->key + (*next_data)->len
405 <= (*next_data)->packet + dh->allocsize);
407 (*next_data)->packet -= disp;
408 (*next_data)->key -= disp;
409 ++next_data;
411 while (next_data < &he_data[db->head->nentries]
412 && (*next_data)->packet == off_alloc);
414 off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1;
417 assert (off_alloc == off_allocend);
419 assert (off_alloc <= db->head->first_free);
420 if (off_alloc == db->head->first_free)
421 /* We are done, that was the last block. */
422 break;
424 assert (next_hash == &he[db->head->nentries]);
425 assert (next_data == &he_data[db->head->nentries]);
427 /* Now perform the actual moves. */
428 if (moves != NULL)
430 struct moveinfo *runp = moves->next;
433 assert ((char *) runp->to >= db->data);
434 assert ((char *) runp->to + runp->size
435 <= db->data + db->head->first_free);
436 assert ((char *) runp->from >= db->data);
437 assert ((char *) runp->from + runp->size
438 <= db->data + db->head->first_free);
440 /* The regions may overlap. */
441 memmove (runp->to, runp->from, runp->size);
442 runp = runp->next;
444 while (runp != moves->next);
446 if (__builtin_expect (debug_level >= 3, 0))
447 dbg_log (_("freed %zu bytes in %s cache"),
448 db->head->first_free
449 - ((char *) moves->to + moves->size - db->data),
450 dbnames[db - dbs]);
452 /* The byte past the end of the last copied block is the next
453 available byte. */
454 db->head->first_free = (char *) moves->to + moves->size - db->data;
456 /* Consistency check. */
457 if (__builtin_expect (debug_level >= 3, 0))
459 for (size_t idx = 0; idx < db->head->module; ++idx)
461 ref_t run = db->head->array[idx];
462 size_t cnt = 0;
464 while (run != ENDREF)
466 if (run + sizeof (struct hashentry) > db->head->first_free)
468 dbg_log ("entry %zu in hash bucket %zu out of bounds: "
469 "%" PRIu32 "+%zu > %zu\n",
470 cnt, idx, run, sizeof (struct hashentry),
471 (size_t) db->head->first_free);
472 break;
475 struct hashentry *he = (struct hashentry *) (db->data + run);
477 if (he->key + he->len > db->head->first_free)
478 dbg_log ("key of entry %zu in hash bucket %zu out of "
479 "bounds: %" PRIu32 "+%zu > %zu\n",
480 cnt, idx, he->key, (size_t) he->len,
481 (size_t) db->head->first_free);
483 if (he->packet + sizeof (struct datahead)
484 > db->head->first_free)
485 dbg_log ("packet of entry %zu in hash bucket %zu out of "
486 "bounds: %" PRIu32 "+%zu > %zu\n",
487 cnt, idx, he->packet, sizeof (struct datahead),
488 (size_t) db->head->first_free);
489 else
491 struct datahead *dh = (struct datahead *) (db->data
492 + he->packet);
493 if (he->packet + dh->allocsize
494 > db->head->first_free)
495 dbg_log ("full key of entry %zu in hash bucket %zu "
496 "out of bounds: %" PRIu32 "+%zu > %zu",
497 cnt, idx, he->packet, (size_t) dh->allocsize,
498 (size_t) db->head->first_free);
501 run = he->next;
502 ++cnt;
508 /* Make sure the data on disk is updated. */
509 if (db->persistent)
510 msync (db->head, db->data + db->head->first_free - (char *) db->head,
511 MS_ASYNC);
514 /* Now we are done modifying the data. */
515 ++db->head->gc_cycle;
516 assert ((db->head->gc_cycle & 1) == 0);
518 /* We are done. */
519 out:
520 pthread_mutex_unlock (&db->memlock);
521 pthread_rwlock_unlock (&db->lock);
523 if (he_use_malloc)
524 free (he);
525 if (mark_use_malloc)
526 free (mark);
530 void *
531 mempool_alloc (struct database_dyn *db, size_t len, enum in_flight idx)
533 /* Make sure LEN is a multiple of our maximum alignment so we can
534 keep track of used memory is multiples of this alignment value. */
535 if ((len & BLOCK_ALIGN_M1) != 0)
536 len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1);
538 pthread_mutex_lock (&db->memlock);
540 assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0);
542 bool tried_resize = false;
543 void *res;
544 retry:
545 res = db->data + db->head->first_free;
547 if (__builtin_expect (db->head->first_free + len > db->head->data_size, 0))
549 if (! tried_resize)
551 /* Try to resize the database. Grow size of 1/8th. */
552 size_t oldtotal = (sizeof (struct database_pers_head)
553 + roundup (db->head->module * sizeof (ref_t),
554 ALIGN)
555 + db->head->data_size);
556 size_t new_data_size = (db->head->data_size
557 + MAX (2 * len, db->head->data_size / 8));
558 size_t newtotal = (sizeof (struct database_pers_head)
559 + roundup (db->head->module * sizeof (ref_t), ALIGN)
560 + new_data_size);
561 if (newtotal > db->max_db_size)
563 new_data_size -= newtotal - db->max_db_size;
564 newtotal = db->max_db_size;
567 if (db->mmap_used && newtotal > oldtotal
568 /* We only have to adjust the file size. The new pages
569 become magically available. */
570 && TEMP_FAILURE_RETRY_VAL (posix_fallocate (db->wr_fd, oldtotal,
571 newtotal
572 - oldtotal)) == 0)
574 db->head->data_size = new_data_size;
575 tried_resize = true;
576 goto retry;
580 if (! db->last_alloc_failed)
582 dbg_log (_("no more memory for database '%s'"), dbnames[db - dbs]);
584 db->last_alloc_failed = true;
587 /* No luck. */
588 res = NULL;
590 else
592 db->head->first_free += len;
594 db->last_alloc_failed = false;
596 /* Remember that we have allocated this memory. */
597 assert (idx >= 0 && idx < IDX_last);
598 mem_in_flight.block[idx].dbidx = db - dbs;
599 mem_in_flight.block[idx].blocklen = len;
600 mem_in_flight.block[idx].blockaddr = res;
603 pthread_mutex_unlock (&db->memlock);
605 return res;