Updated to fedora-glibc-20080524T2218
[glibc.git] / nscd / mem.c
blob96ff03f0dfd9f189dfd26e691b78c439d33d8ea8
1 /* Cache memory handling.
2 Copyright (C) 2004, 2005, 2006, 2008 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@redhat.com>, 2004.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #include <assert.h>
21 #include <errno.h>
22 #include <error.h>
23 #include <fcntl.h>
24 #include <inttypes.h>
25 #include <libintl.h>
26 #include <limits.h>
27 #include <obstack.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <sys/mman.h>
32 #include <sys/param.h>
34 #include "dbg_log.h"
35 #include "nscd.h"
38 /* Wrapper functions with error checking for standard functions. */
39 extern void *xmalloc (size_t n);
40 extern void *xcalloc (size_t n, size_t s);
43 static int
44 sort_he (const void *p1, const void *p2)
46 struct hashentry *h1 = *(struct hashentry **) p1;
47 struct hashentry *h2 = *(struct hashentry **) p2;
49 if (h1 < h2)
50 return -1;
51 if (h1 > h2)
52 return 1;
53 return 0;
57 static int
58 sort_he_data (const void *p1, const void *p2)
60 struct hashentry *h1 = *(struct hashentry **) p1;
61 struct hashentry *h2 = *(struct hashentry **) p2;
63 if (h1->packet < h2->packet)
64 return -1;
65 if (h1->packet > h2->packet)
66 return 1;
67 return 0;
71 /* Basic definitions for the bitmap implementation. Only BITMAP_T
72 needs to be changed to choose a different word size. */
73 #define BITMAP_T uint8_t
74 #define BITS (CHAR_BIT * sizeof (BITMAP_T))
75 #define ALLBITS ((((BITMAP_T) 1) << BITS) - 1)
76 #define HIGHBIT (((BITMAP_T) 1) << (BITS - 1))
79 static void
80 markrange (BITMAP_T *mark, ref_t start, size_t len)
82 /* Adjust parameters for block alignment. */
83 assert ((start & BLOCK_ALIGN_M1) == 0);
84 start /= BLOCK_ALIGN;
85 len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN;
87 size_t elem = start / BITS;
89 if (start % BITS != 0)
91 if (start % BITS + len <= BITS)
93 /* All fits in the partial byte. */
94 mark[elem] |= (ALLBITS >> (BITS - len)) << (start % BITS);
95 return;
98 mark[elem++] |= ALLBITS << (start % BITS);
99 len -= BITS - (start % BITS);
102 while (len >= BITS)
104 mark[elem++] = ALLBITS;
105 len -= BITS;
108 if (len > 0)
109 mark[elem] |= ALLBITS >> (BITS - len);
113 void
114 gc (struct database_dyn *db)
116 /* We need write access. */
117 pthread_rwlock_wrlock (&db->lock);
119 /* And the memory handling lock. */
120 pthread_mutex_lock (&db->memlock);
122 /* We need an array representing the data area. All memory
123 allocation is BLOCK_ALIGN aligned so this is the level at which
124 we have to look at the memory. We use a mark and sweep algorithm
125 where the marks are placed in this array. */
126 assert (db->head->first_free % BLOCK_ALIGN == 0);
128 BITMAP_T *mark;
129 bool mark_use_malloc;
130 /* In prune_cache we are also using a dynamically allocated array.
131 If the array in the caller is too large we have malloc'ed it. */
132 size_t stack_used = sizeof (bool) * db->head->module;
133 if (__builtin_expect (stack_used > MAX_STACK_USE, 0))
134 stack_used = 0;
135 size_t nmark = (db->head->first_free / BLOCK_ALIGN + BITS - 1) / BITS;
136 size_t memory_needed = nmark * sizeof (BITMAP_T);
137 if (stack_used + memory_needed <= MAX_STACK_USE)
139 mark = (BITMAP_T *) alloca (memory_needed);
140 mark_use_malloc = false;
141 memset (mark, '\0', memory_needed);
142 stack_used += memory_needed;
144 else
146 mark = (BITMAP_T *) xcalloc (1, memory_needed);
147 mark_use_malloc = true;
150 /* Create an array which can hold pointer to all the entries in hash
151 entries. */
152 memory_needed = 2 * db->head->nentries * sizeof (struct hashentry *);
153 struct hashentry **he;
154 struct hashentry **he_data;
155 bool he_use_malloc;
156 if (stack_used + memory_needed <= MAX_STACK_USE)
158 he = alloca (db->head->nentries * sizeof (struct hashentry *));
159 he_data = alloca (db->head->nentries * sizeof (struct hashentry *));
160 he_use_malloc = false;
161 stack_used += memory_needed;
163 else
165 he = xmalloc (memory_needed);
166 he_data = &he[db->head->nentries * sizeof (struct hashentry *)];
167 he_use_malloc = true;
170 size_t cnt = 0;
171 for (size_t idx = 0; idx < db->head->module; ++idx)
173 ref_t *prevp = &db->head->array[idx];
174 ref_t run = *prevp;
176 while (run != ENDREF)
178 assert (cnt < db->head->nentries);
179 he[cnt] = (struct hashentry *) (db->data + run);
181 he[cnt]->prevp = prevp;
182 prevp = &he[cnt]->next;
184 /* This is the hash entry itself. */
185 markrange (mark, run, sizeof (struct hashentry));
187 /* Add the information for the data itself. We do this
188 only for the one special entry marked with FIRST. */
189 if (he[cnt]->first)
191 struct datahead *dh
192 = (struct datahead *) (db->data + he[cnt]->packet);
193 markrange (mark, he[cnt]->packet, dh->allocsize);
196 run = he[cnt]->next;
198 ++cnt;
201 assert (cnt == db->head->nentries);
203 /* Go through the list of in-flight memory blocks. */
204 struct mem_in_flight *mrunp = mem_in_flight_list;
205 while (mrunp != NULL)
207 /* NB: There can be no race between this test and another thread
208 setting the field to the index we are looking for because
209 this would require the other thread to also have the memlock
210 for the database.
212 NB2: we do not have to look at latter blocks (higher indices) if
213 earlier blocks are not in flight. They are always allocated in
214 sequence. */
215 for (enum in_flight idx = IDX_result_data;
216 idx < IDX_last && mrunp->block[idx].dbidx == db - dbs; ++idx)
218 assert (mrunp->block[idx].blockoff >= 0);
219 assert (mrunp->block[idx].blocklen < db->memsize);
220 assert (mrunp->block[idx].blockoff
221 + mrunp->block[0].blocklen <= db->memsize);
222 markrange (mark, mrunp->block[idx].blockoff,
223 mrunp->block[idx].blocklen);
226 mrunp = mrunp->next;
229 /* Sort the entries by the addresses of the referenced data. All
230 the entries pointing to the same DATAHEAD object will have the
231 same key. Stability of the sorting is unimportant. */
232 memcpy (he_data, he, cnt * sizeof (struct hashentry *));
233 qsort (he_data, cnt, sizeof (struct hashentry *), sort_he_data);
235 /* Sort the entries by their address. */
236 qsort (he, cnt, sizeof (struct hashentry *), sort_he);
238 /* Determine the highest used address. */
239 size_t high = nmark;
240 while (high > 0 && mark[high - 1] == 0)
241 --high;
243 /* No memory used. */
244 if (high == 0)
246 db->head->first_free = 0;
247 goto out;
250 /* Determine the highest offset. */
251 BITMAP_T mask = HIGHBIT;
252 ref_t highref = (high * BITS - 1) * BLOCK_ALIGN;
253 while ((mark[high - 1] & mask) == 0)
255 mask >>= 1;
256 highref -= BLOCK_ALIGN;
259 /* Now we can iterate over the MARK array and find bits which are not
260 set. These represent memory which can be recovered. */
261 size_t byte = 0;
262 /* Find the first gap. */
263 while (byte < high && mark[byte] == ALLBITS)
264 ++byte;
266 if (byte == high
267 || (byte == high - 1 && (mark[byte] & ~(mask | (mask - 1))) == 0))
268 /* No gap. */
269 goto out;
271 mask = 1;
272 cnt = 0;
273 while ((mark[byte] & mask) != 0)
275 ++cnt;
276 mask <<= 1;
278 ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN;
279 assert (off_free <= db->head->first_free);
281 struct hashentry **next_hash = he;
282 struct hashentry **next_data = he_data;
284 /* Skip over the hash entries in the first block which does not get
285 moved. */
286 while (next_hash < &he[db->head->nentries]
287 && *next_hash < (struct hashentry *) (db->data + off_free))
288 ++next_hash;
290 while (next_data < &he_data[db->head->nentries]
291 && (*next_data)->packet < off_free)
292 ++next_data;
295 /* Now we start modifying the data. Make sure all readers of the
296 data are aware of this and temporarily don't use the data. */
297 ++db->head->gc_cycle;
298 assert ((db->head->gc_cycle & 1) == 1);
301 /* We do not perform the move operations right away since the
302 he_data array is not sorted by the address of the data. */
303 struct moveinfo
305 void *from;
306 void *to;
307 size_t size;
308 struct moveinfo *next;
309 } *moves = NULL;
310 #define obstack_chunk_alloc xmalloc
311 #define obstack_chunk_free free
312 struct obstack ob;
313 obstack_init (&ob);
315 while (byte < high)
317 /* Search for the next filled block. BYTE is the index of the
318 entry in MARK, MASK is the bit, and CNT is the bit number.
319 OFF_FILLED is the corresponding offset. */
320 if ((mark[byte] & ~(mask - 1)) == 0)
322 /* No other bit set in the same element of MARK. Search in the
323 following memory. */
325 ++byte;
326 while (byte < high && mark[byte] == 0);
328 if (byte == high)
329 /* That was it. */
330 break;
332 mask = 1;
333 cnt = 0;
335 /* Find the exact bit. */
336 while ((mark[byte] & mask) == 0)
338 ++cnt;
339 mask <<= 1;
342 ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN;
343 assert (off_alloc <= db->head->first_free);
345 /* Find the end of the used area. */
346 if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1))
348 /* All other bits set. Search the next bytes in MARK. */
350 ++byte;
351 while (byte < high && mark[byte] == ALLBITS);
353 mask = 1;
354 cnt = 0;
356 if (byte < high)
358 /* Find the exact bit. */
359 while ((mark[byte] & mask) != 0)
361 ++cnt;
362 mask <<= 1;
366 ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN;
367 assert (off_allocend <= db->head->first_free);
368 /* Now we know that we can copy the area from OFF_ALLOC to
369 OFF_ALLOCEND (not included) to the memory starting at
370 OFF_FREE. First fix up all the entries for the
371 displacement. */
372 ref_t disp = off_alloc - off_free;
374 struct moveinfo *new_move;
375 if (stack_used + sizeof (*new_move) <= MAX_STACK_USE)
377 new_move = alloca (sizeof (*new_move));
378 stack_used += sizeof (*new_move);
380 else
381 new_move = obstack_alloc (&ob, sizeof (*new_move));
382 new_move->from = db->data + off_alloc;
383 new_move->to = db->data + off_free;
384 new_move->size = off_allocend - off_alloc;
385 /* Create a circular list to be always able to append at the end. */
386 if (moves == NULL)
387 moves = new_move->next = new_move;
388 else
390 new_move->next = moves->next;
391 moves = moves->next = new_move;
394 /* The following loop will prepare to move this much data. */
395 off_free += off_allocend - off_alloc;
397 while (off_alloc < off_allocend)
399 /* Determine whether the next entry is for a hash entry or
400 the data. */
401 if ((struct hashentry *) (db->data + off_alloc) == *next_hash)
403 /* Just correct the forward reference. */
404 *(*next_hash++)->prevp -= disp;
406 off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1)
407 & ~BLOCK_ALIGN_M1);
409 else
411 assert (next_data < &he_data[db->head->nentries]);
412 assert ((*next_data)->packet == off_alloc);
414 struct datahead *dh = (struct datahead *) (db->data + off_alloc);
417 assert ((*next_data)->key >= (*next_data)->packet);
418 assert ((*next_data)->key + (*next_data)->len
419 <= (*next_data)->packet + dh->allocsize);
421 (*next_data)->packet -= disp;
422 (*next_data)->key -= disp;
423 ++next_data;
425 while (next_data < &he_data[db->head->nentries]
426 && (*next_data)->packet == off_alloc);
428 off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1;
431 assert (off_alloc == off_allocend);
433 assert (off_alloc <= db->head->first_free);
434 if (off_alloc == db->head->first_free)
435 /* We are done, that was the last block. */
436 break;
438 assert (next_hash == &he[db->head->nentries]);
439 assert (next_data == &he_data[db->head->nentries]);
441 /* Now perform the actual moves. */
442 if (moves != NULL)
444 struct moveinfo *runp = moves->next;
447 assert ((char *) runp->to >= db->data);
448 assert ((char *) runp->to + runp->size
449 <= db->data + db->head->first_free);
450 assert ((char *) runp->from >= db->data);
451 assert ((char *) runp->from + runp->size
452 <= db->data + db->head->first_free);
454 /* The regions may overlap. */
455 memmove (runp->to, runp->from, runp->size);
456 runp = runp->next;
458 while (runp != moves->next);
460 if (__builtin_expect (debug_level >= 3, 0))
461 dbg_log (_("freed %zu bytes in %s cache"),
462 db->head->first_free
463 - ((char *) moves->to + moves->size - db->data),
464 dbnames[db - dbs]);
466 /* The byte past the end of the last copied block is the next
467 available byte. */
468 db->head->first_free = (char *) moves->to + moves->size - db->data;
470 /* Consistency check. */
471 if (__builtin_expect (debug_level >= 3, 0))
473 for (size_t idx = 0; idx < db->head->module; ++idx)
475 ref_t run = db->head->array[idx];
476 size_t cnt = 0;
478 while (run != ENDREF)
480 if (run + sizeof (struct hashentry) > db->head->first_free)
482 dbg_log ("entry %zu in hash bucket %zu out of bounds: "
483 "%" PRIu32 "+%zu > %zu\n",
484 cnt, idx, run, sizeof (struct hashentry),
485 (size_t) db->head->first_free);
486 break;
489 struct hashentry *he = (struct hashentry *) (db->data + run);
491 if (he->key + he->len > db->head->first_free)
492 dbg_log ("key of entry %zu in hash bucket %zu out of "
493 "bounds: %" PRIu32 "+%zu > %zu\n",
494 cnt, idx, he->key, (size_t) he->len,
495 (size_t) db->head->first_free);
497 if (he->packet + sizeof (struct datahead)
498 > db->head->first_free)
499 dbg_log ("packet of entry %zu in hash bucket %zu out of "
500 "bounds: %" PRIu32 "+%zu > %zu\n",
501 cnt, idx, he->packet, sizeof (struct datahead),
502 (size_t) db->head->first_free);
503 else
505 struct datahead *dh = (struct datahead *) (db->data
506 + he->packet);
507 if (he->packet + dh->allocsize
508 > db->head->first_free)
509 dbg_log ("full key of entry %zu in hash bucket %zu "
510 "out of bounds: %" PRIu32 "+%zu > %zu",
511 cnt, idx, he->packet, (size_t) dh->allocsize,
512 (size_t) db->head->first_free);
515 run = he->next;
516 ++cnt;
522 /* Make sure the data on disk is updated. */
523 if (db->persistent)
524 msync (db->head, db->data + db->head->first_free - (char *) db->head,
525 MS_ASYNC);
528 /* Now we are done modifying the data. */
529 ++db->head->gc_cycle;
530 assert ((db->head->gc_cycle & 1) == 0);
532 /* We are done. */
533 out:
534 pthread_mutex_unlock (&db->memlock);
535 pthread_rwlock_unlock (&db->lock);
537 if (he_use_malloc)
538 free (he);
539 if (mark_use_malloc)
540 free (mark);
542 obstack_free (&ob, NULL);
546 void *
547 mempool_alloc (struct database_dyn *db, size_t len, enum in_flight idx)
549 /* Make sure LEN is a multiple of our maximum alignment so we can
550 keep track of used memory is multiples of this alignment value. */
551 if ((len & BLOCK_ALIGN_M1) != 0)
552 len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1);
554 pthread_mutex_lock (&db->memlock);
556 assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0);
558 bool tried_resize = false;
559 void *res;
560 retry:
561 res = db->data + db->head->first_free;
563 if (__builtin_expect (db->head->first_free + len > db->head->data_size, 0))
565 if (! tried_resize)
567 /* Try to resize the database. Grow size of 1/8th. */
568 size_t oldtotal = (sizeof (struct database_pers_head)
569 + roundup (db->head->module * sizeof (ref_t),
570 ALIGN)
571 + db->head->data_size);
572 size_t new_data_size = (db->head->data_size
573 + MAX (2 * len, db->head->data_size / 8));
574 size_t newtotal = (sizeof (struct database_pers_head)
575 + roundup (db->head->module * sizeof (ref_t), ALIGN)
576 + new_data_size);
577 if (newtotal > db->max_db_size)
579 new_data_size -= newtotal - db->max_db_size;
580 newtotal = db->max_db_size;
583 if (db->mmap_used && newtotal > oldtotal
584 /* We only have to adjust the file size. The new pages
585 become magically available. */
586 && TEMP_FAILURE_RETRY_VAL (posix_fallocate (db->wr_fd, oldtotal,
587 newtotal
588 - oldtotal)) == 0)
590 db->head->data_size = new_data_size;
591 tried_resize = true;
592 goto retry;
596 if (! db->last_alloc_failed)
598 dbg_log (_("no more memory for database '%s'"), dbnames[db - dbs]);
600 db->last_alloc_failed = true;
603 /* No luck. */
604 res = NULL;
606 else
608 /* Remember that we have allocated this memory. */
609 assert (idx >= 0 && idx < IDX_last);
610 mem_in_flight.block[idx].dbidx = db - dbs;
611 mem_in_flight.block[idx].blocklen = len;
612 mem_in_flight.block[idx].blockoff = db->head->first_free;
614 db->head->first_free += len;
616 db->last_alloc_failed = false;
620 pthread_mutex_unlock (&db->memlock);
622 return res;