Optimize x86-64 syscall cancellation handling.
[glibc.git] / nscd / mem.c
blobfcea6dbd03c7ad22ce7a6c874b2e152a0afbd4ff
1 /* Cache memory handling.
2 Copyright (C) 2004, 2005, 2006, 2008, 2009 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@redhat.com>, 2004.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #include <assert.h>
21 #include <errno.h>
22 #include <error.h>
23 #include <fcntl.h>
24 #include <inttypes.h>
25 #include <libintl.h>
26 #include <limits.h>
27 #include <obstack.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <sys/mman.h>
32 #include <sys/param.h>
34 #include "dbg_log.h"
35 #include "nscd.h"
38 /* Wrapper functions with error checking for standard functions. */
39 extern void *xmalloc (size_t n);
40 extern void *xcalloc (size_t n, size_t s);
43 static int
44 sort_he (const void *p1, const void *p2)
46 struct hashentry *h1 = *(struct hashentry **) p1;
47 struct hashentry *h2 = *(struct hashentry **) p2;
49 if (h1 < h2)
50 return -1;
51 if (h1 > h2)
52 return 1;
53 return 0;
57 static int
58 sort_he_data (const void *p1, const void *p2)
60 struct hashentry *h1 = *(struct hashentry **) p1;
61 struct hashentry *h2 = *(struct hashentry **) p2;
63 if (h1->packet < h2->packet)
64 return -1;
65 if (h1->packet > h2->packet)
66 return 1;
67 return 0;
71 /* Basic definitions for the bitmap implementation. Only BITMAP_T
72 needs to be changed to choose a different word size. */
73 #define BITMAP_T uint8_t
74 #define BITS (CHAR_BIT * sizeof (BITMAP_T))
75 #define ALLBITS ((((BITMAP_T) 1) << BITS) - 1)
76 #define HIGHBIT (((BITMAP_T) 1) << (BITS - 1))
79 static void
80 markrange (BITMAP_T *mark, ref_t start, size_t len)
82 /* Adjust parameters for block alignment. */
83 assert ((start & BLOCK_ALIGN_M1) == 0);
84 start /= BLOCK_ALIGN;
85 len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN;
87 size_t elem = start / BITS;
89 if (start % BITS != 0)
91 if (start % BITS + len <= BITS)
93 /* All fits in the partial byte. */
94 mark[elem] |= (ALLBITS >> (BITS - len)) << (start % BITS);
95 return;
98 mark[elem++] |= ALLBITS << (start % BITS);
99 len -= BITS - (start % BITS);
102 while (len >= BITS)
104 mark[elem++] = ALLBITS;
105 len -= BITS;
108 if (len > 0)
109 mark[elem] |= ALLBITS >> (BITS - len);
113 void
114 gc (struct database_dyn *db)
116 /* We need write access. */
117 pthread_rwlock_wrlock (&db->lock);
119 /* And the memory handling lock. */
120 pthread_mutex_lock (&db->memlock);
122 /* We need an array representing the data area. All memory
123 allocation is BLOCK_ALIGN aligned so this is the level at which
124 we have to look at the memory. We use a mark and sweep algorithm
125 where the marks are placed in this array. */
126 assert (db->head->first_free % BLOCK_ALIGN == 0);
128 BITMAP_T *mark;
129 bool mark_use_malloc;
130 /* In prune_cache we are also using a dynamically allocated array.
131 If the array in the caller is too large we have malloc'ed it. */
132 size_t stack_used = sizeof (bool) * db->head->module;
133 if (__builtin_expect (stack_used > MAX_STACK_USE, 0))
134 stack_used = 0;
135 size_t nmark = (db->head->first_free / BLOCK_ALIGN + BITS - 1) / BITS;
136 size_t memory_needed = nmark * sizeof (BITMAP_T);
137 if (__builtin_expect (stack_used + memory_needed <= MAX_STACK_USE, 1))
139 mark = (BITMAP_T *) alloca_account (memory_needed, stack_used);
140 mark_use_malloc = false;
141 memset (mark, '\0', memory_needed);
143 else
145 mark = (BITMAP_T *) xcalloc (1, memory_needed);
146 mark_use_malloc = true;
149 /* Create an array which can hold pointer to all the entries in hash
150 entries. */
151 memory_needed = 2 * db->head->nentries * sizeof (struct hashentry *);
152 struct hashentry **he;
153 struct hashentry **he_data;
154 bool he_use_malloc;
155 if (__builtin_expect (stack_used + memory_needed <= MAX_STACK_USE, 1))
157 he = alloca_account (memory_needed, stack_used);
158 he_use_malloc = false;
160 else
162 he = xmalloc (memory_needed);
163 he_use_malloc = true;
165 he_data = &he[db->head->nentries];
167 size_t cnt = 0;
168 for (size_t idx = 0; idx < db->head->module; ++idx)
170 ref_t *prevp = &db->head->array[idx];
171 ref_t run = *prevp;
173 while (run != ENDREF)
175 assert (cnt < db->head->nentries);
176 he[cnt] = (struct hashentry *) (db->data + run);
178 he[cnt]->prevp = prevp;
179 prevp = &he[cnt]->next;
181 /* This is the hash entry itself. */
182 markrange (mark, run, sizeof (struct hashentry));
184 /* Add the information for the data itself. We do this
185 only for the one special entry marked with FIRST. */
186 if (he[cnt]->first)
188 struct datahead *dh
189 = (struct datahead *) (db->data + he[cnt]->packet);
190 markrange (mark, he[cnt]->packet, dh->allocsize);
193 run = he[cnt]->next;
195 ++cnt;
198 assert (cnt == db->head->nentries);
200 /* Sort the entries by the addresses of the referenced data. All
201 the entries pointing to the same DATAHEAD object will have the
202 same key. Stability of the sorting is unimportant. */
203 memcpy (he_data, he, cnt * sizeof (struct hashentry *));
204 qsort (he_data, cnt, sizeof (struct hashentry *), sort_he_data);
206 /* Sort the entries by their address. */
207 qsort (he, cnt, sizeof (struct hashentry *), sort_he);
209 #define obstack_chunk_alloc xmalloc
210 #define obstack_chunk_free free
211 struct obstack ob;
212 obstack_init (&ob);
214 /* Determine the highest used address. */
215 size_t high = nmark;
216 while (high > 0 && mark[high - 1] == 0)
217 --high;
219 /* No memory used. */
220 if (high == 0)
222 db->head->first_free = 0;
223 goto out;
226 /* Determine the highest offset. */
227 BITMAP_T mask = HIGHBIT;
228 ref_t highref = (high * BITS - 1) * BLOCK_ALIGN;
229 while ((mark[high - 1] & mask) == 0)
231 mask >>= 1;
232 highref -= BLOCK_ALIGN;
235 /* Now we can iterate over the MARK array and find bits which are not
236 set. These represent memory which can be recovered. */
237 size_t byte = 0;
238 /* Find the first gap. */
239 while (byte < high && mark[byte] == ALLBITS)
240 ++byte;
242 if (byte == high
243 || (byte == high - 1 && (mark[byte] & ~(mask | (mask - 1))) == 0))
244 /* No gap. */
245 goto out;
247 mask = 1;
248 cnt = 0;
249 while ((mark[byte] & mask) != 0)
251 ++cnt;
252 mask <<= 1;
254 ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN;
255 assert (off_free <= db->head->first_free);
257 struct hashentry **next_hash = he;
258 struct hashentry **next_data = he_data;
260 /* Skip over the hash entries in the first block which does not get
261 moved. */
262 while (next_hash < &he[db->head->nentries]
263 && *next_hash < (struct hashentry *) (db->data + off_free))
264 ++next_hash;
266 while (next_data < &he_data[db->head->nentries]
267 && (*next_data)->packet < off_free)
268 ++next_data;
271 /* Now we start modifying the data. Make sure all readers of the
272 data are aware of this and temporarily don't use the data. */
273 ++db->head->gc_cycle;
274 assert ((db->head->gc_cycle & 1) == 1);
277 /* We do not perform the move operations right away since the
278 he_data array is not sorted by the address of the data. */
279 struct moveinfo
281 void *from;
282 void *to;
283 size_t size;
284 struct moveinfo *next;
285 } *moves = NULL;
287 while (byte < high)
289 /* Search for the next filled block. BYTE is the index of the
290 entry in MARK, MASK is the bit, and CNT is the bit number.
291 OFF_FILLED is the corresponding offset. */
292 if ((mark[byte] & ~(mask - 1)) == 0)
294 /* No other bit set in the same element of MARK. Search in the
295 following memory. */
297 ++byte;
298 while (byte < high && mark[byte] == 0);
300 if (byte == high)
301 /* That was it. */
302 break;
304 mask = 1;
305 cnt = 0;
307 /* Find the exact bit. */
308 while ((mark[byte] & mask) == 0)
310 ++cnt;
311 mask <<= 1;
314 ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN;
315 assert (off_alloc <= db->head->first_free);
317 /* Find the end of the used area. */
318 if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1))
320 /* All other bits set. Search the next bytes in MARK. */
322 ++byte;
323 while (byte < high && mark[byte] == ALLBITS);
325 mask = 1;
326 cnt = 0;
328 if (byte < high)
330 /* Find the exact bit. */
331 while ((mark[byte] & mask) != 0)
333 ++cnt;
334 mask <<= 1;
338 ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN;
339 assert (off_allocend <= db->head->first_free);
340 /* Now we know that we can copy the area from OFF_ALLOC to
341 OFF_ALLOCEND (not included) to the memory starting at
342 OFF_FREE. First fix up all the entries for the
343 displacement. */
344 ref_t disp = off_alloc - off_free;
346 struct moveinfo *new_move;
347 if (__builtin_expect (stack_used + sizeof (*new_move) <= MAX_STACK_USE,
349 new_move = alloca_account (sizeof (*new_move), stack_used);
350 else
351 new_move = obstack_alloc (&ob, sizeof (*new_move));
352 new_move->from = db->data + off_alloc;
353 new_move->to = db->data + off_free;
354 new_move->size = off_allocend - off_alloc;
355 /* Create a circular list to be always able to append at the end. */
356 if (moves == NULL)
357 moves = new_move->next = new_move;
358 else
360 new_move->next = moves->next;
361 moves = moves->next = new_move;
364 /* The following loop will prepare to move this much data. */
365 off_free += off_allocend - off_alloc;
367 while (off_alloc < off_allocend)
369 /* Determine whether the next entry is for a hash entry or
370 the data. */
371 if ((struct hashentry *) (db->data + off_alloc) == *next_hash)
373 /* Just correct the forward reference. */
374 *(*next_hash++)->prevp -= disp;
376 off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1)
377 & ~BLOCK_ALIGN_M1);
379 else
381 assert (next_data < &he_data[db->head->nentries]);
382 assert ((*next_data)->packet == off_alloc);
384 struct datahead *dh = (struct datahead *) (db->data + off_alloc);
387 assert ((*next_data)->key >= (*next_data)->packet);
388 assert ((*next_data)->key + (*next_data)->len
389 <= (*next_data)->packet + dh->allocsize);
391 (*next_data)->packet -= disp;
392 (*next_data)->key -= disp;
393 ++next_data;
395 while (next_data < &he_data[db->head->nentries]
396 && (*next_data)->packet == off_alloc);
398 off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1;
401 assert (off_alloc == off_allocend);
403 assert (off_alloc <= db->head->first_free);
404 if (off_alloc == db->head->first_free)
405 /* We are done, that was the last block. */
406 break;
408 assert (next_hash == &he[db->head->nentries]);
409 assert (next_data == &he_data[db->head->nentries]);
411 /* Now perform the actual moves. */
412 if (moves != NULL)
414 struct moveinfo *runp = moves->next;
417 assert ((char *) runp->to >= db->data);
418 assert ((char *) runp->to + runp->size
419 <= db->data + db->head->first_free);
420 assert ((char *) runp->from >= db->data);
421 assert ((char *) runp->from + runp->size
422 <= db->data + db->head->first_free);
424 /* The regions may overlap. */
425 memmove (runp->to, runp->from, runp->size);
426 runp = runp->next;
428 while (runp != moves->next);
430 if (__builtin_expect (debug_level >= 3, 0))
431 dbg_log (_("freed %zu bytes in %s cache"),
432 db->head->first_free
433 - ((char *) moves->to + moves->size - db->data),
434 dbnames[db - dbs]);
436 /* The byte past the end of the last copied block is the next
437 available byte. */
438 db->head->first_free = (char *) moves->to + moves->size - db->data;
440 /* Consistency check. */
441 if (__builtin_expect (debug_level >= 3, 0))
443 for (size_t idx = 0; idx < db->head->module; ++idx)
445 ref_t run = db->head->array[idx];
446 size_t cnt = 0;
448 while (run != ENDREF)
450 if (run + sizeof (struct hashentry) > db->head->first_free)
452 dbg_log ("entry %zu in hash bucket %zu out of bounds: "
453 "%" PRIu32 "+%zu > %zu\n",
454 cnt, idx, run, sizeof (struct hashentry),
455 (size_t) db->head->first_free);
456 break;
459 struct hashentry *he = (struct hashentry *) (db->data + run);
461 if (he->key + he->len > db->head->first_free)
462 dbg_log ("key of entry %zu in hash bucket %zu out of "
463 "bounds: %" PRIu32 "+%zu > %zu\n",
464 cnt, idx, he->key, (size_t) he->len,
465 (size_t) db->head->first_free);
467 if (he->packet + sizeof (struct datahead)
468 > db->head->first_free)
469 dbg_log ("packet of entry %zu in hash bucket %zu out of "
470 "bounds: %" PRIu32 "+%zu > %zu\n",
471 cnt, idx, he->packet, sizeof (struct datahead),
472 (size_t) db->head->first_free);
473 else
475 struct datahead *dh = (struct datahead *) (db->data
476 + he->packet);
477 if (he->packet + dh->allocsize
478 > db->head->first_free)
479 dbg_log ("full key of entry %zu in hash bucket %zu "
480 "out of bounds: %" PRIu32 "+%zu > %zu",
481 cnt, idx, he->packet, (size_t) dh->allocsize,
482 (size_t) db->head->first_free);
485 run = he->next;
486 ++cnt;
492 /* Make sure the data on disk is updated. */
493 if (db->persistent)
494 msync (db->head, db->data + db->head->first_free - (char *) db->head,
495 MS_ASYNC);
498 /* Now we are done modifying the data. */
499 ++db->head->gc_cycle;
500 assert ((db->head->gc_cycle & 1) == 0);
502 /* We are done. */
503 out:
504 pthread_mutex_unlock (&db->memlock);
505 pthread_rwlock_unlock (&db->lock);
507 if (he_use_malloc)
508 free (he);
509 if (mark_use_malloc)
510 free (mark);
512 obstack_free (&ob, NULL);
516 void *
517 mempool_alloc (struct database_dyn *db, size_t len, int data_alloc)
519 /* Make sure LEN is a multiple of our maximum alignment so we can
520 keep track of used memory is multiples of this alignment value. */
521 if ((len & BLOCK_ALIGN_M1) != 0)
522 len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1);
524 if (data_alloc)
525 pthread_rwlock_rdlock (&db->lock);
527 pthread_mutex_lock (&db->memlock);
529 assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0);
531 bool tried_resize = false;
532 void *res;
533 retry:
534 res = db->data + db->head->first_free;
536 if (__builtin_expect (db->head->first_free + len > db->head->data_size, 0))
538 if (! tried_resize)
540 /* Try to resize the database. Grow size of 1/8th. */
541 size_t oldtotal = (sizeof (struct database_pers_head)
542 + roundup (db->head->module * sizeof (ref_t),
543 ALIGN)
544 + db->head->data_size);
545 size_t new_data_size = (db->head->data_size
546 + MAX (2 * len, db->head->data_size / 8));
547 size_t newtotal = (sizeof (struct database_pers_head)
548 + roundup (db->head->module * sizeof (ref_t), ALIGN)
549 + new_data_size);
550 if (newtotal > db->max_db_size)
552 new_data_size -= newtotal - db->max_db_size;
553 newtotal = db->max_db_size;
556 if (db->mmap_used && newtotal > oldtotal
557 /* We only have to adjust the file size. The new pages
558 become magically available. */
559 && TEMP_FAILURE_RETRY_VAL (posix_fallocate (db->wr_fd, oldtotal,
560 newtotal
561 - oldtotal)) == 0)
563 db->head->data_size = new_data_size;
564 tried_resize = true;
565 goto retry;
569 if (data_alloc)
570 pthread_rwlock_unlock (&db->lock);
572 if (! db->last_alloc_failed)
574 dbg_log (_("no more memory for database '%s'"), dbnames[db - dbs]);
576 db->last_alloc_failed = true;
579 ++db->head->addfailed;
581 /* No luck. */
582 res = NULL;
584 else
586 db->head->first_free += len;
588 db->last_alloc_failed = false;
592 pthread_mutex_unlock (&db->memlock);
594 return res;