nscd/mem.c

   1 /* Cache memory handling.
   2    Copyright (C) 2004, 2005, 2006 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Ulrich Drepper <drepper@redhat.com>, 2004.
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published
   8    by the Free Software Foundation; version 2 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program; if not, write to the Free Software Foundation,
  18    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  19
  20 #include <assert.h>
  21 #include <errno.h>
  22 #include <error.h>
  23 #include <fcntl.h>
  24 #include <inttypes.h>
  25 #include <libintl.h>
  26 #include <limits.h>
  27 #include <stdlib.h>
  28 #include <string.h>
  29 #include <unistd.h>
  30 #include <sys/mman.h>
  31 #include <sys/param.h>
  32
  33 #include "dbg_log.h"
  34 #include "nscd.h"
  35
  36
  37 static int
  38 sort_he (const void *p1, const void *p2)
  39 {
  40   struct hashentry *h1 = *(struct hashentry **) p1;
  41   struct hashentry *h2 = *(struct hashentry **) p2;
  42
  43   if (h1 < h2)
  44     return -1;
  45   if (h1 > h2)
  46     return 1;
  47   return 0;
  48 }
  49
  50
  51 static int
  52 sort_he_data (const void *p1, const void *p2)
  53 {
  54   struct hashentry *h1 = *(struct hashentry **) p1;
  55   struct hashentry *h2 = *(struct hashentry **) p2;
  56
  57   if (h1->packet < h2->packet)
  58     return -1;
  59   if (h1->packet > h2->packet)
  60     return 1;
  61   return 0;
  62 }
  63
  64
  65 /* Basic definitions for the bitmap implementation.  Only BITMAP_T
  66    needs to be changed to choose a different word size.  */
  67 #define BITMAP_T uint8_t
  68 #define BITS (CHAR_BIT * sizeof (BITMAP_T))
  69 #define ALLBITS ((((BITMAP_T) 1) << BITS) - 1)
  70 #define HIGHBIT (((BITMAP_T) 1) << (BITS - 1))
  71
  72
  73 static void
  74 markrange (BITMAP_T *mark, ref_t start, size_t len)
  75 {
  76   /* Adjust parameters for block alignment.  */
  77   start /= BLOCK_ALIGN;
  78   len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN;
  79
  80   size_t elem = start / BITS;
  81
  82   if (start % BITS != 0)
  83     {
  84       if (start % BITS + len <= BITS)
  85         {
  86           /* All fits in the partial byte.  */
  87           mark[elem] |= (ALLBITS >> (BITS - len)) << (start % BITS);
  88           return;
  89         }
  90
  91       mark[elem++] |= 0xff << (start % BITS);
  92       len -= BITS - (start % BITS);
  93     }
  94
  95   while (len >= BITS)
  96     {
  97       mark[elem++] = ALLBITS;
  98       len -= BITS;
  99     }
 100
 101   if (len > 0)
 102     mark[elem] |= ALLBITS >> (BITS - len);
 103 }
 104
 105
 106 void
 107 gc (struct database_dyn *db)
 108 {
 109   /* We need write access.  */
 110   pthread_rwlock_wrlock (&db->lock);
 111
 112   /* And the memory handling lock.  */
 113   pthread_mutex_lock (&db->memlock);
 114
 115   /* We need an array representing the data area.  All memory
 116      allocation is BLOCK_ALIGN aligned so this is the level at which
 117      we have to look at the memory.  We use a mark and sweep algorithm
 118      where the marks are placed in this array.  */
 119   assert (db->head->first_free % BLOCK_ALIGN == 0);
 120   BITMAP_T mark[(db->head->first_free / BLOCK_ALIGN + BITS - 1) / BITS];
 121   memset (mark, '\0', sizeof (mark));
 122
 123   /* Create an array which can hold pointer to all the entries in hash
 124      entries.  */
 125   struct hashentry *he[db->head->nentries];
 126   struct hashentry *he_data[db->head->nentries];
 127
 128   size_t cnt = 0;
 129   for (size_t idx = 0; idx < db->head->module; ++idx)
 130     {
 131       ref_t *prevp = &db->head->array[idx];
 132       ref_t run = *prevp;
 133
 134       while (run != ENDREF)
 135         {
 136           assert (cnt < db->head->nentries);
 137           he[cnt] = (struct hashentry *) (db->data + run);
 138
 139           he[cnt]->prevp = prevp;
 140           prevp = &he[cnt]->next;
 141
 142           /* This is the hash entry itself.  */
 143           markrange (mark, run, sizeof (struct hashentry));
 144
 145           /* Add the information for the data itself.  We do this
 146              only for the one special entry marked with FIRST.  */
 147           if (he[cnt]->first)
 148             {
 149               struct datahead *dh
 150                 = (struct datahead *) (db->data + he[cnt]->packet);
 151               markrange (mark, he[cnt]->packet, dh->allocsize);
 152             }
 153
 154           run = he[cnt]->next;
 155
 156           ++cnt;
 157         }
 158     }
 159   assert (cnt == db->head->nentries);
 160
 161   /* Sort the entries by the addresses of the referenced data.  All
 162      the entries pointing to the same DATAHEAD object will have the
 163      same key.  Stability of the sorting is unimportant.  */
 164   memcpy (he_data, he, cnt * sizeof (struct hashentry *));
 165   qsort (he_data, cnt, sizeof (struct hashentry *), sort_he_data);
 166
 167   /* Sort the entries by their address.  */
 168   qsort (he, cnt, sizeof (struct hashentry *), sort_he);
 169
 170   /* Determine the highest used address.  */
 171   size_t high = sizeof (mark);
 172   while (high > 0 && mark[high - 1] == 0)
 173     --high;
 174
 175   /* No memory used.  */
 176   if (high == 0)
 177     {
 178       db->head->first_free = 0;
 179       goto out;
 180     }
 181
 182   /* Determine the highest offset.  */
 183   BITMAP_T mask = HIGHBIT;
 184   ref_t highref = (high * BITS - 1) * BLOCK_ALIGN;
 185   while ((mark[high - 1] & mask) == 0)
 186     {
 187       mask >>= 1;
 188       highref -= BLOCK_ALIGN;
 189     }
 190
 191   /* Now we can iterate over the MARK array and find bits which are not
 192      set.  These represent memory which can be recovered.  */
 193   size_t byte = 0;
 194   /* Find the first gap.  */
 195   while (byte < high && mark[byte] == ALLBITS)
 196     ++byte;
 197
 198   if (byte == high
 199       || (byte == high - 1 && (mark[byte] & ~(mask | (mask - 1))) == 0))
 200     /* No gap.  */
 201     goto out;
 202
 203   mask = 1;
 204   cnt = 0;
 205   while ((mark[byte] & mask) != 0)
 206     {
 207       ++cnt;
 208       mask <<= 1;
 209     }
 210   ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN;
 211   assert (off_free <= db->head->first_free);
 212
 213   struct hashentry **next_hash = he;
 214   struct hashentry **next_data = he_data;
 215
 216   /* Skip over the hash entries in the first block which does not get
 217      moved.  */
 218   while (next_hash < &he[db->head->nentries]
 219          && *next_hash < (struct hashentry *) (db->data + off_free))
 220     ++next_hash;
 221
 222   while (next_data < &he_data[db->head->nentries]
 223          && (*next_data)->packet < off_free)
 224     ++next_data;
 225
 226
 227   /* Now we start modifying the data.  Make sure all readers of the
 228      data are aware of this and temporarily don't use the data.  */
 229   ++db->head->gc_cycle;
 230   assert ((db->head->gc_cycle & 1) == 1);
 231
 232
 233   /* We do not perform the move operations right away since the
 234      he_data array is not sorted by the address of the data.  */
 235   struct moveinfo
 236   {
 237     void *from;
 238     void *to;
 239     size_t size;
 240     struct moveinfo *next;
 241   } *moves = NULL;
 242
 243   while (byte < high)
 244     {
 245       /* Search for the next filled block.  BYTE is the index of the
 246          entry in MARK, MASK is the bit, and CNT is the bit number.
 247          OFF_FILLED is the corresponding offset.  */
 248       if ((mark[byte] & ~(mask - 1)) == 0)
 249         {
 250           /* No other bit set in the same element of MARK.  Search in the
 251              following memory.  */
 252           do
 253             ++byte;
 254           while (byte < high && mark[byte] == 0);
 255
 256           if (byte == high)
 257             /* That was it.  */
 258             break;
 259
 260           mask = 1;
 261           cnt = 0;
 262         }
 263       /* Find the exact bit.  */
 264       while ((mark[byte] & mask) == 0)
 265         {
 266           ++cnt;
 267           mask <<= 1;
 268         }
 269
 270       ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN;
 271       assert (off_alloc <= db->head->first_free);
 272
 273       /* Find the end of the used area.  */
 274       if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1))
 275         {
 276           /* All other bits set.  Search the next bytes in MARK.  */
 277           do
 278             ++byte;
 279           while (byte < high && mark[byte] == ALLBITS);
 280
 281           mask = 1;
 282           cnt = 0;
 283         }
 284       if (byte < high)
 285         {
 286           /* Find the exact bit.  */
 287           while ((mark[byte] & mask) != 0)
 288             {
 289               ++cnt;
 290               mask <<= 1;
 291             }
 292         }
 293
 294       ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN;
 295       assert (off_allocend <= db->head->first_free);
 296       /* Now we know that we can copy the area from OFF_ALLOC to
 297          OFF_ALLOCEND (not included) to the memory starting at
 298          OFF_FREE.  First fix up all the entries for the
 299          displacement.  */
 300       ref_t disp = off_alloc - off_free;
 301
 302       struct moveinfo *new_move
 303         = (struct moveinfo *) alloca (sizeof (*new_move));
 304       new_move->from = db->data + off_alloc;
 305       new_move->to = db->data + off_free;
 306       new_move->size = off_allocend - off_alloc;
 307       /* Create a circular list to be always able to append at the end.  */
 308       if (moves == NULL)
 309         moves = new_move->next = new_move;
 310       else
 311         {
 312           new_move->next = moves->next;
 313           moves = moves->next = new_move;
 314         }
 315
 316       /* The following loop will prepare to move this much data.  */
 317       off_free += off_allocend - off_alloc;
 318
 319       while (off_alloc < off_allocend)
 320         {
 321           /* Determine whether the next entry is for a hash entry or
 322              the data.  */
 323           if ((struct hashentry *) (db->data + off_alloc) == *next_hash)
 324             {
 325               /* Just correct the forward reference.  */
 326               *(*next_hash++)->prevp -= disp;
 327
 328               off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1)
 329                             & ~BLOCK_ALIGN_M1);
 330             }
 331           else
 332             {
 333               assert (next_data < &he_data[db->head->nentries]);
 334               assert ((*next_data)->packet == off_alloc);
 335
 336               struct datahead *dh = (struct datahead *) (db->data + off_alloc);
 337               do
 338                 {
 339                   assert ((*next_data)->key >= (*next_data)->packet);
 340                   assert ((*next_data)->key + (*next_data)->len
 341                           <= (*next_data)->packet + dh->allocsize);
 342
 343                   (*next_data)->packet -= disp;
 344                   (*next_data)->key -= disp;
 345                   ++next_data;
 346                 }
 347               while (next_data < &he_data[db->head->nentries]
 348                      && (*next_data)->packet == off_alloc);
 349
 350               off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1;
 351             }
 352         }
 353       assert (off_alloc == off_allocend);
 354
 355       assert (off_alloc <= db->head->first_free);
 356       if (off_alloc == db->head->first_free)
 357         /* We are done, that was the last block.  */
 358         break;
 359     }
 360   assert (next_hash == &he[db->head->nentries]);
 361   assert (next_data == &he_data[db->head->nentries]);
 362
 363   /* Now perform the actual moves.  */
 364   if (moves != NULL)
 365     {
 366       struct moveinfo *runp = moves->next;
 367       do
 368         {
 369           assert ((char *) runp->to >= db->data);
 370           assert ((char *) runp->to + runp->size
 371                   <= db->data  + db->head->first_free);
 372           assert ((char *) runp->from >= db->data);
 373           assert ((char *) runp->from + runp->size
 374                   <= db->data  + db->head->first_free);
 375
 376           /* The regions may overlap.  */
 377           memmove (runp->to, runp->from, runp->size);
 378           runp = runp->next;
 379         }
 380       while (runp != moves->next);
 381
 382       if (__builtin_expect (debug_level >= 3, 0))
 383         dbg_log (_("freed %zu bytes in %s cache"),
 384                  db->head->first_free
 385                  - ((char *) moves->to + moves->size - db->data),
 386                  dbnames[db - dbs]);
 387
 388       /* The byte past the end of the last copied block is the next
 389          available byte.  */
 390       db->head->first_free = (char *) moves->to + moves->size - db->data;
 391
 392       /* Consistency check.  */
 393       if (__builtin_expect (debug_level >= 3, 0))
 394         {
 395           for (size_t idx = 0; idx < db->head->module; ++idx)
 396             {
 397               ref_t run = db->head->array[idx];
 398               size_t cnt = 0;
 399
 400               while (run != ENDREF)
 401                 {
 402                   if (run + sizeof (struct hashentry) > db->head->first_free)
 403                     {
 404                       dbg_log ("entry %zu in hash bucket %zu out of bounds: "
 405                                "%" PRIu32 "+%zu > %zu\n",
 406                                cnt, idx, run, sizeof (struct hashentry),
 407                                (size_t) db->head->first_free);
 408                       break;
 409                     }
 410
 411                   struct hashentry *he = (struct hashentry *) (db->data + run);
 412
 413                   if (he->key + he->len > db->head->first_free)
 414                     dbg_log ("key of entry %zu in hash bucket %zu out of "
 415                              "bounds: %" PRIu32 "+%zu > %zu\n",
 416                              cnt, idx, he->key, (size_t) he->len,
 417                              (size_t) db->head->first_free);
 418
 419                   if (he->packet + sizeof (struct datahead)
 420                       > db->head->first_free)
 421                     dbg_log ("packet of entry %zu in hash bucket %zu out of "
 422                              "bounds: %" PRIu32 "+%zu > %zu\n",
 423                              cnt, idx, he->packet, sizeof (struct datahead),
 424                              (size_t) db->head->first_free);
 425                   else
 426                     {
 427                       struct datahead *dh = (struct datahead *) (db->data
 428                                                                  + he->packet);
 429                       if (he->packet + dh->allocsize
 430                           > db->head->first_free)
 431                         dbg_log ("full key of entry %zu in hash bucket %zu "
 432                                  "out of bounds: %" PRIu32 "+%zu > %zu",
 433                                  cnt, idx, he->packet, (size_t) dh->allocsize,
 434                                  (size_t) db->head->first_free);
 435                     }
 436
 437                   run = he->next;
 438                   ++cnt;
 439                 }
 440             }
 441         }
 442     }
 443
 444   /* Make sure the data on disk is updated.  */
 445   if (db->persistent)
 446     msync (db->head, db->data + db->head->first_free - (char *) db->head,
 447            MS_ASYNC);
 448
 449
 450   /* Now we are done modifying the data.  */
 451   ++db->head->gc_cycle;
 452   assert ((db->head->gc_cycle & 1) == 0);
 453
 454   /* We are done.  */
 455  out:
 456   pthread_mutex_unlock (&db->memlock);
 457   pthread_rwlock_unlock (&db->lock);
 458 }
 459
 460
 461 void *
 462 mempool_alloc (struct database_dyn *db, size_t len)
 463 {
 464   /* Make sure LEN is a multiple of our maximum alignment so we can
 465      keep track of used memory is multiples of this alignment value.  */
 466   if ((len & BLOCK_ALIGN_M1) != 0)
 467     len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1);
 468
 469   pthread_mutex_lock (&db->memlock);
 470
 471   assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0);
 472
 473   bool tried_resize = false;
 474   void *res;
 475  retry:
 476   res = db->data + db->head->first_free;
 477
 478   if (__builtin_expect (db->head->first_free + len > db->head->data_size, 0))
 479     {
 480       if (! tried_resize)
 481         {
 482           /* Try to resize the database.  Grow size of 1/8th.  */
 483           size_t oldtotal = (sizeof (struct database_pers_head)
 484                              + roundup (db->head->module * sizeof (ref_t), ALIGN)
 485                              + db->head->data_size);
 486           size_t new_data_size = (db->head->data_size
 487                                   + MAX (2 * len, db->head->data_size / 8));
 488           size_t newtotal = (sizeof (struct database_pers_head)
 489                              + roundup (db->head->module * sizeof (ref_t), ALIGN)
 490                              + new_data_size);
 491           if (newtotal > db->max_db_size)
 492             {
 493               new_data_size -= newtotal - db->max_db_size;
 494               newtotal = db->max_db_size;
 495             }
 496
 497           if (db->mmap_used && newtotal > oldtotal
 498               /* We only have to adjust the file size.  The new pages
 499                  become magically available.  */
 500               && TEMP_FAILURE_RETRY_VAL (posix_fallocate (db->wr_fd, oldtotal,
 501                                                           newtotal
 502                                                           - oldtotal)) == 0)
 503             {
 504               db->head->data_size = new_data_size;
 505               tried_resize = true;
 506               goto retry;
 507             }
 508         }
 509
 510       if (! db->last_alloc_failed)
 511         {
 512           dbg_log (_("no more memory for database '%s'"), dbnames[db - dbs]);
 513
 514           db->last_alloc_failed = true;
 515         }
 516
 517       /* No luck.  */
 518       res = NULL;
 519     }
 520   else
 521     {
 522       db->head->first_free += len;
 523
 524       db->last_alloc_failed = false;
 525     }
 526
 527   pthread_mutex_unlock (&db->memlock);
 528
 529   return res;
 530 }