nscd/mem.c

   1 /* Cache memory handling.
   2    Copyright (C) 2004-2022 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published
   7    by the Free Software Foundation; version 2 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, see <https://www.gnu.org/licenses/>.  */
  17
  18 #include <assert.h>
  19 #include <errno.h>
  20 #include <error.h>
  21 #include <fcntl.h>
  22 #include <inttypes.h>
  23 #include <libintl.h>
  24 #include <limits.h>
  25 #include <obstack.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include <unistd.h>
  29 #include <sys/mman.h>
  30 #include <sys/param.h>
  31
  32 #include "dbg_log.h"
  33 #include "nscd.h"
  34
  35
  36 static int
  37 sort_he (const void *p1, const void *p2)
  38 {
  39   struct hashentry *h1 = *(struct hashentry **) p1;
  40   struct hashentry *h2 = *(struct hashentry **) p2;
  41
  42   if (h1 < h2)
  43     return -1;
  44   if (h1 > h2)
  45     return 1;
  46   return 0;
  47 }
  48
  49
  50 static int
  51 sort_he_data (const void *p1, const void *p2)
  52 {
  53   struct hashentry *h1 = *(struct hashentry **) p1;
  54   struct hashentry *h2 = *(struct hashentry **) p2;
  55
  56   if (h1->packet < h2->packet)
  57     return -1;
  58   if (h1->packet > h2->packet)
  59     return 1;
  60   return 0;
  61 }
  62
  63
  64 /* Basic definitions for the bitmap implementation.  Only BITMAP_T
  65    needs to be changed to choose a different word size.  */
  66 #define BITMAP_T uint8_t
  67 #define BITS (CHAR_BIT * sizeof (BITMAP_T))
  68 #define ALLBITS ((((BITMAP_T) 1) << BITS) - 1)
  69 #define HIGHBIT (((BITMAP_T) 1) << (BITS - 1))
  70
  71
  72 static void
  73 markrange (BITMAP_T *mark, ref_t start, size_t len)
  74 {
  75   /* Adjust parameters for block alignment.  */
  76   assert ((start & BLOCK_ALIGN_M1) == 0);
  77   start /= BLOCK_ALIGN;
  78   len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN;
  79
  80   size_t elem = start / BITS;
  81
  82   if (start % BITS != 0)
  83     {
  84       if (start % BITS + len <= BITS)
  85         {
  86           /* All fits in the partial byte.  */
  87           mark[elem] |= (ALLBITS >> (BITS - len)) << (start % BITS);
  88           return;
  89         }
  90
  91       mark[elem++] |= ALLBITS << (start % BITS);
  92       len -= BITS - (start % BITS);
  93     }
  94
  95   while (len >= BITS)
  96     {
  97       mark[elem++] = ALLBITS;
  98       len -= BITS;
  99     }
 100
 101   if (len > 0)
 102     mark[elem] |= ALLBITS >> (BITS - len);
 103 }
 104
 105
 106 void
 107 gc (struct database_dyn *db)
 108 {
 109   /* We need write access.  */
 110   pthread_rwlock_wrlock (&db->lock);
 111
 112   /* And the memory handling lock.  */
 113   pthread_mutex_lock (&db->memlock);
 114
 115   /* We need an array representing the data area.  All memory
 116      allocation is BLOCK_ALIGN aligned so this is the level at which
 117      we have to look at the memory.  We use a mark and sweep algorithm
 118      where the marks are placed in this array.  */
 119   assert (db->head->first_free % BLOCK_ALIGN == 0);
 120
 121   BITMAP_T *mark;
 122   bool mark_use_malloc;
 123   /* In prune_cache we are also using a dynamically allocated array.
 124      If the array in the caller is too large we have malloc'ed it.  */
 125   size_t stack_used = sizeof (bool) * db->head->module;
 126   if (__glibc_unlikely (stack_used > MAX_STACK_USE))
 127     stack_used = 0;
 128   size_t nmark = (db->head->first_free / BLOCK_ALIGN + BITS - 1) / BITS;
 129   size_t memory_needed = nmark * sizeof (BITMAP_T);
 130   if (__glibc_likely (stack_used + memory_needed <= MAX_STACK_USE))
 131     {
 132       mark = (BITMAP_T *) alloca_account (memory_needed, stack_used);
 133       mark_use_malloc = false;
 134       memset (mark, '\0', memory_needed);
 135     }
 136   else
 137     {
 138       mark = (BITMAP_T *) xcalloc (1, memory_needed);
 139       mark_use_malloc = true;
 140     }
 141
 142   /* Create an array which can hold pointer to all the entries in hash
 143      entries.  */
 144   memory_needed = 2 * db->head->nentries * sizeof (struct hashentry *);
 145   struct hashentry **he;
 146   struct hashentry **he_data;
 147   bool he_use_malloc;
 148   if (__glibc_likely (stack_used + memory_needed <= MAX_STACK_USE))
 149     {
 150       he = alloca_account (memory_needed, stack_used);
 151       he_use_malloc = false;
 152     }
 153   else
 154     {
 155       he = xmalloc (memory_needed);
 156       he_use_malloc = true;
 157     }
 158   he_data = &he[db->head->nentries];
 159
 160   size_t cnt = 0;
 161   for (size_t idx = 0; idx < db->head->module; ++idx)
 162     {
 163       ref_t *prevp = &db->head->array[idx];
 164       ref_t run = *prevp;
 165
 166       while (run != ENDREF)
 167         {
 168           assert (cnt < db->head->nentries);
 169           he[cnt] = (struct hashentry *) (db->data + run);
 170
 171           he[cnt]->prevp = prevp;
 172           prevp = &he[cnt]->next;
 173
 174           /* This is the hash entry itself.  */
 175           markrange (mark, run, sizeof (struct hashentry));
 176
 177           /* Add the information for the data itself.  We do this
 178              only for the one special entry marked with FIRST.  */
 179           if (he[cnt]->first)
 180             {
 181               struct datahead *dh
 182                 = (struct datahead *) (db->data + he[cnt]->packet);
 183               markrange (mark, he[cnt]->packet, dh->allocsize);
 184             }
 185
 186           run = he[cnt]->next;
 187
 188           ++cnt;
 189         }
 190     }
 191   assert (cnt == db->head->nentries);
 192
 193   /* Sort the entries by the addresses of the referenced data.  All
 194      the entries pointing to the same DATAHEAD object will have the
 195      same key.  Stability of the sorting is unimportant.  */
 196   memcpy (he_data, he, cnt * sizeof (struct hashentry *));
 197   qsort (he_data, cnt, sizeof (struct hashentry *), sort_he_data);
 198
 199   /* Sort the entries by their address.  */
 200   qsort (he, cnt, sizeof (struct hashentry *), sort_he);
 201
 202 #define obstack_chunk_alloc xmalloc
 203 #define obstack_chunk_free free
 204   struct obstack ob;
 205   obstack_init (&ob);
 206
 207   /* Determine the highest used address.  */
 208   size_t high = nmark;
 209   while (high > 0 && mark[high - 1] == 0)
 210     --high;
 211
 212   /* No memory used.  */
 213   if (high == 0)
 214     {
 215       db->head->first_free = 0;
 216       goto out;
 217     }
 218
 219   /* Determine the highest offset.  */
 220   BITMAP_T mask = HIGHBIT;
 221   while ((mark[high - 1] & mask) == 0)
 222     mask >>= 1;
 223
 224   /* Now we can iterate over the MARK array and find bits which are not
 225      set.  These represent memory which can be recovered.  */
 226   size_t byte = 0;
 227   /* Find the first gap.  */
 228   while (byte < high && mark[byte] == ALLBITS)
 229     ++byte;
 230
 231   if (byte == high
 232       || (byte == high - 1 && (mark[byte] & ~(mask | (mask - 1))) == 0))
 233     /* No gap.  */
 234     goto out;
 235
 236   mask = 1;
 237   cnt = 0;
 238   while ((mark[byte] & mask) != 0)
 239     {
 240       ++cnt;
 241       mask <<= 1;
 242     }
 243   ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN;
 244   assert (off_free <= db->head->first_free);
 245
 246   struct hashentry **next_hash = he;
 247   struct hashentry **next_data = he_data;
 248
 249   /* Skip over the hash entries in the first block which does not get
 250      moved.  */
 251   while (next_hash < &he[db->head->nentries]
 252          && *next_hash < (struct hashentry *) (db->data + off_free))
 253     ++next_hash;
 254
 255   while (next_data < &he_data[db->head->nentries]
 256          && (*next_data)->packet < off_free)
 257     ++next_data;
 258
 259
 260   /* Now we start modifying the data.  Make sure all readers of the
 261      data are aware of this and temporarily don't use the data.  */
 262   atomic_fetch_add_relaxed (&db->head->gc_cycle, 1);
 263   assert ((db->head->gc_cycle & 1) == 1);
 264
 265
 266   /* We do not perform the move operations right away since the
 267      he_data array is not sorted by the address of the data.  */
 268   struct moveinfo
 269   {
 270     void *from;
 271     void *to;
 272     size_t size;
 273     struct moveinfo *next;
 274   } *moves = NULL;
 275
 276   while (byte < high)
 277     {
 278       /* Search for the next filled block.  BYTE is the index of the
 279          entry in MARK, MASK is the bit, and CNT is the bit number.
 280          OFF_FILLED is the corresponding offset.  */
 281       if ((mark[byte] & ~(mask - 1)) == 0)
 282         {
 283           /* No other bit set in the same element of MARK.  Search in the
 284              following memory.  */
 285           do
 286             ++byte;
 287           while (byte < high && mark[byte] == 0);
 288
 289           if (byte == high)
 290             /* That was it.  */
 291             break;
 292
 293           mask = 1;
 294           cnt = 0;
 295         }
 296       /* Find the exact bit.  */
 297       while ((mark[byte] & mask) == 0)
 298         {
 299           ++cnt;
 300           mask <<= 1;
 301         }
 302
 303       ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN;
 304       assert (off_alloc <= db->head->first_free);
 305
 306       /* Find the end of the used area.  */
 307       if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1))
 308         {
 309           /* All other bits set.  Search the next bytes in MARK.  */
 310           do
 311             ++byte;
 312           while (byte < high && mark[byte] == ALLBITS);
 313
 314           mask = 1;
 315           cnt = 0;
 316         }
 317       if (byte < high)
 318         {
 319           /* Find the exact bit.  */
 320           while ((mark[byte] & mask) != 0)
 321             {
 322               ++cnt;
 323               mask <<= 1;
 324             }
 325         }
 326
 327       ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN;
 328       assert (off_allocend <= db->head->first_free);
 329       /* Now we know that we can copy the area from OFF_ALLOC to
 330          OFF_ALLOCEND (not included) to the memory starting at
 331          OFF_FREE.  First fix up all the entries for the
 332          displacement.  */
 333       ref_t disp = off_alloc - off_free;
 334
 335       struct moveinfo *new_move;
 336       if (__builtin_expect (stack_used + sizeof (*new_move) <= MAX_STACK_USE,
 337                             1))
 338         new_move = alloca_account (sizeof (*new_move), stack_used);
 339       else
 340         new_move = obstack_alloc (&ob, sizeof (*new_move));
 341       new_move->from = db->data + off_alloc;
 342       new_move->to = db->data + off_free;
 343       new_move->size = off_allocend - off_alloc;
 344       /* Create a circular list to be always able to append at the end.  */
 345       if (moves == NULL)
 346         moves = new_move->next = new_move;
 347       else
 348         {
 349           new_move->next = moves->next;
 350           moves = moves->next = new_move;
 351         }
 352
 353       /* The following loop will prepare to move this much data.  */
 354       off_free += off_allocend - off_alloc;
 355
 356       while (off_alloc < off_allocend)
 357         {
 358           /* Determine whether the next entry is for a hash entry or
 359              the data.  */
 360           if ((struct hashentry *) (db->data + off_alloc) == *next_hash)
 361             {
 362               /* Just correct the forward reference.  */
 363               *(*next_hash++)->prevp -= disp;
 364
 365               off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1)
 366                             & ~BLOCK_ALIGN_M1);
 367             }
 368           else
 369             {
 370               assert (next_data < &he_data[db->head->nentries]);
 371               assert ((*next_data)->packet == off_alloc);
 372
 373               struct datahead *dh = (struct datahead *) (db->data + off_alloc);
 374               do
 375                 {
 376                   assert ((*next_data)->key >= (*next_data)->packet);
 377                   assert ((*next_data)->key + (*next_data)->len
 378                           <= (*next_data)->packet + dh->allocsize);
 379
 380                   (*next_data)->packet -= disp;
 381                   (*next_data)->key -= disp;
 382                   ++next_data;
 383                 }
 384               while (next_data < &he_data[db->head->nentries]
 385                      && (*next_data)->packet == off_alloc);
 386
 387               off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1;
 388             }
 389         }
 390       assert (off_alloc == off_allocend);
 391
 392       assert (off_alloc <= db->head->first_free);
 393       if (off_alloc == db->head->first_free)
 394         /* We are done, that was the last block.  */
 395         break;
 396     }
 397   assert (next_hash == &he[db->head->nentries]);
 398   assert (next_data == &he_data[db->head->nentries]);
 399
 400   /* Now perform the actual moves.  */
 401   if (moves != NULL)
 402     {
 403       struct moveinfo *runp = moves->next;
 404       do
 405         {
 406           assert ((char *) runp->to >= db->data);
 407           assert ((char *) runp->to + runp->size
 408                   <= db->data  + db->head->first_free);
 409           assert ((char *) runp->from >= db->data);
 410           assert ((char *) runp->from + runp->size
 411                   <= db->data  + db->head->first_free);
 412
 413           /* The regions may overlap.  */
 414           memmove (runp->to, runp->from, runp->size);
 415           runp = runp->next;
 416         }
 417       while (runp != moves->next);
 418
 419       if (__glibc_unlikely (debug_level >= 3))
 420         dbg_log (_("freed %zu bytes in %s cache"),
 421                  (size_t) (db->head->first_free
 422                            - ((char *) moves->to + moves->size - db->data)),
 423                  dbnames[db - dbs]);
 424
 425       /* The byte past the end of the last copied block is the next
 426          available byte.  */
 427       db->head->first_free = (char *) moves->to + moves->size - db->data;
 428
 429       /* Consistency check.  */
 430       if (__glibc_unlikely (debug_level >= 3))
 431         {
 432           for (size_t idx = 0; idx < db->head->module; ++idx)
 433             {
 434               ref_t run = db->head->array[idx];
 435               size_t cnt = 0;
 436
 437               while (run != ENDREF)
 438                 {
 439                   if (run + sizeof (struct hashentry) > db->head->first_free)
 440                     {
 441                       dbg_log ("entry %zu in hash bucket %zu out of bounds: "
 442                                "%" PRIu32 "+%zu > %zu\n",
 443                                cnt, idx, run, sizeof (struct hashentry),
 444                                (size_t) db->head->first_free);
 445                       break;
 446                     }
 447
 448                   struct hashentry *he = (struct hashentry *) (db->data + run);
 449
 450                   if (he->key + he->len > db->head->first_free)
 451                     dbg_log ("key of entry %zu in hash bucket %zu out of "
 452                              "bounds: %" PRIu32 "+%zu > %zu\n",
 453                              cnt, idx, he->key, (size_t) he->len,
 454                              (size_t) db->head->first_free);
 455
 456                   if (he->packet + sizeof (struct datahead)
 457                       > db->head->first_free)
 458                     dbg_log ("packet of entry %zu in hash bucket %zu out of "
 459                              "bounds: %" PRIu32 "+%zu > %zu\n",
 460                              cnt, idx, he->packet, sizeof (struct datahead),
 461                              (size_t) db->head->first_free);
 462                   else
 463                     {
 464                       struct datahead *dh = (struct datahead *) (db->data
 465                                                                  + he->packet);
 466                       if (he->packet + dh->allocsize
 467                           > db->head->first_free)
 468                         dbg_log ("full key of entry %zu in hash bucket %zu "
 469                                  "out of bounds: %" PRIu32 "+%zu > %zu",
 470                                  cnt, idx, he->packet, (size_t) dh->allocsize,
 471                                  (size_t) db->head->first_free);
 472                     }
 473
 474                   run = he->next;
 475                   ++cnt;
 476                 }
 477             }
 478         }
 479     }
 480
 481   /* Make sure the data on disk is updated.  */
 482   if (db->persistent)
 483     msync (db->head, db->data + db->head->first_free - (char *) db->head,
 484            MS_ASYNC);
 485
 486
 487   /* Now we are done modifying the data.  */
 488   atomic_fetch_add_relaxed (&db->head->gc_cycle, 1);
 489   assert ((db->head->gc_cycle & 1) == 0);
 490
 491   /* We are done.  */
 492  out:
 493   pthread_mutex_unlock (&db->memlock);
 494   pthread_rwlock_unlock (&db->lock);
 495
 496   if (he_use_malloc)
 497     free (he);
 498   if (mark_use_malloc)
 499     free (mark);
 500
 501   obstack_free (&ob, NULL);
 502 }
 503
 504
 505 void *
 506 mempool_alloc (struct database_dyn *db, size_t len, int data_alloc)
 507 {
 508   /* Make sure LEN is a multiple of our maximum alignment so we can
 509      keep track of used memory is multiples of this alignment value.  */
 510   if ((len & BLOCK_ALIGN_M1) != 0)
 511     len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1);
 512
 513   if (data_alloc)
 514     pthread_rwlock_rdlock (&db->lock);
 515
 516   pthread_mutex_lock (&db->memlock);
 517
 518   assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0);
 519
 520   bool tried_resize = false;
 521   void *res;
 522  retry:
 523   res = db->data + db->head->first_free;
 524
 525   if (__glibc_unlikely (db->head->first_free + len > db->head->data_size))
 526     {
 527       if (! tried_resize)
 528         {
 529           /* Try to resize the database.  Grow size of 1/8th.  */
 530           size_t oldtotal = (sizeof (struct database_pers_head)
 531                              + roundup (db->head->module * sizeof (ref_t),
 532                                         ALIGN)
 533                              + db->head->data_size);
 534           size_t new_data_size = (db->head->data_size
 535                                   + MAX (2 * len, db->head->data_size / 8));
 536           size_t newtotal = (sizeof (struct database_pers_head)
 537                              + roundup (db->head->module * sizeof (ref_t), ALIGN)
 538                              + new_data_size);
 539           if (newtotal > db->max_db_size)
 540             {
 541               new_data_size -= newtotal - db->max_db_size;
 542               newtotal = db->max_db_size;
 543             }
 544
 545           if (db->mmap_used && newtotal > oldtotal
 546               /* We only have to adjust the file size.  The new pages
 547                  become magically available.  */
 548               && TEMP_FAILURE_RETRY_VAL (posix_fallocate (db->wr_fd, oldtotal,
 549                                                           newtotal
 550                                                           - oldtotal)) == 0)
 551             {
 552               db->head->data_size = new_data_size;
 553               tried_resize = true;
 554               goto retry;
 555             }
 556         }
 557
 558       if (data_alloc)
 559         pthread_rwlock_unlock (&db->lock);
 560
 561       if (! db->last_alloc_failed)
 562         {
 563           dbg_log (_("no more memory for database '%s'"), dbnames[db - dbs]);
 564
 565           db->last_alloc_failed = true;
 566         }
 567
 568       ++db->head->addfailed;
 569
 570       /* No luck.  */
 571       res = NULL;
 572     }
 573   else
 574     {
 575       db->head->first_free += len;
 576
 577       db->last_alloc_failed = false;
 578
 579     }
 580
 581   pthread_mutex_unlock (&db->memlock);
 582
 583   return res;
 584 }