nscd/nscd_helper.c

   1 /* Copyright (C) 1998-2023 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    The GNU C Library is free software; you can redistribute it and/or
   5    modify it under the terms of the GNU Lesser General Public
   6    License as published by the Free Software Foundation; either
   7    version 2.1 of the License, or (at your option) any later version.
   8
   9    The GNU C Library is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12    Lesser General Public License for more details.
  13
  14    You should have received a copy of the GNU Lesser General Public
  15    License along with the GNU C Library; if not, see
  16    <https://www.gnu.org/licenses/>.  */
  17
  18 #include <assert.h>
  19 #include <errno.h>
  20 #include <fcntl.h>
  21 #include <stdbool.h>
  22 #include <stddef.h>
  23 #include <stdlib.h>
  24 #include <string.h>
  25 #include <time.h>
  26 #include <unistd.h>
  27 #include <stdint.h>
  28 #include <sys/mman.h>
  29 #include <sys/param.h>
  30 #include <sys/poll.h>
  31 #include <sys/socket.h>
  32 #include <sys/stat.h>
  33 #include <sys/time.h>
  34 #include <sys/uio.h>
  35 #include <sys/un.h>
  36 #include <not-cancel.h>
  37 #include <kernel-features.h>
  38 #include <nss.h>
  39 #include <struct___timespec64.h>
  40
  41 #include "nscd-client.h"
  42
  43 /* Extra time we wait if the socket is still receiving data.  This
  44    value is in milliseconds.  Note that the other side is nscd on the
  45    local machine and it is already transmitting data.  So the wait
  46    time need not be long.  */
  47 #define EXTRA_RECEIVE_TIME 200
  48
  49
  50 static int
  51 wait_on_socket (int sock, long int usectmo)
  52 {
  53   struct pollfd fds[1];
  54   fds[0].fd = sock;
  55   fds[0].events = POLLIN | POLLERR | POLLHUP;
  56   int n = __poll (fds, 1, usectmo);
  57   if (n == -1 && __builtin_expect (errno == EINTR, 0))
  58     {
  59       /* Handle the case where the poll() call is interrupted by a
  60          signal.  We cannot just use TEMP_FAILURE_RETRY since it might
  61          lead to infinite loops.  */
  62       struct __timespec64 now;
  63       __clock_gettime64 (CLOCK_REALTIME, &now);
  64       int64_t end = (now.tv_sec * 1000 + usectmo
  65                      + (now.tv_nsec + 500000) / 1000000);
  66       long int timeout = usectmo;
  67       while (1)
  68         {
  69           n = __poll (fds, 1, timeout);
  70           if (n != -1 || errno != EINTR)
  71             break;
  72
  73           /* Recompute the timeout time.  */
  74           __clock_gettime64 (CLOCK_REALTIME, &now);
  75           timeout = end - ((now.tv_sec * 1000
  76                             + (now.tv_nsec + 500000) / 1000000));
  77         }
  78     }
  79
  80   return n;
  81 }
  82
  83
  84 ssize_t
  85 __readall (int fd, void *buf, size_t len)
  86 {
  87   size_t n = len;
  88   ssize_t ret;
  89   do
  90     {
  91     again:
  92       ret = TEMP_FAILURE_RETRY (__read (fd, buf, n));
  93       if (ret <= 0)
  94         {
  95           if (__builtin_expect (ret < 0 && errno == EAGAIN, 0)
  96               /* The socket is still receiving data.  Wait a bit more.  */
  97               && wait_on_socket (fd, EXTRA_RECEIVE_TIME) > 0)
  98             goto again;
  99
 100           break;
 101         }
 102       buf = (char *) buf + ret;
 103       n -= ret;
 104     }
 105   while (n > 0);
 106   return ret < 0 ? ret : len - n;
 107 }
 108
 109
 110 ssize_t
 111 __readvall (int fd, const struct iovec *iov, int iovcnt)
 112 {
 113   ssize_t ret = TEMP_FAILURE_RETRY (__readv (fd, iov, iovcnt));
 114   if (ret <= 0)
 115     {
 116       if (__glibc_likely (ret == 0 || errno != EAGAIN))
 117         /* A genuine error or no data to read.  */
 118         return ret;
 119
 120       /* The data has not all yet been received.  Do as if we have not
 121          read anything yet.  */
 122       ret = 0;
 123     }
 124
 125   size_t total = 0;
 126   for (int i = 0; i < iovcnt; ++i)
 127     total += iov[i].iov_len;
 128
 129   if (ret < total)
 130     {
 131       struct iovec iov_buf[iovcnt];
 132       ssize_t r = ret;
 133
 134       struct iovec *iovp = memcpy (iov_buf, iov, iovcnt * sizeof (*iov));
 135       do
 136         {
 137           while (iovp->iov_len <= r)
 138             {
 139               r -= iovp->iov_len;
 140               --iovcnt;
 141               ++iovp;
 142             }
 143           iovp->iov_base = (char *) iovp->iov_base + r;
 144           iovp->iov_len -= r;
 145         again:
 146           r = TEMP_FAILURE_RETRY (__readv (fd, iovp, iovcnt));
 147           if (r <= 0)
 148             {
 149               if (__builtin_expect (r < 0 && errno == EAGAIN, 0)
 150                   /* The socket is still receiving data.  Wait a bit more.  */
 151                   && wait_on_socket (fd, EXTRA_RECEIVE_TIME) > 0)
 152                 goto again;
 153
 154               break;
 155             }
 156           ret += r;
 157         }
 158       while (ret < total);
 159       if (r < 0)
 160         ret = r;
 161     }
 162   return ret;
 163 }
 164
 165
 166 static int
 167 open_socket (request_type type, const char *key, size_t keylen)
 168 {
 169   int sock;
 170
 171   sock = __socket (PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
 172   if (sock < 0)
 173     return -1;
 174
 175   size_t real_sizeof_reqdata = sizeof (request_header) + keylen;
 176   struct
 177   {
 178     request_header req;
 179     char key[];
 180   } *reqdata = alloca (real_sizeof_reqdata);
 181
 182   struct sockaddr_un sun;
 183   sun.sun_family = AF_UNIX;
 184   strcpy (sun.sun_path, _PATH_NSCDSOCKET);
 185   if (__connect (sock, (struct sockaddr *) &sun, sizeof (sun)) < 0
 186       && errno != EINPROGRESS)
 187     goto out;
 188
 189   reqdata->req.version = NSCD_VERSION;
 190   reqdata->req.type = type;
 191   reqdata->req.key_len = keylen;
 192
 193   memcpy (reqdata->key, key, keylen);
 194
 195   bool first_try = true;
 196   struct __timespec64 tvend = { 0, 0 };
 197   while (1)
 198     {
 199 #ifndef MSG_NOSIGNAL
 200 # define MSG_NOSIGNAL 0
 201 #endif
 202       ssize_t wres = TEMP_FAILURE_RETRY (__send (sock, reqdata,
 203                                                  real_sizeof_reqdata,
 204                                                  MSG_NOSIGNAL));
 205       if (__glibc_likely (wres == (ssize_t) real_sizeof_reqdata))
 206         /* We managed to send the request.  */
 207         return sock;
 208
 209       if (wres != -1 || errno != EAGAIN)
 210         /* Something is really wrong, no chance to continue.  */
 211         break;
 212
 213       /* The daemon is busy wait for it.  */
 214       int to;
 215       struct __timespec64 now;
 216       __clock_gettime64 (CLOCK_REALTIME, &now);
 217       if (first_try)
 218         {
 219           tvend.tv_nsec = now.tv_nsec;
 220           tvend.tv_sec = now.tv_sec + 5;
 221           to = 5 * 1000;
 222           first_try = false;
 223         }
 224       else
 225         to = ((tvend.tv_sec - now.tv_sec) * 1000
 226               + (tvend.tv_nsec - now.tv_nsec) / 1000000);
 227
 228       struct pollfd fds[1];
 229       fds[0].fd = sock;
 230       fds[0].events = POLLOUT | POLLERR | POLLHUP;
 231       if (__poll (fds, 1, to) <= 0)
 232         /* The connection timed out or broke down.  */
 233         break;
 234
 235       /* We try to write again.  */
 236     }
 237
 238  out:
 239   __close_nocancel_nostatus (sock);
 240
 241   return -1;
 242 }
 243
 244
 245 void
 246 __nscd_unmap (struct mapped_database *mapped)
 247 {
 248   assert (mapped->counter == 0);
 249   __munmap ((void *) mapped->head, mapped->mapsize);
 250   free (mapped);
 251 }
 252
 253
 254 /* Try to get a file descriptor for the shared meory segment
 255    containing the database.  */
 256 struct mapped_database *
 257 __nscd_get_mapping (request_type type, const char *key,
 258                     struct mapped_database **mappedp)
 259 {
 260   struct mapped_database *result = NO_MAPPING;
 261 #ifdef SCM_RIGHTS
 262   const size_t keylen = strlen (key) + 1;
 263   int saved_errno = errno;
 264
 265   int mapfd = -1;
 266   char resdata[keylen];
 267
 268   /* Open a socket and send the request.  */
 269   int sock = open_socket (type, key, keylen);
 270   if (sock < 0)
 271     goto out;
 272
 273   /* Room for the data sent along with the file descriptor.  We expect
 274      the key name back.  */
 275   uint64_t mapsize;
 276   struct iovec iov[2];
 277   iov[0].iov_base = resdata;
 278   iov[0].iov_len = keylen;
 279   iov[1].iov_base = &mapsize;
 280   iov[1].iov_len = sizeof (mapsize);
 281
 282   union
 283   {
 284     struct cmsghdr hdr;
 285     char bytes[CMSG_SPACE (sizeof (int))];
 286   } buf;
 287   struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
 288                         .msg_control = buf.bytes,
 289                         .msg_controllen = sizeof (buf) };
 290   struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
 291
 292   cmsg->cmsg_level = SOL_SOCKET;
 293   cmsg->cmsg_type = SCM_RIGHTS;
 294   cmsg->cmsg_len = CMSG_LEN (sizeof (int));
 295
 296   /* This access is well-aligned since BUF is correctly aligned for an
 297      int and CMSG_DATA preserves this alignment.  */
 298   memset (CMSG_DATA (cmsg), '\xff', sizeof (int));
 299
 300   msg.msg_controllen = cmsg->cmsg_len;
 301
 302   if (wait_on_socket (sock, 5 * 1000) <= 0)
 303     goto out_close2;
 304
 305 # ifndef MSG_CMSG_CLOEXEC
 306 #  define MSG_CMSG_CLOEXEC 0
 307 # endif
 308   ssize_t n = TEMP_FAILURE_RETRY (__recvmsg (sock, &msg, MSG_CMSG_CLOEXEC));
 309
 310   if (__builtin_expect (CMSG_FIRSTHDR (&msg) == NULL
 311                         || (CMSG_FIRSTHDR (&msg)->cmsg_len
 312                             != CMSG_LEN (sizeof (int))), 0))
 313     goto out_close2;
 314
 315   int *ip = (void *) CMSG_DATA (cmsg);
 316   mapfd = *ip;
 317
 318   if (__glibc_unlikely (n != keylen && n != keylen + sizeof (mapsize)))
 319     goto out_close;
 320
 321   if (__glibc_unlikely (strcmp (resdata, key) != 0))
 322     goto out_close;
 323
 324   if (__glibc_unlikely (n == keylen))
 325     {
 326       struct __stat64_t64 st;
 327       if (__glibc_unlikely (__fstat64_time64 (mapfd, &st) != 0)
 328           || __builtin_expect (st.st_size < sizeof (struct database_pers_head),
 329                                0))
 330         goto out_close;
 331
 332       mapsize = st.st_size;
 333     }
 334
 335   /* The file is large enough, map it now.  */
 336   void *mapping = __mmap (NULL, mapsize, PROT_READ, MAP_SHARED, mapfd, 0);
 337   if (__glibc_likely (mapping != MAP_FAILED))
 338     {
 339       /* Check whether the database is correct and up-to-date.  */
 340       struct database_pers_head *head = mapping;
 341
 342       if (__builtin_expect (head->version != DB_VERSION, 0)
 343           || __builtin_expect (head->header_size != sizeof (*head), 0)
 344           /* Catch some misconfiguration.  The server should catch
 345              them now but some older versions did not.  */
 346           || __builtin_expect (head->module == 0, 0)
 347           /* This really should not happen but who knows, maybe the update
 348              thread got stuck.  */
 349           || __builtin_expect (! head->nscd_certainly_running
 350                                && (head->timestamp + MAPPING_TIMEOUT
 351                                    < time_now ()), 0))
 352         {
 353         out_unmap:
 354           __munmap (mapping, mapsize);
 355           goto out_close;
 356         }
 357
 358       size_t size = (sizeof (*head) + roundup (head->module * sizeof (ref_t),
 359                                                ALIGN)
 360                      + head->data_size);
 361
 362       if (__glibc_unlikely (mapsize < size))
 363         goto out_unmap;
 364
 365       /* Allocate a record for the mapping.  */
 366       struct mapped_database *newp = malloc (sizeof (*newp));
 367       if (newp == NULL)
 368         /* Ugh, after all we went through the memory allocation failed.  */
 369         goto out_unmap;
 370
 371       newp->head = mapping;
 372       newp->data = ((char *) mapping + head->header_size
 373                     + roundup (head->module * sizeof (ref_t), ALIGN));
 374       newp->mapsize = size;
 375       newp->datasize = head->data_size;
 376       /* Set counter to 1 to show it is usable.  */
 377       newp->counter = 1;
 378
 379       result = newp;
 380     }
 381
 382  out_close:
 383   __close (mapfd);
 384  out_close2:
 385   __close (sock);
 386  out:
 387   __set_errno (saved_errno);
 388 #endif  /* SCM_RIGHTS */
 389
 390   struct mapped_database *oldval = *mappedp;
 391   *mappedp = result;
 392
 393   if (oldval != NULL && atomic_fetch_add_relaxed (&oldval->counter, -1) == 1)
 394     __nscd_unmap (oldval);
 395
 396   return result;
 397 }
 398
 399 struct mapped_database *
 400 __nscd_get_map_ref (request_type type, const char *name,
 401                     volatile struct locked_map_ptr *mapptr, int *gc_cyclep)
 402 {
 403   struct mapped_database *cur = mapptr->mapped;
 404   if (cur == NO_MAPPING)
 405     return cur;
 406
 407   if (!__nscd_acquire_maplock (mapptr))
 408     return NO_MAPPING;
 409
 410   cur = mapptr->mapped;
 411
 412   if (__glibc_likely (cur != NO_MAPPING))
 413     {
 414       /* If not mapped or timestamp not updated, request new map.  */
 415       if (cur == NULL
 416           || (cur->head->nscd_certainly_running == 0
 417               && cur->head->timestamp + MAPPING_TIMEOUT < time_now ())
 418           || cur->head->data_size > cur->datasize)
 419         cur = __nscd_get_mapping (type, name,
 420                                   (struct mapped_database **) &mapptr->mapped);
 421
 422       if (__glibc_likely (cur != NO_MAPPING))
 423         {
 424           if (__builtin_expect (((*gc_cyclep = cur->head->gc_cycle) & 1) != 0,
 425                                 0))
 426             cur = NO_MAPPING;
 427           else
 428             atomic_fetch_add_relaxed (&cur->counter, 1);
 429         }
 430     }
 431
 432   mapptr->lock = 0;
 433
 434   return cur;
 435 }
 436
 437
 438 /* Using sizeof (hashentry) is not always correct to determine the size of
 439    the data structure as found in the nscd cache.  The program could be
 440    a 64-bit process and nscd could be a 32-bit process.  In this case
 441    sizeof (hashentry) would overestimate the size.  The following is
 442    the minimum size of such an entry, good enough for our tests here.  */
 443 #define MINIMUM_HASHENTRY_SIZE \
 444   (offsetof (struct hashentry, dellist) + sizeof (int32_t))
 445
 446 /* Don't return const struct datahead *, as eventhough the record
 447    is normally constant, it can change arbitrarily during nscd
 448    garbage collection.  */
 449 struct datahead *
 450 __nscd_cache_search (request_type type, const char *key, size_t keylen,
 451                      const struct mapped_database *mapped, size_t datalen)
 452 {
 453   unsigned long int hash = __nss_hash (key, keylen) % mapped->head->module;
 454   size_t datasize = mapped->datasize;
 455
 456   ref_t trail = mapped->head->array[hash];
 457   trail = atomic_forced_read (trail);
 458   ref_t work = trail;
 459   size_t loop_cnt = datasize / (MINIMUM_HASHENTRY_SIZE
 460                                 + offsetof (struct datahead, data) / 2);
 461   int tick = 0;
 462
 463   while (work != ENDREF && work + MINIMUM_HASHENTRY_SIZE <= datasize)
 464     {
 465       struct hashentry *here = (struct hashentry *) (mapped->data + work);
 466       ref_t here_key, here_packet;
 467
 468 #if !_STRING_ARCH_unaligned
 469       /* Although during garbage collection when moving struct hashentry
 470          records around we first copy from old to new location and then
 471          adjust pointer from previous hashentry to it, there is no barrier
 472          between those memory writes.  It is very unlikely to hit it,
 473          so check alignment only if a misaligned load can crash the
 474          application.  */
 475       if ((uintptr_t) here & (__alignof__ (*here) - 1))
 476         return NULL;
 477 #endif
 478
 479       if (type == here->type
 480           && keylen == here->len
 481           && (here_key = atomic_forced_read (here->key)) + keylen <= datasize
 482           && memcmp (key, mapped->data + here_key, keylen) == 0
 483           && ((here_packet = atomic_forced_read (here->packet))
 484               + sizeof (struct datahead) <= datasize))
 485         {
 486           /* We found the entry.  Increment the appropriate counter.  */
 487           struct datahead *dh
 488             = (struct datahead *) (mapped->data + here_packet);
 489
 490 #if !_STRING_ARCH_unaligned
 491           if ((uintptr_t) dh & (__alignof__ (*dh) - 1))
 492             return NULL;
 493 #endif
 494
 495           /* See whether we must ignore the entry or whether something
 496              is wrong because garbage collection is in progress.  */
 497           if (dh->usable
 498               && here_packet + dh->allocsize <= datasize
 499               && (here_packet + offsetof (struct datahead, data) + datalen
 500                   <= datasize))
 501             return dh;
 502         }
 503
 504       work = atomic_forced_read (here->next);
 505       /* Prevent endless loops.  This should never happen but perhaps
 506          the database got corrupted, accidentally or deliberately.  */
 507       if (work == trail || loop_cnt-- == 0)
 508         break;
 509       if (tick)
 510         {
 511           struct hashentry *trailelem;
 512           trailelem = (struct hashentry *) (mapped->data + trail);
 513
 514 #if !_STRING_ARCH_unaligned
 515           /* We have to redo the checks.  Maybe the data changed.  */
 516           if ((uintptr_t) trailelem & (__alignof__ (*trailelem) - 1))
 517             return NULL;
 518 #endif
 519
 520           if (trail + MINIMUM_HASHENTRY_SIZE > datasize)
 521             return NULL;
 522
 523           trail = atomic_forced_read (trailelem->next);
 524         }
 525       tick = 1 - tick;
 526     }
 527
 528   return NULL;
 529 }
 530
 531
 532 /* Create a socket connected to a name. */
 533 int
 534 __nscd_open_socket (const char *key, size_t keylen, request_type type,
 535                     void *response, size_t responselen)
 536 {
 537   /* This should never happen and it is something the nscd daemon
 538      enforces, too.  He it helps to limit the amount of stack
 539      used.  */
 540   if (keylen > MAXKEYLEN)
 541     return -1;
 542
 543   int saved_errno = errno;
 544
 545   int sock = open_socket (type, key, keylen);
 546   if (sock >= 0)
 547     {
 548       /* Wait for data.  */
 549       if (wait_on_socket (sock, 5 * 1000) > 0)
 550         {
 551           ssize_t nbytes = TEMP_FAILURE_RETRY (__read (sock, response,
 552                                                        responselen));
 553           if (nbytes == (ssize_t) responselen)
 554             return sock;
 555         }
 556
 557       __close_nocancel_nostatus (sock);
 558     }
 559
 560   __set_errno (saved_errno);
 561
 562   return -1;
 563 }