nscd/nscd_helper.c

   1 /* Copyright (C) 1998-2021 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <https://www.gnu.org/licenses/>.  */
  18
  19 #include <assert.h>
  20 #include <errno.h>
  21 #include <fcntl.h>
  22 #include <stdbool.h>
  23 #include <stddef.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26 #include <time.h>
  27 #include <unistd.h>
  28 #include <stdint.h>
  29 #include <sys/mman.h>
  30 #include <sys/param.h>
  31 #include <sys/poll.h>
  32 #include <sys/socket.h>
  33 #include <sys/stat.h>
  34 #include <sys/time.h>
  35 #include <sys/uio.h>
  36 #include <sys/un.h>
  37 #include <not-cancel.h>
  38 #include <kernel-features.h>
  39 #include <nss.h>
  40 #include <struct___timespec64.h>
  41
  42 #include "nscd-client.h"
  43
  44 /* Extra time we wait if the socket is still receiving data.  This
  45    value is in milliseconds.  Note that the other side is nscd on the
  46    local machine and it is already transmitting data.  So the wait
  47    time need not be long.  */
  48 #define EXTRA_RECEIVE_TIME 200
  49
  50
  51 static int
  52 wait_on_socket (int sock, long int usectmo)
  53 {
  54   struct pollfd fds[1];
  55   fds[0].fd = sock;
  56   fds[0].events = POLLIN | POLLERR | POLLHUP;
  57   int n = __poll (fds, 1, usectmo);
  58   if (n == -1 && __builtin_expect (errno == EINTR, 0))
  59     {
  60       /* Handle the case where the poll() call is interrupted by a
  61          signal.  We cannot just use TEMP_FAILURE_RETRY since it might
  62          lead to infinite loops.  */
  63       struct __timespec64 now;
  64       __clock_gettime64 (CLOCK_REALTIME, &now);
  65       int64_t end = (now.tv_sec * 1000 + usectmo
  66                      + (now.tv_nsec + 500000) / 1000000);
  67       long int timeout = usectmo;
  68       while (1)
  69         {
  70           n = __poll (fds, 1, timeout);
  71           if (n != -1 || errno != EINTR)
  72             break;
  73
  74           /* Recompute the timeout time.  */
  75           __clock_gettime64 (CLOCK_REALTIME, &now);
  76           timeout = end - ((now.tv_sec * 1000
  77                             + (now.tv_nsec + 500000) / 1000000));
  78         }
  79     }
  80
  81   return n;
  82 }
  83
  84
  85 ssize_t
  86 __readall (int fd, void *buf, size_t len)
  87 {
  88   size_t n = len;
  89   ssize_t ret;
  90   do
  91     {
  92     again:
  93       ret = TEMP_FAILURE_RETRY (__read (fd, buf, n));
  94       if (ret <= 0)
  95         {
  96           if (__builtin_expect (ret < 0 && errno == EAGAIN, 0)
  97               /* The socket is still receiving data.  Wait a bit more.  */
  98               && wait_on_socket (fd, EXTRA_RECEIVE_TIME) > 0)
  99             goto again;
 100
 101           break;
 102         }
 103       buf = (char *) buf + ret;
 104       n -= ret;
 105     }
 106   while (n > 0);
 107   return ret < 0 ? ret : len - n;
 108 }
 109
 110
 111 ssize_t
 112 __readvall (int fd, const struct iovec *iov, int iovcnt)
 113 {
 114   ssize_t ret = TEMP_FAILURE_RETRY (__readv (fd, iov, iovcnt));
 115   if (ret <= 0)
 116     {
 117       if (__glibc_likely (ret == 0 || errno != EAGAIN))
 118         /* A genuine error or no data to read.  */
 119         return ret;
 120
 121       /* The data has not all yet been received.  Do as if we have not
 122          read anything yet.  */
 123       ret = 0;
 124     }
 125
 126   size_t total = 0;
 127   for (int i = 0; i < iovcnt; ++i)
 128     total += iov[i].iov_len;
 129
 130   if (ret < total)
 131     {
 132       struct iovec iov_buf[iovcnt];
 133       ssize_t r = ret;
 134
 135       struct iovec *iovp = memcpy (iov_buf, iov, iovcnt * sizeof (*iov));
 136       do
 137         {
 138           while (iovp->iov_len <= r)
 139             {
 140               r -= iovp->iov_len;
 141               --iovcnt;
 142               ++iovp;
 143             }
 144           iovp->iov_base = (char *) iovp->iov_base + r;
 145           iovp->iov_len -= r;
 146         again:
 147           r = TEMP_FAILURE_RETRY (__readv (fd, iovp, iovcnt));
 148           if (r <= 0)
 149             {
 150               if (__builtin_expect (r < 0 && errno == EAGAIN, 0)
 151                   /* The socket is still receiving data.  Wait a bit more.  */
 152                   && wait_on_socket (fd, EXTRA_RECEIVE_TIME) > 0)
 153                 goto again;
 154
 155               break;
 156             }
 157           ret += r;
 158         }
 159       while (ret < total);
 160       if (r < 0)
 161         ret = r;
 162     }
 163   return ret;
 164 }
 165
 166
 167 static int
 168 open_socket (request_type type, const char *key, size_t keylen)
 169 {
 170   int sock;
 171
 172   sock = __socket (PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
 173   if (sock < 0)
 174     return -1;
 175
 176   size_t real_sizeof_reqdata = sizeof (request_header) + keylen;
 177   struct
 178   {
 179     request_header req;
 180     char key[];
 181   } *reqdata = alloca (real_sizeof_reqdata);
 182
 183   struct sockaddr_un sun;
 184   sun.sun_family = AF_UNIX;
 185   strcpy (sun.sun_path, _PATH_NSCDSOCKET);
 186   if (__connect (sock, (struct sockaddr *) &sun, sizeof (sun)) < 0
 187       && errno != EINPROGRESS)
 188     goto out;
 189
 190   reqdata->req.version = NSCD_VERSION;
 191   reqdata->req.type = type;
 192   reqdata->req.key_len = keylen;
 193
 194   memcpy (reqdata->key, key, keylen);
 195
 196   bool first_try = true;
 197   struct __timespec64 tvend = { 0, 0 };
 198   while (1)
 199     {
 200 #ifndef MSG_NOSIGNAL
 201 # define MSG_NOSIGNAL 0
 202 #endif
 203       ssize_t wres = TEMP_FAILURE_RETRY (__send (sock, reqdata,
 204                                                  real_sizeof_reqdata,
 205                                                  MSG_NOSIGNAL));
 206       if (__glibc_likely (wres == (ssize_t) real_sizeof_reqdata))
 207         /* We managed to send the request.  */
 208         return sock;
 209
 210       if (wres != -1 || errno != EAGAIN)
 211         /* Something is really wrong, no chance to continue.  */
 212         break;
 213
 214       /* The daemon is busy wait for it.  */
 215       int to;
 216       struct __timespec64 now;
 217       __clock_gettime64 (CLOCK_REALTIME, &now);
 218       if (first_try)
 219         {
 220           tvend.tv_nsec = now.tv_nsec;
 221           tvend.tv_sec = now.tv_sec + 5;
 222           to = 5 * 1000;
 223           first_try = false;
 224         }
 225       else
 226         to = ((tvend.tv_sec - now.tv_sec) * 1000
 227               + (tvend.tv_nsec - now.tv_nsec) / 1000000);
 228
 229       struct pollfd fds[1];
 230       fds[0].fd = sock;
 231       fds[0].events = POLLOUT | POLLERR | POLLHUP;
 232       if (__poll (fds, 1, to) <= 0)
 233         /* The connection timed out or broke down.  */
 234         break;
 235
 236       /* We try to write again.  */
 237     }
 238
 239  out:
 240   __close_nocancel_nostatus (sock);
 241
 242   return -1;
 243 }
 244
 245
 246 void
 247 __nscd_unmap (struct mapped_database *mapped)
 248 {
 249   assert (mapped->counter == 0);
 250   __munmap ((void *) mapped->head, mapped->mapsize);
 251   free (mapped);
 252 }
 253
 254
 255 /* Try to get a file descriptor for the shared meory segment
 256    containing the database.  */
 257 struct mapped_database *
 258 __nscd_get_mapping (request_type type, const char *key,
 259                     struct mapped_database **mappedp)
 260 {
 261   struct mapped_database *result = NO_MAPPING;
 262 #ifdef SCM_RIGHTS
 263   const size_t keylen = strlen (key) + 1;
 264   int saved_errno = errno;
 265
 266   int mapfd = -1;
 267   char resdata[keylen];
 268
 269   /* Open a socket and send the request.  */
 270   int sock = open_socket (type, key, keylen);
 271   if (sock < 0)
 272     goto out;
 273
 274   /* Room for the data sent along with the file descriptor.  We expect
 275      the key name back.  */
 276   uint64_t mapsize;
 277   struct iovec iov[2];
 278   iov[0].iov_base = resdata;
 279   iov[0].iov_len = keylen;
 280   iov[1].iov_base = &mapsize;
 281   iov[1].iov_len = sizeof (mapsize);
 282
 283   union
 284   {
 285     struct cmsghdr hdr;
 286     char bytes[CMSG_SPACE (sizeof (int))];
 287   } buf;
 288   struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
 289                         .msg_control = buf.bytes,
 290                         .msg_controllen = sizeof (buf) };
 291   struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
 292
 293   cmsg->cmsg_level = SOL_SOCKET;
 294   cmsg->cmsg_type = SCM_RIGHTS;
 295   cmsg->cmsg_len = CMSG_LEN (sizeof (int));
 296
 297   /* This access is well-aligned since BUF is correctly aligned for an
 298      int and CMSG_DATA preserves this alignment.  */
 299   memset (CMSG_DATA (cmsg), '\xff', sizeof (int));
 300
 301   msg.msg_controllen = cmsg->cmsg_len;
 302
 303   if (wait_on_socket (sock, 5 * 1000) <= 0)
 304     goto out_close2;
 305
 306 # ifndef MSG_CMSG_CLOEXEC
 307 #  define MSG_CMSG_CLOEXEC 0
 308 # endif
 309   ssize_t n = TEMP_FAILURE_RETRY (__recvmsg (sock, &msg, MSG_CMSG_CLOEXEC));
 310
 311   if (__builtin_expect (CMSG_FIRSTHDR (&msg) == NULL
 312                         || (CMSG_FIRSTHDR (&msg)->cmsg_len
 313                             != CMSG_LEN (sizeof (int))), 0))
 314     goto out_close2;
 315
 316   int *ip = (void *) CMSG_DATA (cmsg);
 317   mapfd = *ip;
 318
 319   if (__glibc_unlikely (n != keylen && n != keylen + sizeof (mapsize)))
 320     goto out_close;
 321
 322   if (__glibc_unlikely (strcmp (resdata, key) != 0))
 323     goto out_close;
 324
 325   if (__glibc_unlikely (n == keylen))
 326     {
 327       struct stat64 st;
 328       if (__builtin_expect (__fstat64 (mapfd, &st) != 0, 0)
 329           || __builtin_expect (st.st_size < sizeof (struct database_pers_head),
 330                                0))
 331         goto out_close;
 332
 333       mapsize = st.st_size;
 334     }
 335
 336   /* The file is large enough, map it now.  */
 337   void *mapping = __mmap (NULL, mapsize, PROT_READ, MAP_SHARED, mapfd, 0);
 338   if (__glibc_likely (mapping != MAP_FAILED))
 339     {
 340       /* Check whether the database is correct and up-to-date.  */
 341       struct database_pers_head *head = mapping;
 342
 343       if (__builtin_expect (head->version != DB_VERSION, 0)
 344           || __builtin_expect (head->header_size != sizeof (*head), 0)
 345           /* Catch some misconfiguration.  The server should catch
 346              them now but some older versions did not.  */
 347           || __builtin_expect (head->module == 0, 0)
 348           /* This really should not happen but who knows, maybe the update
 349              thread got stuck.  */
 350           || __builtin_expect (! head->nscd_certainly_running
 351                                && (head->timestamp + MAPPING_TIMEOUT
 352                                    < time_now ()), 0))
 353         {
 354         out_unmap:
 355           __munmap (mapping, mapsize);
 356           goto out_close;
 357         }
 358
 359       size_t size = (sizeof (*head) + roundup (head->module * sizeof (ref_t),
 360                                                ALIGN)
 361                      + head->data_size);
 362
 363       if (__glibc_unlikely (mapsize < size))
 364         goto out_unmap;
 365
 366       /* Allocate a record for the mapping.  */
 367       struct mapped_database *newp = malloc (sizeof (*newp));
 368       if (newp == NULL)
 369         /* Ugh, after all we went through the memory allocation failed.  */
 370         goto out_unmap;
 371
 372       newp->head = mapping;
 373       newp->data = ((char *) mapping + head->header_size
 374                     + roundup (head->module * sizeof (ref_t), ALIGN));
 375       newp->mapsize = size;
 376       newp->datasize = head->data_size;
 377       /* Set counter to 1 to show it is usable.  */
 378       newp->counter = 1;
 379
 380       result = newp;
 381     }
 382
 383  out_close:
 384   __close (mapfd);
 385  out_close2:
 386   __close (sock);
 387  out:
 388   __set_errno (saved_errno);
 389 #endif  /* SCM_RIGHTS */
 390
 391   struct mapped_database *oldval = *mappedp;
 392   *mappedp = result;
 393
 394   if (oldval != NULL && atomic_decrement_val (&oldval->counter) == 0)
 395     __nscd_unmap (oldval);
 396
 397   return result;
 398 }
 399
 400 struct mapped_database *
 401 __nscd_get_map_ref (request_type type, const char *name,
 402                     volatile struct locked_map_ptr *mapptr, int *gc_cyclep)
 403 {
 404   struct mapped_database *cur = mapptr->mapped;
 405   if (cur == NO_MAPPING)
 406     return cur;
 407
 408   if (!__nscd_acquire_maplock (mapptr))
 409     return NO_MAPPING;
 410
 411   cur = mapptr->mapped;
 412
 413   if (__glibc_likely (cur != NO_MAPPING))
 414     {
 415       /* If not mapped or timestamp not updated, request new map.  */
 416       if (cur == NULL
 417           || (cur->head->nscd_certainly_running == 0
 418               && cur->head->timestamp + MAPPING_TIMEOUT < time_now ())
 419           || cur->head->data_size > cur->datasize)
 420         cur = __nscd_get_mapping (type, name,
 421                                   (struct mapped_database **) &mapptr->mapped);
 422
 423       if (__glibc_likely (cur != NO_MAPPING))
 424         {
 425           if (__builtin_expect (((*gc_cyclep = cur->head->gc_cycle) & 1) != 0,
 426                                 0))
 427             cur = NO_MAPPING;
 428           else
 429             atomic_increment (&cur->counter);
 430         }
 431     }
 432
 433   mapptr->lock = 0;
 434
 435   return cur;
 436 }
 437
 438
 439 /* Using sizeof (hashentry) is not always correct to determine the size of
 440    the data structure as found in the nscd cache.  The program could be
 441    a 64-bit process and nscd could be a 32-bit process.  In this case
 442    sizeof (hashentry) would overestimate the size.  The following is
 443    the minimum size of such an entry, good enough for our tests here.  */
 444 #define MINIMUM_HASHENTRY_SIZE \
 445   (offsetof (struct hashentry, dellist) + sizeof (int32_t))
 446
 447 /* Don't return const struct datahead *, as eventhough the record
 448    is normally constant, it can change arbitrarily during nscd
 449    garbage collection.  */
 450 struct datahead *
 451 __nscd_cache_search (request_type type, const char *key, size_t keylen,
 452                      const struct mapped_database *mapped, size_t datalen)
 453 {
 454   unsigned long int hash = __nss_hash (key, keylen) % mapped->head->module;
 455   size_t datasize = mapped->datasize;
 456
 457   ref_t trail = mapped->head->array[hash];
 458   trail = atomic_forced_read (trail);
 459   ref_t work = trail;
 460   size_t loop_cnt = datasize / (MINIMUM_HASHENTRY_SIZE
 461                                 + offsetof (struct datahead, data) / 2);
 462   int tick = 0;
 463
 464   while (work != ENDREF && work + MINIMUM_HASHENTRY_SIZE <= datasize)
 465     {
 466       struct hashentry *here = (struct hashentry *) (mapped->data + work);
 467       ref_t here_key, here_packet;
 468
 469 #if !_STRING_ARCH_unaligned
 470       /* Although during garbage collection when moving struct hashentry
 471          records around we first copy from old to new location and then
 472          adjust pointer from previous hashentry to it, there is no barrier
 473          between those memory writes.  It is very unlikely to hit it,
 474          so check alignment only if a misaligned load can crash the
 475          application.  */
 476       if ((uintptr_t) here & (__alignof__ (*here) - 1))
 477         return NULL;
 478 #endif
 479
 480       if (type == here->type
 481           && keylen == here->len
 482           && (here_key = atomic_forced_read (here->key)) + keylen <= datasize
 483           && memcmp (key, mapped->data + here_key, keylen) == 0
 484           && ((here_packet = atomic_forced_read (here->packet))
 485               + sizeof (struct datahead) <= datasize))
 486         {
 487           /* We found the entry.  Increment the appropriate counter.  */
 488           struct datahead *dh
 489             = (struct datahead *) (mapped->data + here_packet);
 490
 491 #if !_STRING_ARCH_unaligned
 492           if ((uintptr_t) dh & (__alignof__ (*dh) - 1))
 493             return NULL;
 494 #endif
 495
 496           /* See whether we must ignore the entry or whether something
 497              is wrong because garbage collection is in progress.  */
 498           if (dh->usable
 499               && here_packet + dh->allocsize <= datasize
 500               && (here_packet + offsetof (struct datahead, data) + datalen
 501                   <= datasize))
 502             return dh;
 503         }
 504
 505       work = atomic_forced_read (here->next);
 506       /* Prevent endless loops.  This should never happen but perhaps
 507          the database got corrupted, accidentally or deliberately.  */
 508       if (work == trail || loop_cnt-- == 0)
 509         break;
 510       if (tick)
 511         {
 512           struct hashentry *trailelem;
 513           trailelem = (struct hashentry *) (mapped->data + trail);
 514
 515 #if !_STRING_ARCH_unaligned
 516           /* We have to redo the checks.  Maybe the data changed.  */
 517           if ((uintptr_t) trailelem & (__alignof__ (*trailelem) - 1))
 518             return NULL;
 519 #endif
 520
 521           if (trail + MINIMUM_HASHENTRY_SIZE > datasize)
 522             return NULL;
 523
 524           trail = atomic_forced_read (trailelem->next);
 525         }
 526       tick = 1 - tick;
 527     }
 528
 529   return NULL;
 530 }
 531
 532
 533 /* Create a socket connected to a name. */
 534 int
 535 __nscd_open_socket (const char *key, size_t keylen, request_type type,
 536                     void *response, size_t responselen)
 537 {
 538   /* This should never happen and it is something the nscd daemon
 539      enforces, too.  He it helps to limit the amount of stack
 540      used.  */
 541   if (keylen > MAXKEYLEN)
 542     return -1;
 543
 544   int saved_errno = errno;
 545
 546   int sock = open_socket (type, key, keylen);
 547   if (sock >= 0)
 548     {
 549       /* Wait for data.  */
 550       if (wait_on_socket (sock, 5 * 1000) > 0)
 551         {
 552           ssize_t nbytes = TEMP_FAILURE_RETRY (__read (sock, response,
 553                                                        responselen));
 554           if (nbytes == (ssize_t) responselen)
 555             return sock;
 556         }
 557
 558       __close_nocancel_nostatus (sock);
 559     }
 560
 561   __set_errno (saved_errno);
 562
 563   return -1;
 564 }