nscd/nscd_helper.c

   1 /* Copyright (C) 1998-2015 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <http://www.gnu.org/licenses/>.  */
  18
  19 #include <assert.h>
  20 #include <errno.h>
  21 #include <fcntl.h>
  22 #include <stdbool.h>
  23 #include <stddef.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26 #include <time.h>
  27 #include <unistd.h>
  28 #include <stdint.h>
  29 #include <sys/mman.h>
  30 #include <sys/poll.h>
  31 #include <sys/socket.h>
  32 #include <sys/stat.h>
  33 #include <sys/time.h>
  34 #include <sys/uio.h>
  35 #include <sys/un.h>
  36 #include <not-cancel.h>
  37 #include <nis/rpcsvc/nis.h>
  38 #include <kernel-features.h>
  39
  40 #include "nscd-client.h"
  41
  42
  43 /* Extra time we wait if the socket is still receiving data.  This
  44    value is in milliseconds.  Note that the other side is nscd on the
  45    local machine and it is already transmitting data.  So the wait
  46    time need not be long.  */
  47 #define EXTRA_RECEIVE_TIME 200
  48
  49
  50 static int
  51 wait_on_socket (int sock, long int usectmo)
  52 {
  53   struct pollfd fds[1];
  54   fds[0].fd = sock;
  55   fds[0].events = POLLIN | POLLERR | POLLHUP;
  56   int n = __poll (fds, 1, usectmo);
  57   if (n == -1 && __builtin_expect (errno == EINTR, 0))
  58     {
  59       /* Handle the case where the poll() call is interrupted by a
  60          signal.  We cannot just use TEMP_FAILURE_RETRY since it might
  61          lead to infinite loops.  */
  62       struct timeval now;
  63       (void) __gettimeofday (&now, NULL);
  64       long int end = now.tv_sec * 1000 + usectmo + (now.tv_usec + 500) / 1000;
  65       long int timeout = usectmo;
  66       while (1)
  67         {
  68           n = __poll (fds, 1, timeout);
  69           if (n != -1 || errno != EINTR)
  70             break;
  71
  72           /* Recompute the timeout time.  */
  73           (void) __gettimeofday (&now, NULL);
  74           timeout = end - (now.tv_sec * 1000 + (now.tv_usec + 500) / 1000);
  75         }
  76     }
  77
  78   return n;
  79 }
  80
  81
  82 ssize_t
  83 __readall (int fd, void *buf, size_t len)
  84 {
  85   size_t n = len;
  86   ssize_t ret;
  87   do
  88     {
  89     again:
  90       ret = TEMP_FAILURE_RETRY (__read (fd, buf, n));
  91       if (ret <= 0)
  92         {
  93           if (__builtin_expect (ret < 0 && errno == EAGAIN, 0)
  94               /* The socket is still receiving data.  Wait a bit more.  */
  95               && wait_on_socket (fd, EXTRA_RECEIVE_TIME) > 0)
  96             goto again;
  97
  98           break;
  99         }
 100       buf = (char *) buf + ret;
 101       n -= ret;
 102     }
 103   while (n > 0);
 104   return ret < 0 ? ret : len - n;
 105 }
 106
 107
 108 ssize_t
 109 __readvall (int fd, const struct iovec *iov, int iovcnt)
 110 {
 111   ssize_t ret = TEMP_FAILURE_RETRY (__readv (fd, iov, iovcnt));
 112   if (ret <= 0)
 113     {
 114       if (__glibc_likely (ret == 0 || errno != EAGAIN))
 115         /* A genuine error or no data to read.  */
 116         return ret;
 117
 118       /* The data has not all yet been received.  Do as if we have not
 119          read anything yet.  */
 120       ret = 0;
 121     }
 122
 123   size_t total = 0;
 124   for (int i = 0; i < iovcnt; ++i)
 125     total += iov[i].iov_len;
 126
 127   if (ret < total)
 128     {
 129       struct iovec iov_buf[iovcnt];
 130       ssize_t r = ret;
 131
 132       struct iovec *iovp = memcpy (iov_buf, iov, iovcnt * sizeof (*iov));
 133       do
 134         {
 135           while (iovp->iov_len <= r)
 136             {
 137               r -= iovp->iov_len;
 138               --iovcnt;
 139               ++iovp;
 140             }
 141           iovp->iov_base = (char *) iovp->iov_base + r;
 142           iovp->iov_len -= r;
 143         again:
 144           r = TEMP_FAILURE_RETRY (__readv (fd, iovp, iovcnt));
 145           if (r <= 0)
 146             {
 147               if (__builtin_expect (r < 0 && errno == EAGAIN, 0)
 148                   /* The socket is still receiving data.  Wait a bit more.  */
 149                   && wait_on_socket (fd, EXTRA_RECEIVE_TIME) > 0)
 150                 goto again;
 151
 152               break;
 153             }
 154           ret += r;
 155         }
 156       while (ret < total);
 157       if (r < 0)
 158         ret = r;
 159     }
 160   return ret;
 161 }
 162
 163
 164 static int
 165 open_socket (request_type type, const char *key, size_t keylen)
 166 {
 167   int sock;
 168
 169 #ifdef SOCK_CLOEXEC
 170 # ifndef __ASSUME_SOCK_CLOEXEC
 171   if (__have_sock_cloexec >= 0)
 172 # endif
 173     {
 174       sock = __socket (PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
 175 # ifndef __ASSUME_SOCK_CLOEXEC
 176       if (__have_sock_cloexec == 0)
 177         __have_sock_cloexec = sock != -1 || errno != EINVAL ? 1 : -1;
 178 # endif
 179     }
 180 #endif
 181 #ifndef __ASSUME_SOCK_CLOEXEC
 182 # ifdef SOCK_CLOEXEC
 183   if (__have_sock_cloexec < 0)
 184 # endif
 185     sock = __socket (PF_UNIX, SOCK_STREAM, 0);
 186 #endif
 187   if (sock < 0)
 188     return -1;
 189
 190   size_t real_sizeof_reqdata = sizeof (request_header) + keylen;
 191   struct
 192   {
 193     request_header req;
 194     char key[];
 195   } *reqdata = alloca (real_sizeof_reqdata);
 196
 197 #ifndef __ASSUME_SOCK_CLOEXEC
 198 # ifdef SOCK_NONBLOCK
 199   if (__have_sock_cloexec < 0)
 200 # endif
 201     /* Make socket non-blocking.  */
 202     __fcntl (sock, F_SETFL, O_RDWR | O_NONBLOCK);
 203 #endif
 204
 205   struct sockaddr_un sun;
 206   sun.sun_family = AF_UNIX;
 207   strcpy (sun.sun_path, _PATH_NSCDSOCKET);
 208   if (__connect (sock, (struct sockaddr *) &sun, sizeof (sun)) < 0
 209       && errno != EINPROGRESS)
 210     goto out;
 211
 212   reqdata->req.version = NSCD_VERSION;
 213   reqdata->req.type = type;
 214   reqdata->req.key_len = keylen;
 215
 216   memcpy (reqdata->key, key, keylen);
 217
 218   bool first_try = true;
 219   struct timeval tvend;
 220   /* Fake initializing tvend.  */
 221   asm ("" : "=m" (tvend));
 222   while (1)
 223     {
 224 #ifndef MSG_NOSIGNAL
 225 # define MSG_NOSIGNAL 0
 226 #endif
 227       ssize_t wres = TEMP_FAILURE_RETRY (__send (sock, reqdata,
 228                                                  real_sizeof_reqdata,
 229                                                  MSG_NOSIGNAL));
 230       if (__glibc_likely (wres == (ssize_t) real_sizeof_reqdata))
 231         /* We managed to send the request.  */
 232         return sock;
 233
 234       if (wres != -1 || errno != EAGAIN)
 235         /* Something is really wrong, no chance to continue.  */
 236         break;
 237
 238       /* The daemon is busy wait for it.  */
 239       int to;
 240       struct timeval now;
 241       (void) __gettimeofday (&now, NULL);
 242       if (first_try)
 243         {
 244           tvend.tv_usec = now.tv_usec;
 245           tvend.tv_sec = now.tv_sec + 5;
 246           to = 5 * 1000;
 247           first_try = false;
 248         }
 249       else
 250         to = ((tvend.tv_sec - now.tv_sec) * 1000
 251               + (tvend.tv_usec - now.tv_usec) / 1000);
 252
 253       struct pollfd fds[1];
 254       fds[0].fd = sock;
 255       fds[0].events = POLLOUT | POLLERR | POLLHUP;
 256       if (__poll (fds, 1, to) <= 0)
 257         /* The connection timed out or broke down.  */
 258         break;
 259
 260       /* We try to write again.  */
 261     }
 262
 263  out:
 264   close_not_cancel_no_status (sock);
 265
 266   return -1;
 267 }
 268
 269
 270 void
 271 __nscd_unmap (struct mapped_database *mapped)
 272 {
 273   assert (mapped->counter == 0);
 274   __munmap ((void *) mapped->head, mapped->mapsize);
 275   free (mapped);
 276 }
 277
 278
 279 /* Try to get a file descriptor for the shared meory segment
 280    containing the database.  */
 281 struct mapped_database *
 282 __nscd_get_mapping (request_type type, const char *key,
 283                     struct mapped_database **mappedp)
 284 {
 285   struct mapped_database *result = NO_MAPPING;
 286 #ifdef SCM_RIGHTS
 287   const size_t keylen = strlen (key) + 1;
 288   int saved_errno = errno;
 289
 290   int mapfd = -1;
 291   char resdata[keylen];
 292
 293   /* Open a socket and send the request.  */
 294   int sock = open_socket (type, key, keylen);
 295   if (sock < 0)
 296     goto out;
 297
 298   /* Room for the data sent along with the file descriptor.  We expect
 299      the key name back.  */
 300   uint64_t mapsize;
 301   struct iovec iov[2];
 302   iov[0].iov_base = resdata;
 303   iov[0].iov_len = keylen;
 304   iov[1].iov_base = &mapsize;
 305   iov[1].iov_len = sizeof (mapsize);
 306
 307   union
 308   {
 309     struct cmsghdr hdr;
 310     char bytes[CMSG_SPACE (sizeof (int))];
 311   } buf;
 312   struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
 313                         .msg_control = buf.bytes,
 314                         .msg_controllen = sizeof (buf) };
 315   struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
 316
 317   cmsg->cmsg_level = SOL_SOCKET;
 318   cmsg->cmsg_type = SCM_RIGHTS;
 319   cmsg->cmsg_len = CMSG_LEN (sizeof (int));
 320
 321   /* This access is well-aligned since BUF is correctly aligned for an
 322      int and CMSG_DATA preserves this alignment.  */
 323   memset (CMSG_DATA (cmsg), '\xff', sizeof (int));
 324
 325   msg.msg_controllen = cmsg->cmsg_len;
 326
 327   if (wait_on_socket (sock, 5 * 1000) <= 0)
 328     goto out_close2;
 329
 330 # ifndef MSG_CMSG_CLOEXEC
 331 #  define MSG_CMSG_CLOEXEC 0
 332 # endif
 333   ssize_t n = TEMP_FAILURE_RETRY (__recvmsg (sock, &msg, MSG_CMSG_CLOEXEC));
 334
 335   if (__builtin_expect (CMSG_FIRSTHDR (&msg) == NULL
 336                         || (CMSG_FIRSTHDR (&msg)->cmsg_len
 337                             != CMSG_LEN (sizeof (int))), 0))
 338     goto out_close2;
 339
 340   int *ip = (void *) CMSG_DATA (cmsg);
 341   mapfd = *ip;
 342
 343   if (__glibc_unlikely (n != keylen && n != keylen + sizeof (mapsize)))
 344     goto out_close;
 345
 346   if (__glibc_unlikely (strcmp (resdata, key) != 0))
 347     goto out_close;
 348
 349   if (__glibc_unlikely (n == keylen))
 350     {
 351       struct stat64 st;
 352       if (__builtin_expect (fstat64 (mapfd, &st) != 0, 0)
 353           || __builtin_expect (st.st_size < sizeof (struct database_pers_head),
 354                                0))
 355         goto out_close;
 356
 357       mapsize = st.st_size;
 358     }
 359
 360   /* The file is large enough, map it now.  */
 361   void *mapping = __mmap (NULL, mapsize, PROT_READ, MAP_SHARED, mapfd, 0);
 362   if (__glibc_likely (mapping != MAP_FAILED))
 363     {
 364       /* Check whether the database is correct and up-to-date.  */
 365       struct database_pers_head *head = mapping;
 366
 367       if (__builtin_expect (head->version != DB_VERSION, 0)
 368           || __builtin_expect (head->header_size != sizeof (*head), 0)
 369           /* Catch some misconfiguration.  The server should catch
 370              them now but some older versions did not.  */
 371           || __builtin_expect (head->module == 0, 0)
 372           /* This really should not happen but who knows, maybe the update
 373              thread got stuck.  */
 374           || __builtin_expect (! head->nscd_certainly_running
 375                                && (head->timestamp + MAPPING_TIMEOUT
 376                                    < time (NULL)), 0))
 377         {
 378         out_unmap:
 379           __munmap (mapping, mapsize);
 380           goto out_close;
 381         }
 382
 383       size_t size = (sizeof (*head) + roundup (head->module * sizeof (ref_t),
 384                                                ALIGN)
 385                      + head->data_size);
 386
 387       if (__glibc_unlikely (mapsize < size))
 388         goto out_unmap;
 389
 390       /* Allocate a record for the mapping.  */
 391       struct mapped_database *newp = malloc (sizeof (*newp));
 392       if (newp == NULL)
 393         /* Ugh, after all we went through the memory allocation failed.  */
 394         goto out_unmap;
 395
 396       newp->head = mapping;
 397       newp->data = ((char *) mapping + head->header_size
 398                     + roundup (head->module * sizeof (ref_t), ALIGN));
 399       newp->mapsize = size;
 400       newp->datasize = head->data_size;
 401       /* Set counter to 1 to show it is usable.  */
 402       newp->counter = 1;
 403
 404       result = newp;
 405     }
 406
 407  out_close:
 408   __close (mapfd);
 409  out_close2:
 410   __close (sock);
 411  out:
 412   __set_errno (saved_errno);
 413 #endif  /* SCM_RIGHTS */
 414
 415   struct mapped_database *oldval = *mappedp;
 416   *mappedp = result;
 417
 418   if (oldval != NULL && atomic_decrement_val (&oldval->counter) == 0)
 419     __nscd_unmap (oldval);
 420
 421   return result;
 422 }
 423
 424 struct mapped_database *
 425 __nscd_get_map_ref (request_type type, const char *name,
 426                     volatile struct locked_map_ptr *mapptr, int *gc_cyclep)
 427 {
 428   struct mapped_database *cur = mapptr->mapped;
 429   if (cur == NO_MAPPING)
 430     return cur;
 431
 432   if (!__nscd_acquire_maplock (mapptr))
 433     return NO_MAPPING;
 434
 435   cur = mapptr->mapped;
 436
 437   if (__glibc_likely (cur != NO_MAPPING))
 438     {
 439       /* If not mapped or timestamp not updated, request new map.  */
 440       if (cur == NULL
 441           || (cur->head->nscd_certainly_running == 0
 442               && cur->head->timestamp + MAPPING_TIMEOUT < time (NULL))
 443           || cur->head->data_size > cur->datasize)
 444         cur = __nscd_get_mapping (type, name,
 445                                   (struct mapped_database **) &mapptr->mapped);
 446
 447       if (__glibc_likely (cur != NO_MAPPING))
 448         {
 449           if (__builtin_expect (((*gc_cyclep = cur->head->gc_cycle) & 1) != 0,
 450                                 0))
 451             cur = NO_MAPPING;
 452           else
 453             atomic_increment (&cur->counter);
 454         }
 455     }
 456
 457   mapptr->lock = 0;
 458
 459   return cur;
 460 }
 461
 462
 463 /* Using sizeof (hashentry) is not always correct to determine the size of
 464    the data structure as found in the nscd cache.  The program could be
 465    a 64-bit process and nscd could be a 32-bit process.  In this case
 466    sizeof (hashentry) would overestimate the size.  The following is
 467    the minimum size of such an entry, good enough for our tests here.  */
 468 #define MINIMUM_HASHENTRY_SIZE \
 469   (offsetof (struct hashentry, dellist) + sizeof (int32_t))
 470
 471
 472 /* Don't return const struct datahead *, as eventhough the record
 473    is normally constant, it can change arbitrarily during nscd
 474    garbage collection.  */
 475 struct datahead *
 476 __nscd_cache_search (request_type type, const char *key, size_t keylen,
 477                      const struct mapped_database *mapped, size_t datalen)
 478 {
 479   unsigned long int hash = __nis_hash (key, keylen) % mapped->head->module;
 480   size_t datasize = mapped->datasize;
 481
 482   ref_t trail = mapped->head->array[hash];
 483   trail = atomic_forced_read (trail);
 484   ref_t work = trail;
 485   size_t loop_cnt = datasize / (MINIMUM_HASHENTRY_SIZE
 486                                 + offsetof (struct datahead, data) / 2);
 487   int tick = 0;
 488
 489   while (work != ENDREF && work + MINIMUM_HASHENTRY_SIZE <= datasize)
 490     {
 491       struct hashentry *here = (struct hashentry *) (mapped->data + work);
 492       ref_t here_key, here_packet;
 493
 494 #if !_STRING_ARCH_unaligned
 495       /* Although during garbage collection when moving struct hashentry
 496          records around we first copy from old to new location and then
 497          adjust pointer from previous hashentry to it, there is no barrier
 498          between those memory writes.  It is very unlikely to hit it,
 499          so check alignment only if a misaligned load can crash the
 500          application.  */
 501       if ((uintptr_t) here & (__alignof__ (*here) - 1))
 502         return NULL;
 503 #endif
 504
 505       if (type == here->type
 506           && keylen == here->len
 507           && (here_key = atomic_forced_read (here->key)) + keylen <= datasize
 508           && memcmp (key, mapped->data + here_key, keylen) == 0
 509           && ((here_packet = atomic_forced_read (here->packet))
 510               + sizeof (struct datahead) <= datasize))
 511         {
 512           /* We found the entry.  Increment the appropriate counter.  */
 513           struct datahead *dh
 514             = (struct datahead *) (mapped->data + here_packet);
 515
 516 #if !_STRING_ARCH_unaligned
 517           if ((uintptr_t) dh & (__alignof__ (*dh) - 1))
 518             return NULL;
 519 #endif
 520
 521           /* See whether we must ignore the entry or whether something
 522              is wrong because garbage collection is in progress.  */
 523           if (dh->usable
 524               && here_packet + dh->allocsize <= datasize
 525               && (here_packet + offsetof (struct datahead, data) + datalen
 526                   <= datasize))
 527             return dh;
 528         }
 529
 530       work = atomic_forced_read (here->next);
 531       /* Prevent endless loops.  This should never happen but perhaps
 532          the database got corrupted, accidentally or deliberately.  */
 533       if (work == trail || loop_cnt-- == 0)
 534         break;
 535       if (tick)
 536         {
 537           struct hashentry *trailelem;
 538           trailelem = (struct hashentry *) (mapped->data + trail);
 539
 540 #if !_STRING_ARCH_unaligned
 541           /* We have to redo the checks.  Maybe the data changed.  */
 542           if ((uintptr_t) trailelem & (__alignof__ (*trailelem) - 1))
 543             return NULL;
 544 #endif
 545
 546           if (trail + MINIMUM_HASHENTRY_SIZE > datasize)
 547             return NULL;
 548
 549           trail = atomic_forced_read (trailelem->next);
 550         }
 551       tick = 1 - tick;
 552     }
 553
 554   return NULL;
 555 }
 556
 557
 558 /* Create a socket connected to a name. */
 559 int
 560 __nscd_open_socket (const char *key, size_t keylen, request_type type,
 561                     void *response, size_t responselen)
 562 {
 563   /* This should never happen and it is something the nscd daemon
 564      enforces, too.  He it helps to limit the amount of stack
 565      used.  */
 566   if (keylen > MAXKEYLEN)
 567     return -1;
 568
 569   int saved_errno = errno;
 570
 571   int sock = open_socket (type, key, keylen);
 572   if (sock >= 0)
 573     {
 574       /* Wait for data.  */
 575       if (wait_on_socket (sock, 5 * 1000) > 0)
 576         {
 577           ssize_t nbytes = TEMP_FAILURE_RETRY (__read (sock, response,
 578                                                        responselen));
 579           if (nbytes == (ssize_t) responselen)
 580             return sock;
 581         }
 582
 583       close_not_cancel_no_status (sock);
 584     }
 585
 586   __set_errno (saved_errno);
 587
 588   return -1;
 589 }