nscd/connections.c

   1 /* Inner loops of cache daemon.
   2    Copyright (C) 1998-2018 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published
   8    by the Free Software Foundation; version 2 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program; if not, see <http://www.gnu.org/licenses/>.  */
  18
  19 #include <alloca.h>
  20 #include <assert.h>
  21 #include <atomic.h>
  22 #include <error.h>
  23 #include <errno.h>
  24 #include <fcntl.h>
  25 #include <grp.h>
  26 #include <ifaddrs.h>
  27 #include <libintl.h>
  28 #include <pthread.h>
  29 #include <pwd.h>
  30 #include <resolv.h>
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33 #include <unistd.h>
  34 #include <stdint.h>
  35 #include <arpa/inet.h>
  36 #ifdef HAVE_NETLINK
  37 # include <linux/netlink.h>
  38 # include <linux/rtnetlink.h>
  39 #endif
  40 #ifdef HAVE_EPOLL
  41 # include <sys/epoll.h>
  42 #endif
  43 #ifdef HAVE_INOTIFY
  44 # include <sys/inotify.h>
  45 #endif
  46 #include <sys/mman.h>
  47 #include <sys/param.h>
  48 #include <sys/poll.h>
  49 #include <sys/socket.h>
  50 #include <sys/stat.h>
  51 #include <sys/un.h>
  52
  53 #include "nscd.h"
  54 #include "dbg_log.h"
  55 #include "selinux.h"
  56 #include <resolv/resolv.h>
  57
  58 #include <kernel-features.h>
  59 #include <libc-diag.h>
  60
  61
  62 /* Support to run nscd as an unprivileged user */
  63 const char *server_user;
  64 static uid_t server_uid;
  65 static gid_t server_gid;
  66 const char *stat_user;
  67 uid_t stat_uid;
  68 static gid_t *server_groups;
  69 #ifndef NGROUPS
  70 # define NGROUPS 32
  71 #endif
  72 static int server_ngroups;
  73
  74 static pthread_attr_t attr;
  75
  76 static void begin_drop_privileges (void);
  77 static void finish_drop_privileges (void);
  78
  79 /* Map request type to a string.  */
  80 const char *const serv2str[LASTREQ] =
  81 {
  82   [GETPWBYNAME] = "GETPWBYNAME",
  83   [GETPWBYUID] = "GETPWBYUID",
  84   [GETGRBYNAME] = "GETGRBYNAME",
  85   [GETGRBYGID] = "GETGRBYGID",
  86   [GETHOSTBYNAME] = "GETHOSTBYNAME",
  87   [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
  88   [GETHOSTBYADDR] = "GETHOSTBYADDR",
  89   [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
  90   [SHUTDOWN] = "SHUTDOWN",
  91   [GETSTAT] = "GETSTAT",
  92   [INVALIDATE] = "INVALIDATE",
  93   [GETFDPW] = "GETFDPW",
  94   [GETFDGR] = "GETFDGR",
  95   [GETFDHST] = "GETFDHST",
  96   [GETAI] = "GETAI",
  97   [INITGROUPS] = "INITGROUPS",
  98   [GETSERVBYNAME] = "GETSERVBYNAME",
  99   [GETSERVBYPORT] = "GETSERVBYPORT",
 100   [GETFDSERV] = "GETFDSERV",
 101   [GETNETGRENT] = "GETNETGRENT",
 102   [INNETGR] = "INNETGR",
 103   [GETFDNETGR] = "GETFDNETGR"
 104 };
 105
 106 #ifdef PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
 107 # define RWLOCK_INITIALIZER PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
 108 #else
 109 # define RWLOCK_INITIALIZER PTHREAD_RWLOCK_INITIALIZER
 110 #endif
 111
 112 /* The control data structures for the services.  */
 113 struct database_dyn dbs[lastdb] =
 114 {
 115   [pwddb] = {
 116     .lock = RWLOCK_INITIALIZER,
 117     .prune_lock = PTHREAD_MUTEX_INITIALIZER,
 118     .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
 119     .enabled = 0,
 120     .check_file = 1,
 121     .persistent = 0,
 122     .propagate = 1,
 123     .shared = 0,
 124     .max_db_size = DEFAULT_MAX_DB_SIZE,
 125     .suggested_module = DEFAULT_SUGGESTED_MODULE,
 126     .db_filename = _PATH_NSCD_PASSWD_DB,
 127     .disabled_iov = &pwd_iov_disabled,
 128     .postimeout = 3600,
 129     .negtimeout = 20,
 130     .wr_fd = -1,
 131     .ro_fd = -1,
 132     .mmap_used = false
 133   },
 134   [grpdb] = {
 135     .lock = RWLOCK_INITIALIZER,
 136     .prune_lock = PTHREAD_MUTEX_INITIALIZER,
 137     .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
 138     .enabled = 0,
 139     .check_file = 1,
 140     .persistent = 0,
 141     .propagate = 1,
 142     .shared = 0,
 143     .max_db_size = DEFAULT_MAX_DB_SIZE,
 144     .suggested_module = DEFAULT_SUGGESTED_MODULE,
 145     .db_filename = _PATH_NSCD_GROUP_DB,
 146     .disabled_iov = &grp_iov_disabled,
 147     .postimeout = 3600,
 148     .negtimeout = 60,
 149     .wr_fd = -1,
 150     .ro_fd = -1,
 151     .mmap_used = false
 152   },
 153   [hstdb] = {
 154     .lock = RWLOCK_INITIALIZER,
 155     .prune_lock = PTHREAD_MUTEX_INITIALIZER,
 156     .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
 157     .enabled = 0,
 158     .check_file = 1,
 159     .persistent = 0,
 160     .propagate = 0,             /* Not used.  */
 161     .shared = 0,
 162     .max_db_size = DEFAULT_MAX_DB_SIZE,
 163     .suggested_module = DEFAULT_SUGGESTED_MODULE,
 164     .db_filename = _PATH_NSCD_HOSTS_DB,
 165     .disabled_iov = &hst_iov_disabled,
 166     .postimeout = 3600,
 167     .negtimeout = 20,
 168     .wr_fd = -1,
 169     .ro_fd = -1,
 170     .mmap_used = false
 171   },
 172   [servdb] = {
 173     .lock = RWLOCK_INITIALIZER,
 174     .prune_lock = PTHREAD_MUTEX_INITIALIZER,
 175     .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
 176     .enabled = 0,
 177     .check_file = 1,
 178     .persistent = 0,
 179     .propagate = 0,             /* Not used.  */
 180     .shared = 0,
 181     .max_db_size = DEFAULT_MAX_DB_SIZE,
 182     .suggested_module = DEFAULT_SUGGESTED_MODULE,
 183     .db_filename = _PATH_NSCD_SERVICES_DB,
 184     .disabled_iov = &serv_iov_disabled,
 185     .postimeout = 28800,
 186     .negtimeout = 20,
 187     .wr_fd = -1,
 188     .ro_fd = -1,
 189     .mmap_used = false
 190   },
 191   [netgrdb] = {
 192     .lock = RWLOCK_INITIALIZER,
 193     .prune_lock = PTHREAD_MUTEX_INITIALIZER,
 194     .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
 195     .enabled = 0,
 196     .check_file = 1,
 197     .persistent = 0,
 198     .propagate = 0,             /* Not used.  */
 199     .shared = 0,
 200     .max_db_size = DEFAULT_MAX_DB_SIZE,
 201     .suggested_module = DEFAULT_SUGGESTED_MODULE,
 202     .db_filename = _PATH_NSCD_NETGROUP_DB,
 203     .disabled_iov = &netgroup_iov_disabled,
 204     .postimeout = 28800,
 205     .negtimeout = 20,
 206     .wr_fd = -1,
 207     .ro_fd = -1,
 208     .mmap_used = false
 209   }
 210 };
 211
 212
 213 /* Mapping of request type to database.  */
 214 static struct
 215 {
 216   bool data_request;
 217   struct database_dyn *db;
 218 } const reqinfo[LASTREQ] =
 219 {
 220   [GETPWBYNAME] = { true, &dbs[pwddb] },
 221   [GETPWBYUID] = { true, &dbs[pwddb] },
 222   [GETGRBYNAME] = { true, &dbs[grpdb] },
 223   [GETGRBYGID] = { true, &dbs[grpdb] },
 224   [GETHOSTBYNAME] = { true, &dbs[hstdb] },
 225   [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
 226   [GETHOSTBYADDR] = { true, &dbs[hstdb] },
 227   [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
 228   [SHUTDOWN] = { false, NULL },
 229   [GETSTAT] = { false, NULL },
 230   [SHUTDOWN] = { false, NULL },
 231   [GETFDPW] = { false, &dbs[pwddb] },
 232   [GETFDGR] = { false, &dbs[grpdb] },
 233   [GETFDHST] = { false, &dbs[hstdb] },
 234   [GETAI] = { true, &dbs[hstdb] },
 235   [INITGROUPS] = { true, &dbs[grpdb] },
 236   [GETSERVBYNAME] = { true, &dbs[servdb] },
 237   [GETSERVBYPORT] = { true, &dbs[servdb] },
 238   [GETFDSERV] = { false, &dbs[servdb] },
 239   [GETNETGRENT] = { true, &dbs[netgrdb] },
 240   [INNETGR] = { true, &dbs[netgrdb] },
 241   [GETFDNETGR] = { false, &dbs[netgrdb] }
 242 };
 243
 244
 245 /* Initial number of threads to use.  */
 246 int nthreads = -1;
 247 /* Maximum number of threads to use.  */
 248 int max_nthreads = 32;
 249
 250 /* Socket for incoming connections.  */
 251 static int sock;
 252
 253 #ifdef HAVE_INOTIFY
 254 /* Inotify descriptor.  */
 255 int inotify_fd = -1;
 256 #endif
 257
 258 #ifdef HAVE_NETLINK
 259 /* Descriptor for netlink status updates.  */
 260 static int nl_status_fd = -1;
 261 #endif
 262
 263 /* Number of times clients had to wait.  */
 264 unsigned long int client_queued;
 265
 266
 267 ssize_t
 268 writeall (int fd, const void *buf, size_t len)
 269 {
 270   size_t n = len;
 271   ssize_t ret;
 272   do
 273     {
 274       ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
 275       if (ret <= 0)
 276         break;
 277       buf = (const char *) buf + ret;
 278       n -= ret;
 279     }
 280   while (n > 0);
 281   return ret < 0 ? ret : len - n;
 282 }
 283
 284
 285 enum usekey
 286   {
 287     use_not = 0,
 288     /* The following three are not really used, they are symbolic constants.  */
 289     use_first = 16,
 290     use_begin = 32,
 291     use_end = 64,
 292
 293     use_he = 1,
 294     use_he_begin = use_he | use_begin,
 295     use_he_end = use_he | use_end,
 296     use_data = 3,
 297     use_data_begin = use_data | use_begin,
 298     use_data_end = use_data | use_end,
 299     use_data_first = use_data_begin | use_first
 300   };
 301
 302
 303 static int
 304 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
 305            enum usekey use, ref_t start, size_t len)
 306 {
 307   assert (len >= 2);
 308
 309   if (start > first_free || start + len > first_free
 310       || (start & BLOCK_ALIGN_M1))
 311     return 0;
 312
 313   if (usemap[start] == use_not)
 314     {
 315       /* Add the start marker.  */
 316       usemap[start] = use | use_begin;
 317       use &= ~use_first;
 318
 319       while (--len > 0)
 320         if (usemap[++start] != use_not)
 321           return 0;
 322         else
 323           usemap[start] = use;
 324
 325       /* Add the end marker.  */
 326       usemap[start] = use | use_end;
 327     }
 328   else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
 329     {
 330       /* Hash entries can't be shared.  */
 331       if (use == use_he)
 332         return 0;
 333
 334       usemap[start] |= (use & use_first);
 335       use &= ~use_first;
 336
 337       while (--len > 1)
 338         if (usemap[++start] != use)
 339           return 0;
 340
 341       if (usemap[++start] != (use | use_end))
 342         return 0;
 343     }
 344   else
 345     /* Points to a wrong object or somewhere in the middle.  */
 346     return 0;
 347
 348   return 1;
 349 }
 350
 351
 352 /* Verify data in persistent database.  */
 353 static int
 354 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
 355 {
 356   assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
 357           || dbnr == netgrdb);
 358
 359   time_t now = time (NULL);
 360
 361   struct database_pers_head *head = mem;
 362   struct database_pers_head head_copy = *head;
 363
 364   /* Check that the header that was read matches the head in the database.  */
 365   if (memcmp (head, readhead, sizeof (*head)) != 0)
 366     return 0;
 367
 368   /* First some easy tests: make sure the database header is sane.  */
 369   if (head->version != DB_VERSION
 370       || head->header_size != sizeof (*head)
 371       /* We allow a timestamp to be one hour ahead of the current time.
 372          This should cover daylight saving time changes.  */
 373       || head->timestamp > now + 60 * 60 + 60
 374       || (head->gc_cycle & 1)
 375       || head->module == 0
 376       || (size_t) head->module > INT32_MAX / sizeof (ref_t)
 377       || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
 378       || head->first_free < 0
 379       || head->first_free > head->data_size
 380       || (head->first_free & BLOCK_ALIGN_M1) != 0
 381       || head->maxnentries < 0
 382       || head->maxnsearched < 0)
 383     return 0;
 384
 385   uint8_t *usemap = calloc (head->first_free, 1);
 386   if (usemap == NULL)
 387     return 0;
 388
 389   const char *data = (char *) &head->array[roundup (head->module,
 390                                                     ALIGN / sizeof (ref_t))];
 391
 392   nscd_ssize_t he_cnt = 0;
 393   for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
 394     {
 395       ref_t trail = head->array[cnt];
 396       ref_t work = trail;
 397       int tick = 0;
 398
 399       while (work != ENDREF)
 400         {
 401           if (! check_use (data, head->first_free, usemap, use_he, work,
 402                            sizeof (struct hashentry)))
 403             goto fail;
 404
 405           /* Now we know we can dereference the record.  */
 406           struct hashentry *here = (struct hashentry *) (data + work);
 407
 408           ++he_cnt;
 409
 410           /* Make sure the record is for this type of service.  */
 411           if (here->type >= LASTREQ
 412               || reqinfo[here->type].db != &dbs[dbnr])
 413             goto fail;
 414
 415           /* Validate boolean field value.  */
 416           if (here->first != false && here->first != true)
 417             goto fail;
 418
 419           if (here->len < 0)
 420             goto fail;
 421
 422           /* Now the data.  */
 423           if (here->packet < 0
 424               || here->packet > head->first_free
 425               || here->packet + sizeof (struct datahead) > head->first_free)
 426             goto fail;
 427
 428           struct datahead *dh = (struct datahead *) (data + here->packet);
 429
 430           if (! check_use (data, head->first_free, usemap,
 431                            use_data | (here->first ? use_first : 0),
 432                            here->packet, dh->allocsize))
 433             goto fail;
 434
 435           if (dh->allocsize < sizeof (struct datahead)
 436               || dh->recsize > dh->allocsize
 437               || (dh->notfound != false && dh->notfound != true)
 438               || (dh->usable != false && dh->usable != true))
 439             goto fail;
 440
 441           if (here->key < here->packet + sizeof (struct datahead)
 442               || here->key > here->packet + dh->allocsize
 443               || here->key + here->len > here->packet + dh->allocsize)
 444             goto fail;
 445
 446           work = here->next;
 447
 448           if (work == trail)
 449             /* A circular list, this must not happen.  */
 450             goto fail;
 451           if (tick)
 452             trail = ((struct hashentry *) (data + trail))->next;
 453           tick = 1 - tick;
 454         }
 455     }
 456
 457   if (he_cnt != head->nentries)
 458     goto fail;
 459
 460   /* See if all data and keys had at least one reference from
 461      he->first == true hashentry.  */
 462   for (ref_t idx = 0; idx < head->first_free; ++idx)
 463     {
 464       if (usemap[idx] == use_data_begin)
 465         goto fail;
 466     }
 467
 468   /* Finally, make sure the database hasn't changed since the first test.  */
 469   if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
 470     goto fail;
 471
 472   free (usemap);
 473   return 1;
 474
 475 fail:
 476   free (usemap);
 477   return 0;
 478 }
 479
 480
 481 /* Initialize database information structures.  */
 482 void
 483 nscd_init (void)
 484 {
 485   /* Look up unprivileged uid/gid/groups before we start listening on the
 486      socket  */
 487   if (server_user != NULL)
 488     begin_drop_privileges ();
 489
 490   if (nthreads == -1)
 491     /* No configuration for this value, assume a default.  */
 492     nthreads = 4;
 493
 494   for (size_t cnt = 0; cnt < lastdb; ++cnt)
 495     if (dbs[cnt].enabled)
 496       {
 497         pthread_rwlock_init (&dbs[cnt].lock, NULL);
 498         pthread_mutex_init (&dbs[cnt].memlock, NULL);
 499
 500         if (dbs[cnt].persistent)
 501           {
 502             /* Try to open the appropriate file on disk.  */
 503             int fd = open (dbs[cnt].db_filename, O_RDWR | O_CLOEXEC);
 504             if (fd != -1)
 505               {
 506                 char *msg = NULL;
 507                 struct stat64 st;
 508                 void *mem;
 509                 size_t total;
 510                 struct database_pers_head head;
 511                 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
 512                                                       sizeof (head)));
 513                 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
 514                   {
 515                   fail_db_errno:
 516                     /* The code is single-threaded at this point so
 517                        using strerror is just fine.  */
 518                     msg = strerror (errno);
 519                   fail_db:
 520                     dbg_log (_("invalid persistent database file \"%s\": %s"),
 521                              dbs[cnt].db_filename, msg);
 522                     unlink (dbs[cnt].db_filename);
 523                   }
 524                 else if (head.module == 0 && head.data_size == 0)
 525                   {
 526                     /* The file has been created, but the head has not
 527                        been initialized yet.  */
 528                     msg = _("uninitialized header");
 529                     goto fail_db;
 530                   }
 531                 else if (head.header_size != (int) sizeof (head))
 532                   {
 533                     msg = _("header size does not match");
 534                     goto fail_db;
 535                   }
 536                 else if ((total = (sizeof (head)
 537                                    + roundup (head.module * sizeof (ref_t),
 538                                               ALIGN)
 539                                    + head.data_size))
 540                          > st.st_size
 541                          || total < sizeof (head))
 542                   {
 543                     msg = _("file size does not match");
 544                     goto fail_db;
 545                   }
 546                 /* Note we map with the maximum size allowed for the
 547                    database.  This is likely much larger than the
 548                    actual file size.  This is OK on most OSes since
 549                    extensions of the underlying file will
 550                    automatically translate more pages available for
 551                    memory access.  */
 552                 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
 553                                       PROT_READ | PROT_WRITE,
 554                                       MAP_SHARED, fd, 0))
 555                          == MAP_FAILED)
 556                   goto fail_db_errno;
 557                 else if (!verify_persistent_db (mem, &head, cnt))
 558                   {
 559                     munmap (mem, total);
 560                     msg = _("verification failed");
 561                     goto fail_db;
 562                   }
 563                 else
 564                   {
 565                     /* Success.  We have the database.  */
 566                     dbs[cnt].head = mem;
 567                     dbs[cnt].memsize = total;
 568                     dbs[cnt].data = (char *)
 569                       &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
 570                                                      ALIGN / sizeof (ref_t))];
 571                     dbs[cnt].mmap_used = true;
 572
 573                     if (dbs[cnt].suggested_module > head.module)
 574                       dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
 575                                dbnames[cnt]);
 576
 577                     dbs[cnt].wr_fd = fd;
 578                     fd = -1;
 579                     /* We also need a read-only descriptor.  */
 580                     if (dbs[cnt].shared)
 581                       {
 582                         dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
 583                                                O_RDONLY | O_CLOEXEC);
 584                         if (dbs[cnt].ro_fd == -1)
 585                           dbg_log (_("\
 586 cannot create read-only descriptor for \"%s\"; no mmap"),
 587                                    dbs[cnt].db_filename);
 588                       }
 589
 590                     // XXX Shall we test whether the descriptors actually
 591                     // XXX point to the same file?
 592                   }
 593
 594                 /* Close the file descriptors in case something went
 595                    wrong in which case the variable have not been
 596                    assigned -1.  */
 597                 if (fd != -1)
 598                   close (fd);
 599               }
 600             else if (errno == EACCES)
 601               do_exit (EXIT_FAILURE, 0, _("cannot access '%s'"),
 602                        dbs[cnt].db_filename);
 603           }
 604
 605         if (dbs[cnt].head == NULL)
 606           {
 607             /* No database loaded.  Allocate the data structure,
 608                possibly on disk.  */
 609             struct database_pers_head head;
 610             size_t total = (sizeof (head)
 611                             + roundup (dbs[cnt].suggested_module
 612                                        * sizeof (ref_t), ALIGN)
 613                             + (dbs[cnt].suggested_module
 614                                * DEFAULT_DATASIZE_PER_BUCKET));
 615
 616             /* Try to create the database.  If we do not need a
 617                persistent database create a temporary file.  */
 618             int fd;
 619             int ro_fd = -1;
 620             if (dbs[cnt].persistent)
 621               {
 622                 fd = open (dbs[cnt].db_filename,
 623                            O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC,
 624                            S_IRUSR | S_IWUSR);
 625                 if (fd != -1 && dbs[cnt].shared)
 626                   ro_fd = open (dbs[cnt].db_filename,
 627                                 O_RDONLY | O_CLOEXEC);
 628               }
 629             else
 630               {
 631                 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
 632                 fd = mkostemp (fname, O_CLOEXEC);
 633
 634                 /* We do not need the file name anymore after we
 635                    opened another file descriptor in read-only mode.  */
 636                 if (fd != -1)
 637                   {
 638                     if (dbs[cnt].shared)
 639                       ro_fd = open (fname, O_RDONLY | O_CLOEXEC);
 640
 641                     unlink (fname);
 642                   }
 643               }
 644
 645             if (fd == -1)
 646               {
 647                 if (errno == EEXIST)
 648                   {
 649                     dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
 650                              dbnames[cnt], dbs[cnt].db_filename);
 651                     do_exit (1, 0, NULL);
 652                   }
 653
 654                 if  (dbs[cnt].persistent)
 655                   dbg_log (_("cannot create %s; no persistent database used"),
 656                            dbs[cnt].db_filename);
 657                 else
 658                   dbg_log (_("cannot create %s; no sharing possible"),
 659                            dbs[cnt].db_filename);
 660
 661                 dbs[cnt].persistent = 0;
 662                 // XXX remember: no mmap
 663               }
 664             else
 665               {
 666                 /* Tell the user if we could not create the read-only
 667                    descriptor.  */
 668                 if (ro_fd == -1 && dbs[cnt].shared)
 669                   dbg_log (_("\
 670 cannot create read-only descriptor for \"%s\"; no mmap"),
 671                            dbs[cnt].db_filename);
 672
 673                 /* Before we create the header, initialize the hash
 674                    table.  That way if we get interrupted while writing
 675                    the header we can recognize a partially initialized
 676                    database.  */
 677                 size_t ps = sysconf (_SC_PAGESIZE);
 678                 char tmpbuf[ps];
 679                 assert (~ENDREF == 0);
 680                 memset (tmpbuf, '\xff', ps);
 681
 682                 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
 683                 off_t offset = sizeof (head);
 684
 685                 size_t towrite;
 686                 if (offset % ps != 0)
 687                   {
 688                     towrite = MIN (remaining, ps - (offset % ps));
 689                     if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
 690                       goto write_fail;
 691                     offset += towrite;
 692                     remaining -= towrite;
 693                   }
 694
 695                 while (remaining > ps)
 696                   {
 697                     if (pwrite (fd, tmpbuf, ps, offset) == -1)
 698                       goto write_fail;
 699                     offset += ps;
 700                     remaining -= ps;
 701                   }
 702
 703                 if (remaining > 0
 704                     && pwrite (fd, tmpbuf, remaining, offset) != remaining)
 705                   goto write_fail;
 706
 707                 /* Create the header of the file.  */
 708                 struct database_pers_head head =
 709                   {
 710                     .version = DB_VERSION,
 711                     .header_size = sizeof (head),
 712                     .module = dbs[cnt].suggested_module,
 713                     .data_size = (dbs[cnt].suggested_module
 714                                   * DEFAULT_DATASIZE_PER_BUCKET),
 715                     .first_free = 0
 716                   };
 717                 void *mem;
 718
 719                 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
 720                      != sizeof (head))
 721                     || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
 722                         != 0)
 723                     || (mem = mmap (NULL, dbs[cnt].max_db_size,
 724                                     PROT_READ | PROT_WRITE,
 725                                     MAP_SHARED, fd, 0)) == MAP_FAILED)
 726                   {
 727                   write_fail:
 728                     unlink (dbs[cnt].db_filename);
 729                     dbg_log (_("cannot write to database file %s: %s"),
 730                              dbs[cnt].db_filename, strerror (errno));
 731                     dbs[cnt].persistent = 0;
 732                   }
 733                 else
 734                   {
 735                     /* Success.  */
 736                     dbs[cnt].head = mem;
 737                     dbs[cnt].data = (char *)
 738                       &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
 739                                                      ALIGN / sizeof (ref_t))];
 740                     dbs[cnt].memsize = total;
 741                     dbs[cnt].mmap_used = true;
 742
 743                     /* Remember the descriptors.  */
 744                     dbs[cnt].wr_fd = fd;
 745                     dbs[cnt].ro_fd = ro_fd;
 746                     fd = -1;
 747                     ro_fd = -1;
 748                   }
 749
 750                 if (fd != -1)
 751                   close (fd);
 752                 if (ro_fd != -1)
 753                   close (ro_fd);
 754               }
 755           }
 756
 757         if (dbs[cnt].head == NULL)
 758           {
 759             /* We do not use the persistent database.  Just
 760                create an in-memory data structure.  */
 761             assert (! dbs[cnt].persistent);
 762
 763             dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
 764                                      + (dbs[cnt].suggested_module
 765                                         * sizeof (ref_t)));
 766             memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
 767             assert (~ENDREF == 0);
 768             memset (dbs[cnt].head->array, '\xff',
 769                     dbs[cnt].suggested_module * sizeof (ref_t));
 770             dbs[cnt].head->module = dbs[cnt].suggested_module;
 771             dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
 772                                         * dbs[cnt].head->module);
 773             dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
 774             dbs[cnt].head->first_free = 0;
 775
 776             dbs[cnt].shared = 0;
 777             assert (dbs[cnt].ro_fd == -1);
 778           }
 779       }
 780
 781   /* Create the socket.  */
 782   sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
 783   if (sock < 0)
 784     {
 785       dbg_log (_("cannot open socket: %s"), strerror (errno));
 786       do_exit (errno == EACCES ? 4 : 1, 0, NULL);
 787     }
 788   /* Bind a name to the socket.  */
 789   struct sockaddr_un sock_addr;
 790   sock_addr.sun_family = AF_UNIX;
 791   strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
 792   if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
 793     {
 794       dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
 795       do_exit (errno == EACCES ? 4 : 1, 0, NULL);
 796     }
 797
 798   /* Set permissions for the socket.  */
 799   chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
 800
 801   /* Set the socket up to accept connections.  */
 802   if (listen (sock, SOMAXCONN) < 0)
 803     {
 804       dbg_log (_("cannot enable socket to accept connections: %s"),
 805                strerror (errno));
 806       do_exit (1, 0, NULL);
 807     }
 808
 809 #ifdef HAVE_NETLINK
 810   if (dbs[hstdb].enabled)
 811     {
 812       /* Try to open netlink socket to monitor network setting changes.  */
 813       nl_status_fd = socket (AF_NETLINK,
 814                              SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
 815                              NETLINK_ROUTE);
 816       if (nl_status_fd != -1)
 817         {
 818           struct sockaddr_nl snl;
 819           memset (&snl, '\0', sizeof (snl));
 820           snl.nl_family = AF_NETLINK;
 821           /* XXX Is this the best set to use?  */
 822           snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
 823                            | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
 824                            | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
 825                            | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
 826                            | RTMGRP_IPV6_PREFIX);
 827
 828           if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
 829             {
 830               close (nl_status_fd);
 831               nl_status_fd = -1;
 832             }
 833           else
 834             {
 835               /* Start the timestamp process.  */
 836               dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
 837                 = __bump_nl_timestamp ();
 838             }
 839         }
 840     }
 841 #endif
 842
 843   /* Change to unprivileged uid/gid/groups if specified in config file */
 844   if (server_user != NULL)
 845     finish_drop_privileges ();
 846 }
 847
 848 #ifdef HAVE_INOTIFY
 849 #define TRACED_FILE_MASK (IN_DELETE_SELF | IN_CLOSE_WRITE | IN_MOVE_SELF)
 850 #define TRACED_DIR_MASK (IN_DELETE_SELF | IN_CREATE | IN_MOVED_TO | IN_MOVE_SELF)
 851 void
 852 install_watches (struct traced_file *finfo)
 853 {
 854   /* Use inotify support if we have it.  */
 855   if (finfo->inotify_descr[TRACED_FILE] < 0)
 856     finfo->inotify_descr[TRACED_FILE] = inotify_add_watch (inotify_fd,
 857                                                            finfo->fname,
 858                                                            TRACED_FILE_MASK);
 859   if (finfo->inotify_descr[TRACED_FILE] < 0)
 860     {
 861       dbg_log (_("disabled inotify-based monitoring for file `%s': %s"),
 862                  finfo->fname, strerror (errno));
 863       return;
 864     }
 865   dbg_log (_("monitoring file `%s` (%d)"),
 866            finfo->fname, finfo->inotify_descr[TRACED_FILE]);
 867   /* Additionally listen for events in the file's parent directory.
 868      We do this because the file to be watched might be
 869      deleted and then added back again.  When it is added back again
 870      we must re-add the watch.  We must also cover IN_MOVED_TO to
 871      detect a file being moved into the directory.  */
 872   if (finfo->inotify_descr[TRACED_DIR] < 0)
 873     finfo->inotify_descr[TRACED_DIR] = inotify_add_watch (inotify_fd,
 874                                                           finfo->dname,
 875                                                           TRACED_DIR_MASK);
 876   if (finfo->inotify_descr[TRACED_DIR] < 0)
 877     {
 878       dbg_log (_("disabled inotify-based monitoring for directory `%s': %s"),
 879                  finfo->fname, strerror (errno));
 880       return;
 881     }
 882   dbg_log (_("monitoring directory `%s` (%d)"),
 883            finfo->dname, finfo->inotify_descr[TRACED_DIR]);
 884 }
 885 #endif
 886
 887 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
 888
 889    We support registering multiple files per database. Each call to
 890    register_traced_file adds to the list of registered files.
 891
 892    When we prune the database, either through timeout or a request to
 893    invalidate, we will check to see if any of the registered files has changed.
 894    When we accept new connections to handle a cache request we will also
 895    check to see if any of the registered files has changed.
 896
 897    If we have inotify support then we install an inotify fd to notify us of
 898    file deletion or modification, both of which will require we invalidate
 899    the cache for the database.  Without inotify support we stat the file and
 900    store st_mtime to determine if the file has been modified.  */
 901 void
 902 register_traced_file (size_t dbidx, struct traced_file *finfo)
 903 {
 904   /* If the database is disabled or file checking is disabled
 905      then ignore the registration.  */
 906   if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
 907     return;
 908
 909   if (__glibc_unlikely (debug_level > 0))
 910     dbg_log (_("monitoring file %s for database %s"),
 911              finfo->fname, dbnames[dbidx]);
 912
 913 #ifdef HAVE_INOTIFY
 914   install_watches (finfo);
 915 #endif
 916   struct stat64 st;
 917   if (stat64 (finfo->fname, &st) < 0)
 918     {
 919       /* We cannot stat() the file. Set mtime to zero and try again later.  */
 920       dbg_log (_("stat failed for file `%s'; will try again later: %s"),
 921                finfo->fname, strerror (errno));
 922       finfo->mtime = 0;
 923     }
 924   else
 925     finfo->mtime = st.st_mtime;
 926
 927   /* Queue up the file name.  */
 928   finfo->next = dbs[dbidx].traced_files;
 929   dbs[dbidx].traced_files = finfo;
 930 }
 931
 932
 933 /* Close the connections.  */
 934 void
 935 close_sockets (void)
 936 {
 937   close (sock);
 938 }
 939
 940
 941 static void
 942 invalidate_cache (char *key, int fd)
 943 {
 944   dbtype number;
 945   int32_t resp;
 946
 947   for (number = pwddb; number < lastdb; ++number)
 948     if (strcmp (key, dbnames[number]) == 0)
 949       {
 950         struct traced_file *runp = dbs[number].traced_files;
 951         while (runp != NULL)
 952           {
 953             /* Make sure we reload from file when checking mtime.  */
 954             runp->mtime = 0;
 955 #ifdef HAVE_INOTIFY
 956             /* During an invalidation we try to reload the traced
 957                file watches.  This allows the user to re-sync if
 958                inotify events were lost.  Similar to what we do during
 959                pruning.  */
 960             install_watches (runp);
 961 #endif
 962             if (runp->call_res_init)
 963               {
 964                 res_init ();
 965                 break;
 966               }
 967             runp = runp->next;
 968           }
 969         break;
 970       }
 971
 972   if (number == lastdb)
 973     {
 974       resp = EINVAL;
 975       writeall (fd, &resp, sizeof (resp));
 976       return;
 977     }
 978
 979   if (dbs[number].enabled)
 980     {
 981       pthread_mutex_lock (&dbs[number].prune_run_lock);
 982       prune_cache (&dbs[number], LONG_MAX, fd);
 983       pthread_mutex_unlock (&dbs[number].prune_run_lock);
 984     }
 985   else
 986     {
 987       resp = 0;
 988       writeall (fd, &resp, sizeof (resp));
 989     }
 990 }
 991
 992
 993 #ifdef SCM_RIGHTS
 994 static void
 995 send_ro_fd (struct database_dyn *db, char *key, int fd)
 996 {
 997   /* If we do not have an read-only file descriptor do nothing.  */
 998   if (db->ro_fd == -1)
 999     return;
1000
1001   /* We need to send some data along with the descriptor.  */
1002   uint64_t mapsize = (db->head->data_size
1003                       + roundup (db->head->module * sizeof (ref_t), ALIGN)
1004                       + sizeof (struct database_pers_head));
1005   struct iovec iov[2];
1006   iov[0].iov_base = key;
1007   iov[0].iov_len = strlen (key) + 1;
1008   iov[1].iov_base = &mapsize;
1009   iov[1].iov_len = sizeof (mapsize);
1010
1011   /* Prepare the control message to transfer the descriptor.  */
1012   union
1013   {
1014     struct cmsghdr hdr;
1015     char bytes[CMSG_SPACE (sizeof (int))];
1016   } buf;
1017   struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1018                         .msg_control = buf.bytes,
1019                         .msg_controllen = sizeof (buf) };
1020   struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1021
1022   cmsg->cmsg_level = SOL_SOCKET;
1023   cmsg->cmsg_type = SCM_RIGHTS;
1024   cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1025
1026   int *ip = (int *) CMSG_DATA (cmsg);
1027   *ip = db->ro_fd;
1028
1029   msg.msg_controllen = cmsg->cmsg_len;
1030
1031   /* Send the control message.  We repeat when we are interrupted but
1032      everything else is ignored.  */
1033 #ifndef MSG_NOSIGNAL
1034 # define MSG_NOSIGNAL 0
1035 #endif
1036   (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1037
1038   if (__glibc_unlikely (debug_level > 0))
1039     dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1040 }
1041 #endif  /* SCM_RIGHTS */
1042
1043
1044 /* Handle new request.  */
1045 static void
1046 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1047 {
1048   if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1049     {
1050       if (debug_level > 0)
1051         dbg_log (_("\
1052 cannot handle old request version %d; current version is %d"),
1053                  req->version, NSCD_VERSION);
1054       return;
1055     }
1056
1057   /* Perform the SELinux check before we go on to the standard checks.  */
1058   if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1059     {
1060       if (debug_level > 0)
1061         {
1062 #ifdef SO_PEERCRED
1063           char pbuf[sizeof ("/proc//exe") + 3 * sizeof (long int)];
1064 # ifdef PATH_MAX
1065           char buf[PATH_MAX];
1066 # else
1067           char buf[4096];
1068 # endif
1069
1070           snprintf (pbuf, sizeof (pbuf), "/proc/%ld/exe", (long int) pid);
1071           ssize_t n = readlink (pbuf, buf, sizeof (buf) - 1);
1072
1073           if (n <= 0)
1074             dbg_log (_("\
1075 request from %ld not handled due to missing permission"), (long int) pid);
1076           else
1077             {
1078               buf[n] = '\0';
1079               dbg_log (_("\
1080 request from '%s' [%ld] not handled due to missing permission"),
1081                        buf, (long int) pid);
1082             }
1083 #else
1084           dbg_log (_("request not handled due to missing permission"));
1085 #endif
1086         }
1087       return;
1088     }
1089
1090   struct database_dyn *db = reqinfo[req->type].db;
1091
1092   /* See whether we can service the request from the cache.  */
1093   if (__builtin_expect (reqinfo[req->type].data_request, true))
1094     {
1095       if (__builtin_expect (debug_level, 0) > 0)
1096         {
1097           if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1098             {
1099               char buf[INET6_ADDRSTRLEN];
1100
1101               dbg_log ("\t%s (%s)", serv2str[req->type],
1102                        inet_ntop (req->type == GETHOSTBYADDR
1103                                   ? AF_INET : AF_INET6,
1104                                   key, buf, sizeof (buf)));
1105             }
1106           else
1107             dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1108         }
1109
1110       /* Is this service enabled?  */
1111       if (__glibc_unlikely (!db->enabled))
1112         {
1113           /* No, sent the prepared record.  */
1114           if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1115                                         db->disabled_iov->iov_len,
1116                                         MSG_NOSIGNAL))
1117               != (ssize_t) db->disabled_iov->iov_len
1118               && __builtin_expect (debug_level, 0) > 0)
1119             {
1120               /* We have problems sending the result.  */
1121               char buf[256];
1122               dbg_log (_("cannot write result: %s"),
1123                        strerror_r (errno, buf, sizeof (buf)));
1124             }
1125
1126           return;
1127         }
1128
1129       /* Be sure we can read the data.  */
1130       if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db->lock) != 0))
1131         {
1132           ++db->head->rdlockdelayed;
1133           pthread_rwlock_rdlock (&db->lock);
1134         }
1135
1136       /* See whether we can handle it from the cache.  */
1137       struct datahead *cached;
1138       cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1139                                                  db, uid);
1140       if (cached != NULL)
1141         {
1142           /* Hurray it's in the cache.  */
1143           if (writeall (fd, cached->data, cached->recsize) != cached->recsize
1144               && __glibc_unlikely (debug_level > 0))
1145             {
1146               /* We have problems sending the result.  */
1147               char buf[256];
1148               dbg_log (_("cannot write result: %s"),
1149                        strerror_r (errno, buf, sizeof (buf)));
1150             }
1151
1152           pthread_rwlock_unlock (&db->lock);
1153
1154           return;
1155         }
1156
1157       pthread_rwlock_unlock (&db->lock);
1158     }
1159   else if (__builtin_expect (debug_level, 0) > 0)
1160     {
1161       if (req->type == INVALIDATE)
1162         dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1163       else
1164         dbg_log ("\t%s", serv2str[req->type]);
1165     }
1166
1167   /* Handle the request.  */
1168   switch (req->type)
1169     {
1170     case GETPWBYNAME:
1171       addpwbyname (db, fd, req, key, uid);
1172       break;
1173
1174     case GETPWBYUID:
1175       addpwbyuid (db, fd, req, key, uid);
1176       break;
1177
1178     case GETGRBYNAME:
1179       addgrbyname (db, fd, req, key, uid);
1180       break;
1181
1182     case GETGRBYGID:
1183       addgrbygid (db, fd, req, key, uid);
1184       break;
1185
1186     case GETHOSTBYNAME:
1187       addhstbyname (db, fd, req, key, uid);
1188       break;
1189
1190     case GETHOSTBYNAMEv6:
1191       addhstbynamev6 (db, fd, req, key, uid);
1192       break;
1193
1194     case GETHOSTBYADDR:
1195       addhstbyaddr (db, fd, req, key, uid);
1196       break;
1197
1198     case GETHOSTBYADDRv6:
1199       addhstbyaddrv6 (db, fd, req, key, uid);
1200       break;
1201
1202     case GETAI:
1203       addhstai (db, fd, req, key, uid);
1204       break;
1205
1206     case INITGROUPS:
1207       addinitgroups (db, fd, req, key, uid);
1208       break;
1209
1210     case GETSERVBYNAME:
1211       addservbyname (db, fd, req, key, uid);
1212       break;
1213
1214     case GETSERVBYPORT:
1215       addservbyport (db, fd, req, key, uid);
1216       break;
1217
1218     case GETNETGRENT:
1219       addgetnetgrent (db, fd, req, key, uid);
1220       break;
1221
1222     case INNETGR:
1223       addinnetgr (db, fd, req, key, uid);
1224       break;
1225
1226     case GETSTAT:
1227     case SHUTDOWN:
1228     case INVALIDATE:
1229       {
1230         /* Get the callers credentials.  */
1231 #ifdef SO_PEERCRED
1232         struct ucred caller;
1233         socklen_t optlen = sizeof (caller);
1234
1235         if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1236           {
1237             char buf[256];
1238
1239             dbg_log (_("error getting caller's id: %s"),
1240                      strerror_r (errno, buf, sizeof (buf)));
1241             break;
1242           }
1243
1244         uid = caller.uid;
1245 #else
1246         /* Some systems have no SO_PEERCRED implementation.  They don't
1247            care about security so we don't as well.  */
1248         uid = 0;
1249 #endif
1250       }
1251
1252       /* Accept shutdown, getstat and invalidate only from root.  For
1253          the stat call also allow the user specified in the config file.  */
1254       if (req->type == GETSTAT)
1255         {
1256           if (uid == 0 || uid == stat_uid)
1257             send_stats (fd, dbs);
1258         }
1259       else if (uid == 0)
1260         {
1261           if (req->type == INVALIDATE)
1262             invalidate_cache (key, fd);
1263           else
1264             termination_handler (0);
1265         }
1266       break;
1267
1268     case GETFDPW:
1269     case GETFDGR:
1270     case GETFDHST:
1271     case GETFDSERV:
1272     case GETFDNETGR:
1273 #ifdef SCM_RIGHTS
1274       send_ro_fd (reqinfo[req->type].db, key, fd);
1275 #endif
1276       break;
1277
1278     default:
1279       /* Ignore the command, it's nothing we know.  */
1280       break;
1281     }
1282 }
1283
1284
1285 /* Restart the process.  */
1286 static void
1287 restart (void)
1288 {
1289   /* First determine the parameters.  We do not use the parameters
1290      passed to main() since in case nscd is started by running the
1291      dynamic linker this will not work.  Yes, this is not the usual
1292      case but nscd is part of glibc and we occasionally do this.  */
1293   size_t buflen = 1024;
1294   char *buf = alloca (buflen);
1295   size_t readlen = 0;
1296   int fd = open ("/proc/self/cmdline", O_RDONLY);
1297   if (fd == -1)
1298     {
1299       dbg_log (_("\
1300 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1301                strerror (errno));
1302
1303       paranoia = 0;
1304       return;
1305     }
1306
1307   while (1)
1308     {
1309       ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1310                                             buflen - readlen));
1311       if (n == -1)
1312         {
1313           dbg_log (_("\
1314 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1315                    strerror (errno));
1316
1317           close (fd);
1318           paranoia = 0;
1319           return;
1320         }
1321
1322       readlen += n;
1323
1324       if (readlen < buflen)
1325         break;
1326
1327       /* We might have to extend the buffer.  */
1328       size_t old_buflen = buflen;
1329       char *newp = extend_alloca (buf, buflen, 2 * buflen);
1330       buf = memmove (newp, buf, old_buflen);
1331     }
1332
1333   close (fd);
1334
1335   /* Parse the command line.  Worst case scenario: every two
1336      characters form one parameter (one character plus NUL).  */
1337   char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1338   int argc = 0;
1339
1340   char *cp = buf;
1341   while (cp < buf + readlen)
1342     {
1343       argv[argc++] = cp;
1344       cp = (char *) rawmemchr (cp, '\0') + 1;
1345     }
1346   argv[argc] = NULL;
1347
1348   /* Second, change back to the old user if we changed it.  */
1349   if (server_user != NULL)
1350     {
1351       if (setresuid (old_uid, old_uid, old_uid) != 0)
1352         {
1353           dbg_log (_("\
1354 cannot change to old UID: %s; disabling paranoia mode"),
1355                    strerror (errno));
1356
1357           paranoia = 0;
1358           return;
1359         }
1360
1361       if (setresgid (old_gid, old_gid, old_gid) != 0)
1362         {
1363           dbg_log (_("\
1364 cannot change to old GID: %s; disabling paranoia mode"),
1365                    strerror (errno));
1366
1367           ignore_value (setuid (server_uid));
1368           paranoia = 0;
1369           return;
1370         }
1371     }
1372
1373   /* Next change back to the old working directory.  */
1374   if (chdir (oldcwd) == -1)
1375     {
1376       dbg_log (_("\
1377 cannot change to old working directory: %s; disabling paranoia mode"),
1378                strerror (errno));
1379
1380       if (server_user != NULL)
1381         {
1382           ignore_value (setuid (server_uid));
1383           ignore_value (setgid (server_gid));
1384         }
1385       paranoia = 0;
1386       return;
1387     }
1388
1389   /* Synchronize memory.  */
1390   int32_t certainly[lastdb];
1391   for (int cnt = 0; cnt < lastdb; ++cnt)
1392     if (dbs[cnt].enabled)
1393       {
1394         /* Make sure nobody keeps using the database.  */
1395         dbs[cnt].head->timestamp = 0;
1396         certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1397         dbs[cnt].head->nscd_certainly_running = 0;
1398
1399         if (dbs[cnt].persistent)
1400           // XXX async OK?
1401           msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1402       }
1403
1404   /* The preparations are done.  */
1405 #ifdef PATH_MAX
1406   char pathbuf[PATH_MAX];
1407 #else
1408   char pathbuf[256];
1409 #endif
1410   /* Try to exec the real nscd program so the process name (as reported
1411      in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1412      if readlink or the exec with the result of the readlink call fails.  */
1413   ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1414   if (n != -1)
1415     {
1416       pathbuf[n] = '\0';
1417       execv (pathbuf, argv);
1418     }
1419   execv ("/proc/self/exe", argv);
1420
1421   /* If we come here, we will never be able to re-exec.  */
1422   dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1423            strerror (errno));
1424
1425   if (server_user != NULL)
1426     {
1427       ignore_value (setuid (server_uid));
1428       ignore_value (setgid (server_gid));
1429     }
1430   if (chdir ("/") != 0)
1431     dbg_log (_("cannot change current working directory to \"/\": %s"),
1432              strerror (errno));
1433   paranoia = 0;
1434
1435   /* Reenable the databases.  */
1436   time_t now = time (NULL);
1437   for (int cnt = 0; cnt < lastdb; ++cnt)
1438     if (dbs[cnt].enabled)
1439       {
1440         dbs[cnt].head->timestamp = now;
1441         dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1442       }
1443 }
1444
1445
1446 /* List of file descriptors.  */
1447 struct fdlist
1448 {
1449   int fd;
1450   struct fdlist *next;
1451 };
1452 /* Memory allocated for the list.  */
1453 static struct fdlist *fdlist;
1454 /* List of currently ready-to-read file descriptors.  */
1455 static struct fdlist *readylist;
1456
1457 /* Conditional variable and mutex to signal availability of entries in
1458    READYLIST.  The condvar is initialized dynamically since we might
1459    use a different clock depending on availability.  */
1460 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1461 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1462
1463 /* The clock to use with the condvar.  */
1464 static clockid_t timeout_clock = CLOCK_REALTIME;
1465
1466 /* Number of threads ready to handle the READYLIST.  */
1467 static unsigned long int nready;
1468
1469
1470 /* Function for the clean-up threads.  */
1471 static void *
1472 __attribute__ ((__noreturn__))
1473 nscd_run_prune (void *p)
1474 {
1475   const long int my_number = (long int) p;
1476   assert (dbs[my_number].enabled);
1477
1478   int dont_need_update = setup_thread (&dbs[my_number]);
1479
1480   time_t now = time (NULL);
1481
1482   /* We are running.  */
1483   dbs[my_number].head->timestamp = now;
1484
1485   struct timespec prune_ts;
1486   if (__glibc_unlikely (clock_gettime (timeout_clock, &prune_ts) == -1))
1487     /* Should never happen.  */
1488     abort ();
1489
1490   /* Compute the initial timeout time.  Prevent all the timers to go
1491      off at the same time by adding a db-based value.  */
1492   prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1493   dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1494
1495   pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1496   pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1497   pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1498
1499   pthread_mutex_lock (prune_lock);
1500   while (1)
1501     {
1502       /* Wait, but not forever.  */
1503       int e = 0;
1504       if (! dbs[my_number].clear_cache)
1505         e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1506       assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1507
1508       time_t next_wait;
1509       now = time (NULL);
1510       if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1511           || dbs[my_number].clear_cache)
1512         {
1513           /* We will determine the new timout values based on the
1514              cache content.  Should there be concurrent additions to
1515              the cache which are not accounted for in the cache
1516              pruning we want to know about it.  Therefore set the
1517              timeout to the maximum.  It will be descreased when adding
1518              new entries to the cache, if necessary.  */
1519           dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1520
1521           /* Unconditionally reset the flag.  */
1522           time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1523           dbs[my_number].clear_cache = 0;
1524
1525           pthread_mutex_unlock (prune_lock);
1526
1527           /* We use a separate lock for running the prune function (instead
1528              of keeping prune_lock locked) because this enables concurrent
1529              invocations of cache_add which might modify the timeout value.  */
1530           pthread_mutex_lock (prune_run_lock);
1531           next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1532           pthread_mutex_unlock (prune_run_lock);
1533
1534           next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1535           /* If clients cannot determine for sure whether nscd is running
1536              we need to wake up occasionally to update the timestamp.
1537              Wait 90% of the update period.  */
1538 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1539           if (__glibc_unlikely (! dont_need_update))
1540             {
1541               next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1542               dbs[my_number].head->timestamp = now;
1543             }
1544
1545           pthread_mutex_lock (prune_lock);
1546
1547           /* Make it known when we will wake up again.  */
1548           if (now + next_wait < dbs[my_number].wakeup_time)
1549             dbs[my_number].wakeup_time = now + next_wait;
1550           else
1551             next_wait = dbs[my_number].wakeup_time - now;
1552         }
1553       else
1554         /* The cache was just pruned.  Do not do it again now.  Just
1555            use the new timeout value.  */
1556         next_wait = dbs[my_number].wakeup_time - now;
1557
1558       if (clock_gettime (timeout_clock, &prune_ts) == -1)
1559         /* Should never happen.  */
1560         abort ();
1561
1562       /* Compute next timeout time.  */
1563       prune_ts.tv_sec += next_wait;
1564     }
1565 }
1566
1567
1568 /* This is the main loop.  It is replicated in different threads but
1569    the use of the ready list makes sure only one thread handles an
1570    incoming connection.  */
1571 static void *
1572 __attribute__ ((__noreturn__))
1573 nscd_run_worker (void *p)
1574 {
1575   char buf[256];
1576
1577   /* Initial locking.  */
1578   pthread_mutex_lock (&readylist_lock);
1579
1580   /* One more thread available.  */
1581   ++nready;
1582
1583   while (1)
1584     {
1585       while (readylist == NULL)
1586         pthread_cond_wait (&readylist_cond, &readylist_lock);
1587
1588       struct fdlist *it = readylist->next;
1589       if (readylist->next == readylist)
1590         /* Just one entry on the list.  */
1591         readylist = NULL;
1592       else
1593         readylist->next = it->next;
1594
1595       /* Extract the information and mark the record ready to be used
1596          again.  */
1597       int fd = it->fd;
1598       it->next = NULL;
1599
1600       /* One more thread available.  */
1601       --nready;
1602
1603       /* We are done with the list.  */
1604       pthread_mutex_unlock (&readylist_lock);
1605
1606       /* Now read the request.  */
1607       request_header req;
1608       if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1609                             != sizeof (req), 0))
1610         {
1611           /* We failed to read data.  Note that this also might mean we
1612              failed because we would have blocked.  */
1613           if (debug_level > 0)
1614             dbg_log (_("short read while reading request: %s"),
1615                      strerror_r (errno, buf, sizeof (buf)));
1616           goto close_and_out;
1617         }
1618
1619       /* Check whether this is a valid request type.  */
1620       if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1621         goto close_and_out;
1622
1623       /* Some systems have no SO_PEERCRED implementation.  They don't
1624          care about security so we don't as well.  */
1625       uid_t uid = -1;
1626 #ifdef SO_PEERCRED
1627       pid_t pid = 0;
1628
1629       if (__glibc_unlikely (debug_level > 0))
1630         {
1631           struct ucred caller;
1632           socklen_t optlen = sizeof (caller);
1633
1634           if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1635             pid = caller.pid;
1636         }
1637 #else
1638       const pid_t pid = 0;
1639 #endif
1640
1641       /* It should not be possible to crash the nscd with a silly
1642          request (i.e., a terribly large key).  We limit the size to 1kb.  */
1643       if (__builtin_expect (req.key_len, 1) < 0
1644           || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1645         {
1646           if (debug_level > 0)
1647             dbg_log (_("key length in request too long: %d"), req.key_len);
1648         }
1649       else
1650         {
1651           /* Get the key.  */
1652           char keybuf[MAXKEYLEN + 1];
1653
1654           if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1655                                                           req.key_len))
1656                                 != req.key_len, 0))
1657             {
1658               /* Again, this can also mean we would have blocked.  */
1659               if (debug_level > 0)
1660                 dbg_log (_("short read while reading request key: %s"),
1661                          strerror_r (errno, buf, sizeof (buf)));
1662               goto close_and_out;
1663             }
1664           keybuf[req.key_len] = '\0';
1665
1666           if (__builtin_expect (debug_level, 0) > 0)
1667             {
1668 #ifdef SO_PEERCRED
1669               if (pid != 0)
1670                 dbg_log (_("\
1671 handle_request: request received (Version = %d) from PID %ld"),
1672                          req.version, (long int) pid);
1673               else
1674 #endif
1675                 dbg_log (_("\
1676 handle_request: request received (Version = %d)"), req.version);
1677             }
1678
1679           /* Phew, we got all the data, now process it.  */
1680           handle_request (fd, &req, keybuf, uid, pid);
1681         }
1682
1683     close_and_out:
1684       /* We are done.  */
1685       close (fd);
1686
1687       /* Re-locking.  */
1688       pthread_mutex_lock (&readylist_lock);
1689
1690       /* One more thread available.  */
1691       ++nready;
1692     }
1693   /* NOTREACHED */
1694 }
1695
1696
1697 static unsigned int nconns;
1698
1699 static void
1700 fd_ready (int fd)
1701 {
1702   pthread_mutex_lock (&readylist_lock);
1703
1704   /* Find an empty entry in FDLIST.  */
1705   size_t inner;
1706   for (inner = 0; inner < nconns; ++inner)
1707     if (fdlist[inner].next == NULL)
1708       break;
1709   assert (inner < nconns);
1710
1711   fdlist[inner].fd = fd;
1712
1713   if (readylist == NULL)
1714     readylist = fdlist[inner].next = &fdlist[inner];
1715   else
1716     {
1717       fdlist[inner].next = readylist->next;
1718       readylist = readylist->next = &fdlist[inner];
1719     }
1720
1721   bool do_signal = true;
1722   if (__glibc_unlikely (nready == 0))
1723     {
1724       ++client_queued;
1725       do_signal = false;
1726
1727       /* Try to start another thread to help out.  */
1728       pthread_t th;
1729       if (nthreads < max_nthreads
1730           && pthread_create (&th, &attr, nscd_run_worker,
1731                              (void *) (long int) nthreads) == 0)
1732         {
1733           /* We got another thread.  */
1734           ++nthreads;
1735           /* The new thread might need a kick.  */
1736           do_signal = true;
1737         }
1738
1739     }
1740
1741   pthread_mutex_unlock (&readylist_lock);
1742
1743   /* Tell one of the worker threads there is work to do.  */
1744   if (do_signal)
1745     pthread_cond_signal (&readylist_cond);
1746 }
1747
1748
1749 /* Check whether restarting should happen.  */
1750 static bool
1751 restart_p (time_t now)
1752 {
1753   return (paranoia && readylist == NULL && nready == nthreads
1754           && now >= restart_time);
1755 }
1756
1757
1758 /* Array for times a connection was accepted.  */
1759 static time_t *starttime;
1760
1761 #ifdef HAVE_INOTIFY
1762 /* Inotify event for changed file.  */
1763 union __inev
1764 {
1765   struct inotify_event i;
1766 # ifndef PATH_MAX
1767 #  define PATH_MAX 1024
1768 # endif
1769   char buf[sizeof (struct inotify_event) + PATH_MAX];
1770 };
1771
1772 /* Returns 0 if the file is there otherwise -1.  */
1773 int
1774 check_file (struct traced_file *finfo)
1775 {
1776   struct stat64 st;
1777   /* We could check mtime and if different re-add
1778      the watches, and invalidate the database, but we
1779      don't because we are called from inotify_check_files
1780      which should be doing that work.  If sufficient inotify
1781      events were lost then the next pruning or invalidation
1782      will do the stat and mtime check.  We don't do it here to
1783      keep the logic simple.  */
1784   if (stat64 (finfo->fname, &st) < 0)
1785     return -1;
1786   return 0;
1787 }
1788
1789 /* Process the inotify event in INEV. If the event matches any of the files
1790    registered with a database then mark that database as requiring its cache
1791    to be cleared. We indicate the cache needs clearing by setting
1792    TO_CLEAR[DBCNT] to true for the matching database.  */
1793 static void
1794 inotify_check_files (bool *to_clear, union __inev *inev)
1795 {
1796   /* Check which of the files changed.  */
1797   for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1798     {
1799       struct traced_file *finfo = dbs[dbcnt].traced_files;
1800
1801       while (finfo != NULL)
1802         {
1803           /* The configuration file was moved or deleted.
1804              We stop watching it at that point, and reinitialize.  */
1805           if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1806               && ((inev->i.mask & IN_MOVE_SELF)
1807                   || (inev->i.mask & IN_DELETE_SELF)
1808                   || (inev->i.mask & IN_IGNORED)))
1809             {
1810               int ret;
1811               bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1812
1813               if (check_file (finfo) == 0)
1814                 {
1815                   dbg_log (_("ignored inotify event for `%s` (file exists)"),
1816                            finfo->fname);
1817                   return;
1818                 }
1819
1820               dbg_log (_("monitored file `%s` was %s, removing watch"),
1821                        finfo->fname, moved ? "moved" : "deleted");
1822               /* File was moved out, remove the watch.  Watches are
1823                  automatically removed when the file is deleted.  */
1824               if (moved)
1825                 {
1826                   ret = inotify_rm_watch (inotify_fd, inev->i.wd);
1827                   if (ret < 0)
1828                     dbg_log (_("failed to remove file watch `%s`: %s"),
1829                              finfo->fname, strerror (errno));
1830                 }
1831               finfo->inotify_descr[TRACED_FILE] = -1;
1832               to_clear[dbcnt] = true;
1833               if (finfo->call_res_init)
1834                 res_init ();
1835               return;
1836             }
1837           /* The configuration file was open for writing and has just closed.
1838              We reset the cache and reinitialize.  */
1839           if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1840               && inev->i.mask & IN_CLOSE_WRITE)
1841             {
1842               /* Mark cache as needing to be cleared and reinitialize.  */
1843               dbg_log (_("monitored file `%s` was written to"), finfo->fname);
1844               to_clear[dbcnt] = true;
1845               if (finfo->call_res_init)
1846                 res_init ();
1847               return;
1848             }
1849           /* The parent directory was moved or deleted.  We trigger one last
1850              invalidation.  At the next pruning or invalidation we may add
1851              this watch back if the file is present again.  */
1852           if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1853               && ((inev->i.mask & IN_DELETE_SELF)
1854                   || (inev->i.mask & IN_MOVE_SELF)
1855                   || (inev->i.mask & IN_IGNORED)))
1856             {
1857               bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1858               /* The directory watch may have already been removed
1859                  but we don't know so we just remove it again and
1860                  ignore the error.  Then we remove the file watch.
1861                  Note: watches are automatically removed for deleted
1862                  files.  */
1863               if (moved)
1864                 inotify_rm_watch (inotify_fd, inev->i.wd);
1865               if (finfo->inotify_descr[TRACED_FILE] != -1)
1866                 {
1867                   dbg_log (_("monitored parent directory `%s` was %s, removing watch on `%s`"),
1868                            finfo->dname, moved ? "moved" : "deleted", finfo->fname);
1869                   if (inotify_rm_watch (inotify_fd, finfo->inotify_descr[TRACED_FILE]) < 0)
1870                     dbg_log (_("failed to remove file watch `%s`: %s"),
1871                              finfo->dname, strerror (errno));
1872                 }
1873               finfo->inotify_descr[TRACED_FILE] = -1;
1874               finfo->inotify_descr[TRACED_DIR] = -1;
1875               to_clear[dbcnt] = true;
1876               if (finfo->call_res_init)
1877                 res_init ();
1878               /* Continue to the next entry since this might be the
1879                  parent directory for multiple registered files and
1880                  we want to remove watches for all registered files.  */
1881               continue;
1882             }
1883           /* The parent directory had a create or moved to event.  */
1884           if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1885               && ((inev->i.mask & IN_MOVED_TO)
1886                   || (inev->i.mask & IN_CREATE))
1887               && strcmp (inev->i.name, finfo->sfname) == 0)
1888             {
1889               /* We detected a directory change.  We look for the creation
1890                  of the file we are tracking or the move of the same file
1891                  into the directory.  */
1892               int ret;
1893               dbg_log (_("monitored file `%s` was %s, adding watch"),
1894                        finfo->fname,
1895                        inev->i.mask & IN_CREATE ? "created" : "moved into place");
1896               /* File was moved in or created.  Regenerate the watch.  */
1897               if (finfo->inotify_descr[TRACED_FILE] != -1)
1898                 inotify_rm_watch (inotify_fd,
1899                                   finfo->inotify_descr[TRACED_FILE]);
1900
1901               ret = inotify_add_watch (inotify_fd,
1902                                        finfo->fname,
1903                                        TRACED_FILE_MASK);
1904               if (ret < 0)
1905                 dbg_log (_("failed to add file watch `%s`: %s"),
1906                          finfo->fname, strerror (errno));
1907
1908               finfo->inotify_descr[TRACED_FILE] = ret;
1909
1910               /* The file is new or moved so mark cache as needing to
1911                  be cleared and reinitialize.  */
1912               to_clear[dbcnt] = true;
1913               if (finfo->call_res_init)
1914                 res_init ();
1915
1916               /* Done re-adding the watch.  Don't return, we may still
1917                  have other files in this same directory, same watch
1918                  descriptor, and need to process them.  */
1919             }
1920           /* Other events are ignored, and we move on to the next file.  */
1921           finfo = finfo->next;
1922         }
1923     }
1924 }
1925
1926 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1927    for the associated database, otherwise do nothing. The TO_CLEAR array must
1928    have LASTDB entries.  */
1929 static inline void
1930 clear_db_cache (bool *to_clear)
1931 {
1932   for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1933     if (to_clear[dbcnt])
1934       {
1935         pthread_mutex_lock (&dbs[dbcnt].prune_lock);
1936         dbs[dbcnt].clear_cache = 1;
1937         pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1938         pthread_cond_signal (&dbs[dbcnt].prune_cond);
1939       }
1940 }
1941
1942 int
1943 handle_inotify_events (void)
1944 {
1945   bool to_clear[lastdb] = { false, };
1946   union __inev inev;
1947
1948   /* Read all inotify events for files registered via
1949      register_traced_file().  */
1950   while (1)
1951     {
1952       /* Potentially read multiple events into buf.  */
1953       ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd,
1954                                              &inev.buf,
1955                                              sizeof (inev)));
1956       if (nb < (ssize_t) sizeof (struct inotify_event))
1957         {
1958           /* Not even 1 event.  */
1959           if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
1960             return -1;
1961           /* Done reading events that are ready.  */
1962           break;
1963         }
1964       /* Process all events.  The normal inotify interface delivers
1965          complete events on a read and never a partial event.  */
1966       char *eptr = &inev.buf[0];
1967       ssize_t count;
1968       while (1)
1969         {
1970           /* Check which of the files changed.  */
1971           inotify_check_files (to_clear, &inev);
1972           count = sizeof (struct inotify_event) + inev.i.len;
1973           eptr += count;
1974           nb -= count;
1975           if (nb >= (ssize_t) sizeof (struct inotify_event))
1976             memcpy (&inev, eptr, nb);
1977           else
1978             break;
1979         }
1980       continue;
1981     }
1982   /* Actually perform the cache clearing.  */
1983   clear_db_cache (to_clear);
1984   return 0;
1985 }
1986
1987 #endif
1988
1989 static void
1990 __attribute__ ((__noreturn__))
1991 main_loop_poll (void)
1992 {
1993   struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1994                                                     * sizeof (conns[0]));
1995
1996   conns[0].fd = sock;
1997   conns[0].events = POLLRDNORM;
1998   size_t nused = 1;
1999   size_t firstfree = 1;
2000
2001 #ifdef HAVE_INOTIFY
2002   if (inotify_fd != -1)
2003     {
2004       conns[1].fd = inotify_fd;
2005       conns[1].events = POLLRDNORM;
2006       nused = 2;
2007       firstfree = 2;
2008     }
2009 #endif
2010
2011 #ifdef HAVE_NETLINK
2012   size_t idx_nl_status_fd = 0;
2013   if (nl_status_fd != -1)
2014     {
2015       idx_nl_status_fd = nused;
2016       conns[nused].fd = nl_status_fd;
2017       conns[nused].events = POLLRDNORM;
2018       ++nused;
2019       firstfree = nused;
2020     }
2021 #endif
2022
2023   while (1)
2024     {
2025       /* Wait for any event.  We wait at most a couple of seconds so
2026          that we can check whether we should close any of the accepted
2027          connections since we have not received a request.  */
2028 #define MAX_ACCEPT_TIMEOUT 30
2029 #define MIN_ACCEPT_TIMEOUT 5
2030 #define MAIN_THREAD_TIMEOUT \
2031   (MAX_ACCEPT_TIMEOUT * 1000                                                  \
2032    - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
2033
2034       int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
2035
2036       time_t now = time (NULL);
2037
2038       /* If there is a descriptor ready for reading or there is a new
2039          connection, process this now.  */
2040       if (n > 0)
2041         {
2042           if (conns[0].revents != 0)
2043             {
2044               /* We have a new incoming connection.  Accept the connection.  */
2045               int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2046                                                     SOCK_NONBLOCK));
2047
2048               /* Use the descriptor if we have not reached the limit.  */
2049               if (fd >= 0)
2050                 {
2051                   if (firstfree < nconns)
2052                     {
2053                       conns[firstfree].fd = fd;
2054                       conns[firstfree].events = POLLRDNORM;
2055                       starttime[firstfree] = now;
2056                       if (firstfree >= nused)
2057                         nused = firstfree + 1;
2058
2059                       do
2060                         ++firstfree;
2061                       while (firstfree < nused && conns[firstfree].fd != -1);
2062                     }
2063                   else
2064                     /* We cannot use the connection so close it.  */
2065                     close (fd);
2066                 }
2067
2068               --n;
2069             }
2070
2071           size_t first = 1;
2072 #ifdef HAVE_INOTIFY
2073           if (inotify_fd != -1 && conns[1].fd == inotify_fd)
2074             {
2075               if (conns[1].revents != 0)
2076                 {
2077                   int ret;
2078                   ret = handle_inotify_events ();
2079                   if (ret == -1)
2080                     {
2081                       /* Something went wrong when reading the inotify
2082                          data.  Better disable inotify.  */
2083                       dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2084                       conns[1].fd = -1;
2085                       firstfree = 1;
2086                       if (nused == 2)
2087                         nused = 1;
2088                       close (inotify_fd);
2089                       inotify_fd = -1;
2090                     }
2091                   --n;
2092                 }
2093
2094               first = 2;
2095             }
2096 #endif
2097
2098 #ifdef HAVE_NETLINK
2099           if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2100             {
2101               char buf[4096];
2102               /* Read all the data.  We do not interpret it here.  */
2103               while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2104                                                sizeof (buf))) != -1)
2105                 ;
2106
2107               dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2108                 = __bump_nl_timestamp ();
2109             }
2110 #endif
2111
2112           for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2113             if (conns[cnt].revents != 0)
2114               {
2115                 fd_ready (conns[cnt].fd);
2116
2117                 /* Clean up the CONNS array.  */
2118                 conns[cnt].fd = -1;
2119                 if (cnt < firstfree)
2120                   firstfree = cnt;
2121                 if (cnt == nused - 1)
2122                   do
2123                     --nused;
2124                   while (conns[nused - 1].fd == -1);
2125
2126                 --n;
2127               }
2128         }
2129
2130       /* Now find entries which have timed out.  */
2131       assert (nused > 0);
2132
2133       /* We make the timeout length depend on the number of file
2134          descriptors currently used.  */
2135 #define ACCEPT_TIMEOUT \
2136   (MAX_ACCEPT_TIMEOUT                                                         \
2137    - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2138       time_t laststart = now - ACCEPT_TIMEOUT;
2139
2140       for (size_t cnt = nused - 1; cnt > 0; --cnt)
2141         {
2142           if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2143             {
2144               /* Remove the entry, it timed out.  */
2145               (void) close (conns[cnt].fd);
2146               conns[cnt].fd = -1;
2147
2148               if (cnt < firstfree)
2149                 firstfree = cnt;
2150               if (cnt == nused - 1)
2151                 do
2152                   --nused;
2153                 while (conns[nused - 1].fd == -1);
2154             }
2155         }
2156
2157       if (restart_p (now))
2158         restart ();
2159     }
2160 }
2161
2162
2163 #ifdef HAVE_EPOLL
2164 static void
2165 main_loop_epoll (int efd)
2166 {
2167   struct epoll_event ev = { 0, };
2168   int nused = 1;
2169   size_t highest = 0;
2170
2171   /* Add the socket.  */
2172   ev.events = EPOLLRDNORM;
2173   ev.data.fd = sock;
2174   if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2175     /* We cannot use epoll.  */
2176     return;
2177
2178 # ifdef HAVE_INOTIFY
2179   if (inotify_fd != -1)
2180     {
2181       ev.events = EPOLLRDNORM;
2182       ev.data.fd = inotify_fd;
2183       if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2184         /* We cannot use epoll.  */
2185         return;
2186       nused = 2;
2187     }
2188 # endif
2189
2190 # ifdef HAVE_NETLINK
2191   if (nl_status_fd != -1)
2192     {
2193       ev.events = EPOLLRDNORM;
2194       ev.data.fd = nl_status_fd;
2195       if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2196         /* We cannot use epoll.  */
2197         return;
2198     }
2199 # endif
2200
2201   while (1)
2202     {
2203       struct epoll_event revs[100];
2204 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2205
2206       int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2207
2208       time_t now = time (NULL);
2209
2210       for (int cnt = 0; cnt < n; ++cnt)
2211         if (revs[cnt].data.fd == sock)
2212           {
2213             /* A new connection.  */
2214             int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2215                                                   SOCK_NONBLOCK));
2216
2217             /* Use the descriptor if we have not reached the limit.  */
2218             if (fd >= 0)
2219               {
2220                 /* Try to add the  new descriptor.  */
2221                 ev.data.fd = fd;
2222                 if (fd >= nconns
2223                     || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2224                   /* The descriptor is too large or something went
2225                      wrong.  Close the descriptor.  */
2226                   close (fd);
2227                 else
2228                   {
2229                     /* Remember when we accepted the connection.  */
2230                     starttime[fd] = now;
2231
2232                     if (fd > highest)
2233                       highest = fd;
2234
2235                     ++nused;
2236                   }
2237               }
2238           }
2239 # ifdef HAVE_INOTIFY
2240         else if (revs[cnt].data.fd == inotify_fd)
2241           {
2242             int ret;
2243             ret = handle_inotify_events ();
2244             if (ret == -1)
2245               {
2246                 /* Something went wrong when reading the inotify
2247                    data.  Better disable inotify.  */
2248                 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2249                 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd, NULL);
2250                 close (inotify_fd);
2251                 inotify_fd = -1;
2252                 break;
2253               }
2254           }
2255 # endif
2256 # ifdef HAVE_NETLINK
2257         else if (revs[cnt].data.fd == nl_status_fd)
2258           {
2259             char buf[4096];
2260             /* Read all the data.  We do not interpret it here.  */
2261             while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2262                                              sizeof (buf))) != -1)
2263               ;
2264
2265             __bump_nl_timestamp ();
2266           }
2267 # endif
2268         else
2269           {
2270             /* Remove the descriptor from the epoll descriptor.  */
2271             (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2272
2273             /* Get a worker to handle the request.  */
2274             fd_ready (revs[cnt].data.fd);
2275
2276             /* Reset the time.  */
2277             starttime[revs[cnt].data.fd] = 0;
2278             if (revs[cnt].data.fd == highest)
2279               do
2280                 --highest;
2281               while (highest > 0 && starttime[highest] == 0);
2282
2283             --nused;
2284           }
2285
2286       /*  Now look for descriptors for accepted connections which have
2287           no reply in too long of a time.  */
2288       time_t laststart = now - ACCEPT_TIMEOUT;
2289       assert (starttime[sock] == 0);
2290 # ifdef HAVE_INOTIFY
2291       assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2292 # endif
2293       assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2294       for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2295         if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2296           {
2297             /* We are waiting for this one for too long.  Close it.  */
2298             (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2299
2300             (void) close (cnt);
2301
2302             starttime[cnt] = 0;
2303             if (cnt == highest)
2304               --highest;
2305           }
2306         else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2307           --highest;
2308
2309       if (restart_p (now))
2310         restart ();
2311     }
2312 }
2313 #endif
2314
2315
2316 /* Start all the threads we want.  The initial process is thread no. 1.  */
2317 void
2318 start_threads (void)
2319 {
2320   /* Initialize the conditional variable we will use.  The only
2321      non-standard attribute we might use is the clock selection.  */
2322   pthread_condattr_t condattr;
2323   pthread_condattr_init (&condattr);
2324
2325 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2326     && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2327   /* Determine whether the monotonous clock is available.  */
2328   struct timespec dummy;
2329 # if _POSIX_MONOTONIC_CLOCK == 0
2330   if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2331 # endif
2332 # if _POSIX_CLOCK_SELECTION == 0
2333     if (sysconf (_SC_CLOCK_SELECTION) > 0)
2334 # endif
2335       if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2336           && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2337         timeout_clock = CLOCK_MONOTONIC;
2338 #endif
2339
2340   /* Create the attribute for the threads.  They are all created
2341      detached.  */
2342   pthread_attr_init (&attr);
2343   pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2344   /* Use 1MB stacks, twice as much for 64-bit architectures.  */
2345   pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2346
2347   /* We allow less than LASTDB threads only for debugging.  */
2348   if (debug_level == 0)
2349     nthreads = MAX (nthreads, lastdb);
2350
2351   /* Create the threads which prune the databases.  */
2352   // XXX Ideally this work would be done by some of the worker threads.
2353   // XXX But this is problematic since we would need to be able to wake
2354   // XXX them up explicitly as well as part of the group handling the
2355   // XXX ready-list.  This requires an operation where we can wait on
2356   // XXX two conditional variables at the same time.  This operation
2357   // XXX does not exist (yet).
2358   for (long int i = 0; i < lastdb; ++i)
2359     {
2360       /* Initialize the conditional variable.  */
2361       if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2362         {
2363           dbg_log (_("could not initialize conditional variable"));
2364           do_exit (1, 0, NULL);
2365         }
2366
2367       pthread_t th;
2368       if (dbs[i].enabled
2369           && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2370         {
2371           dbg_log (_("could not start clean-up thread; terminating"));
2372           do_exit (1, 0, NULL);
2373         }
2374     }
2375
2376   pthread_condattr_destroy (&condattr);
2377
2378   for (long int i = 0; i < nthreads; ++i)
2379     {
2380       pthread_t th;
2381       if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2382         {
2383           if (i == 0)
2384             {
2385               dbg_log (_("could not start any worker thread; terminating"));
2386               do_exit (1, 0, NULL);
2387             }
2388
2389           break;
2390         }
2391     }
2392
2393   /* Now it is safe to let the parent know that we're doing fine and it can
2394      exit.  */
2395   notify_parent (0);
2396
2397   /* Determine how much room for descriptors we should initially
2398      allocate.  This might need to change later if we cap the number
2399      with MAXCONN.  */
2400   const long int nfds = sysconf (_SC_OPEN_MAX);
2401 #define MINCONN 32
2402 #define MAXCONN 16384
2403   if (nfds == -1 || nfds > MAXCONN)
2404     nconns = MAXCONN;
2405   else if (nfds < MINCONN)
2406     nconns = MINCONN;
2407   else
2408     nconns = nfds;
2409
2410   /* We need memory to pass descriptors on to the worker threads.  */
2411   fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2412   /* Array to keep track when connection was accepted.  */
2413   starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2414
2415   /* In the main thread we execute the loop which handles incoming
2416      connections.  */
2417 #ifdef HAVE_EPOLL
2418   int efd = epoll_create (100);
2419   if (efd != -1)
2420     {
2421       main_loop_epoll (efd);
2422       close (efd);
2423     }
2424 #endif
2425
2426   main_loop_poll ();
2427 }
2428
2429
2430 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2431    this function is called, we are not listening on the nscd socket yet so
2432    we can just use the ordinary lookup functions without causing a lockup  */
2433 static void
2434 begin_drop_privileges (void)
2435 {
2436   struct passwd *pwd = getpwnam (server_user);
2437
2438   if (pwd == NULL)
2439     {
2440       dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2441       do_exit (EXIT_FAILURE, 0,
2442                _("Failed to run nscd as user '%s'"), server_user);
2443     }
2444
2445   server_uid = pwd->pw_uid;
2446   server_gid = pwd->pw_gid;
2447
2448   /* Save the old UID/GID if we have to change back.  */
2449   if (paranoia)
2450     {
2451       old_uid = getuid ();
2452       old_gid = getgid ();
2453     }
2454
2455   if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2456     {
2457       /* This really must never happen.  */
2458       dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2459       do_exit (EXIT_FAILURE, errno,
2460                _("initial getgrouplist failed"));
2461     }
2462
2463   server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2464
2465   if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2466       == -1)
2467     {
2468       dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2469       do_exit (EXIT_FAILURE, errno, _("getgrouplist failed"));
2470     }
2471 }
2472
2473
2474 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2475    run nscd as the user specified in the configuration file.  */
2476 static void
2477 finish_drop_privileges (void)
2478 {
2479 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2480   /* We need to preserve the capabilities to connect to the audit daemon.  */
2481   cap_t new_caps = preserve_capabilities ();
2482 #endif
2483
2484   if (setgroups (server_ngroups, server_groups) == -1)
2485     {
2486       dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2487       do_exit (EXIT_FAILURE, errno, _("setgroups failed"));
2488     }
2489
2490   int res;
2491   if (paranoia)
2492     res = setresgid (server_gid, server_gid, old_gid);
2493   else
2494     res = setgid (server_gid);
2495   if (res == -1)
2496     {
2497       dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2498       do_exit (4, errno, "setgid");
2499     }
2500
2501   if (paranoia)
2502     res = setresuid (server_uid, server_uid, old_uid);
2503   else
2504     res = setuid (server_uid);
2505   if (res == -1)
2506     {
2507       dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2508       do_exit (4, errno, "setuid");
2509     }
2510
2511 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2512   /* Remove the temporary capabilities.  */
2513   install_real_capabilities (new_caps);
2514 #endif
2515 }