Update copyright dates with scripts/update-copyrights.
[glibc.git] / nscd / connections.c
blob985eab6a30a39ce712b6d80b76dfd67acd0851ae
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2015 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <ifaddrs.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <stdint.h>
35 #include <arpa/inet.h>
36 #ifdef HAVE_NETLINK
37 # include <linux/netlink.h>
38 # include <linux/rtnetlink.h>
39 #endif
40 #ifdef HAVE_EPOLL
41 # include <sys/epoll.h>
42 #endif
43 #ifdef HAVE_INOTIFY
44 # include <sys/inotify.h>
45 #endif
46 #include <sys/mman.h>
47 #include <sys/param.h>
48 #include <sys/poll.h>
49 #ifdef HAVE_SENDFILE
50 # include <sys/sendfile.h>
51 #endif
52 #include <sys/socket.h>
53 #include <sys/stat.h>
54 #include <sys/un.h>
56 #include "nscd.h"
57 #include "dbg_log.h"
58 #include "selinux.h"
59 #include <resolv/resolv.h>
61 #include <kernel-features.h>
62 #include <libc-internal.h>
65 /* Support to run nscd as an unprivileged user */
66 const char *server_user;
67 static uid_t server_uid;
68 static gid_t server_gid;
69 const char *stat_user;
70 uid_t stat_uid;
71 static gid_t *server_groups;
72 #ifndef NGROUPS
73 # define NGROUPS 32
74 #endif
75 static int server_ngroups;
77 static pthread_attr_t attr;
79 static void begin_drop_privileges (void);
80 static void finish_drop_privileges (void);
82 /* Map request type to a string. */
83 const char *const serv2str[LASTREQ] =
85 [GETPWBYNAME] = "GETPWBYNAME",
86 [GETPWBYUID] = "GETPWBYUID",
87 [GETGRBYNAME] = "GETGRBYNAME",
88 [GETGRBYGID] = "GETGRBYGID",
89 [GETHOSTBYNAME] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
93 [SHUTDOWN] = "SHUTDOWN",
94 [GETSTAT] = "GETSTAT",
95 [INVALIDATE] = "INVALIDATE",
96 [GETFDPW] = "GETFDPW",
97 [GETFDGR] = "GETFDGR",
98 [GETFDHST] = "GETFDHST",
99 [GETAI] = "GETAI",
100 [INITGROUPS] = "INITGROUPS",
101 [GETSERVBYNAME] = "GETSERVBYNAME",
102 [GETSERVBYPORT] = "GETSERVBYPORT",
103 [GETFDSERV] = "GETFDSERV",
104 [GETNETGRENT] = "GETNETGRENT",
105 [INNETGR] = "INNETGR",
106 [GETFDNETGR] = "GETFDNETGR"
109 /* The control data structures for the services. */
110 struct database_dyn dbs[lastdb] =
112 [pwddb] = {
113 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
114 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
115 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
116 .enabled = 0,
117 .check_file = 1,
118 .persistent = 0,
119 .propagate = 1,
120 .shared = 0,
121 .max_db_size = DEFAULT_MAX_DB_SIZE,
122 .suggested_module = DEFAULT_SUGGESTED_MODULE,
123 .db_filename = _PATH_NSCD_PASSWD_DB,
124 .disabled_iov = &pwd_iov_disabled,
125 .postimeout = 3600,
126 .negtimeout = 20,
127 .wr_fd = -1,
128 .ro_fd = -1,
129 .mmap_used = false
131 [grpdb] = {
132 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
133 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
134 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
135 .enabled = 0,
136 .check_file = 1,
137 .persistent = 0,
138 .propagate = 1,
139 .shared = 0,
140 .max_db_size = DEFAULT_MAX_DB_SIZE,
141 .suggested_module = DEFAULT_SUGGESTED_MODULE,
142 .db_filename = _PATH_NSCD_GROUP_DB,
143 .disabled_iov = &grp_iov_disabled,
144 .postimeout = 3600,
145 .negtimeout = 60,
146 .wr_fd = -1,
147 .ro_fd = -1,
148 .mmap_used = false
150 [hstdb] = {
151 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
152 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
153 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
154 .enabled = 0,
155 .check_file = 1,
156 .persistent = 0,
157 .propagate = 0, /* Not used. */
158 .shared = 0,
159 .max_db_size = DEFAULT_MAX_DB_SIZE,
160 .suggested_module = DEFAULT_SUGGESTED_MODULE,
161 .db_filename = _PATH_NSCD_HOSTS_DB,
162 .disabled_iov = &hst_iov_disabled,
163 .postimeout = 3600,
164 .negtimeout = 20,
165 .wr_fd = -1,
166 .ro_fd = -1,
167 .mmap_used = false
169 [servdb] = {
170 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
171 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
172 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
173 .enabled = 0,
174 .check_file = 1,
175 .persistent = 0,
176 .propagate = 0, /* Not used. */
177 .shared = 0,
178 .max_db_size = DEFAULT_MAX_DB_SIZE,
179 .suggested_module = DEFAULT_SUGGESTED_MODULE,
180 .db_filename = _PATH_NSCD_SERVICES_DB,
181 .disabled_iov = &serv_iov_disabled,
182 .postimeout = 28800,
183 .negtimeout = 20,
184 .wr_fd = -1,
185 .ro_fd = -1,
186 .mmap_used = false
188 [netgrdb] = {
189 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
190 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
191 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
192 .enabled = 0,
193 .check_file = 1,
194 .persistent = 0,
195 .propagate = 0, /* Not used. */
196 .shared = 0,
197 .max_db_size = DEFAULT_MAX_DB_SIZE,
198 .suggested_module = DEFAULT_SUGGESTED_MODULE,
199 .db_filename = _PATH_NSCD_NETGROUP_DB,
200 .disabled_iov = &netgroup_iov_disabled,
201 .postimeout = 28800,
202 .negtimeout = 20,
203 .wr_fd = -1,
204 .ro_fd = -1,
205 .mmap_used = false
210 /* Mapping of request type to database. */
211 static struct
213 bool data_request;
214 struct database_dyn *db;
215 } const reqinfo[LASTREQ] =
217 [GETPWBYNAME] = { true, &dbs[pwddb] },
218 [GETPWBYUID] = { true, &dbs[pwddb] },
219 [GETGRBYNAME] = { true, &dbs[grpdb] },
220 [GETGRBYGID] = { true, &dbs[grpdb] },
221 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
222 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
223 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
224 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
225 [SHUTDOWN] = { false, NULL },
226 [GETSTAT] = { false, NULL },
227 [SHUTDOWN] = { false, NULL },
228 [GETFDPW] = { false, &dbs[pwddb] },
229 [GETFDGR] = { false, &dbs[grpdb] },
230 [GETFDHST] = { false, &dbs[hstdb] },
231 [GETAI] = { true, &dbs[hstdb] },
232 [INITGROUPS] = { true, &dbs[grpdb] },
233 [GETSERVBYNAME] = { true, &dbs[servdb] },
234 [GETSERVBYPORT] = { true, &dbs[servdb] },
235 [GETFDSERV] = { false, &dbs[servdb] },
236 [GETNETGRENT] = { true, &dbs[netgrdb] },
237 [INNETGR] = { true, &dbs[netgrdb] },
238 [GETFDNETGR] = { false, &dbs[netgrdb] }
242 /* Initial number of threads to use. */
243 int nthreads = -1;
244 /* Maximum number of threads to use. */
245 int max_nthreads = 32;
247 /* Socket for incoming connections. */
248 static int sock;
250 #ifdef HAVE_INOTIFY
251 /* Inotify descriptor. */
252 int inotify_fd = -1;
253 #endif
255 #ifdef HAVE_NETLINK
256 /* Descriptor for netlink status updates. */
257 static int nl_status_fd = -1;
258 #endif
260 #ifndef __ASSUME_SOCK_CLOEXEC
261 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
262 before be know the result. */
263 static int have_sock_cloexec;
264 #endif
265 #ifndef __ASSUME_ACCEPT4
266 static int have_accept4;
267 #endif
269 /* Number of times clients had to wait. */
270 unsigned long int client_queued;
273 ssize_t
274 writeall (int fd, const void *buf, size_t len)
276 size_t n = len;
277 ssize_t ret;
280 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
281 if (ret <= 0)
282 break;
283 buf = (const char *) buf + ret;
284 n -= ret;
286 while (n > 0);
287 return ret < 0 ? ret : len - n;
291 #ifdef HAVE_SENDFILE
292 ssize_t
293 sendfileall (int tofd, int fromfd, off_t off, size_t len)
295 ssize_t n = len;
296 ssize_t ret;
300 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
301 if (ret <= 0)
302 break;
303 n -= ret;
305 while (n > 0);
306 return ret < 0 ? ret : len - n;
308 #endif
311 enum usekey
313 use_not = 0,
314 /* The following three are not really used, they are symbolic constants. */
315 use_first = 16,
316 use_begin = 32,
317 use_end = 64,
319 use_he = 1,
320 use_he_begin = use_he | use_begin,
321 use_he_end = use_he | use_end,
322 use_data = 3,
323 use_data_begin = use_data | use_begin,
324 use_data_end = use_data | use_end,
325 use_data_first = use_data_begin | use_first
329 static int
330 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
331 enum usekey use, ref_t start, size_t len)
333 assert (len >= 2);
335 if (start > first_free || start + len > first_free
336 || (start & BLOCK_ALIGN_M1))
337 return 0;
339 if (usemap[start] == use_not)
341 /* Add the start marker. */
342 usemap[start] = use | use_begin;
343 use &= ~use_first;
345 while (--len > 0)
346 if (usemap[++start] != use_not)
347 return 0;
348 else
349 usemap[start] = use;
351 /* Add the end marker. */
352 usemap[start] = use | use_end;
354 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
356 /* Hash entries can't be shared. */
357 if (use == use_he)
358 return 0;
360 usemap[start] |= (use & use_first);
361 use &= ~use_first;
363 while (--len > 1)
364 if (usemap[++start] != use)
365 return 0;
367 if (usemap[++start] != (use | use_end))
368 return 0;
370 else
371 /* Points to a wrong object or somewhere in the middle. */
372 return 0;
374 return 1;
378 /* Verify data in persistent database. */
379 static int
380 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
382 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
383 || dbnr == netgrdb);
385 time_t now = time (NULL);
387 struct database_pers_head *head = mem;
388 struct database_pers_head head_copy = *head;
390 /* Check that the header that was read matches the head in the database. */
391 if (memcmp (head, readhead, sizeof (*head)) != 0)
392 return 0;
394 /* First some easy tests: make sure the database header is sane. */
395 if (head->version != DB_VERSION
396 || head->header_size != sizeof (*head)
397 /* We allow a timestamp to be one hour ahead of the current time.
398 This should cover daylight saving time changes. */
399 || head->timestamp > now + 60 * 60 + 60
400 || (head->gc_cycle & 1)
401 || head->module == 0
402 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
403 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
404 || head->first_free < 0
405 || head->first_free > head->data_size
406 || (head->first_free & BLOCK_ALIGN_M1) != 0
407 || head->maxnentries < 0
408 || head->maxnsearched < 0)
409 return 0;
411 uint8_t *usemap = calloc (head->first_free, 1);
412 if (usemap == NULL)
413 return 0;
415 const char *data = (char *) &head->array[roundup (head->module,
416 ALIGN / sizeof (ref_t))];
418 nscd_ssize_t he_cnt = 0;
419 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
421 ref_t trail = head->array[cnt];
422 ref_t work = trail;
423 int tick = 0;
425 while (work != ENDREF)
427 if (! check_use (data, head->first_free, usemap, use_he, work,
428 sizeof (struct hashentry)))
429 goto fail;
431 /* Now we know we can dereference the record. */
432 struct hashentry *here = (struct hashentry *) (data + work);
434 ++he_cnt;
436 /* Make sure the record is for this type of service. */
437 if (here->type >= LASTREQ
438 || reqinfo[here->type].db != &dbs[dbnr])
439 goto fail;
441 /* Validate boolean field value. */
442 if (here->first != false && here->first != true)
443 goto fail;
445 if (here->len < 0)
446 goto fail;
448 /* Now the data. */
449 if (here->packet < 0
450 || here->packet > head->first_free
451 || here->packet + sizeof (struct datahead) > head->first_free)
452 goto fail;
454 struct datahead *dh = (struct datahead *) (data + here->packet);
456 if (! check_use (data, head->first_free, usemap,
457 use_data | (here->first ? use_first : 0),
458 here->packet, dh->allocsize))
459 goto fail;
461 if (dh->allocsize < sizeof (struct datahead)
462 || dh->recsize > dh->allocsize
463 || (dh->notfound != false && dh->notfound != true)
464 || (dh->usable != false && dh->usable != true))
465 goto fail;
467 if (here->key < here->packet + sizeof (struct datahead)
468 || here->key > here->packet + dh->allocsize
469 || here->key + here->len > here->packet + dh->allocsize)
470 goto fail;
472 work = here->next;
474 if (work == trail)
475 /* A circular list, this must not happen. */
476 goto fail;
477 if (tick)
478 trail = ((struct hashentry *) (data + trail))->next;
479 tick = 1 - tick;
483 if (he_cnt != head->nentries)
484 goto fail;
486 /* See if all data and keys had at least one reference from
487 he->first == true hashentry. */
488 for (ref_t idx = 0; idx < head->first_free; ++idx)
490 if (usemap[idx] == use_data_begin)
491 goto fail;
494 /* Finally, make sure the database hasn't changed since the first test. */
495 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
496 goto fail;
498 free (usemap);
499 return 1;
501 fail:
502 free (usemap);
503 return 0;
507 #ifdef O_CLOEXEC
508 # define EXTRA_O_FLAGS O_CLOEXEC
509 #else
510 # define EXTRA_O_FLAGS 0
511 #endif
514 /* Initialize database information structures. */
515 void
516 nscd_init (void)
518 /* Look up unprivileged uid/gid/groups before we start listening on the
519 socket */
520 if (server_user != NULL)
521 begin_drop_privileges ();
523 if (nthreads == -1)
524 /* No configuration for this value, assume a default. */
525 nthreads = 4;
527 for (size_t cnt = 0; cnt < lastdb; ++cnt)
528 if (dbs[cnt].enabled)
530 pthread_rwlock_init (&dbs[cnt].lock, NULL);
531 pthread_mutex_init (&dbs[cnt].memlock, NULL);
533 if (dbs[cnt].persistent)
535 /* Try to open the appropriate file on disk. */
536 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
537 if (fd != -1)
539 char *msg = NULL;
540 struct stat64 st;
541 void *mem;
542 size_t total;
543 struct database_pers_head head;
544 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
545 sizeof (head)));
546 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
548 fail_db_errno:
549 /* The code is single-threaded at this point so
550 using strerror is just fine. */
551 msg = strerror (errno);
552 fail_db:
553 dbg_log (_("invalid persistent database file \"%s\": %s"),
554 dbs[cnt].db_filename, msg);
555 unlink (dbs[cnt].db_filename);
557 else if (head.module == 0 && head.data_size == 0)
559 /* The file has been created, but the head has not
560 been initialized yet. */
561 msg = _("uninitialized header");
562 goto fail_db;
564 else if (head.header_size != (int) sizeof (head))
566 msg = _("header size does not match");
567 goto fail_db;
569 else if ((total = (sizeof (head)
570 + roundup (head.module * sizeof (ref_t),
571 ALIGN)
572 + head.data_size))
573 > st.st_size
574 || total < sizeof (head))
576 msg = _("file size does not match");
577 goto fail_db;
579 /* Note we map with the maximum size allowed for the
580 database. This is likely much larger than the
581 actual file size. This is OK on most OSes since
582 extensions of the underlying file will
583 automatically translate more pages available for
584 memory access. */
585 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
586 PROT_READ | PROT_WRITE,
587 MAP_SHARED, fd, 0))
588 == MAP_FAILED)
589 goto fail_db_errno;
590 else if (!verify_persistent_db (mem, &head, cnt))
592 munmap (mem, total);
593 msg = _("verification failed");
594 goto fail_db;
596 else
598 /* Success. We have the database. */
599 dbs[cnt].head = mem;
600 dbs[cnt].memsize = total;
601 dbs[cnt].data = (char *)
602 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
603 ALIGN / sizeof (ref_t))];
604 dbs[cnt].mmap_used = true;
606 if (dbs[cnt].suggested_module > head.module)
607 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
608 dbnames[cnt]);
610 dbs[cnt].wr_fd = fd;
611 fd = -1;
612 /* We also need a read-only descriptor. */
613 if (dbs[cnt].shared)
615 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
616 O_RDONLY | EXTRA_O_FLAGS);
617 if (dbs[cnt].ro_fd == -1)
618 dbg_log (_("\
619 cannot create read-only descriptor for \"%s\"; no mmap"),
620 dbs[cnt].db_filename);
623 // XXX Shall we test whether the descriptors actually
624 // XXX point to the same file?
627 /* Close the file descriptors in case something went
628 wrong in which case the variable have not been
629 assigned -1. */
630 if (fd != -1)
631 close (fd);
633 else if (errno == EACCES)
634 do_exit (EXIT_FAILURE, 0, _("cannot access '%s'"),
635 dbs[cnt].db_filename);
638 if (dbs[cnt].head == NULL)
640 /* No database loaded. Allocate the data structure,
641 possibly on disk. */
642 struct database_pers_head head;
643 size_t total = (sizeof (head)
644 + roundup (dbs[cnt].suggested_module
645 * sizeof (ref_t), ALIGN)
646 + (dbs[cnt].suggested_module
647 * DEFAULT_DATASIZE_PER_BUCKET));
649 /* Try to create the database. If we do not need a
650 persistent database create a temporary file. */
651 int fd;
652 int ro_fd = -1;
653 if (dbs[cnt].persistent)
655 fd = open (dbs[cnt].db_filename,
656 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
657 S_IRUSR | S_IWUSR);
658 if (fd != -1 && dbs[cnt].shared)
659 ro_fd = open (dbs[cnt].db_filename,
660 O_RDONLY | EXTRA_O_FLAGS);
662 else
664 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
665 fd = mkostemp (fname, EXTRA_O_FLAGS);
667 /* We do not need the file name anymore after we
668 opened another file descriptor in read-only mode. */
669 if (fd != -1)
671 if (dbs[cnt].shared)
672 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
674 unlink (fname);
678 if (fd == -1)
680 if (errno == EEXIST)
682 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
683 dbnames[cnt], dbs[cnt].db_filename);
684 do_exit (1, 0, NULL);
687 if (dbs[cnt].persistent)
688 dbg_log (_("cannot create %s; no persistent database used"),
689 dbs[cnt].db_filename);
690 else
691 dbg_log (_("cannot create %s; no sharing possible"),
692 dbs[cnt].db_filename);
694 dbs[cnt].persistent = 0;
695 // XXX remember: no mmap
697 else
699 /* Tell the user if we could not create the read-only
700 descriptor. */
701 if (ro_fd == -1 && dbs[cnt].shared)
702 dbg_log (_("\
703 cannot create read-only descriptor for \"%s\"; no mmap"),
704 dbs[cnt].db_filename);
706 /* Before we create the header, initialize the hash
707 table. That way if we get interrupted while writing
708 the header we can recognize a partially initialized
709 database. */
710 size_t ps = sysconf (_SC_PAGESIZE);
711 char tmpbuf[ps];
712 assert (~ENDREF == 0);
713 memset (tmpbuf, '\xff', ps);
715 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
716 off_t offset = sizeof (head);
718 size_t towrite;
719 if (offset % ps != 0)
721 towrite = MIN (remaining, ps - (offset % ps));
722 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
723 goto write_fail;
724 offset += towrite;
725 remaining -= towrite;
728 while (remaining > ps)
730 if (pwrite (fd, tmpbuf, ps, offset) == -1)
731 goto write_fail;
732 offset += ps;
733 remaining -= ps;
736 if (remaining > 0
737 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
738 goto write_fail;
740 /* Create the header of the file. */
741 struct database_pers_head head =
743 .version = DB_VERSION,
744 .header_size = sizeof (head),
745 .module = dbs[cnt].suggested_module,
746 .data_size = (dbs[cnt].suggested_module
747 * DEFAULT_DATASIZE_PER_BUCKET),
748 .first_free = 0
750 void *mem;
752 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
753 != sizeof (head))
754 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
755 != 0)
756 || (mem = mmap (NULL, dbs[cnt].max_db_size,
757 PROT_READ | PROT_WRITE,
758 MAP_SHARED, fd, 0)) == MAP_FAILED)
760 write_fail:
761 unlink (dbs[cnt].db_filename);
762 dbg_log (_("cannot write to database file %s: %s"),
763 dbs[cnt].db_filename, strerror (errno));
764 dbs[cnt].persistent = 0;
766 else
768 /* Success. */
769 dbs[cnt].head = mem;
770 dbs[cnt].data = (char *)
771 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
772 ALIGN / sizeof (ref_t))];
773 dbs[cnt].memsize = total;
774 dbs[cnt].mmap_used = true;
776 /* Remember the descriptors. */
777 dbs[cnt].wr_fd = fd;
778 dbs[cnt].ro_fd = ro_fd;
779 fd = -1;
780 ro_fd = -1;
783 if (fd != -1)
784 close (fd);
785 if (ro_fd != -1)
786 close (ro_fd);
790 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
791 /* We do not check here whether the O_CLOEXEC provided to the
792 open call was successful or not. The two fcntl calls are
793 only performed once each per process start-up and therefore
794 is not noticeable at all. */
795 if (paranoia
796 && ((dbs[cnt].wr_fd != -1
797 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
798 || (dbs[cnt].ro_fd != -1
799 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
801 dbg_log (_("\
802 cannot set socket to close on exec: %s; disabling paranoia mode"),
803 strerror (errno));
804 paranoia = 0;
806 #endif
808 if (dbs[cnt].head == NULL)
810 /* We do not use the persistent database. Just
811 create an in-memory data structure. */
812 assert (! dbs[cnt].persistent);
814 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
815 + (dbs[cnt].suggested_module
816 * sizeof (ref_t)));
817 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
818 assert (~ENDREF == 0);
819 memset (dbs[cnt].head->array, '\xff',
820 dbs[cnt].suggested_module * sizeof (ref_t));
821 dbs[cnt].head->module = dbs[cnt].suggested_module;
822 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
823 * dbs[cnt].head->module);
824 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
825 dbs[cnt].head->first_free = 0;
827 dbs[cnt].shared = 0;
828 assert (dbs[cnt].ro_fd == -1);
832 /* Create the socket. */
833 #ifndef __ASSUME_SOCK_CLOEXEC
834 sock = -1;
835 if (have_sock_cloexec >= 0)
836 #endif
838 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
839 #ifndef __ASSUME_SOCK_CLOEXEC
840 if (have_sock_cloexec == 0)
841 have_sock_cloexec = sock != -1 || errno != EINVAL ? 1 : -1;
842 #endif
844 #ifndef __ASSUME_SOCK_CLOEXEC
845 if (have_sock_cloexec < 0)
846 sock = socket (AF_UNIX, SOCK_STREAM, 0);
847 #endif
848 if (sock < 0)
850 dbg_log (_("cannot open socket: %s"), strerror (errno));
851 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
853 /* Bind a name to the socket. */
854 struct sockaddr_un sock_addr;
855 sock_addr.sun_family = AF_UNIX;
856 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
857 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
859 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
860 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
863 #ifndef __ASSUME_SOCK_CLOEXEC
864 if (have_sock_cloexec < 0)
866 /* We don't want to get stuck on accept. */
867 int fl = fcntl (sock, F_GETFL);
868 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
870 dbg_log (_("cannot change socket to nonblocking mode: %s"),
871 strerror (errno));
872 do_exit (1, 0, NULL);
875 /* The descriptor needs to be closed on exec. */
876 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
878 dbg_log (_("cannot set socket to close on exec: %s"),
879 strerror (errno));
880 do_exit (1, 0, NULL);
883 #endif
885 /* Set permissions for the socket. */
886 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
888 /* Set the socket up to accept connections. */
889 if (listen (sock, SOMAXCONN) < 0)
891 dbg_log (_("cannot enable socket to accept connections: %s"),
892 strerror (errno));
893 do_exit (1, 0, NULL);
896 #ifdef HAVE_NETLINK
897 if (dbs[hstdb].enabled)
899 /* Try to open netlink socket to monitor network setting changes. */
900 nl_status_fd = socket (AF_NETLINK,
901 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
902 NETLINK_ROUTE);
903 if (nl_status_fd != -1)
905 struct sockaddr_nl snl;
906 memset (&snl, '\0', sizeof (snl));
907 snl.nl_family = AF_NETLINK;
908 /* XXX Is this the best set to use? */
909 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
910 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
911 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
912 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
913 | RTMGRP_IPV6_PREFIX);
915 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
917 close (nl_status_fd);
918 nl_status_fd = -1;
920 else
922 /* Start the timestamp process. */
923 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
924 = __bump_nl_timestamp ();
926 # ifndef __ASSUME_SOCK_CLOEXEC
927 if (have_sock_cloexec < 0)
929 /* We don't want to get stuck on accept. */
930 int fl = fcntl (nl_status_fd, F_GETFL);
931 if (fl == -1
932 || fcntl (nl_status_fd, F_SETFL, fl | O_NONBLOCK) == -1)
934 dbg_log (_("\
935 cannot change socket to nonblocking mode: %s"),
936 strerror (errno));
937 do_exit (1, 0, NULL);
940 /* The descriptor needs to be closed on exec. */
941 if (paranoia
942 && fcntl (nl_status_fd, F_SETFD, FD_CLOEXEC) == -1)
944 dbg_log (_("cannot set socket to close on exec: %s"),
945 strerror (errno));
946 do_exit (1, 0, NULL);
949 # endif
953 #endif
955 /* Change to unprivileged uid/gid/groups if specified in config file */
956 if (server_user != NULL)
957 finish_drop_privileges ();
961 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
963 We support registering multiple files per database. Each call to
964 register_traced_file adds to the list of registered files.
966 When we prune the database, either through timeout or a request to
967 invalidate, we will check to see if any of the registered files has changed.
968 When we accept new connections to handle a cache request we will also
969 check to see if any of the registered files has changed.
971 If we have inotify support then we install an inotify fd to notify us of
972 file deletion or modification, both of which will require we invalidate
973 the cache for the database. Without inotify support we stat the file and
974 store st_mtime to determine if the file has been modified. */
975 void
976 register_traced_file (size_t dbidx, struct traced_file *finfo)
978 /* If the database is disabled or file checking is disabled
979 then ignore the registration. */
980 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
981 return;
983 if (__glibc_unlikely (debug_level > 0))
984 dbg_log (_("register trace file %s for database %s"),
985 finfo->fname, dbnames[dbidx]);
987 #ifdef HAVE_INOTIFY
988 if (inotify_fd < 0
989 || (finfo->inotify_descr = inotify_add_watch (inotify_fd, finfo->fname,
990 IN_DELETE_SELF
991 | IN_MODIFY)) < 0)
992 #endif
994 /* We need the modification date of the file. */
995 struct stat64 st;
997 if (stat64 (finfo->fname, &st) < 0)
999 /* We cannot stat() the file, disable file checking. */
1000 dbg_log (_("cannot stat() file `%s': %s"),
1001 finfo->fname, strerror (errno));
1002 return;
1005 finfo->inotify_descr = -1;
1006 finfo->mtime = st.st_mtime;
1009 /* Queue up the file name. */
1010 finfo->next = dbs[dbidx].traced_files;
1011 dbs[dbidx].traced_files = finfo;
1015 /* Close the connections. */
1016 void
1017 close_sockets (void)
1019 close (sock);
1023 static void
1024 invalidate_cache (char *key, int fd)
1026 dbtype number;
1027 int32_t resp;
1029 for (number = pwddb; number < lastdb; ++number)
1030 if (strcmp (key, dbnames[number]) == 0)
1032 if (number == hstdb)
1034 struct traced_file *runp = dbs[hstdb].traced_files;
1035 while (runp != NULL)
1036 if (runp->call_res_init)
1038 res_init ();
1039 break;
1041 else
1042 runp = runp->next;
1044 break;
1047 if (number == lastdb)
1049 resp = EINVAL;
1050 writeall (fd, &resp, sizeof (resp));
1051 return;
1054 if (dbs[number].enabled)
1056 pthread_mutex_lock (&dbs[number].prune_run_lock);
1057 prune_cache (&dbs[number], LONG_MAX, fd);
1058 pthread_mutex_unlock (&dbs[number].prune_run_lock);
1060 else
1062 resp = 0;
1063 writeall (fd, &resp, sizeof (resp));
1068 #ifdef SCM_RIGHTS
1069 static void
1070 send_ro_fd (struct database_dyn *db, char *key, int fd)
1072 /* If we do not have an read-only file descriptor do nothing. */
1073 if (db->ro_fd == -1)
1074 return;
1076 /* We need to send some data along with the descriptor. */
1077 uint64_t mapsize = (db->head->data_size
1078 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1079 + sizeof (struct database_pers_head));
1080 struct iovec iov[2];
1081 iov[0].iov_base = key;
1082 iov[0].iov_len = strlen (key) + 1;
1083 iov[1].iov_base = &mapsize;
1084 iov[1].iov_len = sizeof (mapsize);
1086 /* Prepare the control message to transfer the descriptor. */
1087 union
1089 struct cmsghdr hdr;
1090 char bytes[CMSG_SPACE (sizeof (int))];
1091 } buf;
1092 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1093 .msg_control = buf.bytes,
1094 .msg_controllen = sizeof (buf) };
1095 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1097 cmsg->cmsg_level = SOL_SOCKET;
1098 cmsg->cmsg_type = SCM_RIGHTS;
1099 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1101 int *ip = (int *) CMSG_DATA (cmsg);
1102 *ip = db->ro_fd;
1104 msg.msg_controllen = cmsg->cmsg_len;
1106 /* Send the control message. We repeat when we are interrupted but
1107 everything else is ignored. */
1108 #ifndef MSG_NOSIGNAL
1109 # define MSG_NOSIGNAL 0
1110 #endif
1111 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1113 if (__glibc_unlikely (debug_level > 0))
1114 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1116 #endif /* SCM_RIGHTS */
1119 /* Handle new request. */
1120 static void
1121 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1123 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1125 if (debug_level > 0)
1126 dbg_log (_("\
1127 cannot handle old request version %d; current version is %d"),
1128 req->version, NSCD_VERSION);
1129 return;
1132 /* Perform the SELinux check before we go on to the standard checks. */
1133 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1135 if (debug_level > 0)
1137 #ifdef SO_PEERCRED
1138 # ifdef PATH_MAX
1139 char buf[PATH_MAX];
1140 # else
1141 char buf[4096];
1142 # endif
1144 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
1145 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
1147 if (n <= 0)
1148 dbg_log (_("\
1149 request from %ld not handled due to missing permission"), (long int) pid);
1150 else
1152 buf[n] = '\0';
1153 dbg_log (_("\
1154 request from '%s' [%ld] not handled due to missing permission"),
1155 buf, (long int) pid);
1157 #else
1158 dbg_log (_("request not handled due to missing permission"));
1159 #endif
1161 return;
1164 struct database_dyn *db = reqinfo[req->type].db;
1166 /* See whether we can service the request from the cache. */
1167 if (__builtin_expect (reqinfo[req->type].data_request, true))
1169 if (__builtin_expect (debug_level, 0) > 0)
1171 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1173 char buf[INET6_ADDRSTRLEN];
1175 dbg_log ("\t%s (%s)", serv2str[req->type],
1176 inet_ntop (req->type == GETHOSTBYADDR
1177 ? AF_INET : AF_INET6,
1178 key, buf, sizeof (buf)));
1180 else
1181 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1184 /* Is this service enabled? */
1185 if (__glibc_unlikely (!db->enabled))
1187 /* No, sent the prepared record. */
1188 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1189 db->disabled_iov->iov_len,
1190 MSG_NOSIGNAL))
1191 != (ssize_t) db->disabled_iov->iov_len
1192 && __builtin_expect (debug_level, 0) > 0)
1194 /* We have problems sending the result. */
1195 char buf[256];
1196 dbg_log (_("cannot write result: %s"),
1197 strerror_r (errno, buf, sizeof (buf)));
1200 return;
1203 /* Be sure we can read the data. */
1204 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db->lock) != 0))
1206 ++db->head->rdlockdelayed;
1207 pthread_rwlock_rdlock (&db->lock);
1210 /* See whether we can handle it from the cache. */
1211 struct datahead *cached;
1212 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1213 db, uid);
1214 if (cached != NULL)
1216 /* Hurray it's in the cache. */
1217 ssize_t nwritten;
1219 #ifdef HAVE_SENDFILE
1220 if (__glibc_likely (db->mmap_used))
1222 assert (db->wr_fd != -1);
1223 assert ((char *) cached->data > (char *) db->data);
1224 assert ((char *) cached->data - (char *) db->head
1225 + cached->recsize
1226 <= (sizeof (struct database_pers_head)
1227 + db->head->module * sizeof (ref_t)
1228 + db->head->data_size));
1229 nwritten = sendfileall (fd, db->wr_fd,
1230 (char *) cached->data
1231 - (char *) db->head, cached->recsize);
1232 # ifndef __ASSUME_SENDFILE
1233 if (nwritten == -1 && errno == ENOSYS)
1234 goto use_write;
1235 # endif
1237 else
1238 # ifndef __ASSUME_SENDFILE
1239 use_write:
1240 # endif
1241 #endif
1242 nwritten = writeall (fd, cached->data, cached->recsize);
1244 if (nwritten != cached->recsize
1245 && __builtin_expect (debug_level, 0) > 0)
1247 /* We have problems sending the result. */
1248 char buf[256];
1249 dbg_log (_("cannot write result: %s"),
1250 strerror_r (errno, buf, sizeof (buf)));
1253 pthread_rwlock_unlock (&db->lock);
1255 return;
1258 pthread_rwlock_unlock (&db->lock);
1260 else if (__builtin_expect (debug_level, 0) > 0)
1262 if (req->type == INVALIDATE)
1263 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1264 else
1265 dbg_log ("\t%s", serv2str[req->type]);
1268 /* Handle the request. */
1269 switch (req->type)
1271 case GETPWBYNAME:
1272 addpwbyname (db, fd, req, key, uid);
1273 break;
1275 case GETPWBYUID:
1276 addpwbyuid (db, fd, req, key, uid);
1277 break;
1279 case GETGRBYNAME:
1280 addgrbyname (db, fd, req, key, uid);
1281 break;
1283 case GETGRBYGID:
1284 addgrbygid (db, fd, req, key, uid);
1285 break;
1287 case GETHOSTBYNAME:
1288 addhstbyname (db, fd, req, key, uid);
1289 break;
1291 case GETHOSTBYNAMEv6:
1292 addhstbynamev6 (db, fd, req, key, uid);
1293 break;
1295 case GETHOSTBYADDR:
1296 addhstbyaddr (db, fd, req, key, uid);
1297 break;
1299 case GETHOSTBYADDRv6:
1300 addhstbyaddrv6 (db, fd, req, key, uid);
1301 break;
1303 case GETAI:
1304 addhstai (db, fd, req, key, uid);
1305 break;
1307 case INITGROUPS:
1308 addinitgroups (db, fd, req, key, uid);
1309 break;
1311 case GETSERVBYNAME:
1312 addservbyname (db, fd, req, key, uid);
1313 break;
1315 case GETSERVBYPORT:
1316 addservbyport (db, fd, req, key, uid);
1317 break;
1319 case GETNETGRENT:
1320 addgetnetgrent (db, fd, req, key, uid);
1321 break;
1323 case INNETGR:
1324 addinnetgr (db, fd, req, key, uid);
1325 break;
1327 case GETSTAT:
1328 case SHUTDOWN:
1329 case INVALIDATE:
1331 /* Get the callers credentials. */
1332 #ifdef SO_PEERCRED
1333 struct ucred caller;
1334 socklen_t optlen = sizeof (caller);
1336 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1338 char buf[256];
1340 dbg_log (_("error getting caller's id: %s"),
1341 strerror_r (errno, buf, sizeof (buf)));
1342 break;
1345 uid = caller.uid;
1346 #else
1347 /* Some systems have no SO_PEERCRED implementation. They don't
1348 care about security so we don't as well. */
1349 uid = 0;
1350 #endif
1353 /* Accept shutdown, getstat and invalidate only from root. For
1354 the stat call also allow the user specified in the config file. */
1355 if (req->type == GETSTAT)
1357 if (uid == 0 || uid == stat_uid)
1358 send_stats (fd, dbs);
1360 else if (uid == 0)
1362 if (req->type == INVALIDATE)
1363 invalidate_cache (key, fd);
1364 else
1365 termination_handler (0);
1367 break;
1369 case GETFDPW:
1370 case GETFDGR:
1371 case GETFDHST:
1372 case GETFDSERV:
1373 case GETFDNETGR:
1374 #ifdef SCM_RIGHTS
1375 send_ro_fd (reqinfo[req->type].db, key, fd);
1376 #endif
1377 break;
1379 default:
1380 /* Ignore the command, it's nothing we know. */
1381 break;
1386 /* Restart the process. */
1387 static void
1388 restart (void)
1390 /* First determine the parameters. We do not use the parameters
1391 passed to main() since in case nscd is started by running the
1392 dynamic linker this will not work. Yes, this is not the usual
1393 case but nscd is part of glibc and we occasionally do this. */
1394 size_t buflen = 1024;
1395 char *buf = alloca (buflen);
1396 size_t readlen = 0;
1397 int fd = open ("/proc/self/cmdline", O_RDONLY);
1398 if (fd == -1)
1400 dbg_log (_("\
1401 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1402 strerror (errno));
1404 paranoia = 0;
1405 return;
1408 while (1)
1410 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1411 buflen - readlen));
1412 if (n == -1)
1414 dbg_log (_("\
1415 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1416 strerror (errno));
1418 close (fd);
1419 paranoia = 0;
1420 return;
1423 readlen += n;
1425 if (readlen < buflen)
1426 break;
1428 /* We might have to extend the buffer. */
1429 size_t old_buflen = buflen;
1430 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1431 buf = memmove (newp, buf, old_buflen);
1434 close (fd);
1436 /* Parse the command line. Worst case scenario: every two
1437 characters form one parameter (one character plus NUL). */
1438 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1439 int argc = 0;
1441 char *cp = buf;
1442 while (cp < buf + readlen)
1444 argv[argc++] = cp;
1445 cp = (char *) rawmemchr (cp, '\0') + 1;
1447 argv[argc] = NULL;
1449 /* Second, change back to the old user if we changed it. */
1450 if (server_user != NULL)
1452 if (setresuid (old_uid, old_uid, old_uid) != 0)
1454 dbg_log (_("\
1455 cannot change to old UID: %s; disabling paranoia mode"),
1456 strerror (errno));
1458 paranoia = 0;
1459 return;
1462 if (setresgid (old_gid, old_gid, old_gid) != 0)
1464 dbg_log (_("\
1465 cannot change to old GID: %s; disabling paranoia mode"),
1466 strerror (errno));
1468 ignore_value (setuid (server_uid));
1469 paranoia = 0;
1470 return;
1474 /* Next change back to the old working directory. */
1475 if (chdir (oldcwd) == -1)
1477 dbg_log (_("\
1478 cannot change to old working directory: %s; disabling paranoia mode"),
1479 strerror (errno));
1481 if (server_user != NULL)
1483 ignore_value (setuid (server_uid));
1484 ignore_value (setgid (server_gid));
1486 paranoia = 0;
1487 return;
1490 /* Synchronize memory. */
1491 int32_t certainly[lastdb];
1492 for (int cnt = 0; cnt < lastdb; ++cnt)
1493 if (dbs[cnt].enabled)
1495 /* Make sure nobody keeps using the database. */
1496 dbs[cnt].head->timestamp = 0;
1497 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1498 dbs[cnt].head->nscd_certainly_running = 0;
1500 if (dbs[cnt].persistent)
1501 // XXX async OK?
1502 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1505 /* The preparations are done. */
1506 #ifdef PATH_MAX
1507 char pathbuf[PATH_MAX];
1508 #else
1509 char pathbuf[256];
1510 #endif
1511 /* Try to exec the real nscd program so the process name (as reported
1512 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1513 if readlink or the exec with the result of the readlink call fails. */
1514 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1515 if (n != -1)
1517 pathbuf[n] = '\0';
1518 execv (pathbuf, argv);
1520 execv ("/proc/self/exe", argv);
1522 /* If we come here, we will never be able to re-exec. */
1523 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1524 strerror (errno));
1526 if (server_user != NULL)
1528 ignore_value (setuid (server_uid));
1529 ignore_value (setgid (server_gid));
1531 if (chdir ("/") != 0)
1532 dbg_log (_("cannot change current working directory to \"/\": %s"),
1533 strerror (errno));
1534 paranoia = 0;
1536 /* Reenable the databases. */
1537 time_t now = time (NULL);
1538 for (int cnt = 0; cnt < lastdb; ++cnt)
1539 if (dbs[cnt].enabled)
1541 dbs[cnt].head->timestamp = now;
1542 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1547 /* List of file descriptors. */
1548 struct fdlist
1550 int fd;
1551 struct fdlist *next;
1553 /* Memory allocated for the list. */
1554 static struct fdlist *fdlist;
1555 /* List of currently ready-to-read file descriptors. */
1556 static struct fdlist *readylist;
1558 /* Conditional variable and mutex to signal availability of entries in
1559 READYLIST. The condvar is initialized dynamically since we might
1560 use a different clock depending on availability. */
1561 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1562 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1564 /* The clock to use with the condvar. */
1565 static clockid_t timeout_clock = CLOCK_REALTIME;
1567 /* Number of threads ready to handle the READYLIST. */
1568 static unsigned long int nready;
1571 /* Function for the clean-up threads. */
1572 static void *
1573 __attribute__ ((__noreturn__))
1574 nscd_run_prune (void *p)
1576 const long int my_number = (long int) p;
1577 assert (dbs[my_number].enabled);
1579 int dont_need_update = setup_thread (&dbs[my_number]);
1581 time_t now = time (NULL);
1583 /* We are running. */
1584 dbs[my_number].head->timestamp = now;
1586 struct timespec prune_ts;
1587 if (__glibc_unlikely (clock_gettime (timeout_clock, &prune_ts) == -1))
1588 /* Should never happen. */
1589 abort ();
1591 /* Compute the initial timeout time. Prevent all the timers to go
1592 off at the same time by adding a db-based value. */
1593 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1594 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1596 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1597 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1598 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1600 pthread_mutex_lock (prune_lock);
1601 while (1)
1603 /* Wait, but not forever. */
1604 int e = 0;
1605 if (! dbs[my_number].clear_cache)
1606 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1607 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1609 time_t next_wait;
1610 now = time (NULL);
1611 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1612 || dbs[my_number].clear_cache)
1614 /* We will determine the new timout values based on the
1615 cache content. Should there be concurrent additions to
1616 the cache which are not accounted for in the cache
1617 pruning we want to know about it. Therefore set the
1618 timeout to the maximum. It will be descreased when adding
1619 new entries to the cache, if necessary. */
1620 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1622 /* Unconditionally reset the flag. */
1623 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1624 dbs[my_number].clear_cache = 0;
1626 pthread_mutex_unlock (prune_lock);
1628 /* We use a separate lock for running the prune function (instead
1629 of keeping prune_lock locked) because this enables concurrent
1630 invocations of cache_add which might modify the timeout value. */
1631 pthread_mutex_lock (prune_run_lock);
1632 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1633 pthread_mutex_unlock (prune_run_lock);
1635 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1636 /* If clients cannot determine for sure whether nscd is running
1637 we need to wake up occasionally to update the timestamp.
1638 Wait 90% of the update period. */
1639 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1640 if (__glibc_unlikely (! dont_need_update))
1642 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1643 dbs[my_number].head->timestamp = now;
1646 pthread_mutex_lock (prune_lock);
1648 /* Make it known when we will wake up again. */
1649 if (now + next_wait < dbs[my_number].wakeup_time)
1650 dbs[my_number].wakeup_time = now + next_wait;
1651 else
1652 next_wait = dbs[my_number].wakeup_time - now;
1654 else
1655 /* The cache was just pruned. Do not do it again now. Just
1656 use the new timeout value. */
1657 next_wait = dbs[my_number].wakeup_time - now;
1659 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1660 /* Should never happen. */
1661 abort ();
1663 /* Compute next timeout time. */
1664 prune_ts.tv_sec += next_wait;
1669 /* This is the main loop. It is replicated in different threads but
1670 the use of the ready list makes sure only one thread handles an
1671 incoming connection. */
1672 static void *
1673 __attribute__ ((__noreturn__))
1674 nscd_run_worker (void *p)
1676 char buf[256];
1678 /* Initial locking. */
1679 pthread_mutex_lock (&readylist_lock);
1681 /* One more thread available. */
1682 ++nready;
1684 while (1)
1686 while (readylist == NULL)
1687 pthread_cond_wait (&readylist_cond, &readylist_lock);
1689 struct fdlist *it = readylist->next;
1690 if (readylist->next == readylist)
1691 /* Just one entry on the list. */
1692 readylist = NULL;
1693 else
1694 readylist->next = it->next;
1696 /* Extract the information and mark the record ready to be used
1697 again. */
1698 int fd = it->fd;
1699 it->next = NULL;
1701 /* One more thread available. */
1702 --nready;
1704 /* We are done with the list. */
1705 pthread_mutex_unlock (&readylist_lock);
1707 #ifndef __ASSUME_ACCEPT4
1708 if (have_accept4 < 0)
1710 /* We do not want to block on a short read or so. */
1711 int fl = fcntl (fd, F_GETFL);
1712 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1713 goto close_and_out;
1715 #endif
1717 /* Now read the request. */
1718 request_header req;
1719 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1720 != sizeof (req), 0))
1722 /* We failed to read data. Note that this also might mean we
1723 failed because we would have blocked. */
1724 if (debug_level > 0)
1725 dbg_log (_("short read while reading request: %s"),
1726 strerror_r (errno, buf, sizeof (buf)));
1727 goto close_and_out;
1730 /* Check whether this is a valid request type. */
1731 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1732 goto close_and_out;
1734 /* Some systems have no SO_PEERCRED implementation. They don't
1735 care about security so we don't as well. */
1736 uid_t uid = -1;
1737 #ifdef SO_PEERCRED
1738 pid_t pid = 0;
1740 if (__glibc_unlikely (debug_level > 0))
1742 struct ucred caller;
1743 socklen_t optlen = sizeof (caller);
1745 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1746 pid = caller.pid;
1748 #else
1749 const pid_t pid = 0;
1750 #endif
1752 /* It should not be possible to crash the nscd with a silly
1753 request (i.e., a terribly large key). We limit the size to 1kb. */
1754 if (__builtin_expect (req.key_len, 1) < 0
1755 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1757 if (debug_level > 0)
1758 dbg_log (_("key length in request too long: %d"), req.key_len);
1760 else
1762 /* Get the key. */
1763 char keybuf[MAXKEYLEN + 1];
1765 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1766 req.key_len))
1767 != req.key_len, 0))
1769 /* Again, this can also mean we would have blocked. */
1770 if (debug_level > 0)
1771 dbg_log (_("short read while reading request key: %s"),
1772 strerror_r (errno, buf, sizeof (buf)));
1773 goto close_and_out;
1775 keybuf[req.key_len] = '\0';
1777 if (__builtin_expect (debug_level, 0) > 0)
1779 #ifdef SO_PEERCRED
1780 if (pid != 0)
1781 dbg_log (_("\
1782 handle_request: request received (Version = %d) from PID %ld"),
1783 req.version, (long int) pid);
1784 else
1785 #endif
1786 dbg_log (_("\
1787 handle_request: request received (Version = %d)"), req.version);
1790 /* Phew, we got all the data, now process it. */
1791 handle_request (fd, &req, keybuf, uid, pid);
1794 close_and_out:
1795 /* We are done. */
1796 close (fd);
1798 /* Re-locking. */
1799 pthread_mutex_lock (&readylist_lock);
1801 /* One more thread available. */
1802 ++nready;
1804 /* NOTREACHED */
1808 static unsigned int nconns;
1810 static void
1811 fd_ready (int fd)
1813 pthread_mutex_lock (&readylist_lock);
1815 /* Find an empty entry in FDLIST. */
1816 size_t inner;
1817 for (inner = 0; inner < nconns; ++inner)
1818 if (fdlist[inner].next == NULL)
1819 break;
1820 assert (inner < nconns);
1822 fdlist[inner].fd = fd;
1824 if (readylist == NULL)
1825 readylist = fdlist[inner].next = &fdlist[inner];
1826 else
1828 fdlist[inner].next = readylist->next;
1829 readylist = readylist->next = &fdlist[inner];
1832 bool do_signal = true;
1833 if (__glibc_unlikely (nready == 0))
1835 ++client_queued;
1836 do_signal = false;
1838 /* Try to start another thread to help out. */
1839 pthread_t th;
1840 if (nthreads < max_nthreads
1841 && pthread_create (&th, &attr, nscd_run_worker,
1842 (void *) (long int) nthreads) == 0)
1844 /* We got another thread. */
1845 ++nthreads;
1846 /* The new thread might need a kick. */
1847 do_signal = true;
1852 pthread_mutex_unlock (&readylist_lock);
1854 /* Tell one of the worker threads there is work to do. */
1855 if (do_signal)
1856 pthread_cond_signal (&readylist_cond);
1860 /* Check whether restarting should happen. */
1861 static bool
1862 restart_p (time_t now)
1864 return (paranoia && readylist == NULL && nready == nthreads
1865 && now >= restart_time);
1869 /* Array for times a connection was accepted. */
1870 static time_t *starttime;
1872 #ifdef HAVE_INOTIFY
1873 /* Inotify event for changed file. */
1874 union __inev
1876 struct inotify_event i;
1877 # ifndef PATH_MAX
1878 # define PATH_MAX 1024
1879 # endif
1880 char buf[sizeof (struct inotify_event) + PATH_MAX];
1883 /* Process the inotify event in INEV. If the event matches any of the files
1884 registered with a database then mark that database as requiring its cache
1885 to be cleared. We indicate the cache needs clearing by setting
1886 TO_CLEAR[DBCNT] to true for the matching database. */
1887 static inline void
1888 inotify_check_files (bool *to_clear, union __inev *inev)
1890 /* Check which of the files changed. */
1891 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1893 struct traced_file *finfo = dbs[dbcnt].traced_files;
1895 while (finfo != NULL)
1897 /* Inotify event watch descriptor matches. */
1898 if (finfo->inotify_descr == inev->i.wd)
1900 /* Mark cache as needing to be cleared and reinitialize. */
1901 to_clear[dbcnt] = true;
1902 if (finfo->call_res_init)
1903 res_init ();
1904 return;
1907 finfo = finfo->next;
1912 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1913 for the associated database, otherwise do nothing. The TO_CLEAR array must
1914 have LASTDB entries. */
1915 static inline void
1916 clear_db_cache (bool *to_clear)
1918 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1919 if (to_clear[dbcnt])
1921 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
1922 dbs[dbcnt].clear_cache = 1;
1923 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1924 pthread_cond_signal (&dbs[dbcnt].prune_cond);
1928 #endif
1930 static void
1931 __attribute__ ((__noreturn__))
1932 main_loop_poll (void)
1934 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1935 * sizeof (conns[0]));
1937 conns[0].fd = sock;
1938 conns[0].events = POLLRDNORM;
1939 size_t nused = 1;
1940 size_t firstfree = 1;
1942 #ifdef HAVE_INOTIFY
1943 if (inotify_fd != -1)
1945 conns[1].fd = inotify_fd;
1946 conns[1].events = POLLRDNORM;
1947 nused = 2;
1948 firstfree = 2;
1950 #endif
1952 #ifdef HAVE_NETLINK
1953 size_t idx_nl_status_fd = 0;
1954 if (nl_status_fd != -1)
1956 idx_nl_status_fd = nused;
1957 conns[nused].fd = nl_status_fd;
1958 conns[nused].events = POLLRDNORM;
1959 ++nused;
1960 firstfree = nused;
1962 #endif
1964 while (1)
1966 /* Wait for any event. We wait at most a couple of seconds so
1967 that we can check whether we should close any of the accepted
1968 connections since we have not received a request. */
1969 #define MAX_ACCEPT_TIMEOUT 30
1970 #define MIN_ACCEPT_TIMEOUT 5
1971 #define MAIN_THREAD_TIMEOUT \
1972 (MAX_ACCEPT_TIMEOUT * 1000 \
1973 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1975 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1977 time_t now = time (NULL);
1979 /* If there is a descriptor ready for reading or there is a new
1980 connection, process this now. */
1981 if (n > 0)
1983 if (conns[0].revents != 0)
1985 /* We have a new incoming connection. Accept the connection. */
1986 int fd;
1988 #ifndef __ASSUME_ACCEPT4
1989 fd = -1;
1990 if (have_accept4 >= 0)
1991 #endif
1993 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
1994 SOCK_NONBLOCK));
1995 #ifndef __ASSUME_ACCEPT4
1996 if (have_accept4 == 0)
1997 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
1998 #endif
2000 #ifndef __ASSUME_ACCEPT4
2001 if (have_accept4 < 0)
2002 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2003 #endif
2005 /* Use the descriptor if we have not reached the limit. */
2006 if (fd >= 0)
2008 if (firstfree < nconns)
2010 conns[firstfree].fd = fd;
2011 conns[firstfree].events = POLLRDNORM;
2012 starttime[firstfree] = now;
2013 if (firstfree >= nused)
2014 nused = firstfree + 1;
2017 ++firstfree;
2018 while (firstfree < nused && conns[firstfree].fd != -1);
2020 else
2021 /* We cannot use the connection so close it. */
2022 close (fd);
2025 --n;
2028 size_t first = 1;
2029 #ifdef HAVE_INOTIFY
2030 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
2032 if (conns[1].revents != 0)
2034 bool to_clear[lastdb] = { false, };
2035 union __inev inev;
2037 /* Read all inotify events for files registered via
2038 register_traced_file(). */
2039 while (1)
2041 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
2042 sizeof (inev)));
2043 if (nb < (ssize_t) sizeof (struct inotify_event))
2045 if (__builtin_expect (nb == -1 && errno != EAGAIN,
2048 /* Something went wrong when reading the inotify
2049 data. Better disable inotify. */
2050 dbg_log (_("\
2051 disabled inotify after read error %d"),
2052 errno);
2053 conns[1].fd = -1;
2054 firstfree = 1;
2055 if (nused == 2)
2056 nused = 1;
2057 close (inotify_fd);
2058 inotify_fd = -1;
2060 break;
2063 /* Check which of the files changed. */
2064 inotify_check_files (to_clear, &inev);
2067 /* Actually perform the cache clearing. */
2068 clear_db_cache (to_clear);
2070 --n;
2073 first = 2;
2075 #endif
2077 #ifdef HAVE_NETLINK
2078 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2080 char buf[4096];
2081 /* Read all the data. We do not interpret it here. */
2082 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2083 sizeof (buf))) != -1)
2086 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2087 = __bump_nl_timestamp ();
2089 #endif
2091 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2092 if (conns[cnt].revents != 0)
2094 fd_ready (conns[cnt].fd);
2096 /* Clean up the CONNS array. */
2097 conns[cnt].fd = -1;
2098 if (cnt < firstfree)
2099 firstfree = cnt;
2100 if (cnt == nused - 1)
2102 --nused;
2103 while (conns[nused - 1].fd == -1);
2105 --n;
2109 /* Now find entries which have timed out. */
2110 assert (nused > 0);
2112 /* We make the timeout length depend on the number of file
2113 descriptors currently used. */
2114 #define ACCEPT_TIMEOUT \
2115 (MAX_ACCEPT_TIMEOUT \
2116 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2117 time_t laststart = now - ACCEPT_TIMEOUT;
2119 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2121 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2123 /* Remove the entry, it timed out. */
2124 (void) close (conns[cnt].fd);
2125 conns[cnt].fd = -1;
2127 if (cnt < firstfree)
2128 firstfree = cnt;
2129 if (cnt == nused - 1)
2131 --nused;
2132 while (conns[nused - 1].fd == -1);
2136 if (restart_p (now))
2137 restart ();
2142 #ifdef HAVE_EPOLL
2143 static void
2144 main_loop_epoll (int efd)
2146 struct epoll_event ev = { 0, };
2147 int nused = 1;
2148 size_t highest = 0;
2150 /* Add the socket. */
2151 ev.events = EPOLLRDNORM;
2152 ev.data.fd = sock;
2153 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2154 /* We cannot use epoll. */
2155 return;
2157 # ifdef HAVE_INOTIFY
2158 if (inotify_fd != -1)
2160 ev.events = EPOLLRDNORM;
2161 ev.data.fd = inotify_fd;
2162 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2163 /* We cannot use epoll. */
2164 return;
2165 nused = 2;
2167 # endif
2169 # ifdef HAVE_NETLINK
2170 if (nl_status_fd != -1)
2172 ev.events = EPOLLRDNORM;
2173 ev.data.fd = nl_status_fd;
2174 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2175 /* We cannot use epoll. */
2176 return;
2178 # endif
2180 while (1)
2182 struct epoll_event revs[100];
2183 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2185 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2187 time_t now = time (NULL);
2189 for (int cnt = 0; cnt < n; ++cnt)
2190 if (revs[cnt].data.fd == sock)
2192 /* A new connection. */
2193 int fd;
2195 # ifndef __ASSUME_ACCEPT4
2196 fd = -1;
2197 if (have_accept4 >= 0)
2198 # endif
2200 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2201 SOCK_NONBLOCK));
2202 # ifndef __ASSUME_ACCEPT4
2203 if (have_accept4 == 0)
2204 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2205 # endif
2207 # ifndef __ASSUME_ACCEPT4
2208 if (have_accept4 < 0)
2209 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2210 # endif
2212 /* Use the descriptor if we have not reached the limit. */
2213 if (fd >= 0)
2215 /* Try to add the new descriptor. */
2216 ev.data.fd = fd;
2217 if (fd >= nconns
2218 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2219 /* The descriptor is too large or something went
2220 wrong. Close the descriptor. */
2221 close (fd);
2222 else
2224 /* Remember when we accepted the connection. */
2225 starttime[fd] = now;
2227 if (fd > highest)
2228 highest = fd;
2230 ++nused;
2234 # ifdef HAVE_INOTIFY
2235 else if (revs[cnt].data.fd == inotify_fd)
2237 bool to_clear[lastdb] = { false, };
2238 union __inev inev;
2240 /* Read all inotify events for files registered via
2241 register_traced_file(). */
2242 while (1)
2244 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
2245 sizeof (inev)));
2246 if (nb < (ssize_t) sizeof (struct inotify_event))
2248 if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
2250 /* Something went wrong when reading the inotify
2251 data. Better disable inotify. */
2252 dbg_log (_("disabled inotify after read error %d"),
2253 errno);
2254 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd,
2255 NULL);
2256 close (inotify_fd);
2257 inotify_fd = -1;
2259 break;
2262 /* Check which of the files changed. */
2263 inotify_check_files(to_clear, &inev);
2266 /* Actually perform the cache clearing. */
2267 clear_db_cache (to_clear);
2269 # endif
2270 # ifdef HAVE_NETLINK
2271 else if (revs[cnt].data.fd == nl_status_fd)
2273 char buf[4096];
2274 /* Read all the data. We do not interpret it here. */
2275 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2276 sizeof (buf))) != -1)
2279 __bump_nl_timestamp ();
2281 # endif
2282 else
2284 /* Remove the descriptor from the epoll descriptor. */
2285 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2287 /* Get a worker to handle the request. */
2288 fd_ready (revs[cnt].data.fd);
2290 /* Reset the time. */
2291 starttime[revs[cnt].data.fd] = 0;
2292 if (revs[cnt].data.fd == highest)
2294 --highest;
2295 while (highest > 0 && starttime[highest] == 0);
2297 --nused;
2300 /* Now look for descriptors for accepted connections which have
2301 no reply in too long of a time. */
2302 time_t laststart = now - ACCEPT_TIMEOUT;
2303 assert (starttime[sock] == 0);
2304 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2305 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2306 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2307 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2309 /* We are waiting for this one for too long. Close it. */
2310 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2312 (void) close (cnt);
2314 starttime[cnt] = 0;
2315 if (cnt == highest)
2316 --highest;
2318 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2319 --highest;
2321 if (restart_p (now))
2322 restart ();
2325 #endif
2328 /* Start all the threads we want. The initial process is thread no. 1. */
2329 void
2330 start_threads (void)
2332 /* Initialize the conditional variable we will use. The only
2333 non-standard attribute we might use is the clock selection. */
2334 pthread_condattr_t condattr;
2335 pthread_condattr_init (&condattr);
2337 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2338 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2339 /* Determine whether the monotonous clock is available. */
2340 struct timespec dummy;
2341 # if _POSIX_MONOTONIC_CLOCK == 0
2342 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2343 # endif
2344 # if _POSIX_CLOCK_SELECTION == 0
2345 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2346 # endif
2347 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2348 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2349 timeout_clock = CLOCK_MONOTONIC;
2350 #endif
2352 /* Create the attribute for the threads. They are all created
2353 detached. */
2354 pthread_attr_init (&attr);
2355 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2356 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2357 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2359 /* We allow less than LASTDB threads only for debugging. */
2360 if (debug_level == 0)
2361 nthreads = MAX (nthreads, lastdb);
2363 /* Create the threads which prune the databases. */
2364 // XXX Ideally this work would be done by some of the worker threads.
2365 // XXX But this is problematic since we would need to be able to wake
2366 // XXX them up explicitly as well as part of the group handling the
2367 // XXX ready-list. This requires an operation where we can wait on
2368 // XXX two conditional variables at the same time. This operation
2369 // XXX does not exist (yet).
2370 for (long int i = 0; i < lastdb; ++i)
2372 /* Initialize the conditional variable. */
2373 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2375 dbg_log (_("could not initialize conditional variable"));
2376 do_exit (1, 0, NULL);
2379 pthread_t th;
2380 if (dbs[i].enabled
2381 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2383 dbg_log (_("could not start clean-up thread; terminating"));
2384 do_exit (1, 0, NULL);
2388 pthread_condattr_destroy (&condattr);
2390 for (long int i = 0; i < nthreads; ++i)
2392 pthread_t th;
2393 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2395 if (i == 0)
2397 dbg_log (_("could not start any worker thread; terminating"));
2398 do_exit (1, 0, NULL);
2401 break;
2405 /* Now it is safe to let the parent know that we're doing fine and it can
2406 exit. */
2407 notify_parent (0);
2409 /* Determine how much room for descriptors we should initially
2410 allocate. This might need to change later if we cap the number
2411 with MAXCONN. */
2412 const long int nfds = sysconf (_SC_OPEN_MAX);
2413 #define MINCONN 32
2414 #define MAXCONN 16384
2415 if (nfds == -1 || nfds > MAXCONN)
2416 nconns = MAXCONN;
2417 else if (nfds < MINCONN)
2418 nconns = MINCONN;
2419 else
2420 nconns = nfds;
2422 /* We need memory to pass descriptors on to the worker threads. */
2423 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2424 /* Array to keep track when connection was accepted. */
2425 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2427 /* In the main thread we execute the loop which handles incoming
2428 connections. */
2429 #ifdef HAVE_EPOLL
2430 int efd = epoll_create (100);
2431 if (efd != -1)
2433 main_loop_epoll (efd);
2434 close (efd);
2436 #endif
2438 main_loop_poll ();
2442 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2443 this function is called, we are not listening on the nscd socket yet so
2444 we can just use the ordinary lookup functions without causing a lockup */
2445 static void
2446 begin_drop_privileges (void)
2448 struct passwd *pwd = getpwnam (server_user);
2450 if (pwd == NULL)
2452 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2453 do_exit (EXIT_FAILURE, 0,
2454 _("Failed to run nscd as user '%s'"), server_user);
2457 server_uid = pwd->pw_uid;
2458 server_gid = pwd->pw_gid;
2460 /* Save the old UID/GID if we have to change back. */
2461 if (paranoia)
2463 old_uid = getuid ();
2464 old_gid = getgid ();
2467 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2469 /* This really must never happen. */
2470 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2471 do_exit (EXIT_FAILURE, errno,
2472 _("initial getgrouplist failed"));
2475 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2477 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2478 == -1)
2480 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2481 do_exit (EXIT_FAILURE, errno, _("getgrouplist failed"));
2486 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2487 run nscd as the user specified in the configuration file. */
2488 static void
2489 finish_drop_privileges (void)
2491 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2492 /* We need to preserve the capabilities to connect to the audit daemon. */
2493 cap_t new_caps = preserve_capabilities ();
2494 #endif
2496 if (setgroups (server_ngroups, server_groups) == -1)
2498 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2499 do_exit (EXIT_FAILURE, errno, _("setgroups failed"));
2502 int res;
2503 if (paranoia)
2504 res = setresgid (server_gid, server_gid, old_gid);
2505 else
2506 res = setgid (server_gid);
2507 if (res == -1)
2509 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2510 do_exit (4, errno, "setgid");
2513 if (paranoia)
2514 res = setresuid (server_uid, server_uid, old_uid);
2515 else
2516 res = setuid (server_uid);
2517 if (res == -1)
2519 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2520 do_exit (4, errno, "setuid");
2523 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2524 /* Remove the temporary capabilities. */
2525 install_real_capabilities (new_caps);
2526 #endif