Remove powerpc, sparc fdim inlines (bug 22987).
[glibc.git] / nscd / connections.c
blob5f91985859b3026a1bda74e18dbaa60db0b8416a
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <ifaddrs.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <stdint.h>
35 #include <arpa/inet.h>
36 #ifdef HAVE_NETLINK
37 # include <linux/netlink.h>
38 # include <linux/rtnetlink.h>
39 #endif
40 #ifdef HAVE_EPOLL
41 # include <sys/epoll.h>
42 #endif
43 #ifdef HAVE_INOTIFY
44 # include <sys/inotify.h>
45 #endif
46 #include <sys/mman.h>
47 #include <sys/param.h>
48 #include <sys/poll.h>
49 #ifdef HAVE_SENDFILE
50 # include <sys/sendfile.h>
51 #endif
52 #include <sys/socket.h>
53 #include <sys/stat.h>
54 #include <sys/un.h>
56 #include "nscd.h"
57 #include "dbg_log.h"
58 #include "selinux.h"
59 #include <resolv/resolv.h>
61 #include <kernel-features.h>
62 #include <libc-diag.h>
65 /* Support to run nscd as an unprivileged user */
66 const char *server_user;
67 static uid_t server_uid;
68 static gid_t server_gid;
69 const char *stat_user;
70 uid_t stat_uid;
71 static gid_t *server_groups;
72 #ifndef NGROUPS
73 # define NGROUPS 32
74 #endif
75 static int server_ngroups;
77 static pthread_attr_t attr;
79 static void begin_drop_privileges (void);
80 static void finish_drop_privileges (void);
82 /* Map request type to a string. */
83 const char *const serv2str[LASTREQ] =
85 [GETPWBYNAME] = "GETPWBYNAME",
86 [GETPWBYUID] = "GETPWBYUID",
87 [GETGRBYNAME] = "GETGRBYNAME",
88 [GETGRBYGID] = "GETGRBYGID",
89 [GETHOSTBYNAME] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
93 [SHUTDOWN] = "SHUTDOWN",
94 [GETSTAT] = "GETSTAT",
95 [INVALIDATE] = "INVALIDATE",
96 [GETFDPW] = "GETFDPW",
97 [GETFDGR] = "GETFDGR",
98 [GETFDHST] = "GETFDHST",
99 [GETAI] = "GETAI",
100 [INITGROUPS] = "INITGROUPS",
101 [GETSERVBYNAME] = "GETSERVBYNAME",
102 [GETSERVBYPORT] = "GETSERVBYPORT",
103 [GETFDSERV] = "GETFDSERV",
104 [GETNETGRENT] = "GETNETGRENT",
105 [INNETGR] = "INNETGR",
106 [GETFDNETGR] = "GETFDNETGR"
109 #ifdef PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
110 # define RWLOCK_INITIALIZER PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
111 #else
112 # define RWLOCK_INITIALIZER PTHREAD_RWLOCK_INITIALIZER
113 #endif
115 /* The control data structures for the services. */
116 struct database_dyn dbs[lastdb] =
118 [pwddb] = {
119 .lock = RWLOCK_INITIALIZER,
120 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
121 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
122 .enabled = 0,
123 .check_file = 1,
124 .persistent = 0,
125 .propagate = 1,
126 .shared = 0,
127 .max_db_size = DEFAULT_MAX_DB_SIZE,
128 .suggested_module = DEFAULT_SUGGESTED_MODULE,
129 .db_filename = _PATH_NSCD_PASSWD_DB,
130 .disabled_iov = &pwd_iov_disabled,
131 .postimeout = 3600,
132 .negtimeout = 20,
133 .wr_fd = -1,
134 .ro_fd = -1,
135 .mmap_used = false
137 [grpdb] = {
138 .lock = RWLOCK_INITIALIZER,
139 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
140 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
141 .enabled = 0,
142 .check_file = 1,
143 .persistent = 0,
144 .propagate = 1,
145 .shared = 0,
146 .max_db_size = DEFAULT_MAX_DB_SIZE,
147 .suggested_module = DEFAULT_SUGGESTED_MODULE,
148 .db_filename = _PATH_NSCD_GROUP_DB,
149 .disabled_iov = &grp_iov_disabled,
150 .postimeout = 3600,
151 .negtimeout = 60,
152 .wr_fd = -1,
153 .ro_fd = -1,
154 .mmap_used = false
156 [hstdb] = {
157 .lock = RWLOCK_INITIALIZER,
158 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
159 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
160 .enabled = 0,
161 .check_file = 1,
162 .persistent = 0,
163 .propagate = 0, /* Not used. */
164 .shared = 0,
165 .max_db_size = DEFAULT_MAX_DB_SIZE,
166 .suggested_module = DEFAULT_SUGGESTED_MODULE,
167 .db_filename = _PATH_NSCD_HOSTS_DB,
168 .disabled_iov = &hst_iov_disabled,
169 .postimeout = 3600,
170 .negtimeout = 20,
171 .wr_fd = -1,
172 .ro_fd = -1,
173 .mmap_used = false
175 [servdb] = {
176 .lock = RWLOCK_INITIALIZER,
177 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
178 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
179 .enabled = 0,
180 .check_file = 1,
181 .persistent = 0,
182 .propagate = 0, /* Not used. */
183 .shared = 0,
184 .max_db_size = DEFAULT_MAX_DB_SIZE,
185 .suggested_module = DEFAULT_SUGGESTED_MODULE,
186 .db_filename = _PATH_NSCD_SERVICES_DB,
187 .disabled_iov = &serv_iov_disabled,
188 .postimeout = 28800,
189 .negtimeout = 20,
190 .wr_fd = -1,
191 .ro_fd = -1,
192 .mmap_used = false
194 [netgrdb] = {
195 .lock = RWLOCK_INITIALIZER,
196 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
197 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
198 .enabled = 0,
199 .check_file = 1,
200 .persistent = 0,
201 .propagate = 0, /* Not used. */
202 .shared = 0,
203 .max_db_size = DEFAULT_MAX_DB_SIZE,
204 .suggested_module = DEFAULT_SUGGESTED_MODULE,
205 .db_filename = _PATH_NSCD_NETGROUP_DB,
206 .disabled_iov = &netgroup_iov_disabled,
207 .postimeout = 28800,
208 .negtimeout = 20,
209 .wr_fd = -1,
210 .ro_fd = -1,
211 .mmap_used = false
216 /* Mapping of request type to database. */
217 static struct
219 bool data_request;
220 struct database_dyn *db;
221 } const reqinfo[LASTREQ] =
223 [GETPWBYNAME] = { true, &dbs[pwddb] },
224 [GETPWBYUID] = { true, &dbs[pwddb] },
225 [GETGRBYNAME] = { true, &dbs[grpdb] },
226 [GETGRBYGID] = { true, &dbs[grpdb] },
227 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
228 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
229 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
230 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
231 [SHUTDOWN] = { false, NULL },
232 [GETSTAT] = { false, NULL },
233 [SHUTDOWN] = { false, NULL },
234 [GETFDPW] = { false, &dbs[pwddb] },
235 [GETFDGR] = { false, &dbs[grpdb] },
236 [GETFDHST] = { false, &dbs[hstdb] },
237 [GETAI] = { true, &dbs[hstdb] },
238 [INITGROUPS] = { true, &dbs[grpdb] },
239 [GETSERVBYNAME] = { true, &dbs[servdb] },
240 [GETSERVBYPORT] = { true, &dbs[servdb] },
241 [GETFDSERV] = { false, &dbs[servdb] },
242 [GETNETGRENT] = { true, &dbs[netgrdb] },
243 [INNETGR] = { true, &dbs[netgrdb] },
244 [GETFDNETGR] = { false, &dbs[netgrdb] }
248 /* Initial number of threads to use. */
249 int nthreads = -1;
250 /* Maximum number of threads to use. */
251 int max_nthreads = 32;
253 /* Socket for incoming connections. */
254 static int sock;
256 #ifdef HAVE_INOTIFY
257 /* Inotify descriptor. */
258 int inotify_fd = -1;
259 #endif
261 #ifdef HAVE_NETLINK
262 /* Descriptor for netlink status updates. */
263 static int nl_status_fd = -1;
264 #endif
266 /* Number of times clients had to wait. */
267 unsigned long int client_queued;
270 ssize_t
271 writeall (int fd, const void *buf, size_t len)
273 size_t n = len;
274 ssize_t ret;
277 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
278 if (ret <= 0)
279 break;
280 buf = (const char *) buf + ret;
281 n -= ret;
283 while (n > 0);
284 return ret < 0 ? ret : len - n;
288 #ifdef HAVE_SENDFILE
289 ssize_t
290 sendfileall (int tofd, int fromfd, off_t off, size_t len)
292 ssize_t n = len;
293 ssize_t ret;
297 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
298 if (ret <= 0)
299 break;
300 n -= ret;
302 while (n > 0);
303 return ret < 0 ? ret : len - n;
305 #endif
308 enum usekey
310 use_not = 0,
311 /* The following three are not really used, they are symbolic constants. */
312 use_first = 16,
313 use_begin = 32,
314 use_end = 64,
316 use_he = 1,
317 use_he_begin = use_he | use_begin,
318 use_he_end = use_he | use_end,
319 use_data = 3,
320 use_data_begin = use_data | use_begin,
321 use_data_end = use_data | use_end,
322 use_data_first = use_data_begin | use_first
326 static int
327 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
328 enum usekey use, ref_t start, size_t len)
330 assert (len >= 2);
332 if (start > first_free || start + len > first_free
333 || (start & BLOCK_ALIGN_M1))
334 return 0;
336 if (usemap[start] == use_not)
338 /* Add the start marker. */
339 usemap[start] = use | use_begin;
340 use &= ~use_first;
342 while (--len > 0)
343 if (usemap[++start] != use_not)
344 return 0;
345 else
346 usemap[start] = use;
348 /* Add the end marker. */
349 usemap[start] = use | use_end;
351 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
353 /* Hash entries can't be shared. */
354 if (use == use_he)
355 return 0;
357 usemap[start] |= (use & use_first);
358 use &= ~use_first;
360 while (--len > 1)
361 if (usemap[++start] != use)
362 return 0;
364 if (usemap[++start] != (use | use_end))
365 return 0;
367 else
368 /* Points to a wrong object or somewhere in the middle. */
369 return 0;
371 return 1;
375 /* Verify data in persistent database. */
376 static int
377 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
379 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
380 || dbnr == netgrdb);
382 time_t now = time (NULL);
384 struct database_pers_head *head = mem;
385 struct database_pers_head head_copy = *head;
387 /* Check that the header that was read matches the head in the database. */
388 if (memcmp (head, readhead, sizeof (*head)) != 0)
389 return 0;
391 /* First some easy tests: make sure the database header is sane. */
392 if (head->version != DB_VERSION
393 || head->header_size != sizeof (*head)
394 /* We allow a timestamp to be one hour ahead of the current time.
395 This should cover daylight saving time changes. */
396 || head->timestamp > now + 60 * 60 + 60
397 || (head->gc_cycle & 1)
398 || head->module == 0
399 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
400 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
401 || head->first_free < 0
402 || head->first_free > head->data_size
403 || (head->first_free & BLOCK_ALIGN_M1) != 0
404 || head->maxnentries < 0
405 || head->maxnsearched < 0)
406 return 0;
408 uint8_t *usemap = calloc (head->first_free, 1);
409 if (usemap == NULL)
410 return 0;
412 const char *data = (char *) &head->array[roundup (head->module,
413 ALIGN / sizeof (ref_t))];
415 nscd_ssize_t he_cnt = 0;
416 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
418 ref_t trail = head->array[cnt];
419 ref_t work = trail;
420 int tick = 0;
422 while (work != ENDREF)
424 if (! check_use (data, head->first_free, usemap, use_he, work,
425 sizeof (struct hashentry)))
426 goto fail;
428 /* Now we know we can dereference the record. */
429 struct hashentry *here = (struct hashentry *) (data + work);
431 ++he_cnt;
433 /* Make sure the record is for this type of service. */
434 if (here->type >= LASTREQ
435 || reqinfo[here->type].db != &dbs[dbnr])
436 goto fail;
438 /* Validate boolean field value. */
439 if (here->first != false && here->first != true)
440 goto fail;
442 if (here->len < 0)
443 goto fail;
445 /* Now the data. */
446 if (here->packet < 0
447 || here->packet > head->first_free
448 || here->packet + sizeof (struct datahead) > head->first_free)
449 goto fail;
451 struct datahead *dh = (struct datahead *) (data + here->packet);
453 if (! check_use (data, head->first_free, usemap,
454 use_data | (here->first ? use_first : 0),
455 here->packet, dh->allocsize))
456 goto fail;
458 if (dh->allocsize < sizeof (struct datahead)
459 || dh->recsize > dh->allocsize
460 || (dh->notfound != false && dh->notfound != true)
461 || (dh->usable != false && dh->usable != true))
462 goto fail;
464 if (here->key < here->packet + sizeof (struct datahead)
465 || here->key > here->packet + dh->allocsize
466 || here->key + here->len > here->packet + dh->allocsize)
467 goto fail;
469 work = here->next;
471 if (work == trail)
472 /* A circular list, this must not happen. */
473 goto fail;
474 if (tick)
475 trail = ((struct hashentry *) (data + trail))->next;
476 tick = 1 - tick;
480 if (he_cnt != head->nentries)
481 goto fail;
483 /* See if all data and keys had at least one reference from
484 he->first == true hashentry. */
485 for (ref_t idx = 0; idx < head->first_free; ++idx)
487 if (usemap[idx] == use_data_begin)
488 goto fail;
491 /* Finally, make sure the database hasn't changed since the first test. */
492 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
493 goto fail;
495 free (usemap);
496 return 1;
498 fail:
499 free (usemap);
500 return 0;
504 /* Initialize database information structures. */
505 void
506 nscd_init (void)
508 /* Look up unprivileged uid/gid/groups before we start listening on the
509 socket */
510 if (server_user != NULL)
511 begin_drop_privileges ();
513 if (nthreads == -1)
514 /* No configuration for this value, assume a default. */
515 nthreads = 4;
517 for (size_t cnt = 0; cnt < lastdb; ++cnt)
518 if (dbs[cnt].enabled)
520 pthread_rwlock_init (&dbs[cnt].lock, NULL);
521 pthread_mutex_init (&dbs[cnt].memlock, NULL);
523 if (dbs[cnt].persistent)
525 /* Try to open the appropriate file on disk. */
526 int fd = open (dbs[cnt].db_filename, O_RDWR | O_CLOEXEC);
527 if (fd != -1)
529 char *msg = NULL;
530 struct stat64 st;
531 void *mem;
532 size_t total;
533 struct database_pers_head head;
534 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
535 sizeof (head)));
536 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
538 fail_db_errno:
539 /* The code is single-threaded at this point so
540 using strerror is just fine. */
541 msg = strerror (errno);
542 fail_db:
543 dbg_log (_("invalid persistent database file \"%s\": %s"),
544 dbs[cnt].db_filename, msg);
545 unlink (dbs[cnt].db_filename);
547 else if (head.module == 0 && head.data_size == 0)
549 /* The file has been created, but the head has not
550 been initialized yet. */
551 msg = _("uninitialized header");
552 goto fail_db;
554 else if (head.header_size != (int) sizeof (head))
556 msg = _("header size does not match");
557 goto fail_db;
559 else if ((total = (sizeof (head)
560 + roundup (head.module * sizeof (ref_t),
561 ALIGN)
562 + head.data_size))
563 > st.st_size
564 || total < sizeof (head))
566 msg = _("file size does not match");
567 goto fail_db;
569 /* Note we map with the maximum size allowed for the
570 database. This is likely much larger than the
571 actual file size. This is OK on most OSes since
572 extensions of the underlying file will
573 automatically translate more pages available for
574 memory access. */
575 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
576 PROT_READ | PROT_WRITE,
577 MAP_SHARED, fd, 0))
578 == MAP_FAILED)
579 goto fail_db_errno;
580 else if (!verify_persistent_db (mem, &head, cnt))
582 munmap (mem, total);
583 msg = _("verification failed");
584 goto fail_db;
586 else
588 /* Success. We have the database. */
589 dbs[cnt].head = mem;
590 dbs[cnt].memsize = total;
591 dbs[cnt].data = (char *)
592 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
593 ALIGN / sizeof (ref_t))];
594 dbs[cnt].mmap_used = true;
596 if (dbs[cnt].suggested_module > head.module)
597 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
598 dbnames[cnt]);
600 dbs[cnt].wr_fd = fd;
601 fd = -1;
602 /* We also need a read-only descriptor. */
603 if (dbs[cnt].shared)
605 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
606 O_RDONLY | O_CLOEXEC);
607 if (dbs[cnt].ro_fd == -1)
608 dbg_log (_("\
609 cannot create read-only descriptor for \"%s\"; no mmap"),
610 dbs[cnt].db_filename);
613 // XXX Shall we test whether the descriptors actually
614 // XXX point to the same file?
617 /* Close the file descriptors in case something went
618 wrong in which case the variable have not been
619 assigned -1. */
620 if (fd != -1)
621 close (fd);
623 else if (errno == EACCES)
624 do_exit (EXIT_FAILURE, 0, _("cannot access '%s'"),
625 dbs[cnt].db_filename);
628 if (dbs[cnt].head == NULL)
630 /* No database loaded. Allocate the data structure,
631 possibly on disk. */
632 struct database_pers_head head;
633 size_t total = (sizeof (head)
634 + roundup (dbs[cnt].suggested_module
635 * sizeof (ref_t), ALIGN)
636 + (dbs[cnt].suggested_module
637 * DEFAULT_DATASIZE_PER_BUCKET));
639 /* Try to create the database. If we do not need a
640 persistent database create a temporary file. */
641 int fd;
642 int ro_fd = -1;
643 if (dbs[cnt].persistent)
645 fd = open (dbs[cnt].db_filename,
646 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC,
647 S_IRUSR | S_IWUSR);
648 if (fd != -1 && dbs[cnt].shared)
649 ro_fd = open (dbs[cnt].db_filename,
650 O_RDONLY | O_CLOEXEC);
652 else
654 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
655 fd = mkostemp (fname, O_CLOEXEC);
657 /* We do not need the file name anymore after we
658 opened another file descriptor in read-only mode. */
659 if (fd != -1)
661 if (dbs[cnt].shared)
662 ro_fd = open (fname, O_RDONLY | O_CLOEXEC);
664 unlink (fname);
668 if (fd == -1)
670 if (errno == EEXIST)
672 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
673 dbnames[cnt], dbs[cnt].db_filename);
674 do_exit (1, 0, NULL);
677 if (dbs[cnt].persistent)
678 dbg_log (_("cannot create %s; no persistent database used"),
679 dbs[cnt].db_filename);
680 else
681 dbg_log (_("cannot create %s; no sharing possible"),
682 dbs[cnt].db_filename);
684 dbs[cnt].persistent = 0;
685 // XXX remember: no mmap
687 else
689 /* Tell the user if we could not create the read-only
690 descriptor. */
691 if (ro_fd == -1 && dbs[cnt].shared)
692 dbg_log (_("\
693 cannot create read-only descriptor for \"%s\"; no mmap"),
694 dbs[cnt].db_filename);
696 /* Before we create the header, initialize the hash
697 table. That way if we get interrupted while writing
698 the header we can recognize a partially initialized
699 database. */
700 size_t ps = sysconf (_SC_PAGESIZE);
701 char tmpbuf[ps];
702 assert (~ENDREF == 0);
703 memset (tmpbuf, '\xff', ps);
705 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
706 off_t offset = sizeof (head);
708 size_t towrite;
709 if (offset % ps != 0)
711 towrite = MIN (remaining, ps - (offset % ps));
712 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
713 goto write_fail;
714 offset += towrite;
715 remaining -= towrite;
718 while (remaining > ps)
720 if (pwrite (fd, tmpbuf, ps, offset) == -1)
721 goto write_fail;
722 offset += ps;
723 remaining -= ps;
726 if (remaining > 0
727 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
728 goto write_fail;
730 /* Create the header of the file. */
731 struct database_pers_head head =
733 .version = DB_VERSION,
734 .header_size = sizeof (head),
735 .module = dbs[cnt].suggested_module,
736 .data_size = (dbs[cnt].suggested_module
737 * DEFAULT_DATASIZE_PER_BUCKET),
738 .first_free = 0
740 void *mem;
742 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
743 != sizeof (head))
744 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
745 != 0)
746 || (mem = mmap (NULL, dbs[cnt].max_db_size,
747 PROT_READ | PROT_WRITE,
748 MAP_SHARED, fd, 0)) == MAP_FAILED)
750 write_fail:
751 unlink (dbs[cnt].db_filename);
752 dbg_log (_("cannot write to database file %s: %s"),
753 dbs[cnt].db_filename, strerror (errno));
754 dbs[cnt].persistent = 0;
756 else
758 /* Success. */
759 dbs[cnt].head = mem;
760 dbs[cnt].data = (char *)
761 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
762 ALIGN / sizeof (ref_t))];
763 dbs[cnt].memsize = total;
764 dbs[cnt].mmap_used = true;
766 /* Remember the descriptors. */
767 dbs[cnt].wr_fd = fd;
768 dbs[cnt].ro_fd = ro_fd;
769 fd = -1;
770 ro_fd = -1;
773 if (fd != -1)
774 close (fd);
775 if (ro_fd != -1)
776 close (ro_fd);
780 if (dbs[cnt].head == NULL)
782 /* We do not use the persistent database. Just
783 create an in-memory data structure. */
784 assert (! dbs[cnt].persistent);
786 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
787 + (dbs[cnt].suggested_module
788 * sizeof (ref_t)));
789 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
790 assert (~ENDREF == 0);
791 memset (dbs[cnt].head->array, '\xff',
792 dbs[cnt].suggested_module * sizeof (ref_t));
793 dbs[cnt].head->module = dbs[cnt].suggested_module;
794 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
795 * dbs[cnt].head->module);
796 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
797 dbs[cnt].head->first_free = 0;
799 dbs[cnt].shared = 0;
800 assert (dbs[cnt].ro_fd == -1);
804 /* Create the socket. */
805 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
806 if (sock < 0)
808 dbg_log (_("cannot open socket: %s"), strerror (errno));
809 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
811 /* Bind a name to the socket. */
812 struct sockaddr_un sock_addr;
813 sock_addr.sun_family = AF_UNIX;
814 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
815 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
817 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
818 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
821 /* Set permissions for the socket. */
822 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
824 /* Set the socket up to accept connections. */
825 if (listen (sock, SOMAXCONN) < 0)
827 dbg_log (_("cannot enable socket to accept connections: %s"),
828 strerror (errno));
829 do_exit (1, 0, NULL);
832 #ifdef HAVE_NETLINK
833 if (dbs[hstdb].enabled)
835 /* Try to open netlink socket to monitor network setting changes. */
836 nl_status_fd = socket (AF_NETLINK,
837 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
838 NETLINK_ROUTE);
839 if (nl_status_fd != -1)
841 struct sockaddr_nl snl;
842 memset (&snl, '\0', sizeof (snl));
843 snl.nl_family = AF_NETLINK;
844 /* XXX Is this the best set to use? */
845 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
846 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
847 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
848 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
849 | RTMGRP_IPV6_PREFIX);
851 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
853 close (nl_status_fd);
854 nl_status_fd = -1;
856 else
858 /* Start the timestamp process. */
859 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
860 = __bump_nl_timestamp ();
864 #endif
866 /* Change to unprivileged uid/gid/groups if specified in config file */
867 if (server_user != NULL)
868 finish_drop_privileges ();
871 #ifdef HAVE_INOTIFY
872 #define TRACED_FILE_MASK (IN_DELETE_SELF | IN_CLOSE_WRITE | IN_MOVE_SELF)
873 #define TRACED_DIR_MASK (IN_DELETE_SELF | IN_CREATE | IN_MOVED_TO | IN_MOVE_SELF)
874 void
875 install_watches (struct traced_file *finfo)
877 /* Use inotify support if we have it. */
878 if (finfo->inotify_descr[TRACED_FILE] < 0)
879 finfo->inotify_descr[TRACED_FILE] = inotify_add_watch (inotify_fd,
880 finfo->fname,
881 TRACED_FILE_MASK);
882 if (finfo->inotify_descr[TRACED_FILE] < 0)
884 dbg_log (_("disabled inotify-based monitoring for file `%s': %s"),
885 finfo->fname, strerror (errno));
886 return;
888 dbg_log (_("monitoring file `%s` (%d)"),
889 finfo->fname, finfo->inotify_descr[TRACED_FILE]);
890 /* Additionally listen for events in the file's parent directory.
891 We do this because the file to be watched might be
892 deleted and then added back again. When it is added back again
893 we must re-add the watch. We must also cover IN_MOVED_TO to
894 detect a file being moved into the directory. */
895 if (finfo->inotify_descr[TRACED_DIR] < 0)
896 finfo->inotify_descr[TRACED_DIR] = inotify_add_watch (inotify_fd,
897 finfo->dname,
898 TRACED_DIR_MASK);
899 if (finfo->inotify_descr[TRACED_DIR] < 0)
901 dbg_log (_("disabled inotify-based monitoring for directory `%s': %s"),
902 finfo->fname, strerror (errno));
903 return;
905 dbg_log (_("monitoring directory `%s` (%d)"),
906 finfo->dname, finfo->inotify_descr[TRACED_DIR]);
908 #endif
910 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
912 We support registering multiple files per database. Each call to
913 register_traced_file adds to the list of registered files.
915 When we prune the database, either through timeout or a request to
916 invalidate, we will check to see if any of the registered files has changed.
917 When we accept new connections to handle a cache request we will also
918 check to see if any of the registered files has changed.
920 If we have inotify support then we install an inotify fd to notify us of
921 file deletion or modification, both of which will require we invalidate
922 the cache for the database. Without inotify support we stat the file and
923 store st_mtime to determine if the file has been modified. */
924 void
925 register_traced_file (size_t dbidx, struct traced_file *finfo)
927 /* If the database is disabled or file checking is disabled
928 then ignore the registration. */
929 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
930 return;
932 if (__glibc_unlikely (debug_level > 0))
933 dbg_log (_("monitoring file %s for database %s"),
934 finfo->fname, dbnames[dbidx]);
936 #ifdef HAVE_INOTIFY
937 install_watches (finfo);
938 #endif
939 struct stat64 st;
940 if (stat64 (finfo->fname, &st) < 0)
942 /* We cannot stat() the file. Set mtime to zero and try again later. */
943 dbg_log (_("stat failed for file `%s'; will try again later: %s"),
944 finfo->fname, strerror (errno));
945 finfo->mtime = 0;
947 else
948 finfo->mtime = st.st_mtime;
950 /* Queue up the file name. */
951 finfo->next = dbs[dbidx].traced_files;
952 dbs[dbidx].traced_files = finfo;
956 /* Close the connections. */
957 void
958 close_sockets (void)
960 close (sock);
964 static void
965 invalidate_cache (char *key, int fd)
967 dbtype number;
968 int32_t resp;
970 for (number = pwddb; number < lastdb; ++number)
971 if (strcmp (key, dbnames[number]) == 0)
973 struct traced_file *runp = dbs[number].traced_files;
974 while (runp != NULL)
976 /* Make sure we reload from file when checking mtime. */
977 runp->mtime = 0;
978 #ifdef HAVE_INOTIFY
979 /* During an invalidation we try to reload the traced
980 file watches. This allows the user to re-sync if
981 inotify events were lost. Similar to what we do during
982 pruning. */
983 install_watches (runp);
984 #endif
985 if (runp->call_res_init)
987 res_init ();
988 break;
990 runp = runp->next;
992 break;
995 if (number == lastdb)
997 resp = EINVAL;
998 writeall (fd, &resp, sizeof (resp));
999 return;
1002 if (dbs[number].enabled)
1004 pthread_mutex_lock (&dbs[number].prune_run_lock);
1005 prune_cache (&dbs[number], LONG_MAX, fd);
1006 pthread_mutex_unlock (&dbs[number].prune_run_lock);
1008 else
1010 resp = 0;
1011 writeall (fd, &resp, sizeof (resp));
1016 #ifdef SCM_RIGHTS
1017 static void
1018 send_ro_fd (struct database_dyn *db, char *key, int fd)
1020 /* If we do not have an read-only file descriptor do nothing. */
1021 if (db->ro_fd == -1)
1022 return;
1024 /* We need to send some data along with the descriptor. */
1025 uint64_t mapsize = (db->head->data_size
1026 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1027 + sizeof (struct database_pers_head));
1028 struct iovec iov[2];
1029 iov[0].iov_base = key;
1030 iov[0].iov_len = strlen (key) + 1;
1031 iov[1].iov_base = &mapsize;
1032 iov[1].iov_len = sizeof (mapsize);
1034 /* Prepare the control message to transfer the descriptor. */
1035 union
1037 struct cmsghdr hdr;
1038 char bytes[CMSG_SPACE (sizeof (int))];
1039 } buf;
1040 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1041 .msg_control = buf.bytes,
1042 .msg_controllen = sizeof (buf) };
1043 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1045 cmsg->cmsg_level = SOL_SOCKET;
1046 cmsg->cmsg_type = SCM_RIGHTS;
1047 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1049 int *ip = (int *) CMSG_DATA (cmsg);
1050 *ip = db->ro_fd;
1052 msg.msg_controllen = cmsg->cmsg_len;
1054 /* Send the control message. We repeat when we are interrupted but
1055 everything else is ignored. */
1056 #ifndef MSG_NOSIGNAL
1057 # define MSG_NOSIGNAL 0
1058 #endif
1059 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1061 if (__glibc_unlikely (debug_level > 0))
1062 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1064 #endif /* SCM_RIGHTS */
1067 /* Handle new request. */
1068 static void
1069 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1071 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1073 if (debug_level > 0)
1074 dbg_log (_("\
1075 cannot handle old request version %d; current version is %d"),
1076 req->version, NSCD_VERSION);
1077 return;
1080 /* Perform the SELinux check before we go on to the standard checks. */
1081 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1083 if (debug_level > 0)
1085 #ifdef SO_PEERCRED
1086 char pbuf[sizeof ("/proc//exe") + 3 * sizeof (long int)];
1087 # ifdef PATH_MAX
1088 char buf[PATH_MAX];
1089 # else
1090 char buf[4096];
1091 # endif
1093 snprintf (pbuf, sizeof (pbuf), "/proc/%ld/exe", (long int) pid);
1094 ssize_t n = readlink (pbuf, buf, sizeof (buf) - 1);
1096 if (n <= 0)
1097 dbg_log (_("\
1098 request from %ld not handled due to missing permission"), (long int) pid);
1099 else
1101 buf[n] = '\0';
1102 dbg_log (_("\
1103 request from '%s' [%ld] not handled due to missing permission"),
1104 buf, (long int) pid);
1106 #else
1107 dbg_log (_("request not handled due to missing permission"));
1108 #endif
1110 return;
1113 struct database_dyn *db = reqinfo[req->type].db;
1115 /* See whether we can service the request from the cache. */
1116 if (__builtin_expect (reqinfo[req->type].data_request, true))
1118 if (__builtin_expect (debug_level, 0) > 0)
1120 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1122 char buf[INET6_ADDRSTRLEN];
1124 dbg_log ("\t%s (%s)", serv2str[req->type],
1125 inet_ntop (req->type == GETHOSTBYADDR
1126 ? AF_INET : AF_INET6,
1127 key, buf, sizeof (buf)));
1129 else
1130 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1133 /* Is this service enabled? */
1134 if (__glibc_unlikely (!db->enabled))
1136 /* No, sent the prepared record. */
1137 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1138 db->disabled_iov->iov_len,
1139 MSG_NOSIGNAL))
1140 != (ssize_t) db->disabled_iov->iov_len
1141 && __builtin_expect (debug_level, 0) > 0)
1143 /* We have problems sending the result. */
1144 char buf[256];
1145 dbg_log (_("cannot write result: %s"),
1146 strerror_r (errno, buf, sizeof (buf)));
1149 return;
1152 /* Be sure we can read the data. */
1153 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db->lock) != 0))
1155 ++db->head->rdlockdelayed;
1156 pthread_rwlock_rdlock (&db->lock);
1159 /* See whether we can handle it from the cache. */
1160 struct datahead *cached;
1161 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1162 db, uid);
1163 if (cached != NULL)
1165 /* Hurray it's in the cache. */
1166 ssize_t nwritten;
1168 #ifdef HAVE_SENDFILE
1169 if (__glibc_likely (db->mmap_used))
1171 assert (db->wr_fd != -1);
1172 assert ((char *) cached->data > (char *) db->data);
1173 assert ((char *) cached->data - (char *) db->head
1174 + cached->recsize
1175 <= (sizeof (struct database_pers_head)
1176 + db->head->module * sizeof (ref_t)
1177 + db->head->data_size));
1178 nwritten = sendfileall (fd, db->wr_fd,
1179 (char *) cached->data
1180 - (char *) db->head, cached->recsize);
1181 # ifndef __ASSUME_SENDFILE
1182 if (nwritten == -1 && errno == ENOSYS)
1183 goto use_write;
1184 # endif
1186 else
1187 # ifndef __ASSUME_SENDFILE
1188 use_write:
1189 # endif
1190 #endif
1191 nwritten = writeall (fd, cached->data, cached->recsize);
1193 if (nwritten != cached->recsize
1194 && __builtin_expect (debug_level, 0) > 0)
1196 /* We have problems sending the result. */
1197 char buf[256];
1198 dbg_log (_("cannot write result: %s"),
1199 strerror_r (errno, buf, sizeof (buf)));
1202 pthread_rwlock_unlock (&db->lock);
1204 return;
1207 pthread_rwlock_unlock (&db->lock);
1209 else if (__builtin_expect (debug_level, 0) > 0)
1211 if (req->type == INVALIDATE)
1212 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1213 else
1214 dbg_log ("\t%s", serv2str[req->type]);
1217 /* Handle the request. */
1218 switch (req->type)
1220 case GETPWBYNAME:
1221 addpwbyname (db, fd, req, key, uid);
1222 break;
1224 case GETPWBYUID:
1225 addpwbyuid (db, fd, req, key, uid);
1226 break;
1228 case GETGRBYNAME:
1229 addgrbyname (db, fd, req, key, uid);
1230 break;
1232 case GETGRBYGID:
1233 addgrbygid (db, fd, req, key, uid);
1234 break;
1236 case GETHOSTBYNAME:
1237 addhstbyname (db, fd, req, key, uid);
1238 break;
1240 case GETHOSTBYNAMEv6:
1241 addhstbynamev6 (db, fd, req, key, uid);
1242 break;
1244 case GETHOSTBYADDR:
1245 addhstbyaddr (db, fd, req, key, uid);
1246 break;
1248 case GETHOSTBYADDRv6:
1249 addhstbyaddrv6 (db, fd, req, key, uid);
1250 break;
1252 case GETAI:
1253 addhstai (db, fd, req, key, uid);
1254 break;
1256 case INITGROUPS:
1257 addinitgroups (db, fd, req, key, uid);
1258 break;
1260 case GETSERVBYNAME:
1261 addservbyname (db, fd, req, key, uid);
1262 break;
1264 case GETSERVBYPORT:
1265 addservbyport (db, fd, req, key, uid);
1266 break;
1268 case GETNETGRENT:
1269 addgetnetgrent (db, fd, req, key, uid);
1270 break;
1272 case INNETGR:
1273 addinnetgr (db, fd, req, key, uid);
1274 break;
1276 case GETSTAT:
1277 case SHUTDOWN:
1278 case INVALIDATE:
1280 /* Get the callers credentials. */
1281 #ifdef SO_PEERCRED
1282 struct ucred caller;
1283 socklen_t optlen = sizeof (caller);
1285 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1287 char buf[256];
1289 dbg_log (_("error getting caller's id: %s"),
1290 strerror_r (errno, buf, sizeof (buf)));
1291 break;
1294 uid = caller.uid;
1295 #else
1296 /* Some systems have no SO_PEERCRED implementation. They don't
1297 care about security so we don't as well. */
1298 uid = 0;
1299 #endif
1302 /* Accept shutdown, getstat and invalidate only from root. For
1303 the stat call also allow the user specified in the config file. */
1304 if (req->type == GETSTAT)
1306 if (uid == 0 || uid == stat_uid)
1307 send_stats (fd, dbs);
1309 else if (uid == 0)
1311 if (req->type == INVALIDATE)
1312 invalidate_cache (key, fd);
1313 else
1314 termination_handler (0);
1316 break;
1318 case GETFDPW:
1319 case GETFDGR:
1320 case GETFDHST:
1321 case GETFDSERV:
1322 case GETFDNETGR:
1323 #ifdef SCM_RIGHTS
1324 send_ro_fd (reqinfo[req->type].db, key, fd);
1325 #endif
1326 break;
1328 default:
1329 /* Ignore the command, it's nothing we know. */
1330 break;
1335 /* Restart the process. */
1336 static void
1337 restart (void)
1339 /* First determine the parameters. We do not use the parameters
1340 passed to main() since in case nscd is started by running the
1341 dynamic linker this will not work. Yes, this is not the usual
1342 case but nscd is part of glibc and we occasionally do this. */
1343 size_t buflen = 1024;
1344 char *buf = alloca (buflen);
1345 size_t readlen = 0;
1346 int fd = open ("/proc/self/cmdline", O_RDONLY);
1347 if (fd == -1)
1349 dbg_log (_("\
1350 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1351 strerror (errno));
1353 paranoia = 0;
1354 return;
1357 while (1)
1359 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1360 buflen - readlen));
1361 if (n == -1)
1363 dbg_log (_("\
1364 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1365 strerror (errno));
1367 close (fd);
1368 paranoia = 0;
1369 return;
1372 readlen += n;
1374 if (readlen < buflen)
1375 break;
1377 /* We might have to extend the buffer. */
1378 size_t old_buflen = buflen;
1379 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1380 buf = memmove (newp, buf, old_buflen);
1383 close (fd);
1385 /* Parse the command line. Worst case scenario: every two
1386 characters form one parameter (one character plus NUL). */
1387 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1388 int argc = 0;
1390 char *cp = buf;
1391 while (cp < buf + readlen)
1393 argv[argc++] = cp;
1394 cp = (char *) rawmemchr (cp, '\0') + 1;
1396 argv[argc] = NULL;
1398 /* Second, change back to the old user if we changed it. */
1399 if (server_user != NULL)
1401 if (setresuid (old_uid, old_uid, old_uid) != 0)
1403 dbg_log (_("\
1404 cannot change to old UID: %s; disabling paranoia mode"),
1405 strerror (errno));
1407 paranoia = 0;
1408 return;
1411 if (setresgid (old_gid, old_gid, old_gid) != 0)
1413 dbg_log (_("\
1414 cannot change to old GID: %s; disabling paranoia mode"),
1415 strerror (errno));
1417 ignore_value (setuid (server_uid));
1418 paranoia = 0;
1419 return;
1423 /* Next change back to the old working directory. */
1424 if (chdir (oldcwd) == -1)
1426 dbg_log (_("\
1427 cannot change to old working directory: %s; disabling paranoia mode"),
1428 strerror (errno));
1430 if (server_user != NULL)
1432 ignore_value (setuid (server_uid));
1433 ignore_value (setgid (server_gid));
1435 paranoia = 0;
1436 return;
1439 /* Synchronize memory. */
1440 int32_t certainly[lastdb];
1441 for (int cnt = 0; cnt < lastdb; ++cnt)
1442 if (dbs[cnt].enabled)
1444 /* Make sure nobody keeps using the database. */
1445 dbs[cnt].head->timestamp = 0;
1446 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1447 dbs[cnt].head->nscd_certainly_running = 0;
1449 if (dbs[cnt].persistent)
1450 // XXX async OK?
1451 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1454 /* The preparations are done. */
1455 #ifdef PATH_MAX
1456 char pathbuf[PATH_MAX];
1457 #else
1458 char pathbuf[256];
1459 #endif
1460 /* Try to exec the real nscd program so the process name (as reported
1461 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1462 if readlink or the exec with the result of the readlink call fails. */
1463 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1464 if (n != -1)
1466 pathbuf[n] = '\0';
1467 execv (pathbuf, argv);
1469 execv ("/proc/self/exe", argv);
1471 /* If we come here, we will never be able to re-exec. */
1472 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1473 strerror (errno));
1475 if (server_user != NULL)
1477 ignore_value (setuid (server_uid));
1478 ignore_value (setgid (server_gid));
1480 if (chdir ("/") != 0)
1481 dbg_log (_("cannot change current working directory to \"/\": %s"),
1482 strerror (errno));
1483 paranoia = 0;
1485 /* Reenable the databases. */
1486 time_t now = time (NULL);
1487 for (int cnt = 0; cnt < lastdb; ++cnt)
1488 if (dbs[cnt].enabled)
1490 dbs[cnt].head->timestamp = now;
1491 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1496 /* List of file descriptors. */
1497 struct fdlist
1499 int fd;
1500 struct fdlist *next;
1502 /* Memory allocated for the list. */
1503 static struct fdlist *fdlist;
1504 /* List of currently ready-to-read file descriptors. */
1505 static struct fdlist *readylist;
1507 /* Conditional variable and mutex to signal availability of entries in
1508 READYLIST. The condvar is initialized dynamically since we might
1509 use a different clock depending on availability. */
1510 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1511 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1513 /* The clock to use with the condvar. */
1514 static clockid_t timeout_clock = CLOCK_REALTIME;
1516 /* Number of threads ready to handle the READYLIST. */
1517 static unsigned long int nready;
1520 /* Function for the clean-up threads. */
1521 static void *
1522 __attribute__ ((__noreturn__))
1523 nscd_run_prune (void *p)
1525 const long int my_number = (long int) p;
1526 assert (dbs[my_number].enabled);
1528 int dont_need_update = setup_thread (&dbs[my_number]);
1530 time_t now = time (NULL);
1532 /* We are running. */
1533 dbs[my_number].head->timestamp = now;
1535 struct timespec prune_ts;
1536 if (__glibc_unlikely (clock_gettime (timeout_clock, &prune_ts) == -1))
1537 /* Should never happen. */
1538 abort ();
1540 /* Compute the initial timeout time. Prevent all the timers to go
1541 off at the same time by adding a db-based value. */
1542 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1543 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1545 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1546 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1547 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1549 pthread_mutex_lock (prune_lock);
1550 while (1)
1552 /* Wait, but not forever. */
1553 int e = 0;
1554 if (! dbs[my_number].clear_cache)
1555 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1556 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1558 time_t next_wait;
1559 now = time (NULL);
1560 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1561 || dbs[my_number].clear_cache)
1563 /* We will determine the new timout values based on the
1564 cache content. Should there be concurrent additions to
1565 the cache which are not accounted for in the cache
1566 pruning we want to know about it. Therefore set the
1567 timeout to the maximum. It will be descreased when adding
1568 new entries to the cache, if necessary. */
1569 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1571 /* Unconditionally reset the flag. */
1572 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1573 dbs[my_number].clear_cache = 0;
1575 pthread_mutex_unlock (prune_lock);
1577 /* We use a separate lock for running the prune function (instead
1578 of keeping prune_lock locked) because this enables concurrent
1579 invocations of cache_add which might modify the timeout value. */
1580 pthread_mutex_lock (prune_run_lock);
1581 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1582 pthread_mutex_unlock (prune_run_lock);
1584 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1585 /* If clients cannot determine for sure whether nscd is running
1586 we need to wake up occasionally to update the timestamp.
1587 Wait 90% of the update period. */
1588 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1589 if (__glibc_unlikely (! dont_need_update))
1591 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1592 dbs[my_number].head->timestamp = now;
1595 pthread_mutex_lock (prune_lock);
1597 /* Make it known when we will wake up again. */
1598 if (now + next_wait < dbs[my_number].wakeup_time)
1599 dbs[my_number].wakeup_time = now + next_wait;
1600 else
1601 next_wait = dbs[my_number].wakeup_time - now;
1603 else
1604 /* The cache was just pruned. Do not do it again now. Just
1605 use the new timeout value. */
1606 next_wait = dbs[my_number].wakeup_time - now;
1608 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1609 /* Should never happen. */
1610 abort ();
1612 /* Compute next timeout time. */
1613 prune_ts.tv_sec += next_wait;
1618 /* This is the main loop. It is replicated in different threads but
1619 the use of the ready list makes sure only one thread handles an
1620 incoming connection. */
1621 static void *
1622 __attribute__ ((__noreturn__))
1623 nscd_run_worker (void *p)
1625 char buf[256];
1627 /* Initial locking. */
1628 pthread_mutex_lock (&readylist_lock);
1630 /* One more thread available. */
1631 ++nready;
1633 while (1)
1635 while (readylist == NULL)
1636 pthread_cond_wait (&readylist_cond, &readylist_lock);
1638 struct fdlist *it = readylist->next;
1639 if (readylist->next == readylist)
1640 /* Just one entry on the list. */
1641 readylist = NULL;
1642 else
1643 readylist->next = it->next;
1645 /* Extract the information and mark the record ready to be used
1646 again. */
1647 int fd = it->fd;
1648 it->next = NULL;
1650 /* One more thread available. */
1651 --nready;
1653 /* We are done with the list. */
1654 pthread_mutex_unlock (&readylist_lock);
1656 /* Now read the request. */
1657 request_header req;
1658 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1659 != sizeof (req), 0))
1661 /* We failed to read data. Note that this also might mean we
1662 failed because we would have blocked. */
1663 if (debug_level > 0)
1664 dbg_log (_("short read while reading request: %s"),
1665 strerror_r (errno, buf, sizeof (buf)));
1666 goto close_and_out;
1669 /* Check whether this is a valid request type. */
1670 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1671 goto close_and_out;
1673 /* Some systems have no SO_PEERCRED implementation. They don't
1674 care about security so we don't as well. */
1675 uid_t uid = -1;
1676 #ifdef SO_PEERCRED
1677 pid_t pid = 0;
1679 if (__glibc_unlikely (debug_level > 0))
1681 struct ucred caller;
1682 socklen_t optlen = sizeof (caller);
1684 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1685 pid = caller.pid;
1687 #else
1688 const pid_t pid = 0;
1689 #endif
1691 /* It should not be possible to crash the nscd with a silly
1692 request (i.e., a terribly large key). We limit the size to 1kb. */
1693 if (__builtin_expect (req.key_len, 1) < 0
1694 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1696 if (debug_level > 0)
1697 dbg_log (_("key length in request too long: %d"), req.key_len);
1699 else
1701 /* Get the key. */
1702 char keybuf[MAXKEYLEN + 1];
1704 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1705 req.key_len))
1706 != req.key_len, 0))
1708 /* Again, this can also mean we would have blocked. */
1709 if (debug_level > 0)
1710 dbg_log (_("short read while reading request key: %s"),
1711 strerror_r (errno, buf, sizeof (buf)));
1712 goto close_and_out;
1714 keybuf[req.key_len] = '\0';
1716 if (__builtin_expect (debug_level, 0) > 0)
1718 #ifdef SO_PEERCRED
1719 if (pid != 0)
1720 dbg_log (_("\
1721 handle_request: request received (Version = %d) from PID %ld"),
1722 req.version, (long int) pid);
1723 else
1724 #endif
1725 dbg_log (_("\
1726 handle_request: request received (Version = %d)"), req.version);
1729 /* Phew, we got all the data, now process it. */
1730 handle_request (fd, &req, keybuf, uid, pid);
1733 close_and_out:
1734 /* We are done. */
1735 close (fd);
1737 /* Re-locking. */
1738 pthread_mutex_lock (&readylist_lock);
1740 /* One more thread available. */
1741 ++nready;
1743 /* NOTREACHED */
1747 static unsigned int nconns;
1749 static void
1750 fd_ready (int fd)
1752 pthread_mutex_lock (&readylist_lock);
1754 /* Find an empty entry in FDLIST. */
1755 size_t inner;
1756 for (inner = 0; inner < nconns; ++inner)
1757 if (fdlist[inner].next == NULL)
1758 break;
1759 assert (inner < nconns);
1761 fdlist[inner].fd = fd;
1763 if (readylist == NULL)
1764 readylist = fdlist[inner].next = &fdlist[inner];
1765 else
1767 fdlist[inner].next = readylist->next;
1768 readylist = readylist->next = &fdlist[inner];
1771 bool do_signal = true;
1772 if (__glibc_unlikely (nready == 0))
1774 ++client_queued;
1775 do_signal = false;
1777 /* Try to start another thread to help out. */
1778 pthread_t th;
1779 if (nthreads < max_nthreads
1780 && pthread_create (&th, &attr, nscd_run_worker,
1781 (void *) (long int) nthreads) == 0)
1783 /* We got another thread. */
1784 ++nthreads;
1785 /* The new thread might need a kick. */
1786 do_signal = true;
1791 pthread_mutex_unlock (&readylist_lock);
1793 /* Tell one of the worker threads there is work to do. */
1794 if (do_signal)
1795 pthread_cond_signal (&readylist_cond);
1799 /* Check whether restarting should happen. */
1800 static bool
1801 restart_p (time_t now)
1803 return (paranoia && readylist == NULL && nready == nthreads
1804 && now >= restart_time);
1808 /* Array for times a connection was accepted. */
1809 static time_t *starttime;
1811 #ifdef HAVE_INOTIFY
1812 /* Inotify event for changed file. */
1813 union __inev
1815 struct inotify_event i;
1816 # ifndef PATH_MAX
1817 # define PATH_MAX 1024
1818 # endif
1819 char buf[sizeof (struct inotify_event) + PATH_MAX];
1822 /* Returns 0 if the file is there otherwise -1. */
1824 check_file (struct traced_file *finfo)
1826 struct stat64 st;
1827 /* We could check mtime and if different re-add
1828 the watches, and invalidate the database, but we
1829 don't because we are called from inotify_check_files
1830 which should be doing that work. If sufficient inotify
1831 events were lost then the next pruning or invalidation
1832 will do the stat and mtime check. We don't do it here to
1833 keep the logic simple. */
1834 if (stat64 (finfo->fname, &st) < 0)
1835 return -1;
1836 return 0;
1839 /* Process the inotify event in INEV. If the event matches any of the files
1840 registered with a database then mark that database as requiring its cache
1841 to be cleared. We indicate the cache needs clearing by setting
1842 TO_CLEAR[DBCNT] to true for the matching database. */
1843 static void
1844 inotify_check_files (bool *to_clear, union __inev *inev)
1846 /* Check which of the files changed. */
1847 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1849 struct traced_file *finfo = dbs[dbcnt].traced_files;
1851 while (finfo != NULL)
1853 /* The configuration file was moved or deleted.
1854 We stop watching it at that point, and reinitialize. */
1855 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1856 && ((inev->i.mask & IN_MOVE_SELF)
1857 || (inev->i.mask & IN_DELETE_SELF)
1858 || (inev->i.mask & IN_IGNORED)))
1860 int ret;
1861 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1863 if (check_file (finfo) == 0)
1865 dbg_log (_("ignored inotify event for `%s` (file exists)"),
1866 finfo->fname);
1867 return;
1870 dbg_log (_("monitored file `%s` was %s, removing watch"),
1871 finfo->fname, moved ? "moved" : "deleted");
1872 /* File was moved out, remove the watch. Watches are
1873 automatically removed when the file is deleted. */
1874 if (moved)
1876 ret = inotify_rm_watch (inotify_fd, inev->i.wd);
1877 if (ret < 0)
1878 dbg_log (_("failed to remove file watch `%s`: %s"),
1879 finfo->fname, strerror (errno));
1881 finfo->inotify_descr[TRACED_FILE] = -1;
1882 to_clear[dbcnt] = true;
1883 if (finfo->call_res_init)
1884 res_init ();
1885 return;
1887 /* The configuration file was open for writing and has just closed.
1888 We reset the cache and reinitialize. */
1889 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1890 && inev->i.mask & IN_CLOSE_WRITE)
1892 /* Mark cache as needing to be cleared and reinitialize. */
1893 dbg_log (_("monitored file `%s` was written to"), finfo->fname);
1894 to_clear[dbcnt] = true;
1895 if (finfo->call_res_init)
1896 res_init ();
1897 return;
1899 /* The parent directory was moved or deleted. We trigger one last
1900 invalidation. At the next pruning or invalidation we may add
1901 this watch back if the file is present again. */
1902 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1903 && ((inev->i.mask & IN_DELETE_SELF)
1904 || (inev->i.mask & IN_MOVE_SELF)
1905 || (inev->i.mask & IN_IGNORED)))
1907 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1908 /* The directory watch may have already been removed
1909 but we don't know so we just remove it again and
1910 ignore the error. Then we remove the file watch.
1911 Note: watches are automatically removed for deleted
1912 files. */
1913 if (moved)
1914 inotify_rm_watch (inotify_fd, inev->i.wd);
1915 if (finfo->inotify_descr[TRACED_FILE] != -1)
1917 dbg_log (_("monitored parent directory `%s` was %s, removing watch on `%s`"),
1918 finfo->dname, moved ? "moved" : "deleted", finfo->fname);
1919 if (inotify_rm_watch (inotify_fd, finfo->inotify_descr[TRACED_FILE]) < 0)
1920 dbg_log (_("failed to remove file watch `%s`: %s"),
1921 finfo->dname, strerror (errno));
1923 finfo->inotify_descr[TRACED_FILE] = -1;
1924 finfo->inotify_descr[TRACED_DIR] = -1;
1925 to_clear[dbcnt] = true;
1926 if (finfo->call_res_init)
1927 res_init ();
1928 /* Continue to the next entry since this might be the
1929 parent directory for multiple registered files and
1930 we want to remove watches for all registered files. */
1931 continue;
1933 /* The parent directory had a create or moved to event. */
1934 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1935 && ((inev->i.mask & IN_MOVED_TO)
1936 || (inev->i.mask & IN_CREATE))
1937 && strcmp (inev->i.name, finfo->sfname) == 0)
1939 /* We detected a directory change. We look for the creation
1940 of the file we are tracking or the move of the same file
1941 into the directory. */
1942 int ret;
1943 dbg_log (_("monitored file `%s` was %s, adding watch"),
1944 finfo->fname,
1945 inev->i.mask & IN_CREATE ? "created" : "moved into place");
1946 /* File was moved in or created. Regenerate the watch. */
1947 if (finfo->inotify_descr[TRACED_FILE] != -1)
1948 inotify_rm_watch (inotify_fd,
1949 finfo->inotify_descr[TRACED_FILE]);
1951 ret = inotify_add_watch (inotify_fd,
1952 finfo->fname,
1953 TRACED_FILE_MASK);
1954 if (ret < 0)
1955 dbg_log (_("failed to add file watch `%s`: %s"),
1956 finfo->fname, strerror (errno));
1958 finfo->inotify_descr[TRACED_FILE] = ret;
1960 /* The file is new or moved so mark cache as needing to
1961 be cleared and reinitialize. */
1962 to_clear[dbcnt] = true;
1963 if (finfo->call_res_init)
1964 res_init ();
1966 /* Done re-adding the watch. Don't return, we may still
1967 have other files in this same directory, same watch
1968 descriptor, and need to process them. */
1970 /* Other events are ignored, and we move on to the next file. */
1971 finfo = finfo->next;
1976 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1977 for the associated database, otherwise do nothing. The TO_CLEAR array must
1978 have LASTDB entries. */
1979 static inline void
1980 clear_db_cache (bool *to_clear)
1982 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1983 if (to_clear[dbcnt])
1985 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
1986 dbs[dbcnt].clear_cache = 1;
1987 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1988 pthread_cond_signal (&dbs[dbcnt].prune_cond);
1993 handle_inotify_events (void)
1995 bool to_clear[lastdb] = { false, };
1996 union __inev inev;
1998 /* Read all inotify events for files registered via
1999 register_traced_file(). */
2000 while (1)
2002 /* Potentially read multiple events into buf. */
2003 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd,
2004 &inev.buf,
2005 sizeof (inev)));
2006 if (nb < (ssize_t) sizeof (struct inotify_event))
2008 /* Not even 1 event. */
2009 if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
2010 return -1;
2011 /* Done reading events that are ready. */
2012 break;
2014 /* Process all events. The normal inotify interface delivers
2015 complete events on a read and never a partial event. */
2016 char *eptr = &inev.buf[0];
2017 ssize_t count;
2018 while (1)
2020 /* Check which of the files changed. */
2021 inotify_check_files (to_clear, &inev);
2022 count = sizeof (struct inotify_event) + inev.i.len;
2023 eptr += count;
2024 nb -= count;
2025 if (nb >= (ssize_t) sizeof (struct inotify_event))
2026 memcpy (&inev, eptr, nb);
2027 else
2028 break;
2030 continue;
2032 /* Actually perform the cache clearing. */
2033 clear_db_cache (to_clear);
2034 return 0;
2037 #endif
2039 static void
2040 __attribute__ ((__noreturn__))
2041 main_loop_poll (void)
2043 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
2044 * sizeof (conns[0]));
2046 conns[0].fd = sock;
2047 conns[0].events = POLLRDNORM;
2048 size_t nused = 1;
2049 size_t firstfree = 1;
2051 #ifdef HAVE_INOTIFY
2052 if (inotify_fd != -1)
2054 conns[1].fd = inotify_fd;
2055 conns[1].events = POLLRDNORM;
2056 nused = 2;
2057 firstfree = 2;
2059 #endif
2061 #ifdef HAVE_NETLINK
2062 size_t idx_nl_status_fd = 0;
2063 if (nl_status_fd != -1)
2065 idx_nl_status_fd = nused;
2066 conns[nused].fd = nl_status_fd;
2067 conns[nused].events = POLLRDNORM;
2068 ++nused;
2069 firstfree = nused;
2071 #endif
2073 while (1)
2075 /* Wait for any event. We wait at most a couple of seconds so
2076 that we can check whether we should close any of the accepted
2077 connections since we have not received a request. */
2078 #define MAX_ACCEPT_TIMEOUT 30
2079 #define MIN_ACCEPT_TIMEOUT 5
2080 #define MAIN_THREAD_TIMEOUT \
2081 (MAX_ACCEPT_TIMEOUT * 1000 \
2082 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
2084 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
2086 time_t now = time (NULL);
2088 /* If there is a descriptor ready for reading or there is a new
2089 connection, process this now. */
2090 if (n > 0)
2092 if (conns[0].revents != 0)
2094 /* We have a new incoming connection. Accept the connection. */
2095 int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2096 SOCK_NONBLOCK));
2098 /* Use the descriptor if we have not reached the limit. */
2099 if (fd >= 0)
2101 if (firstfree < nconns)
2103 conns[firstfree].fd = fd;
2104 conns[firstfree].events = POLLRDNORM;
2105 starttime[firstfree] = now;
2106 if (firstfree >= nused)
2107 nused = firstfree + 1;
2110 ++firstfree;
2111 while (firstfree < nused && conns[firstfree].fd != -1);
2113 else
2114 /* We cannot use the connection so close it. */
2115 close (fd);
2118 --n;
2121 size_t first = 1;
2122 #ifdef HAVE_INOTIFY
2123 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
2125 if (conns[1].revents != 0)
2127 int ret;
2128 ret = handle_inotify_events ();
2129 if (ret == -1)
2131 /* Something went wrong when reading the inotify
2132 data. Better disable inotify. */
2133 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2134 conns[1].fd = -1;
2135 firstfree = 1;
2136 if (nused == 2)
2137 nused = 1;
2138 close (inotify_fd);
2139 inotify_fd = -1;
2141 --n;
2144 first = 2;
2146 #endif
2148 #ifdef HAVE_NETLINK
2149 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2151 char buf[4096];
2152 /* Read all the data. We do not interpret it here. */
2153 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2154 sizeof (buf))) != -1)
2157 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2158 = __bump_nl_timestamp ();
2160 #endif
2162 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2163 if (conns[cnt].revents != 0)
2165 fd_ready (conns[cnt].fd);
2167 /* Clean up the CONNS array. */
2168 conns[cnt].fd = -1;
2169 if (cnt < firstfree)
2170 firstfree = cnt;
2171 if (cnt == nused - 1)
2173 --nused;
2174 while (conns[nused - 1].fd == -1);
2176 --n;
2180 /* Now find entries which have timed out. */
2181 assert (nused > 0);
2183 /* We make the timeout length depend on the number of file
2184 descriptors currently used. */
2185 #define ACCEPT_TIMEOUT \
2186 (MAX_ACCEPT_TIMEOUT \
2187 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2188 time_t laststart = now - ACCEPT_TIMEOUT;
2190 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2192 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2194 /* Remove the entry, it timed out. */
2195 (void) close (conns[cnt].fd);
2196 conns[cnt].fd = -1;
2198 if (cnt < firstfree)
2199 firstfree = cnt;
2200 if (cnt == nused - 1)
2202 --nused;
2203 while (conns[nused - 1].fd == -1);
2207 if (restart_p (now))
2208 restart ();
2213 #ifdef HAVE_EPOLL
2214 static void
2215 main_loop_epoll (int efd)
2217 struct epoll_event ev = { 0, };
2218 int nused = 1;
2219 size_t highest = 0;
2221 /* Add the socket. */
2222 ev.events = EPOLLRDNORM;
2223 ev.data.fd = sock;
2224 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2225 /* We cannot use epoll. */
2226 return;
2228 # ifdef HAVE_INOTIFY
2229 if (inotify_fd != -1)
2231 ev.events = EPOLLRDNORM;
2232 ev.data.fd = inotify_fd;
2233 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2234 /* We cannot use epoll. */
2235 return;
2236 nused = 2;
2238 # endif
2240 # ifdef HAVE_NETLINK
2241 if (nl_status_fd != -1)
2243 ev.events = EPOLLRDNORM;
2244 ev.data.fd = nl_status_fd;
2245 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2246 /* We cannot use epoll. */
2247 return;
2249 # endif
2251 while (1)
2253 struct epoll_event revs[100];
2254 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2256 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2258 time_t now = time (NULL);
2260 for (int cnt = 0; cnt < n; ++cnt)
2261 if (revs[cnt].data.fd == sock)
2263 /* A new connection. */
2264 int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2265 SOCK_NONBLOCK));
2267 /* Use the descriptor if we have not reached the limit. */
2268 if (fd >= 0)
2270 /* Try to add the new descriptor. */
2271 ev.data.fd = fd;
2272 if (fd >= nconns
2273 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2274 /* The descriptor is too large or something went
2275 wrong. Close the descriptor. */
2276 close (fd);
2277 else
2279 /* Remember when we accepted the connection. */
2280 starttime[fd] = now;
2282 if (fd > highest)
2283 highest = fd;
2285 ++nused;
2289 # ifdef HAVE_INOTIFY
2290 else if (revs[cnt].data.fd == inotify_fd)
2292 int ret;
2293 ret = handle_inotify_events ();
2294 if (ret == -1)
2296 /* Something went wrong when reading the inotify
2297 data. Better disable inotify. */
2298 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2299 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd, NULL);
2300 close (inotify_fd);
2301 inotify_fd = -1;
2302 break;
2305 # endif
2306 # ifdef HAVE_NETLINK
2307 else if (revs[cnt].data.fd == nl_status_fd)
2309 char buf[4096];
2310 /* Read all the data. We do not interpret it here. */
2311 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2312 sizeof (buf))) != -1)
2315 __bump_nl_timestamp ();
2317 # endif
2318 else
2320 /* Remove the descriptor from the epoll descriptor. */
2321 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2323 /* Get a worker to handle the request. */
2324 fd_ready (revs[cnt].data.fd);
2326 /* Reset the time. */
2327 starttime[revs[cnt].data.fd] = 0;
2328 if (revs[cnt].data.fd == highest)
2330 --highest;
2331 while (highest > 0 && starttime[highest] == 0);
2333 --nused;
2336 /* Now look for descriptors for accepted connections which have
2337 no reply in too long of a time. */
2338 time_t laststart = now - ACCEPT_TIMEOUT;
2339 assert (starttime[sock] == 0);
2340 # ifdef HAVE_INOTIFY
2341 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2342 # endif
2343 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2344 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2345 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2347 /* We are waiting for this one for too long. Close it. */
2348 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2350 (void) close (cnt);
2352 starttime[cnt] = 0;
2353 if (cnt == highest)
2354 --highest;
2356 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2357 --highest;
2359 if (restart_p (now))
2360 restart ();
2363 #endif
2366 /* Start all the threads we want. The initial process is thread no. 1. */
2367 void
2368 start_threads (void)
2370 /* Initialize the conditional variable we will use. The only
2371 non-standard attribute we might use is the clock selection. */
2372 pthread_condattr_t condattr;
2373 pthread_condattr_init (&condattr);
2375 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2376 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2377 /* Determine whether the monotonous clock is available. */
2378 struct timespec dummy;
2379 # if _POSIX_MONOTONIC_CLOCK == 0
2380 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2381 # endif
2382 # if _POSIX_CLOCK_SELECTION == 0
2383 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2384 # endif
2385 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2386 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2387 timeout_clock = CLOCK_MONOTONIC;
2388 #endif
2390 /* Create the attribute for the threads. They are all created
2391 detached. */
2392 pthread_attr_init (&attr);
2393 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2394 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2395 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2397 /* We allow less than LASTDB threads only for debugging. */
2398 if (debug_level == 0)
2399 nthreads = MAX (nthreads, lastdb);
2401 /* Create the threads which prune the databases. */
2402 // XXX Ideally this work would be done by some of the worker threads.
2403 // XXX But this is problematic since we would need to be able to wake
2404 // XXX them up explicitly as well as part of the group handling the
2405 // XXX ready-list. This requires an operation where we can wait on
2406 // XXX two conditional variables at the same time. This operation
2407 // XXX does not exist (yet).
2408 for (long int i = 0; i < lastdb; ++i)
2410 /* Initialize the conditional variable. */
2411 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2413 dbg_log (_("could not initialize conditional variable"));
2414 do_exit (1, 0, NULL);
2417 pthread_t th;
2418 if (dbs[i].enabled
2419 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2421 dbg_log (_("could not start clean-up thread; terminating"));
2422 do_exit (1, 0, NULL);
2426 pthread_condattr_destroy (&condattr);
2428 for (long int i = 0; i < nthreads; ++i)
2430 pthread_t th;
2431 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2433 if (i == 0)
2435 dbg_log (_("could not start any worker thread; terminating"));
2436 do_exit (1, 0, NULL);
2439 break;
2443 /* Now it is safe to let the parent know that we're doing fine and it can
2444 exit. */
2445 notify_parent (0);
2447 /* Determine how much room for descriptors we should initially
2448 allocate. This might need to change later if we cap the number
2449 with MAXCONN. */
2450 const long int nfds = sysconf (_SC_OPEN_MAX);
2451 #define MINCONN 32
2452 #define MAXCONN 16384
2453 if (nfds == -1 || nfds > MAXCONN)
2454 nconns = MAXCONN;
2455 else if (nfds < MINCONN)
2456 nconns = MINCONN;
2457 else
2458 nconns = nfds;
2460 /* We need memory to pass descriptors on to the worker threads. */
2461 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2462 /* Array to keep track when connection was accepted. */
2463 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2465 /* In the main thread we execute the loop which handles incoming
2466 connections. */
2467 #ifdef HAVE_EPOLL
2468 int efd = epoll_create (100);
2469 if (efd != -1)
2471 main_loop_epoll (efd);
2472 close (efd);
2474 #endif
2476 main_loop_poll ();
2480 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2481 this function is called, we are not listening on the nscd socket yet so
2482 we can just use the ordinary lookup functions without causing a lockup */
2483 static void
2484 begin_drop_privileges (void)
2486 struct passwd *pwd = getpwnam (server_user);
2488 if (pwd == NULL)
2490 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2491 do_exit (EXIT_FAILURE, 0,
2492 _("Failed to run nscd as user '%s'"), server_user);
2495 server_uid = pwd->pw_uid;
2496 server_gid = pwd->pw_gid;
2498 /* Save the old UID/GID if we have to change back. */
2499 if (paranoia)
2501 old_uid = getuid ();
2502 old_gid = getgid ();
2505 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2507 /* This really must never happen. */
2508 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2509 do_exit (EXIT_FAILURE, errno,
2510 _("initial getgrouplist failed"));
2513 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2515 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2516 == -1)
2518 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2519 do_exit (EXIT_FAILURE, errno, _("getgrouplist failed"));
2524 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2525 run nscd as the user specified in the configuration file. */
2526 static void
2527 finish_drop_privileges (void)
2529 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2530 /* We need to preserve the capabilities to connect to the audit daemon. */
2531 cap_t new_caps = preserve_capabilities ();
2532 #endif
2534 if (setgroups (server_ngroups, server_groups) == -1)
2536 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2537 do_exit (EXIT_FAILURE, errno, _("setgroups failed"));
2540 int res;
2541 if (paranoia)
2542 res = setresgid (server_gid, server_gid, old_gid);
2543 else
2544 res = setgid (server_gid);
2545 if (res == -1)
2547 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2548 do_exit (4, errno, "setgid");
2551 if (paranoia)
2552 res = setresuid (server_uid, server_uid, old_uid);
2553 else
2554 res = setuid (server_uid);
2555 if (res == -1)
2557 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2558 do_exit (4, errno, "setuid");
2561 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2562 /* Remove the temporary capabilities. */
2563 install_real_capabilities (new_caps);
2564 #endif