aarch64: Use an ifunc/VDSO to implement gettimeofday in shared glibc.
[glibc.git] / nscd / connections.c
blob1b3bae4eebdafa1bbc44d267f9652ff722962e40
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <ifaddrs.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <stdint.h>
35 #include <arpa/inet.h>
36 #ifdef HAVE_NETLINK
37 # include <linux/netlink.h>
38 # include <linux/rtnetlink.h>
39 #endif
40 #ifdef HAVE_EPOLL
41 # include <sys/epoll.h>
42 #endif
43 #ifdef HAVE_INOTIFY
44 # include <sys/inotify.h>
45 #endif
46 #include <sys/mman.h>
47 #include <sys/param.h>
48 #include <sys/poll.h>
49 #include <sys/socket.h>
50 #include <sys/stat.h>
51 #include <sys/un.h>
53 #include "nscd.h"
54 #include "dbg_log.h"
55 #include "selinux.h"
56 #include <resolv/resolv.h>
58 #include <kernel-features.h>
59 #include <libc-diag.h>
62 /* Support to run nscd as an unprivileged user */
63 const char *server_user;
64 static uid_t server_uid;
65 static gid_t server_gid;
66 const char *stat_user;
67 uid_t stat_uid;
68 static gid_t *server_groups;
69 #ifndef NGROUPS
70 # define NGROUPS 32
71 #endif
72 static int server_ngroups;
74 static pthread_attr_t attr;
76 static void begin_drop_privileges (void);
77 static void finish_drop_privileges (void);
79 /* Map request type to a string. */
80 const char *const serv2str[LASTREQ] =
82 [GETPWBYNAME] = "GETPWBYNAME",
83 [GETPWBYUID] = "GETPWBYUID",
84 [GETGRBYNAME] = "GETGRBYNAME",
85 [GETGRBYGID] = "GETGRBYGID",
86 [GETHOSTBYNAME] = "GETHOSTBYNAME",
87 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
88 [GETHOSTBYADDR] = "GETHOSTBYADDR",
89 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
90 [SHUTDOWN] = "SHUTDOWN",
91 [GETSTAT] = "GETSTAT",
92 [INVALIDATE] = "INVALIDATE",
93 [GETFDPW] = "GETFDPW",
94 [GETFDGR] = "GETFDGR",
95 [GETFDHST] = "GETFDHST",
96 [GETAI] = "GETAI",
97 [INITGROUPS] = "INITGROUPS",
98 [GETSERVBYNAME] = "GETSERVBYNAME",
99 [GETSERVBYPORT] = "GETSERVBYPORT",
100 [GETFDSERV] = "GETFDSERV",
101 [GETNETGRENT] = "GETNETGRENT",
102 [INNETGR] = "INNETGR",
103 [GETFDNETGR] = "GETFDNETGR"
106 #ifdef PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
107 # define RWLOCK_INITIALIZER PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
108 #else
109 # define RWLOCK_INITIALIZER PTHREAD_RWLOCK_INITIALIZER
110 #endif
112 /* The control data structures for the services. */
113 struct database_dyn dbs[lastdb] =
115 [pwddb] = {
116 .lock = RWLOCK_INITIALIZER,
117 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
118 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
119 .enabled = 0,
120 .check_file = 1,
121 .persistent = 0,
122 .propagate = 1,
123 .shared = 0,
124 .max_db_size = DEFAULT_MAX_DB_SIZE,
125 .suggested_module = DEFAULT_SUGGESTED_MODULE,
126 .db_filename = _PATH_NSCD_PASSWD_DB,
127 .disabled_iov = &pwd_iov_disabled,
128 .postimeout = 3600,
129 .negtimeout = 20,
130 .wr_fd = -1,
131 .ro_fd = -1,
132 .mmap_used = false
134 [grpdb] = {
135 .lock = RWLOCK_INITIALIZER,
136 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
137 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
138 .enabled = 0,
139 .check_file = 1,
140 .persistent = 0,
141 .propagate = 1,
142 .shared = 0,
143 .max_db_size = DEFAULT_MAX_DB_SIZE,
144 .suggested_module = DEFAULT_SUGGESTED_MODULE,
145 .db_filename = _PATH_NSCD_GROUP_DB,
146 .disabled_iov = &grp_iov_disabled,
147 .postimeout = 3600,
148 .negtimeout = 60,
149 .wr_fd = -1,
150 .ro_fd = -1,
151 .mmap_used = false
153 [hstdb] = {
154 .lock = RWLOCK_INITIALIZER,
155 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
156 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
157 .enabled = 0,
158 .check_file = 1,
159 .persistent = 0,
160 .propagate = 0, /* Not used. */
161 .shared = 0,
162 .max_db_size = DEFAULT_MAX_DB_SIZE,
163 .suggested_module = DEFAULT_SUGGESTED_MODULE,
164 .db_filename = _PATH_NSCD_HOSTS_DB,
165 .disabled_iov = &hst_iov_disabled,
166 .postimeout = 3600,
167 .negtimeout = 20,
168 .wr_fd = -1,
169 .ro_fd = -1,
170 .mmap_used = false
172 [servdb] = {
173 .lock = RWLOCK_INITIALIZER,
174 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
175 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
176 .enabled = 0,
177 .check_file = 1,
178 .persistent = 0,
179 .propagate = 0, /* Not used. */
180 .shared = 0,
181 .max_db_size = DEFAULT_MAX_DB_SIZE,
182 .suggested_module = DEFAULT_SUGGESTED_MODULE,
183 .db_filename = _PATH_NSCD_SERVICES_DB,
184 .disabled_iov = &serv_iov_disabled,
185 .postimeout = 28800,
186 .negtimeout = 20,
187 .wr_fd = -1,
188 .ro_fd = -1,
189 .mmap_used = false
191 [netgrdb] = {
192 .lock = RWLOCK_INITIALIZER,
193 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
194 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
195 .enabled = 0,
196 .check_file = 1,
197 .persistent = 0,
198 .propagate = 0, /* Not used. */
199 .shared = 0,
200 .max_db_size = DEFAULT_MAX_DB_SIZE,
201 .suggested_module = DEFAULT_SUGGESTED_MODULE,
202 .db_filename = _PATH_NSCD_NETGROUP_DB,
203 .disabled_iov = &netgroup_iov_disabled,
204 .postimeout = 28800,
205 .negtimeout = 20,
206 .wr_fd = -1,
207 .ro_fd = -1,
208 .mmap_used = false
213 /* Mapping of request type to database. */
214 static struct
216 bool data_request;
217 struct database_dyn *db;
218 } const reqinfo[LASTREQ] =
220 [GETPWBYNAME] = { true, &dbs[pwddb] },
221 [GETPWBYUID] = { true, &dbs[pwddb] },
222 [GETGRBYNAME] = { true, &dbs[grpdb] },
223 [GETGRBYGID] = { true, &dbs[grpdb] },
224 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
225 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
226 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
227 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
228 [SHUTDOWN] = { false, NULL },
229 [GETSTAT] = { false, NULL },
230 [SHUTDOWN] = { false, NULL },
231 [GETFDPW] = { false, &dbs[pwddb] },
232 [GETFDGR] = { false, &dbs[grpdb] },
233 [GETFDHST] = { false, &dbs[hstdb] },
234 [GETAI] = { true, &dbs[hstdb] },
235 [INITGROUPS] = { true, &dbs[grpdb] },
236 [GETSERVBYNAME] = { true, &dbs[servdb] },
237 [GETSERVBYPORT] = { true, &dbs[servdb] },
238 [GETFDSERV] = { false, &dbs[servdb] },
239 [GETNETGRENT] = { true, &dbs[netgrdb] },
240 [INNETGR] = { true, &dbs[netgrdb] },
241 [GETFDNETGR] = { false, &dbs[netgrdb] }
245 /* Initial number of threads to use. */
246 int nthreads = -1;
247 /* Maximum number of threads to use. */
248 int max_nthreads = 32;
250 /* Socket for incoming connections. */
251 static int sock;
253 #ifdef HAVE_INOTIFY
254 /* Inotify descriptor. */
255 int inotify_fd = -1;
256 #endif
258 #ifdef HAVE_NETLINK
259 /* Descriptor for netlink status updates. */
260 static int nl_status_fd = -1;
261 #endif
263 /* Number of times clients had to wait. */
264 unsigned long int client_queued;
267 ssize_t
268 writeall (int fd, const void *buf, size_t len)
270 size_t n = len;
271 ssize_t ret;
274 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
275 if (ret <= 0)
276 break;
277 buf = (const char *) buf + ret;
278 n -= ret;
280 while (n > 0);
281 return ret < 0 ? ret : len - n;
285 enum usekey
287 use_not = 0,
288 /* The following three are not really used, they are symbolic constants. */
289 use_first = 16,
290 use_begin = 32,
291 use_end = 64,
293 use_he = 1,
294 use_he_begin = use_he | use_begin,
295 use_he_end = use_he | use_end,
296 use_data = 3,
297 use_data_begin = use_data | use_begin,
298 use_data_end = use_data | use_end,
299 use_data_first = use_data_begin | use_first
303 static int
304 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
305 enum usekey use, ref_t start, size_t len)
307 assert (len >= 2);
309 if (start > first_free || start + len > first_free
310 || (start & BLOCK_ALIGN_M1))
311 return 0;
313 if (usemap[start] == use_not)
315 /* Add the start marker. */
316 usemap[start] = use | use_begin;
317 use &= ~use_first;
319 while (--len > 0)
320 if (usemap[++start] != use_not)
321 return 0;
322 else
323 usemap[start] = use;
325 /* Add the end marker. */
326 usemap[start] = use | use_end;
328 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
330 /* Hash entries can't be shared. */
331 if (use == use_he)
332 return 0;
334 usemap[start] |= (use & use_first);
335 use &= ~use_first;
337 while (--len > 1)
338 if (usemap[++start] != use)
339 return 0;
341 if (usemap[++start] != (use | use_end))
342 return 0;
344 else
345 /* Points to a wrong object or somewhere in the middle. */
346 return 0;
348 return 1;
352 /* Verify data in persistent database. */
353 static int
354 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
356 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
357 || dbnr == netgrdb);
359 time_t now = time (NULL);
361 struct database_pers_head *head = mem;
362 struct database_pers_head head_copy = *head;
364 /* Check that the header that was read matches the head in the database. */
365 if (memcmp (head, readhead, sizeof (*head)) != 0)
366 return 0;
368 /* First some easy tests: make sure the database header is sane. */
369 if (head->version != DB_VERSION
370 || head->header_size != sizeof (*head)
371 /* We allow a timestamp to be one hour ahead of the current time.
372 This should cover daylight saving time changes. */
373 || head->timestamp > now + 60 * 60 + 60
374 || (head->gc_cycle & 1)
375 || head->module == 0
376 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
377 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
378 || head->first_free < 0
379 || head->first_free > head->data_size
380 || (head->first_free & BLOCK_ALIGN_M1) != 0
381 || head->maxnentries < 0
382 || head->maxnsearched < 0)
383 return 0;
385 uint8_t *usemap = calloc (head->first_free, 1);
386 if (usemap == NULL)
387 return 0;
389 const char *data = (char *) &head->array[roundup (head->module,
390 ALIGN / sizeof (ref_t))];
392 nscd_ssize_t he_cnt = 0;
393 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
395 ref_t trail = head->array[cnt];
396 ref_t work = trail;
397 int tick = 0;
399 while (work != ENDREF)
401 if (! check_use (data, head->first_free, usemap, use_he, work,
402 sizeof (struct hashentry)))
403 goto fail;
405 /* Now we know we can dereference the record. */
406 struct hashentry *here = (struct hashentry *) (data + work);
408 ++he_cnt;
410 /* Make sure the record is for this type of service. */
411 if (here->type >= LASTREQ
412 || reqinfo[here->type].db != &dbs[dbnr])
413 goto fail;
415 /* Validate boolean field value. */
416 if (here->first != false && here->first != true)
417 goto fail;
419 if (here->len < 0)
420 goto fail;
422 /* Now the data. */
423 if (here->packet < 0
424 || here->packet > head->first_free
425 || here->packet + sizeof (struct datahead) > head->first_free)
426 goto fail;
428 struct datahead *dh = (struct datahead *) (data + here->packet);
430 if (! check_use (data, head->first_free, usemap,
431 use_data | (here->first ? use_first : 0),
432 here->packet, dh->allocsize))
433 goto fail;
435 if (dh->allocsize < sizeof (struct datahead)
436 || dh->recsize > dh->allocsize
437 || (dh->notfound != false && dh->notfound != true)
438 || (dh->usable != false && dh->usable != true))
439 goto fail;
441 if (here->key < here->packet + sizeof (struct datahead)
442 || here->key > here->packet + dh->allocsize
443 || here->key + here->len > here->packet + dh->allocsize)
444 goto fail;
446 work = here->next;
448 if (work == trail)
449 /* A circular list, this must not happen. */
450 goto fail;
451 if (tick)
452 trail = ((struct hashentry *) (data + trail))->next;
453 tick = 1 - tick;
457 if (he_cnt != head->nentries)
458 goto fail;
460 /* See if all data and keys had at least one reference from
461 he->first == true hashentry. */
462 for (ref_t idx = 0; idx < head->first_free; ++idx)
464 if (usemap[idx] == use_data_begin)
465 goto fail;
468 /* Finally, make sure the database hasn't changed since the first test. */
469 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
470 goto fail;
472 free (usemap);
473 return 1;
475 fail:
476 free (usemap);
477 return 0;
481 /* Initialize database information structures. */
482 void
483 nscd_init (void)
485 /* Look up unprivileged uid/gid/groups before we start listening on the
486 socket */
487 if (server_user != NULL)
488 begin_drop_privileges ();
490 if (nthreads == -1)
491 /* No configuration for this value, assume a default. */
492 nthreads = 4;
494 for (size_t cnt = 0; cnt < lastdb; ++cnt)
495 if (dbs[cnt].enabled)
497 pthread_rwlock_init (&dbs[cnt].lock, NULL);
498 pthread_mutex_init (&dbs[cnt].memlock, NULL);
500 if (dbs[cnt].persistent)
502 /* Try to open the appropriate file on disk. */
503 int fd = open (dbs[cnt].db_filename, O_RDWR | O_CLOEXEC);
504 if (fd != -1)
506 char *msg = NULL;
507 struct stat64 st;
508 void *mem;
509 size_t total;
510 struct database_pers_head head;
511 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
512 sizeof (head)));
513 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
515 fail_db_errno:
516 /* The code is single-threaded at this point so
517 using strerror is just fine. */
518 msg = strerror (errno);
519 fail_db:
520 dbg_log (_("invalid persistent database file \"%s\": %s"),
521 dbs[cnt].db_filename, msg);
522 unlink (dbs[cnt].db_filename);
524 else if (head.module == 0 && head.data_size == 0)
526 /* The file has been created, but the head has not
527 been initialized yet. */
528 msg = _("uninitialized header");
529 goto fail_db;
531 else if (head.header_size != (int) sizeof (head))
533 msg = _("header size does not match");
534 goto fail_db;
536 else if ((total = (sizeof (head)
537 + roundup (head.module * sizeof (ref_t),
538 ALIGN)
539 + head.data_size))
540 > st.st_size
541 || total < sizeof (head))
543 msg = _("file size does not match");
544 goto fail_db;
546 /* Note we map with the maximum size allowed for the
547 database. This is likely much larger than the
548 actual file size. This is OK on most OSes since
549 extensions of the underlying file will
550 automatically translate more pages available for
551 memory access. */
552 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
553 PROT_READ | PROT_WRITE,
554 MAP_SHARED, fd, 0))
555 == MAP_FAILED)
556 goto fail_db_errno;
557 else if (!verify_persistent_db (mem, &head, cnt))
559 munmap (mem, total);
560 msg = _("verification failed");
561 goto fail_db;
563 else
565 /* Success. We have the database. */
566 dbs[cnt].head = mem;
567 dbs[cnt].memsize = total;
568 dbs[cnt].data = (char *)
569 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
570 ALIGN / sizeof (ref_t))];
571 dbs[cnt].mmap_used = true;
573 if (dbs[cnt].suggested_module > head.module)
574 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
575 dbnames[cnt]);
577 dbs[cnt].wr_fd = fd;
578 fd = -1;
579 /* We also need a read-only descriptor. */
580 if (dbs[cnt].shared)
582 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
583 O_RDONLY | O_CLOEXEC);
584 if (dbs[cnt].ro_fd == -1)
585 dbg_log (_("\
586 cannot create read-only descriptor for \"%s\"; no mmap"),
587 dbs[cnt].db_filename);
590 // XXX Shall we test whether the descriptors actually
591 // XXX point to the same file?
594 /* Close the file descriptors in case something went
595 wrong in which case the variable have not been
596 assigned -1. */
597 if (fd != -1)
598 close (fd);
600 else if (errno == EACCES)
601 do_exit (EXIT_FAILURE, 0, _("cannot access '%s'"),
602 dbs[cnt].db_filename);
605 if (dbs[cnt].head == NULL)
607 /* No database loaded. Allocate the data structure,
608 possibly on disk. */
609 struct database_pers_head head;
610 size_t total = (sizeof (head)
611 + roundup (dbs[cnt].suggested_module
612 * sizeof (ref_t), ALIGN)
613 + (dbs[cnt].suggested_module
614 * DEFAULT_DATASIZE_PER_BUCKET));
616 /* Try to create the database. If we do not need a
617 persistent database create a temporary file. */
618 int fd;
619 int ro_fd = -1;
620 if (dbs[cnt].persistent)
622 fd = open (dbs[cnt].db_filename,
623 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC,
624 S_IRUSR | S_IWUSR);
625 if (fd != -1 && dbs[cnt].shared)
626 ro_fd = open (dbs[cnt].db_filename,
627 O_RDONLY | O_CLOEXEC);
629 else
631 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
632 fd = mkostemp (fname, O_CLOEXEC);
634 /* We do not need the file name anymore after we
635 opened another file descriptor in read-only mode. */
636 if (fd != -1)
638 if (dbs[cnt].shared)
639 ro_fd = open (fname, O_RDONLY | O_CLOEXEC);
641 unlink (fname);
645 if (fd == -1)
647 if (errno == EEXIST)
649 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
650 dbnames[cnt], dbs[cnt].db_filename);
651 do_exit (1, 0, NULL);
654 if (dbs[cnt].persistent)
655 dbg_log (_("cannot create %s; no persistent database used"),
656 dbs[cnt].db_filename);
657 else
658 dbg_log (_("cannot create %s; no sharing possible"),
659 dbs[cnt].db_filename);
661 dbs[cnt].persistent = 0;
662 // XXX remember: no mmap
664 else
666 /* Tell the user if we could not create the read-only
667 descriptor. */
668 if (ro_fd == -1 && dbs[cnt].shared)
669 dbg_log (_("\
670 cannot create read-only descriptor for \"%s\"; no mmap"),
671 dbs[cnt].db_filename);
673 /* Before we create the header, initialize the hash
674 table. That way if we get interrupted while writing
675 the header we can recognize a partially initialized
676 database. */
677 size_t ps = sysconf (_SC_PAGESIZE);
678 char tmpbuf[ps];
679 assert (~ENDREF == 0);
680 memset (tmpbuf, '\xff', ps);
682 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
683 off_t offset = sizeof (head);
685 size_t towrite;
686 if (offset % ps != 0)
688 towrite = MIN (remaining, ps - (offset % ps));
689 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
690 goto write_fail;
691 offset += towrite;
692 remaining -= towrite;
695 while (remaining > ps)
697 if (pwrite (fd, tmpbuf, ps, offset) == -1)
698 goto write_fail;
699 offset += ps;
700 remaining -= ps;
703 if (remaining > 0
704 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
705 goto write_fail;
707 /* Create the header of the file. */
708 struct database_pers_head head =
710 .version = DB_VERSION,
711 .header_size = sizeof (head),
712 .module = dbs[cnt].suggested_module,
713 .data_size = (dbs[cnt].suggested_module
714 * DEFAULT_DATASIZE_PER_BUCKET),
715 .first_free = 0
717 void *mem;
719 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
720 != sizeof (head))
721 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
722 != 0)
723 || (mem = mmap (NULL, dbs[cnt].max_db_size,
724 PROT_READ | PROT_WRITE,
725 MAP_SHARED, fd, 0)) == MAP_FAILED)
727 write_fail:
728 unlink (dbs[cnt].db_filename);
729 dbg_log (_("cannot write to database file %s: %s"),
730 dbs[cnt].db_filename, strerror (errno));
731 dbs[cnt].persistent = 0;
733 else
735 /* Success. */
736 dbs[cnt].head = mem;
737 dbs[cnt].data = (char *)
738 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
739 ALIGN / sizeof (ref_t))];
740 dbs[cnt].memsize = total;
741 dbs[cnt].mmap_used = true;
743 /* Remember the descriptors. */
744 dbs[cnt].wr_fd = fd;
745 dbs[cnt].ro_fd = ro_fd;
746 fd = -1;
747 ro_fd = -1;
750 if (fd != -1)
751 close (fd);
752 if (ro_fd != -1)
753 close (ro_fd);
757 if (dbs[cnt].head == NULL)
759 /* We do not use the persistent database. Just
760 create an in-memory data structure. */
761 assert (! dbs[cnt].persistent);
763 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
764 + (dbs[cnt].suggested_module
765 * sizeof (ref_t)));
766 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
767 assert (~ENDREF == 0);
768 memset (dbs[cnt].head->array, '\xff',
769 dbs[cnt].suggested_module * sizeof (ref_t));
770 dbs[cnt].head->module = dbs[cnt].suggested_module;
771 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
772 * dbs[cnt].head->module);
773 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
774 dbs[cnt].head->first_free = 0;
776 dbs[cnt].shared = 0;
777 assert (dbs[cnt].ro_fd == -1);
781 /* Create the socket. */
782 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
783 if (sock < 0)
785 dbg_log (_("cannot open socket: %s"), strerror (errno));
786 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
788 /* Bind a name to the socket. */
789 struct sockaddr_un sock_addr;
790 sock_addr.sun_family = AF_UNIX;
791 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
792 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
794 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
795 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
798 /* Set permissions for the socket. */
799 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
801 /* Set the socket up to accept connections. */
802 if (listen (sock, SOMAXCONN) < 0)
804 dbg_log (_("cannot enable socket to accept connections: %s"),
805 strerror (errno));
806 do_exit (1, 0, NULL);
809 #ifdef HAVE_NETLINK
810 if (dbs[hstdb].enabled)
812 /* Try to open netlink socket to monitor network setting changes. */
813 nl_status_fd = socket (AF_NETLINK,
814 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
815 NETLINK_ROUTE);
816 if (nl_status_fd != -1)
818 struct sockaddr_nl snl;
819 memset (&snl, '\0', sizeof (snl));
820 snl.nl_family = AF_NETLINK;
821 /* XXX Is this the best set to use? */
822 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
823 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
824 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
825 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
826 | RTMGRP_IPV6_PREFIX);
828 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
830 close (nl_status_fd);
831 nl_status_fd = -1;
833 else
835 /* Start the timestamp process. */
836 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
837 = __bump_nl_timestamp ();
841 #endif
843 /* Change to unprivileged uid/gid/groups if specified in config file */
844 if (server_user != NULL)
845 finish_drop_privileges ();
848 #ifdef HAVE_INOTIFY
849 #define TRACED_FILE_MASK (IN_DELETE_SELF | IN_CLOSE_WRITE | IN_MOVE_SELF)
850 #define TRACED_DIR_MASK (IN_DELETE_SELF | IN_CREATE | IN_MOVED_TO | IN_MOVE_SELF)
851 void
852 install_watches (struct traced_file *finfo)
854 /* Use inotify support if we have it. */
855 if (finfo->inotify_descr[TRACED_FILE] < 0)
856 finfo->inotify_descr[TRACED_FILE] = inotify_add_watch (inotify_fd,
857 finfo->fname,
858 TRACED_FILE_MASK);
859 if (finfo->inotify_descr[TRACED_FILE] < 0)
861 dbg_log (_("disabled inotify-based monitoring for file `%s': %s"),
862 finfo->fname, strerror (errno));
863 return;
865 dbg_log (_("monitoring file `%s` (%d)"),
866 finfo->fname, finfo->inotify_descr[TRACED_FILE]);
867 /* Additionally listen for events in the file's parent directory.
868 We do this because the file to be watched might be
869 deleted and then added back again. When it is added back again
870 we must re-add the watch. We must also cover IN_MOVED_TO to
871 detect a file being moved into the directory. */
872 if (finfo->inotify_descr[TRACED_DIR] < 0)
873 finfo->inotify_descr[TRACED_DIR] = inotify_add_watch (inotify_fd,
874 finfo->dname,
875 TRACED_DIR_MASK);
876 if (finfo->inotify_descr[TRACED_DIR] < 0)
878 dbg_log (_("disabled inotify-based monitoring for directory `%s': %s"),
879 finfo->fname, strerror (errno));
880 return;
882 dbg_log (_("monitoring directory `%s` (%d)"),
883 finfo->dname, finfo->inotify_descr[TRACED_DIR]);
885 #endif
887 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
889 We support registering multiple files per database. Each call to
890 register_traced_file adds to the list of registered files.
892 When we prune the database, either through timeout or a request to
893 invalidate, we will check to see if any of the registered files has changed.
894 When we accept new connections to handle a cache request we will also
895 check to see if any of the registered files has changed.
897 If we have inotify support then we install an inotify fd to notify us of
898 file deletion or modification, both of which will require we invalidate
899 the cache for the database. Without inotify support we stat the file and
900 store st_mtime to determine if the file has been modified. */
901 void
902 register_traced_file (size_t dbidx, struct traced_file *finfo)
904 /* If the database is disabled or file checking is disabled
905 then ignore the registration. */
906 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
907 return;
909 if (__glibc_unlikely (debug_level > 0))
910 dbg_log (_("monitoring file %s for database %s"),
911 finfo->fname, dbnames[dbidx]);
913 #ifdef HAVE_INOTIFY
914 install_watches (finfo);
915 #endif
916 struct stat64 st;
917 if (stat64 (finfo->fname, &st) < 0)
919 /* We cannot stat() the file. Set mtime to zero and try again later. */
920 dbg_log (_("stat failed for file `%s'; will try again later: %s"),
921 finfo->fname, strerror (errno));
922 finfo->mtime = 0;
924 else
925 finfo->mtime = st.st_mtime;
927 /* Queue up the file name. */
928 finfo->next = dbs[dbidx].traced_files;
929 dbs[dbidx].traced_files = finfo;
933 /* Close the connections. */
934 void
935 close_sockets (void)
937 close (sock);
941 static void
942 invalidate_cache (char *key, int fd)
944 dbtype number;
945 int32_t resp;
947 for (number = pwddb; number < lastdb; ++number)
948 if (strcmp (key, dbnames[number]) == 0)
950 struct traced_file *runp = dbs[number].traced_files;
951 while (runp != NULL)
953 /* Make sure we reload from file when checking mtime. */
954 runp->mtime = 0;
955 #ifdef HAVE_INOTIFY
956 /* During an invalidation we try to reload the traced
957 file watches. This allows the user to re-sync if
958 inotify events were lost. Similar to what we do during
959 pruning. */
960 install_watches (runp);
961 #endif
962 if (runp->call_res_init)
964 res_init ();
965 break;
967 runp = runp->next;
969 break;
972 if (number == lastdb)
974 resp = EINVAL;
975 writeall (fd, &resp, sizeof (resp));
976 return;
979 if (dbs[number].enabled)
981 pthread_mutex_lock (&dbs[number].prune_run_lock);
982 prune_cache (&dbs[number], LONG_MAX, fd);
983 pthread_mutex_unlock (&dbs[number].prune_run_lock);
985 else
987 resp = 0;
988 writeall (fd, &resp, sizeof (resp));
993 #ifdef SCM_RIGHTS
994 static void
995 send_ro_fd (struct database_dyn *db, char *key, int fd)
997 /* If we do not have an read-only file descriptor do nothing. */
998 if (db->ro_fd == -1)
999 return;
1001 /* We need to send some data along with the descriptor. */
1002 uint64_t mapsize = (db->head->data_size
1003 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1004 + sizeof (struct database_pers_head));
1005 struct iovec iov[2];
1006 iov[0].iov_base = key;
1007 iov[0].iov_len = strlen (key) + 1;
1008 iov[1].iov_base = &mapsize;
1009 iov[1].iov_len = sizeof (mapsize);
1011 /* Prepare the control message to transfer the descriptor. */
1012 union
1014 struct cmsghdr hdr;
1015 char bytes[CMSG_SPACE (sizeof (int))];
1016 } buf;
1017 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1018 .msg_control = buf.bytes,
1019 .msg_controllen = sizeof (buf) };
1020 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1022 cmsg->cmsg_level = SOL_SOCKET;
1023 cmsg->cmsg_type = SCM_RIGHTS;
1024 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1026 int *ip = (int *) CMSG_DATA (cmsg);
1027 *ip = db->ro_fd;
1029 msg.msg_controllen = cmsg->cmsg_len;
1031 /* Send the control message. We repeat when we are interrupted but
1032 everything else is ignored. */
1033 #ifndef MSG_NOSIGNAL
1034 # define MSG_NOSIGNAL 0
1035 #endif
1036 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1038 if (__glibc_unlikely (debug_level > 0))
1039 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1041 #endif /* SCM_RIGHTS */
1044 /* Handle new request. */
1045 static void
1046 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1048 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1050 if (debug_level > 0)
1051 dbg_log (_("\
1052 cannot handle old request version %d; current version is %d"),
1053 req->version, NSCD_VERSION);
1054 return;
1057 /* Perform the SELinux check before we go on to the standard checks. */
1058 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1060 if (debug_level > 0)
1062 #ifdef SO_PEERCRED
1063 char pbuf[sizeof ("/proc//exe") + 3 * sizeof (long int)];
1064 # ifdef PATH_MAX
1065 char buf[PATH_MAX];
1066 # else
1067 char buf[4096];
1068 # endif
1070 snprintf (pbuf, sizeof (pbuf), "/proc/%ld/exe", (long int) pid);
1071 ssize_t n = readlink (pbuf, buf, sizeof (buf) - 1);
1073 if (n <= 0)
1074 dbg_log (_("\
1075 request from %ld not handled due to missing permission"), (long int) pid);
1076 else
1078 buf[n] = '\0';
1079 dbg_log (_("\
1080 request from '%s' [%ld] not handled due to missing permission"),
1081 buf, (long int) pid);
1083 #else
1084 dbg_log (_("request not handled due to missing permission"));
1085 #endif
1087 return;
1090 struct database_dyn *db = reqinfo[req->type].db;
1092 /* See whether we can service the request from the cache. */
1093 if (__builtin_expect (reqinfo[req->type].data_request, true))
1095 if (__builtin_expect (debug_level, 0) > 0)
1097 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1099 char buf[INET6_ADDRSTRLEN];
1101 dbg_log ("\t%s (%s)", serv2str[req->type],
1102 inet_ntop (req->type == GETHOSTBYADDR
1103 ? AF_INET : AF_INET6,
1104 key, buf, sizeof (buf)));
1106 else
1107 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1110 /* Is this service enabled? */
1111 if (__glibc_unlikely (!db->enabled))
1113 /* No, sent the prepared record. */
1114 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1115 db->disabled_iov->iov_len,
1116 MSG_NOSIGNAL))
1117 != (ssize_t) db->disabled_iov->iov_len
1118 && __builtin_expect (debug_level, 0) > 0)
1120 /* We have problems sending the result. */
1121 char buf[256];
1122 dbg_log (_("cannot write result: %s"),
1123 strerror_r (errno, buf, sizeof (buf)));
1126 return;
1129 /* Be sure we can read the data. */
1130 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db->lock) != 0))
1132 ++db->head->rdlockdelayed;
1133 pthread_rwlock_rdlock (&db->lock);
1136 /* See whether we can handle it from the cache. */
1137 struct datahead *cached;
1138 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1139 db, uid);
1140 if (cached != NULL)
1142 /* Hurray it's in the cache. */
1143 if (writeall (fd, cached->data, cached->recsize) != cached->recsize
1144 && __glibc_unlikely (debug_level > 0))
1146 /* We have problems sending the result. */
1147 char buf[256];
1148 dbg_log (_("cannot write result: %s"),
1149 strerror_r (errno, buf, sizeof (buf)));
1152 pthread_rwlock_unlock (&db->lock);
1154 return;
1157 pthread_rwlock_unlock (&db->lock);
1159 else if (__builtin_expect (debug_level, 0) > 0)
1161 if (req->type == INVALIDATE)
1162 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1163 else
1164 dbg_log ("\t%s", serv2str[req->type]);
1167 /* Handle the request. */
1168 switch (req->type)
1170 case GETPWBYNAME:
1171 addpwbyname (db, fd, req, key, uid);
1172 break;
1174 case GETPWBYUID:
1175 addpwbyuid (db, fd, req, key, uid);
1176 break;
1178 case GETGRBYNAME:
1179 addgrbyname (db, fd, req, key, uid);
1180 break;
1182 case GETGRBYGID:
1183 addgrbygid (db, fd, req, key, uid);
1184 break;
1186 case GETHOSTBYNAME:
1187 addhstbyname (db, fd, req, key, uid);
1188 break;
1190 case GETHOSTBYNAMEv6:
1191 addhstbynamev6 (db, fd, req, key, uid);
1192 break;
1194 case GETHOSTBYADDR:
1195 addhstbyaddr (db, fd, req, key, uid);
1196 break;
1198 case GETHOSTBYADDRv6:
1199 addhstbyaddrv6 (db, fd, req, key, uid);
1200 break;
1202 case GETAI:
1203 addhstai (db, fd, req, key, uid);
1204 break;
1206 case INITGROUPS:
1207 addinitgroups (db, fd, req, key, uid);
1208 break;
1210 case GETSERVBYNAME:
1211 addservbyname (db, fd, req, key, uid);
1212 break;
1214 case GETSERVBYPORT:
1215 addservbyport (db, fd, req, key, uid);
1216 break;
1218 case GETNETGRENT:
1219 addgetnetgrent (db, fd, req, key, uid);
1220 break;
1222 case INNETGR:
1223 addinnetgr (db, fd, req, key, uid);
1224 break;
1226 case GETSTAT:
1227 case SHUTDOWN:
1228 case INVALIDATE:
1230 /* Get the callers credentials. */
1231 #ifdef SO_PEERCRED
1232 struct ucred caller;
1233 socklen_t optlen = sizeof (caller);
1235 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1237 char buf[256];
1239 dbg_log (_("error getting caller's id: %s"),
1240 strerror_r (errno, buf, sizeof (buf)));
1241 break;
1244 uid = caller.uid;
1245 #else
1246 /* Some systems have no SO_PEERCRED implementation. They don't
1247 care about security so we don't as well. */
1248 uid = 0;
1249 #endif
1252 /* Accept shutdown, getstat and invalidate only from root. For
1253 the stat call also allow the user specified in the config file. */
1254 if (req->type == GETSTAT)
1256 if (uid == 0 || uid == stat_uid)
1257 send_stats (fd, dbs);
1259 else if (uid == 0)
1261 if (req->type == INVALIDATE)
1262 invalidate_cache (key, fd);
1263 else
1264 termination_handler (0);
1266 break;
1268 case GETFDPW:
1269 case GETFDGR:
1270 case GETFDHST:
1271 case GETFDSERV:
1272 case GETFDNETGR:
1273 #ifdef SCM_RIGHTS
1274 send_ro_fd (reqinfo[req->type].db, key, fd);
1275 #endif
1276 break;
1278 default:
1279 /* Ignore the command, it's nothing we know. */
1280 break;
1285 /* Restart the process. */
1286 static void
1287 restart (void)
1289 /* First determine the parameters. We do not use the parameters
1290 passed to main() since in case nscd is started by running the
1291 dynamic linker this will not work. Yes, this is not the usual
1292 case but nscd is part of glibc and we occasionally do this. */
1293 size_t buflen = 1024;
1294 char *buf = alloca (buflen);
1295 size_t readlen = 0;
1296 int fd = open ("/proc/self/cmdline", O_RDONLY);
1297 if (fd == -1)
1299 dbg_log (_("\
1300 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1301 strerror (errno));
1303 paranoia = 0;
1304 return;
1307 while (1)
1309 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1310 buflen - readlen));
1311 if (n == -1)
1313 dbg_log (_("\
1314 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1315 strerror (errno));
1317 close (fd);
1318 paranoia = 0;
1319 return;
1322 readlen += n;
1324 if (readlen < buflen)
1325 break;
1327 /* We might have to extend the buffer. */
1328 size_t old_buflen = buflen;
1329 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1330 buf = memmove (newp, buf, old_buflen);
1333 close (fd);
1335 /* Parse the command line. Worst case scenario: every two
1336 characters form one parameter (one character plus NUL). */
1337 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1338 int argc = 0;
1340 char *cp = buf;
1341 while (cp < buf + readlen)
1343 argv[argc++] = cp;
1344 cp = (char *) rawmemchr (cp, '\0') + 1;
1346 argv[argc] = NULL;
1348 /* Second, change back to the old user if we changed it. */
1349 if (server_user != NULL)
1351 if (setresuid (old_uid, old_uid, old_uid) != 0)
1353 dbg_log (_("\
1354 cannot change to old UID: %s; disabling paranoia mode"),
1355 strerror (errno));
1357 paranoia = 0;
1358 return;
1361 if (setresgid (old_gid, old_gid, old_gid) != 0)
1363 dbg_log (_("\
1364 cannot change to old GID: %s; disabling paranoia mode"),
1365 strerror (errno));
1367 ignore_value (setuid (server_uid));
1368 paranoia = 0;
1369 return;
1373 /* Next change back to the old working directory. */
1374 if (chdir (oldcwd) == -1)
1376 dbg_log (_("\
1377 cannot change to old working directory: %s; disabling paranoia mode"),
1378 strerror (errno));
1380 if (server_user != NULL)
1382 ignore_value (setuid (server_uid));
1383 ignore_value (setgid (server_gid));
1385 paranoia = 0;
1386 return;
1389 /* Synchronize memory. */
1390 int32_t certainly[lastdb];
1391 for (int cnt = 0; cnt < lastdb; ++cnt)
1392 if (dbs[cnt].enabled)
1394 /* Make sure nobody keeps using the database. */
1395 dbs[cnt].head->timestamp = 0;
1396 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1397 dbs[cnt].head->nscd_certainly_running = 0;
1399 if (dbs[cnt].persistent)
1400 // XXX async OK?
1401 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1404 /* The preparations are done. */
1405 #ifdef PATH_MAX
1406 char pathbuf[PATH_MAX];
1407 #else
1408 char pathbuf[256];
1409 #endif
1410 /* Try to exec the real nscd program so the process name (as reported
1411 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1412 if readlink or the exec with the result of the readlink call fails. */
1413 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1414 if (n != -1)
1416 pathbuf[n] = '\0';
1417 execv (pathbuf, argv);
1419 execv ("/proc/self/exe", argv);
1421 /* If we come here, we will never be able to re-exec. */
1422 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1423 strerror (errno));
1425 if (server_user != NULL)
1427 ignore_value (setuid (server_uid));
1428 ignore_value (setgid (server_gid));
1430 if (chdir ("/") != 0)
1431 dbg_log (_("cannot change current working directory to \"/\": %s"),
1432 strerror (errno));
1433 paranoia = 0;
1435 /* Reenable the databases. */
1436 time_t now = time (NULL);
1437 for (int cnt = 0; cnt < lastdb; ++cnt)
1438 if (dbs[cnt].enabled)
1440 dbs[cnt].head->timestamp = now;
1441 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1446 /* List of file descriptors. */
1447 struct fdlist
1449 int fd;
1450 struct fdlist *next;
1452 /* Memory allocated for the list. */
1453 static struct fdlist *fdlist;
1454 /* List of currently ready-to-read file descriptors. */
1455 static struct fdlist *readylist;
1457 /* Conditional variable and mutex to signal availability of entries in
1458 READYLIST. The condvar is initialized dynamically since we might
1459 use a different clock depending on availability. */
1460 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1461 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1463 /* The clock to use with the condvar. */
1464 static clockid_t timeout_clock = CLOCK_REALTIME;
1466 /* Number of threads ready to handle the READYLIST. */
1467 static unsigned long int nready;
1470 /* Function for the clean-up threads. */
1471 static void *
1472 __attribute__ ((__noreturn__))
1473 nscd_run_prune (void *p)
1475 const long int my_number = (long int) p;
1476 assert (dbs[my_number].enabled);
1478 int dont_need_update = setup_thread (&dbs[my_number]);
1480 time_t now = time (NULL);
1482 /* We are running. */
1483 dbs[my_number].head->timestamp = now;
1485 struct timespec prune_ts;
1486 if (__glibc_unlikely (clock_gettime (timeout_clock, &prune_ts) == -1))
1487 /* Should never happen. */
1488 abort ();
1490 /* Compute the initial timeout time. Prevent all the timers to go
1491 off at the same time by adding a db-based value. */
1492 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1493 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1495 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1496 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1497 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1499 pthread_mutex_lock (prune_lock);
1500 while (1)
1502 /* Wait, but not forever. */
1503 int e = 0;
1504 if (! dbs[my_number].clear_cache)
1505 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1506 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1508 time_t next_wait;
1509 now = time (NULL);
1510 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1511 || dbs[my_number].clear_cache)
1513 /* We will determine the new timout values based on the
1514 cache content. Should there be concurrent additions to
1515 the cache which are not accounted for in the cache
1516 pruning we want to know about it. Therefore set the
1517 timeout to the maximum. It will be descreased when adding
1518 new entries to the cache, if necessary. */
1519 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1521 /* Unconditionally reset the flag. */
1522 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1523 dbs[my_number].clear_cache = 0;
1525 pthread_mutex_unlock (prune_lock);
1527 /* We use a separate lock for running the prune function (instead
1528 of keeping prune_lock locked) because this enables concurrent
1529 invocations of cache_add which might modify the timeout value. */
1530 pthread_mutex_lock (prune_run_lock);
1531 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1532 pthread_mutex_unlock (prune_run_lock);
1534 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1535 /* If clients cannot determine for sure whether nscd is running
1536 we need to wake up occasionally to update the timestamp.
1537 Wait 90% of the update period. */
1538 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1539 if (__glibc_unlikely (! dont_need_update))
1541 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1542 dbs[my_number].head->timestamp = now;
1545 pthread_mutex_lock (prune_lock);
1547 /* Make it known when we will wake up again. */
1548 if (now + next_wait < dbs[my_number].wakeup_time)
1549 dbs[my_number].wakeup_time = now + next_wait;
1550 else
1551 next_wait = dbs[my_number].wakeup_time - now;
1553 else
1554 /* The cache was just pruned. Do not do it again now. Just
1555 use the new timeout value. */
1556 next_wait = dbs[my_number].wakeup_time - now;
1558 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1559 /* Should never happen. */
1560 abort ();
1562 /* Compute next timeout time. */
1563 prune_ts.tv_sec += next_wait;
1568 /* This is the main loop. It is replicated in different threads but
1569 the use of the ready list makes sure only one thread handles an
1570 incoming connection. */
1571 static void *
1572 __attribute__ ((__noreturn__))
1573 nscd_run_worker (void *p)
1575 char buf[256];
1577 /* Initial locking. */
1578 pthread_mutex_lock (&readylist_lock);
1580 /* One more thread available. */
1581 ++nready;
1583 while (1)
1585 while (readylist == NULL)
1586 pthread_cond_wait (&readylist_cond, &readylist_lock);
1588 struct fdlist *it = readylist->next;
1589 if (readylist->next == readylist)
1590 /* Just one entry on the list. */
1591 readylist = NULL;
1592 else
1593 readylist->next = it->next;
1595 /* Extract the information and mark the record ready to be used
1596 again. */
1597 int fd = it->fd;
1598 it->next = NULL;
1600 /* One more thread available. */
1601 --nready;
1603 /* We are done with the list. */
1604 pthread_mutex_unlock (&readylist_lock);
1606 /* Now read the request. */
1607 request_header req;
1608 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1609 != sizeof (req), 0))
1611 /* We failed to read data. Note that this also might mean we
1612 failed because we would have blocked. */
1613 if (debug_level > 0)
1614 dbg_log (_("short read while reading request: %s"),
1615 strerror_r (errno, buf, sizeof (buf)));
1616 goto close_and_out;
1619 /* Check whether this is a valid request type. */
1620 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1621 goto close_and_out;
1623 /* Some systems have no SO_PEERCRED implementation. They don't
1624 care about security so we don't as well. */
1625 uid_t uid = -1;
1626 #ifdef SO_PEERCRED
1627 pid_t pid = 0;
1629 if (__glibc_unlikely (debug_level > 0))
1631 struct ucred caller;
1632 socklen_t optlen = sizeof (caller);
1634 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1635 pid = caller.pid;
1637 #else
1638 const pid_t pid = 0;
1639 #endif
1641 /* It should not be possible to crash the nscd with a silly
1642 request (i.e., a terribly large key). We limit the size to 1kb. */
1643 if (__builtin_expect (req.key_len, 1) < 0
1644 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1646 if (debug_level > 0)
1647 dbg_log (_("key length in request too long: %d"), req.key_len);
1649 else
1651 /* Get the key. */
1652 char keybuf[MAXKEYLEN + 1];
1654 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1655 req.key_len))
1656 != req.key_len, 0))
1658 /* Again, this can also mean we would have blocked. */
1659 if (debug_level > 0)
1660 dbg_log (_("short read while reading request key: %s"),
1661 strerror_r (errno, buf, sizeof (buf)));
1662 goto close_and_out;
1664 keybuf[req.key_len] = '\0';
1666 if (__builtin_expect (debug_level, 0) > 0)
1668 #ifdef SO_PEERCRED
1669 if (pid != 0)
1670 dbg_log (_("\
1671 handle_request: request received (Version = %d) from PID %ld"),
1672 req.version, (long int) pid);
1673 else
1674 #endif
1675 dbg_log (_("\
1676 handle_request: request received (Version = %d)"), req.version);
1679 /* Phew, we got all the data, now process it. */
1680 handle_request (fd, &req, keybuf, uid, pid);
1683 close_and_out:
1684 /* We are done. */
1685 close (fd);
1687 /* Re-locking. */
1688 pthread_mutex_lock (&readylist_lock);
1690 /* One more thread available. */
1691 ++nready;
1693 /* NOTREACHED */
1697 static unsigned int nconns;
1699 static void
1700 fd_ready (int fd)
1702 pthread_mutex_lock (&readylist_lock);
1704 /* Find an empty entry in FDLIST. */
1705 size_t inner;
1706 for (inner = 0; inner < nconns; ++inner)
1707 if (fdlist[inner].next == NULL)
1708 break;
1709 assert (inner < nconns);
1711 fdlist[inner].fd = fd;
1713 if (readylist == NULL)
1714 readylist = fdlist[inner].next = &fdlist[inner];
1715 else
1717 fdlist[inner].next = readylist->next;
1718 readylist = readylist->next = &fdlist[inner];
1721 bool do_signal = true;
1722 if (__glibc_unlikely (nready == 0))
1724 ++client_queued;
1725 do_signal = false;
1727 /* Try to start another thread to help out. */
1728 pthread_t th;
1729 if (nthreads < max_nthreads
1730 && pthread_create (&th, &attr, nscd_run_worker,
1731 (void *) (long int) nthreads) == 0)
1733 /* We got another thread. */
1734 ++nthreads;
1735 /* The new thread might need a kick. */
1736 do_signal = true;
1741 pthread_mutex_unlock (&readylist_lock);
1743 /* Tell one of the worker threads there is work to do. */
1744 if (do_signal)
1745 pthread_cond_signal (&readylist_cond);
1749 /* Check whether restarting should happen. */
1750 static bool
1751 restart_p (time_t now)
1753 return (paranoia && readylist == NULL && nready == nthreads
1754 && now >= restart_time);
1758 /* Array for times a connection was accepted. */
1759 static time_t *starttime;
1761 #ifdef HAVE_INOTIFY
1762 /* Inotify event for changed file. */
1763 union __inev
1765 struct inotify_event i;
1766 # ifndef PATH_MAX
1767 # define PATH_MAX 1024
1768 # endif
1769 char buf[sizeof (struct inotify_event) + PATH_MAX];
1772 /* Returns 0 if the file is there otherwise -1. */
1774 check_file (struct traced_file *finfo)
1776 struct stat64 st;
1777 /* We could check mtime and if different re-add
1778 the watches, and invalidate the database, but we
1779 don't because we are called from inotify_check_files
1780 which should be doing that work. If sufficient inotify
1781 events were lost then the next pruning or invalidation
1782 will do the stat and mtime check. We don't do it here to
1783 keep the logic simple. */
1784 if (stat64 (finfo->fname, &st) < 0)
1785 return -1;
1786 return 0;
1789 /* Process the inotify event in INEV. If the event matches any of the files
1790 registered with a database then mark that database as requiring its cache
1791 to be cleared. We indicate the cache needs clearing by setting
1792 TO_CLEAR[DBCNT] to true for the matching database. */
1793 static void
1794 inotify_check_files (bool *to_clear, union __inev *inev)
1796 /* Check which of the files changed. */
1797 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1799 struct traced_file *finfo = dbs[dbcnt].traced_files;
1801 while (finfo != NULL)
1803 /* The configuration file was moved or deleted.
1804 We stop watching it at that point, and reinitialize. */
1805 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1806 && ((inev->i.mask & IN_MOVE_SELF)
1807 || (inev->i.mask & IN_DELETE_SELF)
1808 || (inev->i.mask & IN_IGNORED)))
1810 int ret;
1811 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1813 if (check_file (finfo) == 0)
1815 dbg_log (_("ignored inotify event for `%s` (file exists)"),
1816 finfo->fname);
1817 return;
1820 dbg_log (_("monitored file `%s` was %s, removing watch"),
1821 finfo->fname, moved ? "moved" : "deleted");
1822 /* File was moved out, remove the watch. Watches are
1823 automatically removed when the file is deleted. */
1824 if (moved)
1826 ret = inotify_rm_watch (inotify_fd, inev->i.wd);
1827 if (ret < 0)
1828 dbg_log (_("failed to remove file watch `%s`: %s"),
1829 finfo->fname, strerror (errno));
1831 finfo->inotify_descr[TRACED_FILE] = -1;
1832 to_clear[dbcnt] = true;
1833 if (finfo->call_res_init)
1834 res_init ();
1835 return;
1837 /* The configuration file was open for writing and has just closed.
1838 We reset the cache and reinitialize. */
1839 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1840 && inev->i.mask & IN_CLOSE_WRITE)
1842 /* Mark cache as needing to be cleared and reinitialize. */
1843 dbg_log (_("monitored file `%s` was written to"), finfo->fname);
1844 to_clear[dbcnt] = true;
1845 if (finfo->call_res_init)
1846 res_init ();
1847 return;
1849 /* The parent directory was moved or deleted. We trigger one last
1850 invalidation. At the next pruning or invalidation we may add
1851 this watch back if the file is present again. */
1852 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1853 && ((inev->i.mask & IN_DELETE_SELF)
1854 || (inev->i.mask & IN_MOVE_SELF)
1855 || (inev->i.mask & IN_IGNORED)))
1857 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1858 /* The directory watch may have already been removed
1859 but we don't know so we just remove it again and
1860 ignore the error. Then we remove the file watch.
1861 Note: watches are automatically removed for deleted
1862 files. */
1863 if (moved)
1864 inotify_rm_watch (inotify_fd, inev->i.wd);
1865 if (finfo->inotify_descr[TRACED_FILE] != -1)
1867 dbg_log (_("monitored parent directory `%s` was %s, removing watch on `%s`"),
1868 finfo->dname, moved ? "moved" : "deleted", finfo->fname);
1869 if (inotify_rm_watch (inotify_fd, finfo->inotify_descr[TRACED_FILE]) < 0)
1870 dbg_log (_("failed to remove file watch `%s`: %s"),
1871 finfo->dname, strerror (errno));
1873 finfo->inotify_descr[TRACED_FILE] = -1;
1874 finfo->inotify_descr[TRACED_DIR] = -1;
1875 to_clear[dbcnt] = true;
1876 if (finfo->call_res_init)
1877 res_init ();
1878 /* Continue to the next entry since this might be the
1879 parent directory for multiple registered files and
1880 we want to remove watches for all registered files. */
1881 continue;
1883 /* The parent directory had a create or moved to event. */
1884 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1885 && ((inev->i.mask & IN_MOVED_TO)
1886 || (inev->i.mask & IN_CREATE))
1887 && strcmp (inev->i.name, finfo->sfname) == 0)
1889 /* We detected a directory change. We look for the creation
1890 of the file we are tracking or the move of the same file
1891 into the directory. */
1892 int ret;
1893 dbg_log (_("monitored file `%s` was %s, adding watch"),
1894 finfo->fname,
1895 inev->i.mask & IN_CREATE ? "created" : "moved into place");
1896 /* File was moved in or created. Regenerate the watch. */
1897 if (finfo->inotify_descr[TRACED_FILE] != -1)
1898 inotify_rm_watch (inotify_fd,
1899 finfo->inotify_descr[TRACED_FILE]);
1901 ret = inotify_add_watch (inotify_fd,
1902 finfo->fname,
1903 TRACED_FILE_MASK);
1904 if (ret < 0)
1905 dbg_log (_("failed to add file watch `%s`: %s"),
1906 finfo->fname, strerror (errno));
1908 finfo->inotify_descr[TRACED_FILE] = ret;
1910 /* The file is new or moved so mark cache as needing to
1911 be cleared and reinitialize. */
1912 to_clear[dbcnt] = true;
1913 if (finfo->call_res_init)
1914 res_init ();
1916 /* Done re-adding the watch. Don't return, we may still
1917 have other files in this same directory, same watch
1918 descriptor, and need to process them. */
1920 /* Other events are ignored, and we move on to the next file. */
1921 finfo = finfo->next;
1926 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1927 for the associated database, otherwise do nothing. The TO_CLEAR array must
1928 have LASTDB entries. */
1929 static inline void
1930 clear_db_cache (bool *to_clear)
1932 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1933 if (to_clear[dbcnt])
1935 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
1936 dbs[dbcnt].clear_cache = 1;
1937 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1938 pthread_cond_signal (&dbs[dbcnt].prune_cond);
1943 handle_inotify_events (void)
1945 bool to_clear[lastdb] = { false, };
1946 union __inev inev;
1948 /* Read all inotify events for files registered via
1949 register_traced_file(). */
1950 while (1)
1952 /* Potentially read multiple events into buf. */
1953 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd,
1954 &inev.buf,
1955 sizeof (inev)));
1956 if (nb < (ssize_t) sizeof (struct inotify_event))
1958 /* Not even 1 event. */
1959 if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
1960 return -1;
1961 /* Done reading events that are ready. */
1962 break;
1964 /* Process all events. The normal inotify interface delivers
1965 complete events on a read and never a partial event. */
1966 char *eptr = &inev.buf[0];
1967 ssize_t count;
1968 while (1)
1970 /* Check which of the files changed. */
1971 inotify_check_files (to_clear, &inev);
1972 count = sizeof (struct inotify_event) + inev.i.len;
1973 eptr += count;
1974 nb -= count;
1975 if (nb >= (ssize_t) sizeof (struct inotify_event))
1976 memcpy (&inev, eptr, nb);
1977 else
1978 break;
1980 continue;
1982 /* Actually perform the cache clearing. */
1983 clear_db_cache (to_clear);
1984 return 0;
1987 #endif
1989 static void
1990 __attribute__ ((__noreturn__))
1991 main_loop_poll (void)
1993 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1994 * sizeof (conns[0]));
1996 conns[0].fd = sock;
1997 conns[0].events = POLLRDNORM;
1998 size_t nused = 1;
1999 size_t firstfree = 1;
2001 #ifdef HAVE_INOTIFY
2002 if (inotify_fd != -1)
2004 conns[1].fd = inotify_fd;
2005 conns[1].events = POLLRDNORM;
2006 nused = 2;
2007 firstfree = 2;
2009 #endif
2011 #ifdef HAVE_NETLINK
2012 size_t idx_nl_status_fd = 0;
2013 if (nl_status_fd != -1)
2015 idx_nl_status_fd = nused;
2016 conns[nused].fd = nl_status_fd;
2017 conns[nused].events = POLLRDNORM;
2018 ++nused;
2019 firstfree = nused;
2021 #endif
2023 while (1)
2025 /* Wait for any event. We wait at most a couple of seconds so
2026 that we can check whether we should close any of the accepted
2027 connections since we have not received a request. */
2028 #define MAX_ACCEPT_TIMEOUT 30
2029 #define MIN_ACCEPT_TIMEOUT 5
2030 #define MAIN_THREAD_TIMEOUT \
2031 (MAX_ACCEPT_TIMEOUT * 1000 \
2032 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
2034 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
2036 time_t now = time (NULL);
2038 /* If there is a descriptor ready for reading or there is a new
2039 connection, process this now. */
2040 if (n > 0)
2042 if (conns[0].revents != 0)
2044 /* We have a new incoming connection. Accept the connection. */
2045 int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2046 SOCK_NONBLOCK));
2048 /* Use the descriptor if we have not reached the limit. */
2049 if (fd >= 0)
2051 if (firstfree < nconns)
2053 conns[firstfree].fd = fd;
2054 conns[firstfree].events = POLLRDNORM;
2055 starttime[firstfree] = now;
2056 if (firstfree >= nused)
2057 nused = firstfree + 1;
2060 ++firstfree;
2061 while (firstfree < nused && conns[firstfree].fd != -1);
2063 else
2064 /* We cannot use the connection so close it. */
2065 close (fd);
2068 --n;
2071 size_t first = 1;
2072 #ifdef HAVE_INOTIFY
2073 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
2075 if (conns[1].revents != 0)
2077 int ret;
2078 ret = handle_inotify_events ();
2079 if (ret == -1)
2081 /* Something went wrong when reading the inotify
2082 data. Better disable inotify. */
2083 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2084 conns[1].fd = -1;
2085 firstfree = 1;
2086 if (nused == 2)
2087 nused = 1;
2088 close (inotify_fd);
2089 inotify_fd = -1;
2091 --n;
2094 first = 2;
2096 #endif
2098 #ifdef HAVE_NETLINK
2099 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2101 char buf[4096];
2102 /* Read all the data. We do not interpret it here. */
2103 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2104 sizeof (buf))) != -1)
2107 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2108 = __bump_nl_timestamp ();
2110 #endif
2112 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2113 if (conns[cnt].revents != 0)
2115 fd_ready (conns[cnt].fd);
2117 /* Clean up the CONNS array. */
2118 conns[cnt].fd = -1;
2119 if (cnt < firstfree)
2120 firstfree = cnt;
2121 if (cnt == nused - 1)
2123 --nused;
2124 while (conns[nused - 1].fd == -1);
2126 --n;
2130 /* Now find entries which have timed out. */
2131 assert (nused > 0);
2133 /* We make the timeout length depend on the number of file
2134 descriptors currently used. */
2135 #define ACCEPT_TIMEOUT \
2136 (MAX_ACCEPT_TIMEOUT \
2137 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2138 time_t laststart = now - ACCEPT_TIMEOUT;
2140 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2142 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2144 /* Remove the entry, it timed out. */
2145 (void) close (conns[cnt].fd);
2146 conns[cnt].fd = -1;
2148 if (cnt < firstfree)
2149 firstfree = cnt;
2150 if (cnt == nused - 1)
2152 --nused;
2153 while (conns[nused - 1].fd == -1);
2157 if (restart_p (now))
2158 restart ();
2163 #ifdef HAVE_EPOLL
2164 static void
2165 main_loop_epoll (int efd)
2167 struct epoll_event ev = { 0, };
2168 int nused = 1;
2169 size_t highest = 0;
2171 /* Add the socket. */
2172 ev.events = EPOLLRDNORM;
2173 ev.data.fd = sock;
2174 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2175 /* We cannot use epoll. */
2176 return;
2178 # ifdef HAVE_INOTIFY
2179 if (inotify_fd != -1)
2181 ev.events = EPOLLRDNORM;
2182 ev.data.fd = inotify_fd;
2183 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2184 /* We cannot use epoll. */
2185 return;
2186 nused = 2;
2188 # endif
2190 # ifdef HAVE_NETLINK
2191 if (nl_status_fd != -1)
2193 ev.events = EPOLLRDNORM;
2194 ev.data.fd = nl_status_fd;
2195 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2196 /* We cannot use epoll. */
2197 return;
2199 # endif
2201 while (1)
2203 struct epoll_event revs[100];
2204 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2206 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2208 time_t now = time (NULL);
2210 for (int cnt = 0; cnt < n; ++cnt)
2211 if (revs[cnt].data.fd == sock)
2213 /* A new connection. */
2214 int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2215 SOCK_NONBLOCK));
2217 /* Use the descriptor if we have not reached the limit. */
2218 if (fd >= 0)
2220 /* Try to add the new descriptor. */
2221 ev.data.fd = fd;
2222 if (fd >= nconns
2223 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2224 /* The descriptor is too large or something went
2225 wrong. Close the descriptor. */
2226 close (fd);
2227 else
2229 /* Remember when we accepted the connection. */
2230 starttime[fd] = now;
2232 if (fd > highest)
2233 highest = fd;
2235 ++nused;
2239 # ifdef HAVE_INOTIFY
2240 else if (revs[cnt].data.fd == inotify_fd)
2242 int ret;
2243 ret = handle_inotify_events ();
2244 if (ret == -1)
2246 /* Something went wrong when reading the inotify
2247 data. Better disable inotify. */
2248 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2249 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd, NULL);
2250 close (inotify_fd);
2251 inotify_fd = -1;
2252 break;
2255 # endif
2256 # ifdef HAVE_NETLINK
2257 else if (revs[cnt].data.fd == nl_status_fd)
2259 char buf[4096];
2260 /* Read all the data. We do not interpret it here. */
2261 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2262 sizeof (buf))) != -1)
2265 __bump_nl_timestamp ();
2267 # endif
2268 else
2270 /* Remove the descriptor from the epoll descriptor. */
2271 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2273 /* Get a worker to handle the request. */
2274 fd_ready (revs[cnt].data.fd);
2276 /* Reset the time. */
2277 starttime[revs[cnt].data.fd] = 0;
2278 if (revs[cnt].data.fd == highest)
2280 --highest;
2281 while (highest > 0 && starttime[highest] == 0);
2283 --nused;
2286 /* Now look for descriptors for accepted connections which have
2287 no reply in too long of a time. */
2288 time_t laststart = now - ACCEPT_TIMEOUT;
2289 assert (starttime[sock] == 0);
2290 # ifdef HAVE_INOTIFY
2291 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2292 # endif
2293 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2294 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2295 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2297 /* We are waiting for this one for too long. Close it. */
2298 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2300 (void) close (cnt);
2302 starttime[cnt] = 0;
2303 if (cnt == highest)
2304 --highest;
2306 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2307 --highest;
2309 if (restart_p (now))
2310 restart ();
2313 #endif
2316 /* Start all the threads we want. The initial process is thread no. 1. */
2317 void
2318 start_threads (void)
2320 /* Initialize the conditional variable we will use. The only
2321 non-standard attribute we might use is the clock selection. */
2322 pthread_condattr_t condattr;
2323 pthread_condattr_init (&condattr);
2325 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2326 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2327 /* Determine whether the monotonous clock is available. */
2328 struct timespec dummy;
2329 # if _POSIX_MONOTONIC_CLOCK == 0
2330 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2331 # endif
2332 # if _POSIX_CLOCK_SELECTION == 0
2333 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2334 # endif
2335 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2336 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2337 timeout_clock = CLOCK_MONOTONIC;
2338 #endif
2340 /* Create the attribute for the threads. They are all created
2341 detached. */
2342 pthread_attr_init (&attr);
2343 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2344 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2345 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2347 /* We allow less than LASTDB threads only for debugging. */
2348 if (debug_level == 0)
2349 nthreads = MAX (nthreads, lastdb);
2351 /* Create the threads which prune the databases. */
2352 // XXX Ideally this work would be done by some of the worker threads.
2353 // XXX But this is problematic since we would need to be able to wake
2354 // XXX them up explicitly as well as part of the group handling the
2355 // XXX ready-list. This requires an operation where we can wait on
2356 // XXX two conditional variables at the same time. This operation
2357 // XXX does not exist (yet).
2358 for (long int i = 0; i < lastdb; ++i)
2360 /* Initialize the conditional variable. */
2361 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2363 dbg_log (_("could not initialize conditional variable"));
2364 do_exit (1, 0, NULL);
2367 pthread_t th;
2368 if (dbs[i].enabled
2369 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2371 dbg_log (_("could not start clean-up thread; terminating"));
2372 do_exit (1, 0, NULL);
2376 pthread_condattr_destroy (&condattr);
2378 for (long int i = 0; i < nthreads; ++i)
2380 pthread_t th;
2381 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2383 if (i == 0)
2385 dbg_log (_("could not start any worker thread; terminating"));
2386 do_exit (1, 0, NULL);
2389 break;
2393 /* Now it is safe to let the parent know that we're doing fine and it can
2394 exit. */
2395 notify_parent (0);
2397 /* Determine how much room for descriptors we should initially
2398 allocate. This might need to change later if we cap the number
2399 with MAXCONN. */
2400 const long int nfds = sysconf (_SC_OPEN_MAX);
2401 #define MINCONN 32
2402 #define MAXCONN 16384
2403 if (nfds == -1 || nfds > MAXCONN)
2404 nconns = MAXCONN;
2405 else if (nfds < MINCONN)
2406 nconns = MINCONN;
2407 else
2408 nconns = nfds;
2410 /* We need memory to pass descriptors on to the worker threads. */
2411 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2412 /* Array to keep track when connection was accepted. */
2413 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2415 /* In the main thread we execute the loop which handles incoming
2416 connections. */
2417 #ifdef HAVE_EPOLL
2418 int efd = epoll_create (100);
2419 if (efd != -1)
2421 main_loop_epoll (efd);
2422 close (efd);
2424 #endif
2426 main_loop_poll ();
2430 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2431 this function is called, we are not listening on the nscd socket yet so
2432 we can just use the ordinary lookup functions without causing a lockup */
2433 static void
2434 begin_drop_privileges (void)
2436 struct passwd *pwd = getpwnam (server_user);
2438 if (pwd == NULL)
2440 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2441 do_exit (EXIT_FAILURE, 0,
2442 _("Failed to run nscd as user '%s'"), server_user);
2445 server_uid = pwd->pw_uid;
2446 server_gid = pwd->pw_gid;
2448 /* Save the old UID/GID if we have to change back. */
2449 if (paranoia)
2451 old_uid = getuid ();
2452 old_gid = getgid ();
2455 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2457 /* This really must never happen. */
2458 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2459 do_exit (EXIT_FAILURE, errno,
2460 _("initial getgrouplist failed"));
2463 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2465 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2466 == -1)
2468 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2469 do_exit (EXIT_FAILURE, errno, _("getgrouplist failed"));
2474 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2475 run nscd as the user specified in the configuration file. */
2476 static void
2477 finish_drop_privileges (void)
2479 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2480 /* We need to preserve the capabilities to connect to the audit daemon. */
2481 cap_t new_caps = preserve_capabilities ();
2482 #endif
2484 if (setgroups (server_ngroups, server_groups) == -1)
2486 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2487 do_exit (EXIT_FAILURE, errno, _("setgroups failed"));
2490 int res;
2491 if (paranoia)
2492 res = setresgid (server_gid, server_gid, old_gid);
2493 else
2494 res = setgid (server_gid);
2495 if (res == -1)
2497 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2498 do_exit (4, errno, "setgid");
2501 if (paranoia)
2502 res = setresuid (server_uid, server_uid, old_uid);
2503 else
2504 res = setuid (server_uid);
2505 if (res == -1)
2507 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2508 do_exit (4, errno, "setuid");
2511 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2512 /* Remove the temporary capabilities. */
2513 install_real_capabilities (new_caps);
2514 #endif