S390: Move utf8-utf32-z9.c to multiarch folder and use s390_libc_ifunc_expr macro.
[glibc.git] / nscd / connections.c
blobcc1ed72077640a8b45b8d70f3abc13814a0e8faf
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2017 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <ifaddrs.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <stdint.h>
35 #include <arpa/inet.h>
36 #ifdef HAVE_NETLINK
37 # include <linux/netlink.h>
38 # include <linux/rtnetlink.h>
39 #endif
40 #ifdef HAVE_EPOLL
41 # include <sys/epoll.h>
42 #endif
43 #ifdef HAVE_INOTIFY
44 # include <sys/inotify.h>
45 #endif
46 #include <sys/mman.h>
47 #include <sys/param.h>
48 #include <sys/poll.h>
49 #ifdef HAVE_SENDFILE
50 # include <sys/sendfile.h>
51 #endif
52 #include <sys/socket.h>
53 #include <sys/stat.h>
54 #include <sys/un.h>
56 #include "nscd.h"
57 #include "dbg_log.h"
58 #include "selinux.h"
59 #include <resolv/resolv.h>
61 #include <kernel-features.h>
62 #include <libc-diag.h>
65 /* Support to run nscd as an unprivileged user */
66 const char *server_user;
67 static uid_t server_uid;
68 static gid_t server_gid;
69 const char *stat_user;
70 uid_t stat_uid;
71 static gid_t *server_groups;
72 #ifndef NGROUPS
73 # define NGROUPS 32
74 #endif
75 static int server_ngroups;
77 static pthread_attr_t attr;
79 static void begin_drop_privileges (void);
80 static void finish_drop_privileges (void);
82 /* Map request type to a string. */
83 const char *const serv2str[LASTREQ] =
85 [GETPWBYNAME] = "GETPWBYNAME",
86 [GETPWBYUID] = "GETPWBYUID",
87 [GETGRBYNAME] = "GETGRBYNAME",
88 [GETGRBYGID] = "GETGRBYGID",
89 [GETHOSTBYNAME] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
93 [SHUTDOWN] = "SHUTDOWN",
94 [GETSTAT] = "GETSTAT",
95 [INVALIDATE] = "INVALIDATE",
96 [GETFDPW] = "GETFDPW",
97 [GETFDGR] = "GETFDGR",
98 [GETFDHST] = "GETFDHST",
99 [GETAI] = "GETAI",
100 [INITGROUPS] = "INITGROUPS",
101 [GETSERVBYNAME] = "GETSERVBYNAME",
102 [GETSERVBYPORT] = "GETSERVBYPORT",
103 [GETFDSERV] = "GETFDSERV",
104 [GETNETGRENT] = "GETNETGRENT",
105 [INNETGR] = "INNETGR",
106 [GETFDNETGR] = "GETFDNETGR"
109 /* The control data structures for the services. */
110 struct database_dyn dbs[lastdb] =
112 [pwddb] = {
113 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
114 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
115 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
116 .enabled = 0,
117 .check_file = 1,
118 .persistent = 0,
119 .propagate = 1,
120 .shared = 0,
121 .max_db_size = DEFAULT_MAX_DB_SIZE,
122 .suggested_module = DEFAULT_SUGGESTED_MODULE,
123 .db_filename = _PATH_NSCD_PASSWD_DB,
124 .disabled_iov = &pwd_iov_disabled,
125 .postimeout = 3600,
126 .negtimeout = 20,
127 .wr_fd = -1,
128 .ro_fd = -1,
129 .mmap_used = false
131 [grpdb] = {
132 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
133 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
134 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
135 .enabled = 0,
136 .check_file = 1,
137 .persistent = 0,
138 .propagate = 1,
139 .shared = 0,
140 .max_db_size = DEFAULT_MAX_DB_SIZE,
141 .suggested_module = DEFAULT_SUGGESTED_MODULE,
142 .db_filename = _PATH_NSCD_GROUP_DB,
143 .disabled_iov = &grp_iov_disabled,
144 .postimeout = 3600,
145 .negtimeout = 60,
146 .wr_fd = -1,
147 .ro_fd = -1,
148 .mmap_used = false
150 [hstdb] = {
151 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
152 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
153 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
154 .enabled = 0,
155 .check_file = 1,
156 .persistent = 0,
157 .propagate = 0, /* Not used. */
158 .shared = 0,
159 .max_db_size = DEFAULT_MAX_DB_SIZE,
160 .suggested_module = DEFAULT_SUGGESTED_MODULE,
161 .db_filename = _PATH_NSCD_HOSTS_DB,
162 .disabled_iov = &hst_iov_disabled,
163 .postimeout = 3600,
164 .negtimeout = 20,
165 .wr_fd = -1,
166 .ro_fd = -1,
167 .mmap_used = false
169 [servdb] = {
170 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
171 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
172 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
173 .enabled = 0,
174 .check_file = 1,
175 .persistent = 0,
176 .propagate = 0, /* Not used. */
177 .shared = 0,
178 .max_db_size = DEFAULT_MAX_DB_SIZE,
179 .suggested_module = DEFAULT_SUGGESTED_MODULE,
180 .db_filename = _PATH_NSCD_SERVICES_DB,
181 .disabled_iov = &serv_iov_disabled,
182 .postimeout = 28800,
183 .negtimeout = 20,
184 .wr_fd = -1,
185 .ro_fd = -1,
186 .mmap_used = false
188 [netgrdb] = {
189 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
190 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
191 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
192 .enabled = 0,
193 .check_file = 1,
194 .persistent = 0,
195 .propagate = 0, /* Not used. */
196 .shared = 0,
197 .max_db_size = DEFAULT_MAX_DB_SIZE,
198 .suggested_module = DEFAULT_SUGGESTED_MODULE,
199 .db_filename = _PATH_NSCD_NETGROUP_DB,
200 .disabled_iov = &netgroup_iov_disabled,
201 .postimeout = 28800,
202 .negtimeout = 20,
203 .wr_fd = -1,
204 .ro_fd = -1,
205 .mmap_used = false
210 /* Mapping of request type to database. */
211 static struct
213 bool data_request;
214 struct database_dyn *db;
215 } const reqinfo[LASTREQ] =
217 [GETPWBYNAME] = { true, &dbs[pwddb] },
218 [GETPWBYUID] = { true, &dbs[pwddb] },
219 [GETGRBYNAME] = { true, &dbs[grpdb] },
220 [GETGRBYGID] = { true, &dbs[grpdb] },
221 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
222 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
223 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
224 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
225 [SHUTDOWN] = { false, NULL },
226 [GETSTAT] = { false, NULL },
227 [SHUTDOWN] = { false, NULL },
228 [GETFDPW] = { false, &dbs[pwddb] },
229 [GETFDGR] = { false, &dbs[grpdb] },
230 [GETFDHST] = { false, &dbs[hstdb] },
231 [GETAI] = { true, &dbs[hstdb] },
232 [INITGROUPS] = { true, &dbs[grpdb] },
233 [GETSERVBYNAME] = { true, &dbs[servdb] },
234 [GETSERVBYPORT] = { true, &dbs[servdb] },
235 [GETFDSERV] = { false, &dbs[servdb] },
236 [GETNETGRENT] = { true, &dbs[netgrdb] },
237 [INNETGR] = { true, &dbs[netgrdb] },
238 [GETFDNETGR] = { false, &dbs[netgrdb] }
242 /* Initial number of threads to use. */
243 int nthreads = -1;
244 /* Maximum number of threads to use. */
245 int max_nthreads = 32;
247 /* Socket for incoming connections. */
248 static int sock;
250 #ifdef HAVE_INOTIFY
251 /* Inotify descriptor. */
252 int inotify_fd = -1;
253 #endif
255 #ifdef HAVE_NETLINK
256 /* Descriptor for netlink status updates. */
257 static int nl_status_fd = -1;
258 #endif
260 /* Number of times clients had to wait. */
261 unsigned long int client_queued;
264 ssize_t
265 writeall (int fd, const void *buf, size_t len)
267 size_t n = len;
268 ssize_t ret;
271 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
272 if (ret <= 0)
273 break;
274 buf = (const char *) buf + ret;
275 n -= ret;
277 while (n > 0);
278 return ret < 0 ? ret : len - n;
282 #ifdef HAVE_SENDFILE
283 ssize_t
284 sendfileall (int tofd, int fromfd, off_t off, size_t len)
286 ssize_t n = len;
287 ssize_t ret;
291 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
292 if (ret <= 0)
293 break;
294 n -= ret;
296 while (n > 0);
297 return ret < 0 ? ret : len - n;
299 #endif
302 enum usekey
304 use_not = 0,
305 /* The following three are not really used, they are symbolic constants. */
306 use_first = 16,
307 use_begin = 32,
308 use_end = 64,
310 use_he = 1,
311 use_he_begin = use_he | use_begin,
312 use_he_end = use_he | use_end,
313 use_data = 3,
314 use_data_begin = use_data | use_begin,
315 use_data_end = use_data | use_end,
316 use_data_first = use_data_begin | use_first
320 static int
321 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
322 enum usekey use, ref_t start, size_t len)
324 assert (len >= 2);
326 if (start > first_free || start + len > first_free
327 || (start & BLOCK_ALIGN_M1))
328 return 0;
330 if (usemap[start] == use_not)
332 /* Add the start marker. */
333 usemap[start] = use | use_begin;
334 use &= ~use_first;
336 while (--len > 0)
337 if (usemap[++start] != use_not)
338 return 0;
339 else
340 usemap[start] = use;
342 /* Add the end marker. */
343 usemap[start] = use | use_end;
345 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
347 /* Hash entries can't be shared. */
348 if (use == use_he)
349 return 0;
351 usemap[start] |= (use & use_first);
352 use &= ~use_first;
354 while (--len > 1)
355 if (usemap[++start] != use)
356 return 0;
358 if (usemap[++start] != (use | use_end))
359 return 0;
361 else
362 /* Points to a wrong object or somewhere in the middle. */
363 return 0;
365 return 1;
369 /* Verify data in persistent database. */
370 static int
371 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
373 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
374 || dbnr == netgrdb);
376 time_t now = time (NULL);
378 struct database_pers_head *head = mem;
379 struct database_pers_head head_copy = *head;
381 /* Check that the header that was read matches the head in the database. */
382 if (memcmp (head, readhead, sizeof (*head)) != 0)
383 return 0;
385 /* First some easy tests: make sure the database header is sane. */
386 if (head->version != DB_VERSION
387 || head->header_size != sizeof (*head)
388 /* We allow a timestamp to be one hour ahead of the current time.
389 This should cover daylight saving time changes. */
390 || head->timestamp > now + 60 * 60 + 60
391 || (head->gc_cycle & 1)
392 || head->module == 0
393 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
394 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
395 || head->first_free < 0
396 || head->first_free > head->data_size
397 || (head->first_free & BLOCK_ALIGN_M1) != 0
398 || head->maxnentries < 0
399 || head->maxnsearched < 0)
400 return 0;
402 uint8_t *usemap = calloc (head->first_free, 1);
403 if (usemap == NULL)
404 return 0;
406 const char *data = (char *) &head->array[roundup (head->module,
407 ALIGN / sizeof (ref_t))];
409 nscd_ssize_t he_cnt = 0;
410 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
412 ref_t trail = head->array[cnt];
413 ref_t work = trail;
414 int tick = 0;
416 while (work != ENDREF)
418 if (! check_use (data, head->first_free, usemap, use_he, work,
419 sizeof (struct hashentry)))
420 goto fail;
422 /* Now we know we can dereference the record. */
423 struct hashentry *here = (struct hashentry *) (data + work);
425 ++he_cnt;
427 /* Make sure the record is for this type of service. */
428 if (here->type >= LASTREQ
429 || reqinfo[here->type].db != &dbs[dbnr])
430 goto fail;
432 /* Validate boolean field value. */
433 if (here->first != false && here->first != true)
434 goto fail;
436 if (here->len < 0)
437 goto fail;
439 /* Now the data. */
440 if (here->packet < 0
441 || here->packet > head->first_free
442 || here->packet + sizeof (struct datahead) > head->first_free)
443 goto fail;
445 struct datahead *dh = (struct datahead *) (data + here->packet);
447 if (! check_use (data, head->first_free, usemap,
448 use_data | (here->first ? use_first : 0),
449 here->packet, dh->allocsize))
450 goto fail;
452 if (dh->allocsize < sizeof (struct datahead)
453 || dh->recsize > dh->allocsize
454 || (dh->notfound != false && dh->notfound != true)
455 || (dh->usable != false && dh->usable != true))
456 goto fail;
458 if (here->key < here->packet + sizeof (struct datahead)
459 || here->key > here->packet + dh->allocsize
460 || here->key + here->len > here->packet + dh->allocsize)
461 goto fail;
463 work = here->next;
465 if (work == trail)
466 /* A circular list, this must not happen. */
467 goto fail;
468 if (tick)
469 trail = ((struct hashentry *) (data + trail))->next;
470 tick = 1 - tick;
474 if (he_cnt != head->nentries)
475 goto fail;
477 /* See if all data and keys had at least one reference from
478 he->first == true hashentry. */
479 for (ref_t idx = 0; idx < head->first_free; ++idx)
481 if (usemap[idx] == use_data_begin)
482 goto fail;
485 /* Finally, make sure the database hasn't changed since the first test. */
486 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
487 goto fail;
489 free (usemap);
490 return 1;
492 fail:
493 free (usemap);
494 return 0;
498 /* Initialize database information structures. */
499 void
500 nscd_init (void)
502 /* Look up unprivileged uid/gid/groups before we start listening on the
503 socket */
504 if (server_user != NULL)
505 begin_drop_privileges ();
507 if (nthreads == -1)
508 /* No configuration for this value, assume a default. */
509 nthreads = 4;
511 for (size_t cnt = 0; cnt < lastdb; ++cnt)
512 if (dbs[cnt].enabled)
514 pthread_rwlock_init (&dbs[cnt].lock, NULL);
515 pthread_mutex_init (&dbs[cnt].memlock, NULL);
517 if (dbs[cnt].persistent)
519 /* Try to open the appropriate file on disk. */
520 int fd = open (dbs[cnt].db_filename, O_RDWR | O_CLOEXEC);
521 if (fd != -1)
523 char *msg = NULL;
524 struct stat64 st;
525 void *mem;
526 size_t total;
527 struct database_pers_head head;
528 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
529 sizeof (head)));
530 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
532 fail_db_errno:
533 /* The code is single-threaded at this point so
534 using strerror is just fine. */
535 msg = strerror (errno);
536 fail_db:
537 dbg_log (_("invalid persistent database file \"%s\": %s"),
538 dbs[cnt].db_filename, msg);
539 unlink (dbs[cnt].db_filename);
541 else if (head.module == 0 && head.data_size == 0)
543 /* The file has been created, but the head has not
544 been initialized yet. */
545 msg = _("uninitialized header");
546 goto fail_db;
548 else if (head.header_size != (int) sizeof (head))
550 msg = _("header size does not match");
551 goto fail_db;
553 else if ((total = (sizeof (head)
554 + roundup (head.module * sizeof (ref_t),
555 ALIGN)
556 + head.data_size))
557 > st.st_size
558 || total < sizeof (head))
560 msg = _("file size does not match");
561 goto fail_db;
563 /* Note we map with the maximum size allowed for the
564 database. This is likely much larger than the
565 actual file size. This is OK on most OSes since
566 extensions of the underlying file will
567 automatically translate more pages available for
568 memory access. */
569 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
570 PROT_READ | PROT_WRITE,
571 MAP_SHARED, fd, 0))
572 == MAP_FAILED)
573 goto fail_db_errno;
574 else if (!verify_persistent_db (mem, &head, cnt))
576 munmap (mem, total);
577 msg = _("verification failed");
578 goto fail_db;
580 else
582 /* Success. We have the database. */
583 dbs[cnt].head = mem;
584 dbs[cnt].memsize = total;
585 dbs[cnt].data = (char *)
586 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
587 ALIGN / sizeof (ref_t))];
588 dbs[cnt].mmap_used = true;
590 if (dbs[cnt].suggested_module > head.module)
591 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
592 dbnames[cnt]);
594 dbs[cnt].wr_fd = fd;
595 fd = -1;
596 /* We also need a read-only descriptor. */
597 if (dbs[cnt].shared)
599 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
600 O_RDONLY | O_CLOEXEC);
601 if (dbs[cnt].ro_fd == -1)
602 dbg_log (_("\
603 cannot create read-only descriptor for \"%s\"; no mmap"),
604 dbs[cnt].db_filename);
607 // XXX Shall we test whether the descriptors actually
608 // XXX point to the same file?
611 /* Close the file descriptors in case something went
612 wrong in which case the variable have not been
613 assigned -1. */
614 if (fd != -1)
615 close (fd);
617 else if (errno == EACCES)
618 do_exit (EXIT_FAILURE, 0, _("cannot access '%s'"),
619 dbs[cnt].db_filename);
622 if (dbs[cnt].head == NULL)
624 /* No database loaded. Allocate the data structure,
625 possibly on disk. */
626 struct database_pers_head head;
627 size_t total = (sizeof (head)
628 + roundup (dbs[cnt].suggested_module
629 * sizeof (ref_t), ALIGN)
630 + (dbs[cnt].suggested_module
631 * DEFAULT_DATASIZE_PER_BUCKET));
633 /* Try to create the database. If we do not need a
634 persistent database create a temporary file. */
635 int fd;
636 int ro_fd = -1;
637 if (dbs[cnt].persistent)
639 fd = open (dbs[cnt].db_filename,
640 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC,
641 S_IRUSR | S_IWUSR);
642 if (fd != -1 && dbs[cnt].shared)
643 ro_fd = open (dbs[cnt].db_filename,
644 O_RDONLY | O_CLOEXEC);
646 else
648 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
649 fd = mkostemp (fname, O_CLOEXEC);
651 /* We do not need the file name anymore after we
652 opened another file descriptor in read-only mode. */
653 if (fd != -1)
655 if (dbs[cnt].shared)
656 ro_fd = open (fname, O_RDONLY | O_CLOEXEC);
658 unlink (fname);
662 if (fd == -1)
664 if (errno == EEXIST)
666 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
667 dbnames[cnt], dbs[cnt].db_filename);
668 do_exit (1, 0, NULL);
671 if (dbs[cnt].persistent)
672 dbg_log (_("cannot create %s; no persistent database used"),
673 dbs[cnt].db_filename);
674 else
675 dbg_log (_("cannot create %s; no sharing possible"),
676 dbs[cnt].db_filename);
678 dbs[cnt].persistent = 0;
679 // XXX remember: no mmap
681 else
683 /* Tell the user if we could not create the read-only
684 descriptor. */
685 if (ro_fd == -1 && dbs[cnt].shared)
686 dbg_log (_("\
687 cannot create read-only descriptor for \"%s\"; no mmap"),
688 dbs[cnt].db_filename);
690 /* Before we create the header, initialize the hash
691 table. That way if we get interrupted while writing
692 the header we can recognize a partially initialized
693 database. */
694 size_t ps = sysconf (_SC_PAGESIZE);
695 char tmpbuf[ps];
696 assert (~ENDREF == 0);
697 memset (tmpbuf, '\xff', ps);
699 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
700 off_t offset = sizeof (head);
702 size_t towrite;
703 if (offset % ps != 0)
705 towrite = MIN (remaining, ps - (offset % ps));
706 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
707 goto write_fail;
708 offset += towrite;
709 remaining -= towrite;
712 while (remaining > ps)
714 if (pwrite (fd, tmpbuf, ps, offset) == -1)
715 goto write_fail;
716 offset += ps;
717 remaining -= ps;
720 if (remaining > 0
721 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
722 goto write_fail;
724 /* Create the header of the file. */
725 struct database_pers_head head =
727 .version = DB_VERSION,
728 .header_size = sizeof (head),
729 .module = dbs[cnt].suggested_module,
730 .data_size = (dbs[cnt].suggested_module
731 * DEFAULT_DATASIZE_PER_BUCKET),
732 .first_free = 0
734 void *mem;
736 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
737 != sizeof (head))
738 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
739 != 0)
740 || (mem = mmap (NULL, dbs[cnt].max_db_size,
741 PROT_READ | PROT_WRITE,
742 MAP_SHARED, fd, 0)) == MAP_FAILED)
744 write_fail:
745 unlink (dbs[cnt].db_filename);
746 dbg_log (_("cannot write to database file %s: %s"),
747 dbs[cnt].db_filename, strerror (errno));
748 dbs[cnt].persistent = 0;
750 else
752 /* Success. */
753 dbs[cnt].head = mem;
754 dbs[cnt].data = (char *)
755 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
756 ALIGN / sizeof (ref_t))];
757 dbs[cnt].memsize = total;
758 dbs[cnt].mmap_used = true;
760 /* Remember the descriptors. */
761 dbs[cnt].wr_fd = fd;
762 dbs[cnt].ro_fd = ro_fd;
763 fd = -1;
764 ro_fd = -1;
767 if (fd != -1)
768 close (fd);
769 if (ro_fd != -1)
770 close (ro_fd);
774 if (dbs[cnt].head == NULL)
776 /* We do not use the persistent database. Just
777 create an in-memory data structure. */
778 assert (! dbs[cnt].persistent);
780 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
781 + (dbs[cnt].suggested_module
782 * sizeof (ref_t)));
783 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
784 assert (~ENDREF == 0);
785 memset (dbs[cnt].head->array, '\xff',
786 dbs[cnt].suggested_module * sizeof (ref_t));
787 dbs[cnt].head->module = dbs[cnt].suggested_module;
788 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
789 * dbs[cnt].head->module);
790 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
791 dbs[cnt].head->first_free = 0;
793 dbs[cnt].shared = 0;
794 assert (dbs[cnt].ro_fd == -1);
798 /* Create the socket. */
799 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
800 if (sock < 0)
802 dbg_log (_("cannot open socket: %s"), strerror (errno));
803 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
805 /* Bind a name to the socket. */
806 struct sockaddr_un sock_addr;
807 sock_addr.sun_family = AF_UNIX;
808 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
809 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
811 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
812 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
815 /* Set permissions for the socket. */
816 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
818 /* Set the socket up to accept connections. */
819 if (listen (sock, SOMAXCONN) < 0)
821 dbg_log (_("cannot enable socket to accept connections: %s"),
822 strerror (errno));
823 do_exit (1, 0, NULL);
826 #ifdef HAVE_NETLINK
827 if (dbs[hstdb].enabled)
829 /* Try to open netlink socket to monitor network setting changes. */
830 nl_status_fd = socket (AF_NETLINK,
831 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
832 NETLINK_ROUTE);
833 if (nl_status_fd != -1)
835 struct sockaddr_nl snl;
836 memset (&snl, '\0', sizeof (snl));
837 snl.nl_family = AF_NETLINK;
838 /* XXX Is this the best set to use? */
839 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
840 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
841 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
842 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
843 | RTMGRP_IPV6_PREFIX);
845 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
847 close (nl_status_fd);
848 nl_status_fd = -1;
850 else
852 /* Start the timestamp process. */
853 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
854 = __bump_nl_timestamp ();
858 #endif
860 /* Change to unprivileged uid/gid/groups if specified in config file */
861 if (server_user != NULL)
862 finish_drop_privileges ();
865 #ifdef HAVE_INOTIFY
866 #define TRACED_FILE_MASK (IN_DELETE_SELF | IN_CLOSE_WRITE | IN_MOVE_SELF)
867 #define TRACED_DIR_MASK (IN_DELETE_SELF | IN_CREATE | IN_MOVED_TO | IN_MOVE_SELF)
868 void
869 install_watches (struct traced_file *finfo)
871 /* Use inotify support if we have it. */
872 if (finfo->inotify_descr[TRACED_FILE] < 0)
873 finfo->inotify_descr[TRACED_FILE] = inotify_add_watch (inotify_fd,
874 finfo->fname,
875 TRACED_FILE_MASK);
876 if (finfo->inotify_descr[TRACED_FILE] < 0)
878 dbg_log (_("disabled inotify-based monitoring for file `%s': %s"),
879 finfo->fname, strerror (errno));
880 return;
882 dbg_log (_("monitoring file `%s` (%d)"),
883 finfo->fname, finfo->inotify_descr[TRACED_FILE]);
884 /* Additionally listen for events in the file's parent directory.
885 We do this because the file to be watched might be
886 deleted and then added back again. When it is added back again
887 we must re-add the watch. We must also cover IN_MOVED_TO to
888 detect a file being moved into the directory. */
889 if (finfo->inotify_descr[TRACED_DIR] < 0)
890 finfo->inotify_descr[TRACED_DIR] = inotify_add_watch (inotify_fd,
891 finfo->dname,
892 TRACED_DIR_MASK);
893 if (finfo->inotify_descr[TRACED_DIR] < 0)
895 dbg_log (_("disabled inotify-based monitoring for directory `%s': %s"),
896 finfo->fname, strerror (errno));
897 return;
899 dbg_log (_("monitoring directory `%s` (%d)"),
900 finfo->dname, finfo->inotify_descr[TRACED_DIR]);
902 #endif
904 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
906 We support registering multiple files per database. Each call to
907 register_traced_file adds to the list of registered files.
909 When we prune the database, either through timeout or a request to
910 invalidate, we will check to see if any of the registered files has changed.
911 When we accept new connections to handle a cache request we will also
912 check to see if any of the registered files has changed.
914 If we have inotify support then we install an inotify fd to notify us of
915 file deletion or modification, both of which will require we invalidate
916 the cache for the database. Without inotify support we stat the file and
917 store st_mtime to determine if the file has been modified. */
918 void
919 register_traced_file (size_t dbidx, struct traced_file *finfo)
921 /* If the database is disabled or file checking is disabled
922 then ignore the registration. */
923 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
924 return;
926 if (__glibc_unlikely (debug_level > 0))
927 dbg_log (_("monitoring file %s for database %s"),
928 finfo->fname, dbnames[dbidx]);
930 #ifdef HAVE_INOTIFY
931 install_watches (finfo);
932 #endif
933 struct stat64 st;
934 if (stat64 (finfo->fname, &st) < 0)
936 /* We cannot stat() the file. Set mtime to zero and try again later. */
937 dbg_log (_("stat failed for file `%s'; will try again later: %s"),
938 finfo->fname, strerror (errno));
939 finfo->mtime = 0;
941 else
942 finfo->mtime = st.st_mtime;
944 /* Queue up the file name. */
945 finfo->next = dbs[dbidx].traced_files;
946 dbs[dbidx].traced_files = finfo;
950 /* Close the connections. */
951 void
952 close_sockets (void)
954 close (sock);
958 static void
959 invalidate_cache (char *key, int fd)
961 dbtype number;
962 int32_t resp;
964 for (number = pwddb; number < lastdb; ++number)
965 if (strcmp (key, dbnames[number]) == 0)
967 struct traced_file *runp = dbs[number].traced_files;
968 while (runp != NULL)
970 /* Make sure we reload from file when checking mtime. */
971 runp->mtime = 0;
972 #ifdef HAVE_INOTIFY
973 /* During an invalidation we try to reload the traced
974 file watches. This allows the user to re-sync if
975 inotify events were lost. Similar to what we do during
976 pruning. */
977 install_watches (runp);
978 #endif
979 if (runp->call_res_init)
981 res_init ();
982 break;
984 runp = runp->next;
986 break;
989 if (number == lastdb)
991 resp = EINVAL;
992 writeall (fd, &resp, sizeof (resp));
993 return;
996 if (dbs[number].enabled)
998 pthread_mutex_lock (&dbs[number].prune_run_lock);
999 prune_cache (&dbs[number], LONG_MAX, fd);
1000 pthread_mutex_unlock (&dbs[number].prune_run_lock);
1002 else
1004 resp = 0;
1005 writeall (fd, &resp, sizeof (resp));
1010 #ifdef SCM_RIGHTS
1011 static void
1012 send_ro_fd (struct database_dyn *db, char *key, int fd)
1014 /* If we do not have an read-only file descriptor do nothing. */
1015 if (db->ro_fd == -1)
1016 return;
1018 /* We need to send some data along with the descriptor. */
1019 uint64_t mapsize = (db->head->data_size
1020 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1021 + sizeof (struct database_pers_head));
1022 struct iovec iov[2];
1023 iov[0].iov_base = key;
1024 iov[0].iov_len = strlen (key) + 1;
1025 iov[1].iov_base = &mapsize;
1026 iov[1].iov_len = sizeof (mapsize);
1028 /* Prepare the control message to transfer the descriptor. */
1029 union
1031 struct cmsghdr hdr;
1032 char bytes[CMSG_SPACE (sizeof (int))];
1033 } buf;
1034 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1035 .msg_control = buf.bytes,
1036 .msg_controllen = sizeof (buf) };
1037 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1039 cmsg->cmsg_level = SOL_SOCKET;
1040 cmsg->cmsg_type = SCM_RIGHTS;
1041 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1043 int *ip = (int *) CMSG_DATA (cmsg);
1044 *ip = db->ro_fd;
1046 msg.msg_controllen = cmsg->cmsg_len;
1048 /* Send the control message. We repeat when we are interrupted but
1049 everything else is ignored. */
1050 #ifndef MSG_NOSIGNAL
1051 # define MSG_NOSIGNAL 0
1052 #endif
1053 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1055 if (__glibc_unlikely (debug_level > 0))
1056 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1058 #endif /* SCM_RIGHTS */
1061 /* Handle new request. */
1062 static void
1063 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1065 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1067 if (debug_level > 0)
1068 dbg_log (_("\
1069 cannot handle old request version %d; current version is %d"),
1070 req->version, NSCD_VERSION);
1071 return;
1074 /* Perform the SELinux check before we go on to the standard checks. */
1075 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1077 if (debug_level > 0)
1079 #ifdef SO_PEERCRED
1080 # ifdef PATH_MAX
1081 char buf[PATH_MAX];
1082 # else
1083 char buf[4096];
1084 # endif
1086 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
1087 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
1089 if (n <= 0)
1090 dbg_log (_("\
1091 request from %ld not handled due to missing permission"), (long int) pid);
1092 else
1094 buf[n] = '\0';
1095 dbg_log (_("\
1096 request from '%s' [%ld] not handled due to missing permission"),
1097 buf, (long int) pid);
1099 #else
1100 dbg_log (_("request not handled due to missing permission"));
1101 #endif
1103 return;
1106 struct database_dyn *db = reqinfo[req->type].db;
1108 /* See whether we can service the request from the cache. */
1109 if (__builtin_expect (reqinfo[req->type].data_request, true))
1111 if (__builtin_expect (debug_level, 0) > 0)
1113 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1115 char buf[INET6_ADDRSTRLEN];
1117 dbg_log ("\t%s (%s)", serv2str[req->type],
1118 inet_ntop (req->type == GETHOSTBYADDR
1119 ? AF_INET : AF_INET6,
1120 key, buf, sizeof (buf)));
1122 else
1123 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1126 /* Is this service enabled? */
1127 if (__glibc_unlikely (!db->enabled))
1129 /* No, sent the prepared record. */
1130 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1131 db->disabled_iov->iov_len,
1132 MSG_NOSIGNAL))
1133 != (ssize_t) db->disabled_iov->iov_len
1134 && __builtin_expect (debug_level, 0) > 0)
1136 /* We have problems sending the result. */
1137 char buf[256];
1138 dbg_log (_("cannot write result: %s"),
1139 strerror_r (errno, buf, sizeof (buf)));
1142 return;
1145 /* Be sure we can read the data. */
1146 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db->lock) != 0))
1148 ++db->head->rdlockdelayed;
1149 pthread_rwlock_rdlock (&db->lock);
1152 /* See whether we can handle it from the cache. */
1153 struct datahead *cached;
1154 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1155 db, uid);
1156 if (cached != NULL)
1158 /* Hurray it's in the cache. */
1159 ssize_t nwritten;
1161 #ifdef HAVE_SENDFILE
1162 if (__glibc_likely (db->mmap_used))
1164 assert (db->wr_fd != -1);
1165 assert ((char *) cached->data > (char *) db->data);
1166 assert ((char *) cached->data - (char *) db->head
1167 + cached->recsize
1168 <= (sizeof (struct database_pers_head)
1169 + db->head->module * sizeof (ref_t)
1170 + db->head->data_size));
1171 nwritten = sendfileall (fd, db->wr_fd,
1172 (char *) cached->data
1173 - (char *) db->head, cached->recsize);
1174 # ifndef __ASSUME_SENDFILE
1175 if (nwritten == -1 && errno == ENOSYS)
1176 goto use_write;
1177 # endif
1179 else
1180 # ifndef __ASSUME_SENDFILE
1181 use_write:
1182 # endif
1183 #endif
1184 nwritten = writeall (fd, cached->data, cached->recsize);
1186 if (nwritten != cached->recsize
1187 && __builtin_expect (debug_level, 0) > 0)
1189 /* We have problems sending the result. */
1190 char buf[256];
1191 dbg_log (_("cannot write result: %s"),
1192 strerror_r (errno, buf, sizeof (buf)));
1195 pthread_rwlock_unlock (&db->lock);
1197 return;
1200 pthread_rwlock_unlock (&db->lock);
1202 else if (__builtin_expect (debug_level, 0) > 0)
1204 if (req->type == INVALIDATE)
1205 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1206 else
1207 dbg_log ("\t%s", serv2str[req->type]);
1210 /* Handle the request. */
1211 switch (req->type)
1213 case GETPWBYNAME:
1214 addpwbyname (db, fd, req, key, uid);
1215 break;
1217 case GETPWBYUID:
1218 addpwbyuid (db, fd, req, key, uid);
1219 break;
1221 case GETGRBYNAME:
1222 addgrbyname (db, fd, req, key, uid);
1223 break;
1225 case GETGRBYGID:
1226 addgrbygid (db, fd, req, key, uid);
1227 break;
1229 case GETHOSTBYNAME:
1230 addhstbyname (db, fd, req, key, uid);
1231 break;
1233 case GETHOSTBYNAMEv6:
1234 addhstbynamev6 (db, fd, req, key, uid);
1235 break;
1237 case GETHOSTBYADDR:
1238 addhstbyaddr (db, fd, req, key, uid);
1239 break;
1241 case GETHOSTBYADDRv6:
1242 addhstbyaddrv6 (db, fd, req, key, uid);
1243 break;
1245 case GETAI:
1246 addhstai (db, fd, req, key, uid);
1247 break;
1249 case INITGROUPS:
1250 addinitgroups (db, fd, req, key, uid);
1251 break;
1253 case GETSERVBYNAME:
1254 addservbyname (db, fd, req, key, uid);
1255 break;
1257 case GETSERVBYPORT:
1258 addservbyport (db, fd, req, key, uid);
1259 break;
1261 case GETNETGRENT:
1262 addgetnetgrent (db, fd, req, key, uid);
1263 break;
1265 case INNETGR:
1266 addinnetgr (db, fd, req, key, uid);
1267 break;
1269 case GETSTAT:
1270 case SHUTDOWN:
1271 case INVALIDATE:
1273 /* Get the callers credentials. */
1274 #ifdef SO_PEERCRED
1275 struct ucred caller;
1276 socklen_t optlen = sizeof (caller);
1278 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1280 char buf[256];
1282 dbg_log (_("error getting caller's id: %s"),
1283 strerror_r (errno, buf, sizeof (buf)));
1284 break;
1287 uid = caller.uid;
1288 #else
1289 /* Some systems have no SO_PEERCRED implementation. They don't
1290 care about security so we don't as well. */
1291 uid = 0;
1292 #endif
1295 /* Accept shutdown, getstat and invalidate only from root. For
1296 the stat call also allow the user specified in the config file. */
1297 if (req->type == GETSTAT)
1299 if (uid == 0 || uid == stat_uid)
1300 send_stats (fd, dbs);
1302 else if (uid == 0)
1304 if (req->type == INVALIDATE)
1305 invalidate_cache (key, fd);
1306 else
1307 termination_handler (0);
1309 break;
1311 case GETFDPW:
1312 case GETFDGR:
1313 case GETFDHST:
1314 case GETFDSERV:
1315 case GETFDNETGR:
1316 #ifdef SCM_RIGHTS
1317 send_ro_fd (reqinfo[req->type].db, key, fd);
1318 #endif
1319 break;
1321 default:
1322 /* Ignore the command, it's nothing we know. */
1323 break;
1328 /* Restart the process. */
1329 static void
1330 restart (void)
1332 /* First determine the parameters. We do not use the parameters
1333 passed to main() since in case nscd is started by running the
1334 dynamic linker this will not work. Yes, this is not the usual
1335 case but nscd is part of glibc and we occasionally do this. */
1336 size_t buflen = 1024;
1337 char *buf = alloca (buflen);
1338 size_t readlen = 0;
1339 int fd = open ("/proc/self/cmdline", O_RDONLY);
1340 if (fd == -1)
1342 dbg_log (_("\
1343 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1344 strerror (errno));
1346 paranoia = 0;
1347 return;
1350 while (1)
1352 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1353 buflen - readlen));
1354 if (n == -1)
1356 dbg_log (_("\
1357 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1358 strerror (errno));
1360 close (fd);
1361 paranoia = 0;
1362 return;
1365 readlen += n;
1367 if (readlen < buflen)
1368 break;
1370 /* We might have to extend the buffer. */
1371 size_t old_buflen = buflen;
1372 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1373 buf = memmove (newp, buf, old_buflen);
1376 close (fd);
1378 /* Parse the command line. Worst case scenario: every two
1379 characters form one parameter (one character plus NUL). */
1380 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1381 int argc = 0;
1383 char *cp = buf;
1384 while (cp < buf + readlen)
1386 argv[argc++] = cp;
1387 cp = (char *) rawmemchr (cp, '\0') + 1;
1389 argv[argc] = NULL;
1391 /* Second, change back to the old user if we changed it. */
1392 if (server_user != NULL)
1394 if (setresuid (old_uid, old_uid, old_uid) != 0)
1396 dbg_log (_("\
1397 cannot change to old UID: %s; disabling paranoia mode"),
1398 strerror (errno));
1400 paranoia = 0;
1401 return;
1404 if (setresgid (old_gid, old_gid, old_gid) != 0)
1406 dbg_log (_("\
1407 cannot change to old GID: %s; disabling paranoia mode"),
1408 strerror (errno));
1410 ignore_value (setuid (server_uid));
1411 paranoia = 0;
1412 return;
1416 /* Next change back to the old working directory. */
1417 if (chdir (oldcwd) == -1)
1419 dbg_log (_("\
1420 cannot change to old working directory: %s; disabling paranoia mode"),
1421 strerror (errno));
1423 if (server_user != NULL)
1425 ignore_value (setuid (server_uid));
1426 ignore_value (setgid (server_gid));
1428 paranoia = 0;
1429 return;
1432 /* Synchronize memory. */
1433 int32_t certainly[lastdb];
1434 for (int cnt = 0; cnt < lastdb; ++cnt)
1435 if (dbs[cnt].enabled)
1437 /* Make sure nobody keeps using the database. */
1438 dbs[cnt].head->timestamp = 0;
1439 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1440 dbs[cnt].head->nscd_certainly_running = 0;
1442 if (dbs[cnt].persistent)
1443 // XXX async OK?
1444 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1447 /* The preparations are done. */
1448 #ifdef PATH_MAX
1449 char pathbuf[PATH_MAX];
1450 #else
1451 char pathbuf[256];
1452 #endif
1453 /* Try to exec the real nscd program so the process name (as reported
1454 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1455 if readlink or the exec with the result of the readlink call fails. */
1456 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1457 if (n != -1)
1459 pathbuf[n] = '\0';
1460 execv (pathbuf, argv);
1462 execv ("/proc/self/exe", argv);
1464 /* If we come here, we will never be able to re-exec. */
1465 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1466 strerror (errno));
1468 if (server_user != NULL)
1470 ignore_value (setuid (server_uid));
1471 ignore_value (setgid (server_gid));
1473 if (chdir ("/") != 0)
1474 dbg_log (_("cannot change current working directory to \"/\": %s"),
1475 strerror (errno));
1476 paranoia = 0;
1478 /* Reenable the databases. */
1479 time_t now = time (NULL);
1480 for (int cnt = 0; cnt < lastdb; ++cnt)
1481 if (dbs[cnt].enabled)
1483 dbs[cnt].head->timestamp = now;
1484 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1489 /* List of file descriptors. */
1490 struct fdlist
1492 int fd;
1493 struct fdlist *next;
1495 /* Memory allocated for the list. */
1496 static struct fdlist *fdlist;
1497 /* List of currently ready-to-read file descriptors. */
1498 static struct fdlist *readylist;
1500 /* Conditional variable and mutex to signal availability of entries in
1501 READYLIST. The condvar is initialized dynamically since we might
1502 use a different clock depending on availability. */
1503 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1504 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1506 /* The clock to use with the condvar. */
1507 static clockid_t timeout_clock = CLOCK_REALTIME;
1509 /* Number of threads ready to handle the READYLIST. */
1510 static unsigned long int nready;
1513 /* Function for the clean-up threads. */
1514 static void *
1515 __attribute__ ((__noreturn__))
1516 nscd_run_prune (void *p)
1518 const long int my_number = (long int) p;
1519 assert (dbs[my_number].enabled);
1521 int dont_need_update = setup_thread (&dbs[my_number]);
1523 time_t now = time (NULL);
1525 /* We are running. */
1526 dbs[my_number].head->timestamp = now;
1528 struct timespec prune_ts;
1529 if (__glibc_unlikely (clock_gettime (timeout_clock, &prune_ts) == -1))
1530 /* Should never happen. */
1531 abort ();
1533 /* Compute the initial timeout time. Prevent all the timers to go
1534 off at the same time by adding a db-based value. */
1535 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1536 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1538 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1539 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1540 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1542 pthread_mutex_lock (prune_lock);
1543 while (1)
1545 /* Wait, but not forever. */
1546 int e = 0;
1547 if (! dbs[my_number].clear_cache)
1548 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1549 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1551 time_t next_wait;
1552 now = time (NULL);
1553 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1554 || dbs[my_number].clear_cache)
1556 /* We will determine the new timout values based on the
1557 cache content. Should there be concurrent additions to
1558 the cache which are not accounted for in the cache
1559 pruning we want to know about it. Therefore set the
1560 timeout to the maximum. It will be descreased when adding
1561 new entries to the cache, if necessary. */
1562 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1564 /* Unconditionally reset the flag. */
1565 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1566 dbs[my_number].clear_cache = 0;
1568 pthread_mutex_unlock (prune_lock);
1570 /* We use a separate lock for running the prune function (instead
1571 of keeping prune_lock locked) because this enables concurrent
1572 invocations of cache_add which might modify the timeout value. */
1573 pthread_mutex_lock (prune_run_lock);
1574 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1575 pthread_mutex_unlock (prune_run_lock);
1577 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1578 /* If clients cannot determine for sure whether nscd is running
1579 we need to wake up occasionally to update the timestamp.
1580 Wait 90% of the update period. */
1581 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1582 if (__glibc_unlikely (! dont_need_update))
1584 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1585 dbs[my_number].head->timestamp = now;
1588 pthread_mutex_lock (prune_lock);
1590 /* Make it known when we will wake up again. */
1591 if (now + next_wait < dbs[my_number].wakeup_time)
1592 dbs[my_number].wakeup_time = now + next_wait;
1593 else
1594 next_wait = dbs[my_number].wakeup_time - now;
1596 else
1597 /* The cache was just pruned. Do not do it again now. Just
1598 use the new timeout value. */
1599 next_wait = dbs[my_number].wakeup_time - now;
1601 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1602 /* Should never happen. */
1603 abort ();
1605 /* Compute next timeout time. */
1606 prune_ts.tv_sec += next_wait;
1611 /* This is the main loop. It is replicated in different threads but
1612 the use of the ready list makes sure only one thread handles an
1613 incoming connection. */
1614 static void *
1615 __attribute__ ((__noreturn__))
1616 nscd_run_worker (void *p)
1618 char buf[256];
1620 /* Initial locking. */
1621 pthread_mutex_lock (&readylist_lock);
1623 /* One more thread available. */
1624 ++nready;
1626 while (1)
1628 while (readylist == NULL)
1629 pthread_cond_wait (&readylist_cond, &readylist_lock);
1631 struct fdlist *it = readylist->next;
1632 if (readylist->next == readylist)
1633 /* Just one entry on the list. */
1634 readylist = NULL;
1635 else
1636 readylist->next = it->next;
1638 /* Extract the information and mark the record ready to be used
1639 again. */
1640 int fd = it->fd;
1641 it->next = NULL;
1643 /* One more thread available. */
1644 --nready;
1646 /* We are done with the list. */
1647 pthread_mutex_unlock (&readylist_lock);
1649 /* Now read the request. */
1650 request_header req;
1651 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1652 != sizeof (req), 0))
1654 /* We failed to read data. Note that this also might mean we
1655 failed because we would have blocked. */
1656 if (debug_level > 0)
1657 dbg_log (_("short read while reading request: %s"),
1658 strerror_r (errno, buf, sizeof (buf)));
1659 goto close_and_out;
1662 /* Check whether this is a valid request type. */
1663 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1664 goto close_and_out;
1666 /* Some systems have no SO_PEERCRED implementation. They don't
1667 care about security so we don't as well. */
1668 uid_t uid = -1;
1669 #ifdef SO_PEERCRED
1670 pid_t pid = 0;
1672 if (__glibc_unlikely (debug_level > 0))
1674 struct ucred caller;
1675 socklen_t optlen = sizeof (caller);
1677 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1678 pid = caller.pid;
1680 #else
1681 const pid_t pid = 0;
1682 #endif
1684 /* It should not be possible to crash the nscd with a silly
1685 request (i.e., a terribly large key). We limit the size to 1kb. */
1686 if (__builtin_expect (req.key_len, 1) < 0
1687 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1689 if (debug_level > 0)
1690 dbg_log (_("key length in request too long: %d"), req.key_len);
1692 else
1694 /* Get the key. */
1695 char keybuf[MAXKEYLEN + 1];
1697 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1698 req.key_len))
1699 != req.key_len, 0))
1701 /* Again, this can also mean we would have blocked. */
1702 if (debug_level > 0)
1703 dbg_log (_("short read while reading request key: %s"),
1704 strerror_r (errno, buf, sizeof (buf)));
1705 goto close_and_out;
1707 keybuf[req.key_len] = '\0';
1709 if (__builtin_expect (debug_level, 0) > 0)
1711 #ifdef SO_PEERCRED
1712 if (pid != 0)
1713 dbg_log (_("\
1714 handle_request: request received (Version = %d) from PID %ld"),
1715 req.version, (long int) pid);
1716 else
1717 #endif
1718 dbg_log (_("\
1719 handle_request: request received (Version = %d)"), req.version);
1722 /* Phew, we got all the data, now process it. */
1723 handle_request (fd, &req, keybuf, uid, pid);
1726 close_and_out:
1727 /* We are done. */
1728 close (fd);
1730 /* Re-locking. */
1731 pthread_mutex_lock (&readylist_lock);
1733 /* One more thread available. */
1734 ++nready;
1736 /* NOTREACHED */
1740 static unsigned int nconns;
1742 static void
1743 fd_ready (int fd)
1745 pthread_mutex_lock (&readylist_lock);
1747 /* Find an empty entry in FDLIST. */
1748 size_t inner;
1749 for (inner = 0; inner < nconns; ++inner)
1750 if (fdlist[inner].next == NULL)
1751 break;
1752 assert (inner < nconns);
1754 fdlist[inner].fd = fd;
1756 if (readylist == NULL)
1757 readylist = fdlist[inner].next = &fdlist[inner];
1758 else
1760 fdlist[inner].next = readylist->next;
1761 readylist = readylist->next = &fdlist[inner];
1764 bool do_signal = true;
1765 if (__glibc_unlikely (nready == 0))
1767 ++client_queued;
1768 do_signal = false;
1770 /* Try to start another thread to help out. */
1771 pthread_t th;
1772 if (nthreads < max_nthreads
1773 && pthread_create (&th, &attr, nscd_run_worker,
1774 (void *) (long int) nthreads) == 0)
1776 /* We got another thread. */
1777 ++nthreads;
1778 /* The new thread might need a kick. */
1779 do_signal = true;
1784 pthread_mutex_unlock (&readylist_lock);
1786 /* Tell one of the worker threads there is work to do. */
1787 if (do_signal)
1788 pthread_cond_signal (&readylist_cond);
1792 /* Check whether restarting should happen. */
1793 static bool
1794 restart_p (time_t now)
1796 return (paranoia && readylist == NULL && nready == nthreads
1797 && now >= restart_time);
1801 /* Array for times a connection was accepted. */
1802 static time_t *starttime;
1804 #ifdef HAVE_INOTIFY
1805 /* Inotify event for changed file. */
1806 union __inev
1808 struct inotify_event i;
1809 # ifndef PATH_MAX
1810 # define PATH_MAX 1024
1811 # endif
1812 char buf[sizeof (struct inotify_event) + PATH_MAX];
1815 /* Returns 0 if the file is there otherwise -1. */
1817 check_file (struct traced_file *finfo)
1819 struct stat64 st;
1820 /* We could check mtime and if different re-add
1821 the watches, and invalidate the database, but we
1822 don't because we are called from inotify_check_files
1823 which should be doing that work. If sufficient inotify
1824 events were lost then the next pruning or invalidation
1825 will do the stat and mtime check. We don't do it here to
1826 keep the logic simple. */
1827 if (stat64 (finfo->fname, &st) < 0)
1828 return -1;
1829 return 0;
1832 /* Process the inotify event in INEV. If the event matches any of the files
1833 registered with a database then mark that database as requiring its cache
1834 to be cleared. We indicate the cache needs clearing by setting
1835 TO_CLEAR[DBCNT] to true for the matching database. */
1836 static void
1837 inotify_check_files (bool *to_clear, union __inev *inev)
1839 /* Check which of the files changed. */
1840 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1842 struct traced_file *finfo = dbs[dbcnt].traced_files;
1844 while (finfo != NULL)
1846 /* The configuration file was moved or deleted.
1847 We stop watching it at that point, and reinitialize. */
1848 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1849 && ((inev->i.mask & IN_MOVE_SELF)
1850 || (inev->i.mask & IN_DELETE_SELF)
1851 || (inev->i.mask & IN_IGNORED)))
1853 int ret;
1854 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1856 if (check_file (finfo) == 0)
1858 dbg_log (_("ignored inotify event for `%s` (file exists)"),
1859 finfo->fname);
1860 return;
1863 dbg_log (_("monitored file `%s` was %s, removing watch"),
1864 finfo->fname, moved ? "moved" : "deleted");
1865 /* File was moved out, remove the watch. Watches are
1866 automatically removed when the file is deleted. */
1867 if (moved)
1869 ret = inotify_rm_watch (inotify_fd, inev->i.wd);
1870 if (ret < 0)
1871 dbg_log (_("failed to remove file watch `%s`: %s"),
1872 finfo->fname, strerror (errno));
1874 finfo->inotify_descr[TRACED_FILE] = -1;
1875 to_clear[dbcnt] = true;
1876 if (finfo->call_res_init)
1877 res_init ();
1878 return;
1880 /* The configuration file was open for writing and has just closed.
1881 We reset the cache and reinitialize. */
1882 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1883 && inev->i.mask & IN_CLOSE_WRITE)
1885 /* Mark cache as needing to be cleared and reinitialize. */
1886 dbg_log (_("monitored file `%s` was written to"), finfo->fname);
1887 to_clear[dbcnt] = true;
1888 if (finfo->call_res_init)
1889 res_init ();
1890 return;
1892 /* The parent directory was moved or deleted. We trigger one last
1893 invalidation. At the next pruning or invalidation we may add
1894 this watch back if the file is present again. */
1895 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1896 && ((inev->i.mask & IN_DELETE_SELF)
1897 || (inev->i.mask & IN_MOVE_SELF)
1898 || (inev->i.mask & IN_IGNORED)))
1900 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1901 /* The directory watch may have already been removed
1902 but we don't know so we just remove it again and
1903 ignore the error. Then we remove the file watch.
1904 Note: watches are automatically removed for deleted
1905 files. */
1906 if (moved)
1907 inotify_rm_watch (inotify_fd, inev->i.wd);
1908 if (finfo->inotify_descr[TRACED_FILE] != -1)
1910 dbg_log (_("monitored parent directory `%s` was %s, removing watch on `%s`"),
1911 finfo->dname, moved ? "moved" : "deleted", finfo->fname);
1912 if (inotify_rm_watch (inotify_fd, finfo->inotify_descr[TRACED_FILE]) < 0)
1913 dbg_log (_("failed to remove file watch `%s`: %s"),
1914 finfo->dname, strerror (errno));
1916 finfo->inotify_descr[TRACED_FILE] = -1;
1917 finfo->inotify_descr[TRACED_DIR] = -1;
1918 to_clear[dbcnt] = true;
1919 if (finfo->call_res_init)
1920 res_init ();
1921 /* Continue to the next entry since this might be the
1922 parent directory for multiple registered files and
1923 we want to remove watches for all registered files. */
1924 continue;
1926 /* The parent directory had a create or moved to event. */
1927 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1928 && ((inev->i.mask & IN_MOVED_TO)
1929 || (inev->i.mask & IN_CREATE))
1930 && strcmp (inev->i.name, finfo->sfname) == 0)
1932 /* We detected a directory change. We look for the creation
1933 of the file we are tracking or the move of the same file
1934 into the directory. */
1935 int ret;
1936 dbg_log (_("monitored file `%s` was %s, adding watch"),
1937 finfo->fname,
1938 inev->i.mask & IN_CREATE ? "created" : "moved into place");
1939 /* File was moved in or created. Regenerate the watch. */
1940 if (finfo->inotify_descr[TRACED_FILE] != -1)
1941 inotify_rm_watch (inotify_fd,
1942 finfo->inotify_descr[TRACED_FILE]);
1944 ret = inotify_add_watch (inotify_fd,
1945 finfo->fname,
1946 TRACED_FILE_MASK);
1947 if (ret < 0)
1948 dbg_log (_("failed to add file watch `%s`: %s"),
1949 finfo->fname, strerror (errno));
1951 finfo->inotify_descr[TRACED_FILE] = ret;
1953 /* The file is new or moved so mark cache as needing to
1954 be cleared and reinitialize. */
1955 to_clear[dbcnt] = true;
1956 if (finfo->call_res_init)
1957 res_init ();
1959 /* Done re-adding the watch. Don't return, we may still
1960 have other files in this same directory, same watch
1961 descriptor, and need to process them. */
1963 /* Other events are ignored, and we move on to the next file. */
1964 finfo = finfo->next;
1969 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1970 for the associated database, otherwise do nothing. The TO_CLEAR array must
1971 have LASTDB entries. */
1972 static inline void
1973 clear_db_cache (bool *to_clear)
1975 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1976 if (to_clear[dbcnt])
1978 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
1979 dbs[dbcnt].clear_cache = 1;
1980 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1981 pthread_cond_signal (&dbs[dbcnt].prune_cond);
1986 handle_inotify_events (void)
1988 bool to_clear[lastdb] = { false, };
1989 union __inev inev;
1991 /* Read all inotify events for files registered via
1992 register_traced_file(). */
1993 while (1)
1995 /* Potentially read multiple events into buf. */
1996 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd,
1997 &inev.buf,
1998 sizeof (inev)));
1999 if (nb < (ssize_t) sizeof (struct inotify_event))
2001 /* Not even 1 event. */
2002 if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
2003 return -1;
2004 /* Done reading events that are ready. */
2005 break;
2007 /* Process all events. The normal inotify interface delivers
2008 complete events on a read and never a partial event. */
2009 char *eptr = &inev.buf[0];
2010 ssize_t count;
2011 while (1)
2013 /* Check which of the files changed. */
2014 inotify_check_files (to_clear, &inev);
2015 count = sizeof (struct inotify_event) + inev.i.len;
2016 eptr += count;
2017 nb -= count;
2018 if (nb >= (ssize_t) sizeof (struct inotify_event))
2019 memcpy (&inev, eptr, nb);
2020 else
2021 break;
2023 continue;
2025 /* Actually perform the cache clearing. */
2026 clear_db_cache (to_clear);
2027 return 0;
2030 #endif
2032 static void
2033 __attribute__ ((__noreturn__))
2034 main_loop_poll (void)
2036 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
2037 * sizeof (conns[0]));
2039 conns[0].fd = sock;
2040 conns[0].events = POLLRDNORM;
2041 size_t nused = 1;
2042 size_t firstfree = 1;
2044 #ifdef HAVE_INOTIFY
2045 if (inotify_fd != -1)
2047 conns[1].fd = inotify_fd;
2048 conns[1].events = POLLRDNORM;
2049 nused = 2;
2050 firstfree = 2;
2052 #endif
2054 #ifdef HAVE_NETLINK
2055 size_t idx_nl_status_fd = 0;
2056 if (nl_status_fd != -1)
2058 idx_nl_status_fd = nused;
2059 conns[nused].fd = nl_status_fd;
2060 conns[nused].events = POLLRDNORM;
2061 ++nused;
2062 firstfree = nused;
2064 #endif
2066 while (1)
2068 /* Wait for any event. We wait at most a couple of seconds so
2069 that we can check whether we should close any of the accepted
2070 connections since we have not received a request. */
2071 #define MAX_ACCEPT_TIMEOUT 30
2072 #define MIN_ACCEPT_TIMEOUT 5
2073 #define MAIN_THREAD_TIMEOUT \
2074 (MAX_ACCEPT_TIMEOUT * 1000 \
2075 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
2077 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
2079 time_t now = time (NULL);
2081 /* If there is a descriptor ready for reading or there is a new
2082 connection, process this now. */
2083 if (n > 0)
2085 if (conns[0].revents != 0)
2087 /* We have a new incoming connection. Accept the connection. */
2088 int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2089 SOCK_NONBLOCK));
2091 /* Use the descriptor if we have not reached the limit. */
2092 if (fd >= 0)
2094 if (firstfree < nconns)
2096 conns[firstfree].fd = fd;
2097 conns[firstfree].events = POLLRDNORM;
2098 starttime[firstfree] = now;
2099 if (firstfree >= nused)
2100 nused = firstfree + 1;
2103 ++firstfree;
2104 while (firstfree < nused && conns[firstfree].fd != -1);
2106 else
2107 /* We cannot use the connection so close it. */
2108 close (fd);
2111 --n;
2114 size_t first = 1;
2115 #ifdef HAVE_INOTIFY
2116 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
2118 if (conns[1].revents != 0)
2120 int ret;
2121 ret = handle_inotify_events ();
2122 if (ret == -1)
2124 /* Something went wrong when reading the inotify
2125 data. Better disable inotify. */
2126 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2127 conns[1].fd = -1;
2128 firstfree = 1;
2129 if (nused == 2)
2130 nused = 1;
2131 close (inotify_fd);
2132 inotify_fd = -1;
2134 --n;
2137 first = 2;
2139 #endif
2141 #ifdef HAVE_NETLINK
2142 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2144 char buf[4096];
2145 /* Read all the data. We do not interpret it here. */
2146 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2147 sizeof (buf))) != -1)
2150 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2151 = __bump_nl_timestamp ();
2153 #endif
2155 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2156 if (conns[cnt].revents != 0)
2158 fd_ready (conns[cnt].fd);
2160 /* Clean up the CONNS array. */
2161 conns[cnt].fd = -1;
2162 if (cnt < firstfree)
2163 firstfree = cnt;
2164 if (cnt == nused - 1)
2166 --nused;
2167 while (conns[nused - 1].fd == -1);
2169 --n;
2173 /* Now find entries which have timed out. */
2174 assert (nused > 0);
2176 /* We make the timeout length depend on the number of file
2177 descriptors currently used. */
2178 #define ACCEPT_TIMEOUT \
2179 (MAX_ACCEPT_TIMEOUT \
2180 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2181 time_t laststart = now - ACCEPT_TIMEOUT;
2183 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2185 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2187 /* Remove the entry, it timed out. */
2188 (void) close (conns[cnt].fd);
2189 conns[cnt].fd = -1;
2191 if (cnt < firstfree)
2192 firstfree = cnt;
2193 if (cnt == nused - 1)
2195 --nused;
2196 while (conns[nused - 1].fd == -1);
2200 if (restart_p (now))
2201 restart ();
2206 #ifdef HAVE_EPOLL
2207 static void
2208 main_loop_epoll (int efd)
2210 struct epoll_event ev = { 0, };
2211 int nused = 1;
2212 size_t highest = 0;
2214 /* Add the socket. */
2215 ev.events = EPOLLRDNORM;
2216 ev.data.fd = sock;
2217 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2218 /* We cannot use epoll. */
2219 return;
2221 # ifdef HAVE_INOTIFY
2222 if (inotify_fd != -1)
2224 ev.events = EPOLLRDNORM;
2225 ev.data.fd = inotify_fd;
2226 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2227 /* We cannot use epoll. */
2228 return;
2229 nused = 2;
2231 # endif
2233 # ifdef HAVE_NETLINK
2234 if (nl_status_fd != -1)
2236 ev.events = EPOLLRDNORM;
2237 ev.data.fd = nl_status_fd;
2238 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2239 /* We cannot use epoll. */
2240 return;
2242 # endif
2244 while (1)
2246 struct epoll_event revs[100];
2247 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2249 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2251 time_t now = time (NULL);
2253 for (int cnt = 0; cnt < n; ++cnt)
2254 if (revs[cnt].data.fd == sock)
2256 /* A new connection. */
2257 int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2258 SOCK_NONBLOCK));
2260 /* Use the descriptor if we have not reached the limit. */
2261 if (fd >= 0)
2263 /* Try to add the new descriptor. */
2264 ev.data.fd = fd;
2265 if (fd >= nconns
2266 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2267 /* The descriptor is too large or something went
2268 wrong. Close the descriptor. */
2269 close (fd);
2270 else
2272 /* Remember when we accepted the connection. */
2273 starttime[fd] = now;
2275 if (fd > highest)
2276 highest = fd;
2278 ++nused;
2282 # ifdef HAVE_INOTIFY
2283 else if (revs[cnt].data.fd == inotify_fd)
2285 int ret;
2286 ret = handle_inotify_events ();
2287 if (ret == -1)
2289 /* Something went wrong when reading the inotify
2290 data. Better disable inotify. */
2291 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2292 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd, NULL);
2293 close (inotify_fd);
2294 inotify_fd = -1;
2295 break;
2298 # endif
2299 # ifdef HAVE_NETLINK
2300 else if (revs[cnt].data.fd == nl_status_fd)
2302 char buf[4096];
2303 /* Read all the data. We do not interpret it here. */
2304 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2305 sizeof (buf))) != -1)
2308 __bump_nl_timestamp ();
2310 # endif
2311 else
2313 /* Remove the descriptor from the epoll descriptor. */
2314 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2316 /* Get a worker to handle the request. */
2317 fd_ready (revs[cnt].data.fd);
2319 /* Reset the time. */
2320 starttime[revs[cnt].data.fd] = 0;
2321 if (revs[cnt].data.fd == highest)
2323 --highest;
2324 while (highest > 0 && starttime[highest] == 0);
2326 --nused;
2329 /* Now look for descriptors for accepted connections which have
2330 no reply in too long of a time. */
2331 time_t laststart = now - ACCEPT_TIMEOUT;
2332 assert (starttime[sock] == 0);
2333 # ifdef HAVE_INOTIFY
2334 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2335 # endif
2336 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2337 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2338 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2340 /* We are waiting for this one for too long. Close it. */
2341 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2343 (void) close (cnt);
2345 starttime[cnt] = 0;
2346 if (cnt == highest)
2347 --highest;
2349 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2350 --highest;
2352 if (restart_p (now))
2353 restart ();
2356 #endif
2359 /* Start all the threads we want. The initial process is thread no. 1. */
2360 void
2361 start_threads (void)
2363 /* Initialize the conditional variable we will use. The only
2364 non-standard attribute we might use is the clock selection. */
2365 pthread_condattr_t condattr;
2366 pthread_condattr_init (&condattr);
2368 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2369 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2370 /* Determine whether the monotonous clock is available. */
2371 struct timespec dummy;
2372 # if _POSIX_MONOTONIC_CLOCK == 0
2373 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2374 # endif
2375 # if _POSIX_CLOCK_SELECTION == 0
2376 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2377 # endif
2378 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2379 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2380 timeout_clock = CLOCK_MONOTONIC;
2381 #endif
2383 /* Create the attribute for the threads. They are all created
2384 detached. */
2385 pthread_attr_init (&attr);
2386 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2387 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2388 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2390 /* We allow less than LASTDB threads only for debugging. */
2391 if (debug_level == 0)
2392 nthreads = MAX (nthreads, lastdb);
2394 /* Create the threads which prune the databases. */
2395 // XXX Ideally this work would be done by some of the worker threads.
2396 // XXX But this is problematic since we would need to be able to wake
2397 // XXX them up explicitly as well as part of the group handling the
2398 // XXX ready-list. This requires an operation where we can wait on
2399 // XXX two conditional variables at the same time. This operation
2400 // XXX does not exist (yet).
2401 for (long int i = 0; i < lastdb; ++i)
2403 /* Initialize the conditional variable. */
2404 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2406 dbg_log (_("could not initialize conditional variable"));
2407 do_exit (1, 0, NULL);
2410 pthread_t th;
2411 if (dbs[i].enabled
2412 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2414 dbg_log (_("could not start clean-up thread; terminating"));
2415 do_exit (1, 0, NULL);
2419 pthread_condattr_destroy (&condattr);
2421 for (long int i = 0; i < nthreads; ++i)
2423 pthread_t th;
2424 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2426 if (i == 0)
2428 dbg_log (_("could not start any worker thread; terminating"));
2429 do_exit (1, 0, NULL);
2432 break;
2436 /* Now it is safe to let the parent know that we're doing fine and it can
2437 exit. */
2438 notify_parent (0);
2440 /* Determine how much room for descriptors we should initially
2441 allocate. This might need to change later if we cap the number
2442 with MAXCONN. */
2443 const long int nfds = sysconf (_SC_OPEN_MAX);
2444 #define MINCONN 32
2445 #define MAXCONN 16384
2446 if (nfds == -1 || nfds > MAXCONN)
2447 nconns = MAXCONN;
2448 else if (nfds < MINCONN)
2449 nconns = MINCONN;
2450 else
2451 nconns = nfds;
2453 /* We need memory to pass descriptors on to the worker threads. */
2454 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2455 /* Array to keep track when connection was accepted. */
2456 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2458 /* In the main thread we execute the loop which handles incoming
2459 connections. */
2460 #ifdef HAVE_EPOLL
2461 int efd = epoll_create (100);
2462 if (efd != -1)
2464 main_loop_epoll (efd);
2465 close (efd);
2467 #endif
2469 main_loop_poll ();
2473 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2474 this function is called, we are not listening on the nscd socket yet so
2475 we can just use the ordinary lookup functions without causing a lockup */
2476 static void
2477 begin_drop_privileges (void)
2479 struct passwd *pwd = getpwnam (server_user);
2481 if (pwd == NULL)
2483 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2484 do_exit (EXIT_FAILURE, 0,
2485 _("Failed to run nscd as user '%s'"), server_user);
2488 server_uid = pwd->pw_uid;
2489 server_gid = pwd->pw_gid;
2491 /* Save the old UID/GID if we have to change back. */
2492 if (paranoia)
2494 old_uid = getuid ();
2495 old_gid = getgid ();
2498 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2500 /* This really must never happen. */
2501 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2502 do_exit (EXIT_FAILURE, errno,
2503 _("initial getgrouplist failed"));
2506 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2508 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2509 == -1)
2511 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2512 do_exit (EXIT_FAILURE, errno, _("getgrouplist failed"));
2517 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2518 run nscd as the user specified in the configuration file. */
2519 static void
2520 finish_drop_privileges (void)
2522 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2523 /* We need to preserve the capabilities to connect to the audit daemon. */
2524 cap_t new_caps = preserve_capabilities ();
2525 #endif
2527 if (setgroups (server_ngroups, server_groups) == -1)
2529 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2530 do_exit (EXIT_FAILURE, errno, _("setgroups failed"));
2533 int res;
2534 if (paranoia)
2535 res = setresgid (server_gid, server_gid, old_gid);
2536 else
2537 res = setgid (server_gid);
2538 if (res == -1)
2540 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2541 do_exit (4, errno, "setgid");
2544 if (paranoia)
2545 res = setresuid (server_uid, server_uid, old_uid);
2546 else
2547 res = setuid (server_uid);
2548 if (res == -1)
2550 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2551 do_exit (4, errno, "setuid");
2554 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2555 /* Remove the temporary capabilities. */
2556 install_real_capabilities (new_caps);
2557 #endif