Avoid deadlock in malloc on backtrace (BZ #16159)
[glibc.git] / nscd / connections.c
blobcba5e6ad9d19ee68de7618154a2a08689fd2a50f
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2015 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <ifaddrs.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <stdint.h>
35 #include <arpa/inet.h>
36 #ifdef HAVE_NETLINK
37 # include <linux/netlink.h>
38 # include <linux/rtnetlink.h>
39 #endif
40 #ifdef HAVE_EPOLL
41 # include <sys/epoll.h>
42 #endif
43 #ifdef HAVE_INOTIFY
44 # include <sys/inotify.h>
45 #endif
46 #include <sys/mman.h>
47 #include <sys/param.h>
48 #include <sys/poll.h>
49 #ifdef HAVE_SENDFILE
50 # include <sys/sendfile.h>
51 #endif
52 #include <sys/socket.h>
53 #include <sys/stat.h>
54 #include <sys/un.h>
56 #include "nscd.h"
57 #include "dbg_log.h"
58 #include "selinux.h"
59 #include <resolv/resolv.h>
61 #include <kernel-features.h>
62 #include <libc-internal.h>
65 /* Support to run nscd as an unprivileged user */
66 const char *server_user;
67 static uid_t server_uid;
68 static gid_t server_gid;
69 const char *stat_user;
70 uid_t stat_uid;
71 static gid_t *server_groups;
72 #ifndef NGROUPS
73 # define NGROUPS 32
74 #endif
75 static int server_ngroups;
77 static pthread_attr_t attr;
79 static void begin_drop_privileges (void);
80 static void finish_drop_privileges (void);
82 /* Map request type to a string. */
83 const char *const serv2str[LASTREQ] =
85 [GETPWBYNAME] = "GETPWBYNAME",
86 [GETPWBYUID] = "GETPWBYUID",
87 [GETGRBYNAME] = "GETGRBYNAME",
88 [GETGRBYGID] = "GETGRBYGID",
89 [GETHOSTBYNAME] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
93 [SHUTDOWN] = "SHUTDOWN",
94 [GETSTAT] = "GETSTAT",
95 [INVALIDATE] = "INVALIDATE",
96 [GETFDPW] = "GETFDPW",
97 [GETFDGR] = "GETFDGR",
98 [GETFDHST] = "GETFDHST",
99 [GETAI] = "GETAI",
100 [INITGROUPS] = "INITGROUPS",
101 [GETSERVBYNAME] = "GETSERVBYNAME",
102 [GETSERVBYPORT] = "GETSERVBYPORT",
103 [GETFDSERV] = "GETFDSERV",
104 [GETNETGRENT] = "GETNETGRENT",
105 [INNETGR] = "INNETGR",
106 [GETFDNETGR] = "GETFDNETGR"
109 /* The control data structures for the services. */
110 struct database_dyn dbs[lastdb] =
112 [pwddb] = {
113 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
114 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
115 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
116 .enabled = 0,
117 .check_file = 1,
118 .persistent = 0,
119 .propagate = 1,
120 .shared = 0,
121 .max_db_size = DEFAULT_MAX_DB_SIZE,
122 .suggested_module = DEFAULT_SUGGESTED_MODULE,
123 .db_filename = _PATH_NSCD_PASSWD_DB,
124 .disabled_iov = &pwd_iov_disabled,
125 .postimeout = 3600,
126 .negtimeout = 20,
127 .wr_fd = -1,
128 .ro_fd = -1,
129 .mmap_used = false
131 [grpdb] = {
132 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
133 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
134 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
135 .enabled = 0,
136 .check_file = 1,
137 .persistent = 0,
138 .propagate = 1,
139 .shared = 0,
140 .max_db_size = DEFAULT_MAX_DB_SIZE,
141 .suggested_module = DEFAULT_SUGGESTED_MODULE,
142 .db_filename = _PATH_NSCD_GROUP_DB,
143 .disabled_iov = &grp_iov_disabled,
144 .postimeout = 3600,
145 .negtimeout = 60,
146 .wr_fd = -1,
147 .ro_fd = -1,
148 .mmap_used = false
150 [hstdb] = {
151 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
152 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
153 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
154 .enabled = 0,
155 .check_file = 1,
156 .persistent = 0,
157 .propagate = 0, /* Not used. */
158 .shared = 0,
159 .max_db_size = DEFAULT_MAX_DB_SIZE,
160 .suggested_module = DEFAULT_SUGGESTED_MODULE,
161 .db_filename = _PATH_NSCD_HOSTS_DB,
162 .disabled_iov = &hst_iov_disabled,
163 .postimeout = 3600,
164 .negtimeout = 20,
165 .wr_fd = -1,
166 .ro_fd = -1,
167 .mmap_used = false
169 [servdb] = {
170 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
171 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
172 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
173 .enabled = 0,
174 .check_file = 1,
175 .persistent = 0,
176 .propagate = 0, /* Not used. */
177 .shared = 0,
178 .max_db_size = DEFAULT_MAX_DB_SIZE,
179 .suggested_module = DEFAULT_SUGGESTED_MODULE,
180 .db_filename = _PATH_NSCD_SERVICES_DB,
181 .disabled_iov = &serv_iov_disabled,
182 .postimeout = 28800,
183 .negtimeout = 20,
184 .wr_fd = -1,
185 .ro_fd = -1,
186 .mmap_used = false
188 [netgrdb] = {
189 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
190 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
191 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
192 .enabled = 0,
193 .check_file = 1,
194 .persistent = 0,
195 .propagate = 0, /* Not used. */
196 .shared = 0,
197 .max_db_size = DEFAULT_MAX_DB_SIZE,
198 .suggested_module = DEFAULT_SUGGESTED_MODULE,
199 .db_filename = _PATH_NSCD_NETGROUP_DB,
200 .disabled_iov = &netgroup_iov_disabled,
201 .postimeout = 28800,
202 .negtimeout = 20,
203 .wr_fd = -1,
204 .ro_fd = -1,
205 .mmap_used = false
210 /* Mapping of request type to database. */
211 static struct
213 bool data_request;
214 struct database_dyn *db;
215 } const reqinfo[LASTREQ] =
217 [GETPWBYNAME] = { true, &dbs[pwddb] },
218 [GETPWBYUID] = { true, &dbs[pwddb] },
219 [GETGRBYNAME] = { true, &dbs[grpdb] },
220 [GETGRBYGID] = { true, &dbs[grpdb] },
221 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
222 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
223 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
224 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
225 [SHUTDOWN] = { false, NULL },
226 [GETSTAT] = { false, NULL },
227 [SHUTDOWN] = { false, NULL },
228 [GETFDPW] = { false, &dbs[pwddb] },
229 [GETFDGR] = { false, &dbs[grpdb] },
230 [GETFDHST] = { false, &dbs[hstdb] },
231 [GETAI] = { true, &dbs[hstdb] },
232 [INITGROUPS] = { true, &dbs[grpdb] },
233 [GETSERVBYNAME] = { true, &dbs[servdb] },
234 [GETSERVBYPORT] = { true, &dbs[servdb] },
235 [GETFDSERV] = { false, &dbs[servdb] },
236 [GETNETGRENT] = { true, &dbs[netgrdb] },
237 [INNETGR] = { true, &dbs[netgrdb] },
238 [GETFDNETGR] = { false, &dbs[netgrdb] }
242 /* Initial number of threads to use. */
243 int nthreads = -1;
244 /* Maximum number of threads to use. */
245 int max_nthreads = 32;
247 /* Socket for incoming connections. */
248 static int sock;
250 #ifdef HAVE_INOTIFY
251 /* Inotify descriptor. */
252 int inotify_fd = -1;
253 #endif
255 #ifdef HAVE_NETLINK
256 /* Descriptor for netlink status updates. */
257 static int nl_status_fd = -1;
258 #endif
260 #ifndef __ASSUME_SOCK_CLOEXEC
261 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
262 before be know the result. */
263 static int have_sock_cloexec;
264 #endif
265 #ifndef __ASSUME_ACCEPT4
266 static int have_accept4;
267 #endif
269 /* Number of times clients had to wait. */
270 unsigned long int client_queued;
273 ssize_t
274 writeall (int fd, const void *buf, size_t len)
276 size_t n = len;
277 ssize_t ret;
280 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
281 if (ret <= 0)
282 break;
283 buf = (const char *) buf + ret;
284 n -= ret;
286 while (n > 0);
287 return ret < 0 ? ret : len - n;
291 #ifdef HAVE_SENDFILE
292 ssize_t
293 sendfileall (int tofd, int fromfd, off_t off, size_t len)
295 ssize_t n = len;
296 ssize_t ret;
300 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
301 if (ret <= 0)
302 break;
303 n -= ret;
305 while (n > 0);
306 return ret < 0 ? ret : len - n;
308 #endif
311 enum usekey
313 use_not = 0,
314 /* The following three are not really used, they are symbolic constants. */
315 use_first = 16,
316 use_begin = 32,
317 use_end = 64,
319 use_he = 1,
320 use_he_begin = use_he | use_begin,
321 use_he_end = use_he | use_end,
322 use_data = 3,
323 use_data_begin = use_data | use_begin,
324 use_data_end = use_data | use_end,
325 use_data_first = use_data_begin | use_first
329 static int
330 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
331 enum usekey use, ref_t start, size_t len)
333 assert (len >= 2);
335 if (start > first_free || start + len > first_free
336 || (start & BLOCK_ALIGN_M1))
337 return 0;
339 if (usemap[start] == use_not)
341 /* Add the start marker. */
342 usemap[start] = use | use_begin;
343 use &= ~use_first;
345 while (--len > 0)
346 if (usemap[++start] != use_not)
347 return 0;
348 else
349 usemap[start] = use;
351 /* Add the end marker. */
352 usemap[start] = use | use_end;
354 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
356 /* Hash entries can't be shared. */
357 if (use == use_he)
358 return 0;
360 usemap[start] |= (use & use_first);
361 use &= ~use_first;
363 while (--len > 1)
364 if (usemap[++start] != use)
365 return 0;
367 if (usemap[++start] != (use | use_end))
368 return 0;
370 else
371 /* Points to a wrong object or somewhere in the middle. */
372 return 0;
374 return 1;
378 /* Verify data in persistent database. */
379 static int
380 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
382 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
383 || dbnr == netgrdb);
385 time_t now = time (NULL);
387 struct database_pers_head *head = mem;
388 struct database_pers_head head_copy = *head;
390 /* Check that the header that was read matches the head in the database. */
391 if (memcmp (head, readhead, sizeof (*head)) != 0)
392 return 0;
394 /* First some easy tests: make sure the database header is sane. */
395 if (head->version != DB_VERSION
396 || head->header_size != sizeof (*head)
397 /* We allow a timestamp to be one hour ahead of the current time.
398 This should cover daylight saving time changes. */
399 || head->timestamp > now + 60 * 60 + 60
400 || (head->gc_cycle & 1)
401 || head->module == 0
402 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
403 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
404 || head->first_free < 0
405 || head->first_free > head->data_size
406 || (head->first_free & BLOCK_ALIGN_M1) != 0
407 || head->maxnentries < 0
408 || head->maxnsearched < 0)
409 return 0;
411 uint8_t *usemap = calloc (head->first_free, 1);
412 if (usemap == NULL)
413 return 0;
415 const char *data = (char *) &head->array[roundup (head->module,
416 ALIGN / sizeof (ref_t))];
418 nscd_ssize_t he_cnt = 0;
419 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
421 ref_t trail = head->array[cnt];
422 ref_t work = trail;
423 int tick = 0;
425 while (work != ENDREF)
427 if (! check_use (data, head->first_free, usemap, use_he, work,
428 sizeof (struct hashentry)))
429 goto fail;
431 /* Now we know we can dereference the record. */
432 struct hashentry *here = (struct hashentry *) (data + work);
434 ++he_cnt;
436 /* Make sure the record is for this type of service. */
437 if (here->type >= LASTREQ
438 || reqinfo[here->type].db != &dbs[dbnr])
439 goto fail;
441 /* Validate boolean field value. */
442 if (here->first != false && here->first != true)
443 goto fail;
445 if (here->len < 0)
446 goto fail;
448 /* Now the data. */
449 if (here->packet < 0
450 || here->packet > head->first_free
451 || here->packet + sizeof (struct datahead) > head->first_free)
452 goto fail;
454 struct datahead *dh = (struct datahead *) (data + here->packet);
456 if (! check_use (data, head->first_free, usemap,
457 use_data | (here->first ? use_first : 0),
458 here->packet, dh->allocsize))
459 goto fail;
461 if (dh->allocsize < sizeof (struct datahead)
462 || dh->recsize > dh->allocsize
463 || (dh->notfound != false && dh->notfound != true)
464 || (dh->usable != false && dh->usable != true))
465 goto fail;
467 if (here->key < here->packet + sizeof (struct datahead)
468 || here->key > here->packet + dh->allocsize
469 || here->key + here->len > here->packet + dh->allocsize)
470 goto fail;
472 work = here->next;
474 if (work == trail)
475 /* A circular list, this must not happen. */
476 goto fail;
477 if (tick)
478 trail = ((struct hashentry *) (data + trail))->next;
479 tick = 1 - tick;
483 if (he_cnt != head->nentries)
484 goto fail;
486 /* See if all data and keys had at least one reference from
487 he->first == true hashentry. */
488 for (ref_t idx = 0; idx < head->first_free; ++idx)
490 if (usemap[idx] == use_data_begin)
491 goto fail;
494 /* Finally, make sure the database hasn't changed since the first test. */
495 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
496 goto fail;
498 free (usemap);
499 return 1;
501 fail:
502 free (usemap);
503 return 0;
507 #ifdef O_CLOEXEC
508 # define EXTRA_O_FLAGS O_CLOEXEC
509 #else
510 # define EXTRA_O_FLAGS 0
511 #endif
514 /* Initialize database information structures. */
515 void
516 nscd_init (void)
518 /* Look up unprivileged uid/gid/groups before we start listening on the
519 socket */
520 if (server_user != NULL)
521 begin_drop_privileges ();
523 if (nthreads == -1)
524 /* No configuration for this value, assume a default. */
525 nthreads = 4;
527 for (size_t cnt = 0; cnt < lastdb; ++cnt)
528 if (dbs[cnt].enabled)
530 pthread_rwlock_init (&dbs[cnt].lock, NULL);
531 pthread_mutex_init (&dbs[cnt].memlock, NULL);
533 if (dbs[cnt].persistent)
535 /* Try to open the appropriate file on disk. */
536 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
537 if (fd != -1)
539 char *msg = NULL;
540 struct stat64 st;
541 void *mem;
542 size_t total;
543 struct database_pers_head head;
544 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
545 sizeof (head)));
546 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
548 fail_db_errno:
549 /* The code is single-threaded at this point so
550 using strerror is just fine. */
551 msg = strerror (errno);
552 fail_db:
553 dbg_log (_("invalid persistent database file \"%s\": %s"),
554 dbs[cnt].db_filename, msg);
555 unlink (dbs[cnt].db_filename);
557 else if (head.module == 0 && head.data_size == 0)
559 /* The file has been created, but the head has not
560 been initialized yet. */
561 msg = _("uninitialized header");
562 goto fail_db;
564 else if (head.header_size != (int) sizeof (head))
566 msg = _("header size does not match");
567 goto fail_db;
569 else if ((total = (sizeof (head)
570 + roundup (head.module * sizeof (ref_t),
571 ALIGN)
572 + head.data_size))
573 > st.st_size
574 || total < sizeof (head))
576 msg = _("file size does not match");
577 goto fail_db;
579 /* Note we map with the maximum size allowed for the
580 database. This is likely much larger than the
581 actual file size. This is OK on most OSes since
582 extensions of the underlying file will
583 automatically translate more pages available for
584 memory access. */
585 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
586 PROT_READ | PROT_WRITE,
587 MAP_SHARED, fd, 0))
588 == MAP_FAILED)
589 goto fail_db_errno;
590 else if (!verify_persistent_db (mem, &head, cnt))
592 munmap (mem, total);
593 msg = _("verification failed");
594 goto fail_db;
596 else
598 /* Success. We have the database. */
599 dbs[cnt].head = mem;
600 dbs[cnt].memsize = total;
601 dbs[cnt].data = (char *)
602 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
603 ALIGN / sizeof (ref_t))];
604 dbs[cnt].mmap_used = true;
606 if (dbs[cnt].suggested_module > head.module)
607 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
608 dbnames[cnt]);
610 dbs[cnt].wr_fd = fd;
611 fd = -1;
612 /* We also need a read-only descriptor. */
613 if (dbs[cnt].shared)
615 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
616 O_RDONLY | EXTRA_O_FLAGS);
617 if (dbs[cnt].ro_fd == -1)
618 dbg_log (_("\
619 cannot create read-only descriptor for \"%s\"; no mmap"),
620 dbs[cnt].db_filename);
623 // XXX Shall we test whether the descriptors actually
624 // XXX point to the same file?
627 /* Close the file descriptors in case something went
628 wrong in which case the variable have not been
629 assigned -1. */
630 if (fd != -1)
631 close (fd);
633 else if (errno == EACCES)
634 do_exit (EXIT_FAILURE, 0, _("cannot access '%s'"),
635 dbs[cnt].db_filename);
638 if (dbs[cnt].head == NULL)
640 /* No database loaded. Allocate the data structure,
641 possibly on disk. */
642 struct database_pers_head head;
643 size_t total = (sizeof (head)
644 + roundup (dbs[cnt].suggested_module
645 * sizeof (ref_t), ALIGN)
646 + (dbs[cnt].suggested_module
647 * DEFAULT_DATASIZE_PER_BUCKET));
649 /* Try to create the database. If we do not need a
650 persistent database create a temporary file. */
651 int fd;
652 int ro_fd = -1;
653 if (dbs[cnt].persistent)
655 fd = open (dbs[cnt].db_filename,
656 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
657 S_IRUSR | S_IWUSR);
658 if (fd != -1 && dbs[cnt].shared)
659 ro_fd = open (dbs[cnt].db_filename,
660 O_RDONLY | EXTRA_O_FLAGS);
662 else
664 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
665 fd = mkostemp (fname, EXTRA_O_FLAGS);
667 /* We do not need the file name anymore after we
668 opened another file descriptor in read-only mode. */
669 if (fd != -1)
671 if (dbs[cnt].shared)
672 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
674 unlink (fname);
678 if (fd == -1)
680 if (errno == EEXIST)
682 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
683 dbnames[cnt], dbs[cnt].db_filename);
684 do_exit (1, 0, NULL);
687 if (dbs[cnt].persistent)
688 dbg_log (_("cannot create %s; no persistent database used"),
689 dbs[cnt].db_filename);
690 else
691 dbg_log (_("cannot create %s; no sharing possible"),
692 dbs[cnt].db_filename);
694 dbs[cnt].persistent = 0;
695 // XXX remember: no mmap
697 else
699 /* Tell the user if we could not create the read-only
700 descriptor. */
701 if (ro_fd == -1 && dbs[cnt].shared)
702 dbg_log (_("\
703 cannot create read-only descriptor for \"%s\"; no mmap"),
704 dbs[cnt].db_filename);
706 /* Before we create the header, initialize the hash
707 table. That way if we get interrupted while writing
708 the header we can recognize a partially initialized
709 database. */
710 size_t ps = sysconf (_SC_PAGESIZE);
711 char tmpbuf[ps];
712 assert (~ENDREF == 0);
713 memset (tmpbuf, '\xff', ps);
715 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
716 off_t offset = sizeof (head);
718 size_t towrite;
719 if (offset % ps != 0)
721 towrite = MIN (remaining, ps - (offset % ps));
722 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
723 goto write_fail;
724 offset += towrite;
725 remaining -= towrite;
728 while (remaining > ps)
730 if (pwrite (fd, tmpbuf, ps, offset) == -1)
731 goto write_fail;
732 offset += ps;
733 remaining -= ps;
736 if (remaining > 0
737 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
738 goto write_fail;
740 /* Create the header of the file. */
741 struct database_pers_head head =
743 .version = DB_VERSION,
744 .header_size = sizeof (head),
745 .module = dbs[cnt].suggested_module,
746 .data_size = (dbs[cnt].suggested_module
747 * DEFAULT_DATASIZE_PER_BUCKET),
748 .first_free = 0
750 void *mem;
752 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
753 != sizeof (head))
754 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
755 != 0)
756 || (mem = mmap (NULL, dbs[cnt].max_db_size,
757 PROT_READ | PROT_WRITE,
758 MAP_SHARED, fd, 0)) == MAP_FAILED)
760 write_fail:
761 unlink (dbs[cnt].db_filename);
762 dbg_log (_("cannot write to database file %s: %s"),
763 dbs[cnt].db_filename, strerror (errno));
764 dbs[cnt].persistent = 0;
766 else
768 /* Success. */
769 dbs[cnt].head = mem;
770 dbs[cnt].data = (char *)
771 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
772 ALIGN / sizeof (ref_t))];
773 dbs[cnt].memsize = total;
774 dbs[cnt].mmap_used = true;
776 /* Remember the descriptors. */
777 dbs[cnt].wr_fd = fd;
778 dbs[cnt].ro_fd = ro_fd;
779 fd = -1;
780 ro_fd = -1;
783 if (fd != -1)
784 close (fd);
785 if (ro_fd != -1)
786 close (ro_fd);
790 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
791 /* We do not check here whether the O_CLOEXEC provided to the
792 open call was successful or not. The two fcntl calls are
793 only performed once each per process start-up and therefore
794 is not noticeable at all. */
795 if (paranoia
796 && ((dbs[cnt].wr_fd != -1
797 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
798 || (dbs[cnt].ro_fd != -1
799 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
801 dbg_log (_("\
802 cannot set socket to close on exec: %s; disabling paranoia mode"),
803 strerror (errno));
804 paranoia = 0;
806 #endif
808 if (dbs[cnt].head == NULL)
810 /* We do not use the persistent database. Just
811 create an in-memory data structure. */
812 assert (! dbs[cnt].persistent);
814 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
815 + (dbs[cnt].suggested_module
816 * sizeof (ref_t)));
817 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
818 assert (~ENDREF == 0);
819 memset (dbs[cnt].head->array, '\xff',
820 dbs[cnt].suggested_module * sizeof (ref_t));
821 dbs[cnt].head->module = dbs[cnt].suggested_module;
822 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
823 * dbs[cnt].head->module);
824 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
825 dbs[cnt].head->first_free = 0;
827 dbs[cnt].shared = 0;
828 assert (dbs[cnt].ro_fd == -1);
832 /* Create the socket. */
833 #ifndef __ASSUME_SOCK_CLOEXEC
834 sock = -1;
835 if (have_sock_cloexec >= 0)
836 #endif
838 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
839 #ifndef __ASSUME_SOCK_CLOEXEC
840 if (have_sock_cloexec == 0)
841 have_sock_cloexec = sock != -1 || errno != EINVAL ? 1 : -1;
842 #endif
844 #ifndef __ASSUME_SOCK_CLOEXEC
845 if (have_sock_cloexec < 0)
846 sock = socket (AF_UNIX, SOCK_STREAM, 0);
847 #endif
848 if (sock < 0)
850 dbg_log (_("cannot open socket: %s"), strerror (errno));
851 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
853 /* Bind a name to the socket. */
854 struct sockaddr_un sock_addr;
855 sock_addr.sun_family = AF_UNIX;
856 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
857 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
859 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
860 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
863 #ifndef __ASSUME_SOCK_CLOEXEC
864 if (have_sock_cloexec < 0)
866 /* We don't want to get stuck on accept. */
867 int fl = fcntl (sock, F_GETFL);
868 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
870 dbg_log (_("cannot change socket to nonblocking mode: %s"),
871 strerror (errno));
872 do_exit (1, 0, NULL);
875 /* The descriptor needs to be closed on exec. */
876 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
878 dbg_log (_("cannot set socket to close on exec: %s"),
879 strerror (errno));
880 do_exit (1, 0, NULL);
883 #endif
885 /* Set permissions for the socket. */
886 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
888 /* Set the socket up to accept connections. */
889 if (listen (sock, SOMAXCONN) < 0)
891 dbg_log (_("cannot enable socket to accept connections: %s"),
892 strerror (errno));
893 do_exit (1, 0, NULL);
896 #ifdef HAVE_NETLINK
897 if (dbs[hstdb].enabled)
899 /* Try to open netlink socket to monitor network setting changes. */
900 nl_status_fd = socket (AF_NETLINK,
901 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
902 NETLINK_ROUTE);
903 if (nl_status_fd != -1)
905 struct sockaddr_nl snl;
906 memset (&snl, '\0', sizeof (snl));
907 snl.nl_family = AF_NETLINK;
908 /* XXX Is this the best set to use? */
909 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
910 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
911 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
912 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
913 | RTMGRP_IPV6_PREFIX);
915 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
917 close (nl_status_fd);
918 nl_status_fd = -1;
920 else
922 /* Start the timestamp process. */
923 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
924 = __bump_nl_timestamp ();
926 # ifndef __ASSUME_SOCK_CLOEXEC
927 if (have_sock_cloexec < 0)
929 /* We don't want to get stuck on accept. */
930 int fl = fcntl (nl_status_fd, F_GETFL);
931 if (fl == -1
932 || fcntl (nl_status_fd, F_SETFL, fl | O_NONBLOCK) == -1)
934 dbg_log (_("\
935 cannot change socket to nonblocking mode: %s"),
936 strerror (errno));
937 do_exit (1, 0, NULL);
940 /* The descriptor needs to be closed on exec. */
941 if (paranoia
942 && fcntl (nl_status_fd, F_SETFD, FD_CLOEXEC) == -1)
944 dbg_log (_("cannot set socket to close on exec: %s"),
945 strerror (errno));
946 do_exit (1, 0, NULL);
949 # endif
953 #endif
955 /* Change to unprivileged uid/gid/groups if specified in config file */
956 if (server_user != NULL)
957 finish_drop_privileges ();
960 #ifdef HAVE_INOTIFY
961 #define TRACED_FILE_MASK (IN_DELETE_SELF | IN_CLOSE_WRITE | IN_MOVE_SELF)
962 #define TRACED_DIR_MASK (IN_DELETE_SELF | IN_CREATE | IN_MOVED_TO | IN_MOVE_SELF)
963 void
964 install_watches (struct traced_file *finfo)
966 /* Use inotify support if we have it. */
967 if (finfo->inotify_descr[TRACED_FILE] < 0)
968 finfo->inotify_descr[TRACED_FILE] = inotify_add_watch (inotify_fd,
969 finfo->fname,
970 TRACED_FILE_MASK);
971 if (finfo->inotify_descr[TRACED_FILE] < 0)
973 dbg_log (_("disabled inotify-based monitoring for file `%s': %s"),
974 finfo->fname, strerror (errno));
975 return;
977 dbg_log (_("monitoring file `%s` (%d)"),
978 finfo->fname, finfo->inotify_descr[TRACED_FILE]);
979 /* Additionally listen for events in the file's parent directory.
980 We do this because the file to be watched might be
981 deleted and then added back again. When it is added back again
982 we must re-add the watch. We must also cover IN_MOVED_TO to
983 detect a file being moved into the directory. */
984 if (finfo->inotify_descr[TRACED_DIR] < 0)
985 finfo->inotify_descr[TRACED_DIR] = inotify_add_watch (inotify_fd,
986 finfo->dname,
987 TRACED_DIR_MASK);
988 if (finfo->inotify_descr[TRACED_DIR] < 0)
990 dbg_log (_("disabled inotify-based monitoring for directory `%s': %s"),
991 finfo->fname, strerror (errno));
992 return;
994 dbg_log (_("monitoring directory `%s` (%d)"),
995 finfo->dname, finfo->inotify_descr[TRACED_DIR]);
997 #endif
999 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
1001 We support registering multiple files per database. Each call to
1002 register_traced_file adds to the list of registered files.
1004 When we prune the database, either through timeout or a request to
1005 invalidate, we will check to see if any of the registered files has changed.
1006 When we accept new connections to handle a cache request we will also
1007 check to see if any of the registered files has changed.
1009 If we have inotify support then we install an inotify fd to notify us of
1010 file deletion or modification, both of which will require we invalidate
1011 the cache for the database. Without inotify support we stat the file and
1012 store st_mtime to determine if the file has been modified. */
1013 void
1014 register_traced_file (size_t dbidx, struct traced_file *finfo)
1016 /* If the database is disabled or file checking is disabled
1017 then ignore the registration. */
1018 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
1019 return;
1021 if (__glibc_unlikely (debug_level > 0))
1022 dbg_log (_("monitoring file %s for database %s"),
1023 finfo->fname, dbnames[dbidx]);
1025 #ifdef HAVE_INOTIFY
1026 install_watches (finfo);
1027 #endif
1028 struct stat64 st;
1029 if (stat64 (finfo->fname, &st) < 0)
1031 /* We cannot stat() the file. Set mtime to zero and try again later. */
1032 dbg_log (_("stat failed for file `%s'; will try again later: %s"),
1033 finfo->fname, strerror (errno));
1034 finfo->mtime = 0;
1036 else
1037 finfo->mtime = st.st_mtime;
1039 /* Queue up the file name. */
1040 finfo->next = dbs[dbidx].traced_files;
1041 dbs[dbidx].traced_files = finfo;
1045 /* Close the connections. */
1046 void
1047 close_sockets (void)
1049 close (sock);
1053 static void
1054 invalidate_cache (char *key, int fd)
1056 dbtype number;
1057 int32_t resp;
1059 for (number = pwddb; number < lastdb; ++number)
1060 if (strcmp (key, dbnames[number]) == 0)
1062 struct traced_file *runp = dbs[number].traced_files;
1063 while (runp != NULL)
1065 /* Make sure we reload from file when checking mtime. */
1066 runp->mtime = 0;
1067 #ifdef HAVE_INOTIFY
1068 /* During an invalidation we try to reload the traced
1069 file watches. This allows the user to re-sync if
1070 inotify events were lost. Similar to what we do during
1071 pruning. */
1072 install_watches (runp);
1073 #endif
1074 if (runp->call_res_init)
1076 res_init ();
1077 break;
1079 runp = runp->next;
1081 break;
1084 if (number == lastdb)
1086 resp = EINVAL;
1087 writeall (fd, &resp, sizeof (resp));
1088 return;
1091 if (dbs[number].enabled)
1093 pthread_mutex_lock (&dbs[number].prune_run_lock);
1094 prune_cache (&dbs[number], LONG_MAX, fd);
1095 pthread_mutex_unlock (&dbs[number].prune_run_lock);
1097 else
1099 resp = 0;
1100 writeall (fd, &resp, sizeof (resp));
1105 #ifdef SCM_RIGHTS
1106 static void
1107 send_ro_fd (struct database_dyn *db, char *key, int fd)
1109 /* If we do not have an read-only file descriptor do nothing. */
1110 if (db->ro_fd == -1)
1111 return;
1113 /* We need to send some data along with the descriptor. */
1114 uint64_t mapsize = (db->head->data_size
1115 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1116 + sizeof (struct database_pers_head));
1117 struct iovec iov[2];
1118 iov[0].iov_base = key;
1119 iov[0].iov_len = strlen (key) + 1;
1120 iov[1].iov_base = &mapsize;
1121 iov[1].iov_len = sizeof (mapsize);
1123 /* Prepare the control message to transfer the descriptor. */
1124 union
1126 struct cmsghdr hdr;
1127 char bytes[CMSG_SPACE (sizeof (int))];
1128 } buf;
1129 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1130 .msg_control = buf.bytes,
1131 .msg_controllen = sizeof (buf) };
1132 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1134 cmsg->cmsg_level = SOL_SOCKET;
1135 cmsg->cmsg_type = SCM_RIGHTS;
1136 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1138 int *ip = (int *) CMSG_DATA (cmsg);
1139 *ip = db->ro_fd;
1141 msg.msg_controllen = cmsg->cmsg_len;
1143 /* Send the control message. We repeat when we are interrupted but
1144 everything else is ignored. */
1145 #ifndef MSG_NOSIGNAL
1146 # define MSG_NOSIGNAL 0
1147 #endif
1148 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1150 if (__glibc_unlikely (debug_level > 0))
1151 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1153 #endif /* SCM_RIGHTS */
1156 /* Handle new request. */
1157 static void
1158 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1160 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1162 if (debug_level > 0)
1163 dbg_log (_("\
1164 cannot handle old request version %d; current version is %d"),
1165 req->version, NSCD_VERSION);
1166 return;
1169 /* Perform the SELinux check before we go on to the standard checks. */
1170 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1172 if (debug_level > 0)
1174 #ifdef SO_PEERCRED
1175 # ifdef PATH_MAX
1176 char buf[PATH_MAX];
1177 # else
1178 char buf[4096];
1179 # endif
1181 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
1182 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
1184 if (n <= 0)
1185 dbg_log (_("\
1186 request from %ld not handled due to missing permission"), (long int) pid);
1187 else
1189 buf[n] = '\0';
1190 dbg_log (_("\
1191 request from '%s' [%ld] not handled due to missing permission"),
1192 buf, (long int) pid);
1194 #else
1195 dbg_log (_("request not handled due to missing permission"));
1196 #endif
1198 return;
1201 struct database_dyn *db = reqinfo[req->type].db;
1203 /* See whether we can service the request from the cache. */
1204 if (__builtin_expect (reqinfo[req->type].data_request, true))
1206 if (__builtin_expect (debug_level, 0) > 0)
1208 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1210 char buf[INET6_ADDRSTRLEN];
1212 dbg_log ("\t%s (%s)", serv2str[req->type],
1213 inet_ntop (req->type == GETHOSTBYADDR
1214 ? AF_INET : AF_INET6,
1215 key, buf, sizeof (buf)));
1217 else
1218 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1221 /* Is this service enabled? */
1222 if (__glibc_unlikely (!db->enabled))
1224 /* No, sent the prepared record. */
1225 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1226 db->disabled_iov->iov_len,
1227 MSG_NOSIGNAL))
1228 != (ssize_t) db->disabled_iov->iov_len
1229 && __builtin_expect (debug_level, 0) > 0)
1231 /* We have problems sending the result. */
1232 char buf[256];
1233 dbg_log (_("cannot write result: %s"),
1234 strerror_r (errno, buf, sizeof (buf)));
1237 return;
1240 /* Be sure we can read the data. */
1241 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db->lock) != 0))
1243 ++db->head->rdlockdelayed;
1244 pthread_rwlock_rdlock (&db->lock);
1247 /* See whether we can handle it from the cache. */
1248 struct datahead *cached;
1249 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1250 db, uid);
1251 if (cached != NULL)
1253 /* Hurray it's in the cache. */
1254 ssize_t nwritten;
1256 #ifdef HAVE_SENDFILE
1257 if (__glibc_likely (db->mmap_used))
1259 assert (db->wr_fd != -1);
1260 assert ((char *) cached->data > (char *) db->data);
1261 assert ((char *) cached->data - (char *) db->head
1262 + cached->recsize
1263 <= (sizeof (struct database_pers_head)
1264 + db->head->module * sizeof (ref_t)
1265 + db->head->data_size));
1266 nwritten = sendfileall (fd, db->wr_fd,
1267 (char *) cached->data
1268 - (char *) db->head, cached->recsize);
1269 # ifndef __ASSUME_SENDFILE
1270 if (nwritten == -1 && errno == ENOSYS)
1271 goto use_write;
1272 # endif
1274 else
1275 # ifndef __ASSUME_SENDFILE
1276 use_write:
1277 # endif
1278 #endif
1279 nwritten = writeall (fd, cached->data, cached->recsize);
1281 if (nwritten != cached->recsize
1282 && __builtin_expect (debug_level, 0) > 0)
1284 /* We have problems sending the result. */
1285 char buf[256];
1286 dbg_log (_("cannot write result: %s"),
1287 strerror_r (errno, buf, sizeof (buf)));
1290 pthread_rwlock_unlock (&db->lock);
1292 return;
1295 pthread_rwlock_unlock (&db->lock);
1297 else if (__builtin_expect (debug_level, 0) > 0)
1299 if (req->type == INVALIDATE)
1300 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1301 else
1302 dbg_log ("\t%s", serv2str[req->type]);
1305 /* Handle the request. */
1306 switch (req->type)
1308 case GETPWBYNAME:
1309 addpwbyname (db, fd, req, key, uid);
1310 break;
1312 case GETPWBYUID:
1313 addpwbyuid (db, fd, req, key, uid);
1314 break;
1316 case GETGRBYNAME:
1317 addgrbyname (db, fd, req, key, uid);
1318 break;
1320 case GETGRBYGID:
1321 addgrbygid (db, fd, req, key, uid);
1322 break;
1324 case GETHOSTBYNAME:
1325 addhstbyname (db, fd, req, key, uid);
1326 break;
1328 case GETHOSTBYNAMEv6:
1329 addhstbynamev6 (db, fd, req, key, uid);
1330 break;
1332 case GETHOSTBYADDR:
1333 addhstbyaddr (db, fd, req, key, uid);
1334 break;
1336 case GETHOSTBYADDRv6:
1337 addhstbyaddrv6 (db, fd, req, key, uid);
1338 break;
1340 case GETAI:
1341 addhstai (db, fd, req, key, uid);
1342 break;
1344 case INITGROUPS:
1345 addinitgroups (db, fd, req, key, uid);
1346 break;
1348 case GETSERVBYNAME:
1349 addservbyname (db, fd, req, key, uid);
1350 break;
1352 case GETSERVBYPORT:
1353 addservbyport (db, fd, req, key, uid);
1354 break;
1356 case GETNETGRENT:
1357 addgetnetgrent (db, fd, req, key, uid);
1358 break;
1360 case INNETGR:
1361 addinnetgr (db, fd, req, key, uid);
1362 break;
1364 case GETSTAT:
1365 case SHUTDOWN:
1366 case INVALIDATE:
1368 /* Get the callers credentials. */
1369 #ifdef SO_PEERCRED
1370 struct ucred caller;
1371 socklen_t optlen = sizeof (caller);
1373 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1375 char buf[256];
1377 dbg_log (_("error getting caller's id: %s"),
1378 strerror_r (errno, buf, sizeof (buf)));
1379 break;
1382 uid = caller.uid;
1383 #else
1384 /* Some systems have no SO_PEERCRED implementation. They don't
1385 care about security so we don't as well. */
1386 uid = 0;
1387 #endif
1390 /* Accept shutdown, getstat and invalidate only from root. For
1391 the stat call also allow the user specified in the config file. */
1392 if (req->type == GETSTAT)
1394 if (uid == 0 || uid == stat_uid)
1395 send_stats (fd, dbs);
1397 else if (uid == 0)
1399 if (req->type == INVALIDATE)
1400 invalidate_cache (key, fd);
1401 else
1402 termination_handler (0);
1404 break;
1406 case GETFDPW:
1407 case GETFDGR:
1408 case GETFDHST:
1409 case GETFDSERV:
1410 case GETFDNETGR:
1411 #ifdef SCM_RIGHTS
1412 send_ro_fd (reqinfo[req->type].db, key, fd);
1413 #endif
1414 break;
1416 default:
1417 /* Ignore the command, it's nothing we know. */
1418 break;
1423 /* Restart the process. */
1424 static void
1425 restart (void)
1427 /* First determine the parameters. We do not use the parameters
1428 passed to main() since in case nscd is started by running the
1429 dynamic linker this will not work. Yes, this is not the usual
1430 case but nscd is part of glibc and we occasionally do this. */
1431 size_t buflen = 1024;
1432 char *buf = alloca (buflen);
1433 size_t readlen = 0;
1434 int fd = open ("/proc/self/cmdline", O_RDONLY);
1435 if (fd == -1)
1437 dbg_log (_("\
1438 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1439 strerror (errno));
1441 paranoia = 0;
1442 return;
1445 while (1)
1447 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1448 buflen - readlen));
1449 if (n == -1)
1451 dbg_log (_("\
1452 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1453 strerror (errno));
1455 close (fd);
1456 paranoia = 0;
1457 return;
1460 readlen += n;
1462 if (readlen < buflen)
1463 break;
1465 /* We might have to extend the buffer. */
1466 size_t old_buflen = buflen;
1467 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1468 buf = memmove (newp, buf, old_buflen);
1471 close (fd);
1473 /* Parse the command line. Worst case scenario: every two
1474 characters form one parameter (one character plus NUL). */
1475 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1476 int argc = 0;
1478 char *cp = buf;
1479 while (cp < buf + readlen)
1481 argv[argc++] = cp;
1482 cp = (char *) rawmemchr (cp, '\0') + 1;
1484 argv[argc] = NULL;
1486 /* Second, change back to the old user if we changed it. */
1487 if (server_user != NULL)
1489 if (setresuid (old_uid, old_uid, old_uid) != 0)
1491 dbg_log (_("\
1492 cannot change to old UID: %s; disabling paranoia mode"),
1493 strerror (errno));
1495 paranoia = 0;
1496 return;
1499 if (setresgid (old_gid, old_gid, old_gid) != 0)
1501 dbg_log (_("\
1502 cannot change to old GID: %s; disabling paranoia mode"),
1503 strerror (errno));
1505 ignore_value (setuid (server_uid));
1506 paranoia = 0;
1507 return;
1511 /* Next change back to the old working directory. */
1512 if (chdir (oldcwd) == -1)
1514 dbg_log (_("\
1515 cannot change to old working directory: %s; disabling paranoia mode"),
1516 strerror (errno));
1518 if (server_user != NULL)
1520 ignore_value (setuid (server_uid));
1521 ignore_value (setgid (server_gid));
1523 paranoia = 0;
1524 return;
1527 /* Synchronize memory. */
1528 int32_t certainly[lastdb];
1529 for (int cnt = 0; cnt < lastdb; ++cnt)
1530 if (dbs[cnt].enabled)
1532 /* Make sure nobody keeps using the database. */
1533 dbs[cnt].head->timestamp = 0;
1534 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1535 dbs[cnt].head->nscd_certainly_running = 0;
1537 if (dbs[cnt].persistent)
1538 // XXX async OK?
1539 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1542 /* The preparations are done. */
1543 #ifdef PATH_MAX
1544 char pathbuf[PATH_MAX];
1545 #else
1546 char pathbuf[256];
1547 #endif
1548 /* Try to exec the real nscd program so the process name (as reported
1549 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1550 if readlink or the exec with the result of the readlink call fails. */
1551 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1552 if (n != -1)
1554 pathbuf[n] = '\0';
1555 execv (pathbuf, argv);
1557 execv ("/proc/self/exe", argv);
1559 /* If we come here, we will never be able to re-exec. */
1560 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1561 strerror (errno));
1563 if (server_user != NULL)
1565 ignore_value (setuid (server_uid));
1566 ignore_value (setgid (server_gid));
1568 if (chdir ("/") != 0)
1569 dbg_log (_("cannot change current working directory to \"/\": %s"),
1570 strerror (errno));
1571 paranoia = 0;
1573 /* Reenable the databases. */
1574 time_t now = time (NULL);
1575 for (int cnt = 0; cnt < lastdb; ++cnt)
1576 if (dbs[cnt].enabled)
1578 dbs[cnt].head->timestamp = now;
1579 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1584 /* List of file descriptors. */
1585 struct fdlist
1587 int fd;
1588 struct fdlist *next;
1590 /* Memory allocated for the list. */
1591 static struct fdlist *fdlist;
1592 /* List of currently ready-to-read file descriptors. */
1593 static struct fdlist *readylist;
1595 /* Conditional variable and mutex to signal availability of entries in
1596 READYLIST. The condvar is initialized dynamically since we might
1597 use a different clock depending on availability. */
1598 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1599 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1601 /* The clock to use with the condvar. */
1602 static clockid_t timeout_clock = CLOCK_REALTIME;
1604 /* Number of threads ready to handle the READYLIST. */
1605 static unsigned long int nready;
1608 /* Function for the clean-up threads. */
1609 static void *
1610 __attribute__ ((__noreturn__))
1611 nscd_run_prune (void *p)
1613 const long int my_number = (long int) p;
1614 assert (dbs[my_number].enabled);
1616 int dont_need_update = setup_thread (&dbs[my_number]);
1618 time_t now = time (NULL);
1620 /* We are running. */
1621 dbs[my_number].head->timestamp = now;
1623 struct timespec prune_ts;
1624 if (__glibc_unlikely (clock_gettime (timeout_clock, &prune_ts) == -1))
1625 /* Should never happen. */
1626 abort ();
1628 /* Compute the initial timeout time. Prevent all the timers to go
1629 off at the same time by adding a db-based value. */
1630 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1631 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1633 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1634 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1635 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1637 pthread_mutex_lock (prune_lock);
1638 while (1)
1640 /* Wait, but not forever. */
1641 int e = 0;
1642 if (! dbs[my_number].clear_cache)
1643 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1644 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1646 time_t next_wait;
1647 now = time (NULL);
1648 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1649 || dbs[my_number].clear_cache)
1651 /* We will determine the new timout values based on the
1652 cache content. Should there be concurrent additions to
1653 the cache which are not accounted for in the cache
1654 pruning we want to know about it. Therefore set the
1655 timeout to the maximum. It will be descreased when adding
1656 new entries to the cache, if necessary. */
1657 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1659 /* Unconditionally reset the flag. */
1660 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1661 dbs[my_number].clear_cache = 0;
1663 pthread_mutex_unlock (prune_lock);
1665 /* We use a separate lock for running the prune function (instead
1666 of keeping prune_lock locked) because this enables concurrent
1667 invocations of cache_add which might modify the timeout value. */
1668 pthread_mutex_lock (prune_run_lock);
1669 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1670 pthread_mutex_unlock (prune_run_lock);
1672 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1673 /* If clients cannot determine for sure whether nscd is running
1674 we need to wake up occasionally to update the timestamp.
1675 Wait 90% of the update period. */
1676 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1677 if (__glibc_unlikely (! dont_need_update))
1679 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1680 dbs[my_number].head->timestamp = now;
1683 pthread_mutex_lock (prune_lock);
1685 /* Make it known when we will wake up again. */
1686 if (now + next_wait < dbs[my_number].wakeup_time)
1687 dbs[my_number].wakeup_time = now + next_wait;
1688 else
1689 next_wait = dbs[my_number].wakeup_time - now;
1691 else
1692 /* The cache was just pruned. Do not do it again now. Just
1693 use the new timeout value. */
1694 next_wait = dbs[my_number].wakeup_time - now;
1696 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1697 /* Should never happen. */
1698 abort ();
1700 /* Compute next timeout time. */
1701 prune_ts.tv_sec += next_wait;
1706 /* This is the main loop. It is replicated in different threads but
1707 the use of the ready list makes sure only one thread handles an
1708 incoming connection. */
1709 static void *
1710 __attribute__ ((__noreturn__))
1711 nscd_run_worker (void *p)
1713 char buf[256];
1715 /* Initial locking. */
1716 pthread_mutex_lock (&readylist_lock);
1718 /* One more thread available. */
1719 ++nready;
1721 while (1)
1723 while (readylist == NULL)
1724 pthread_cond_wait (&readylist_cond, &readylist_lock);
1726 struct fdlist *it = readylist->next;
1727 if (readylist->next == readylist)
1728 /* Just one entry on the list. */
1729 readylist = NULL;
1730 else
1731 readylist->next = it->next;
1733 /* Extract the information and mark the record ready to be used
1734 again. */
1735 int fd = it->fd;
1736 it->next = NULL;
1738 /* One more thread available. */
1739 --nready;
1741 /* We are done with the list. */
1742 pthread_mutex_unlock (&readylist_lock);
1744 #ifndef __ASSUME_ACCEPT4
1745 if (have_accept4 < 0)
1747 /* We do not want to block on a short read or so. */
1748 int fl = fcntl (fd, F_GETFL);
1749 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1750 goto close_and_out;
1752 #endif
1754 /* Now read the request. */
1755 request_header req;
1756 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1757 != sizeof (req), 0))
1759 /* We failed to read data. Note that this also might mean we
1760 failed because we would have blocked. */
1761 if (debug_level > 0)
1762 dbg_log (_("short read while reading request: %s"),
1763 strerror_r (errno, buf, sizeof (buf)));
1764 goto close_and_out;
1767 /* Check whether this is a valid request type. */
1768 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1769 goto close_and_out;
1771 /* Some systems have no SO_PEERCRED implementation. They don't
1772 care about security so we don't as well. */
1773 uid_t uid = -1;
1774 #ifdef SO_PEERCRED
1775 pid_t pid = 0;
1777 if (__glibc_unlikely (debug_level > 0))
1779 struct ucred caller;
1780 socklen_t optlen = sizeof (caller);
1782 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1783 pid = caller.pid;
1785 #else
1786 const pid_t pid = 0;
1787 #endif
1789 /* It should not be possible to crash the nscd with a silly
1790 request (i.e., a terribly large key). We limit the size to 1kb. */
1791 if (__builtin_expect (req.key_len, 1) < 0
1792 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1794 if (debug_level > 0)
1795 dbg_log (_("key length in request too long: %d"), req.key_len);
1797 else
1799 /* Get the key. */
1800 char keybuf[MAXKEYLEN + 1];
1802 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1803 req.key_len))
1804 != req.key_len, 0))
1806 /* Again, this can also mean we would have blocked. */
1807 if (debug_level > 0)
1808 dbg_log (_("short read while reading request key: %s"),
1809 strerror_r (errno, buf, sizeof (buf)));
1810 goto close_and_out;
1812 keybuf[req.key_len] = '\0';
1814 if (__builtin_expect (debug_level, 0) > 0)
1816 #ifdef SO_PEERCRED
1817 if (pid != 0)
1818 dbg_log (_("\
1819 handle_request: request received (Version = %d) from PID %ld"),
1820 req.version, (long int) pid);
1821 else
1822 #endif
1823 dbg_log (_("\
1824 handle_request: request received (Version = %d)"), req.version);
1827 /* Phew, we got all the data, now process it. */
1828 handle_request (fd, &req, keybuf, uid, pid);
1831 close_and_out:
1832 /* We are done. */
1833 close (fd);
1835 /* Re-locking. */
1836 pthread_mutex_lock (&readylist_lock);
1838 /* One more thread available. */
1839 ++nready;
1841 /* NOTREACHED */
1845 static unsigned int nconns;
1847 static void
1848 fd_ready (int fd)
1850 pthread_mutex_lock (&readylist_lock);
1852 /* Find an empty entry in FDLIST. */
1853 size_t inner;
1854 for (inner = 0; inner < nconns; ++inner)
1855 if (fdlist[inner].next == NULL)
1856 break;
1857 assert (inner < nconns);
1859 fdlist[inner].fd = fd;
1861 if (readylist == NULL)
1862 readylist = fdlist[inner].next = &fdlist[inner];
1863 else
1865 fdlist[inner].next = readylist->next;
1866 readylist = readylist->next = &fdlist[inner];
1869 bool do_signal = true;
1870 if (__glibc_unlikely (nready == 0))
1872 ++client_queued;
1873 do_signal = false;
1875 /* Try to start another thread to help out. */
1876 pthread_t th;
1877 if (nthreads < max_nthreads
1878 && pthread_create (&th, &attr, nscd_run_worker,
1879 (void *) (long int) nthreads) == 0)
1881 /* We got another thread. */
1882 ++nthreads;
1883 /* The new thread might need a kick. */
1884 do_signal = true;
1889 pthread_mutex_unlock (&readylist_lock);
1891 /* Tell one of the worker threads there is work to do. */
1892 if (do_signal)
1893 pthread_cond_signal (&readylist_cond);
1897 /* Check whether restarting should happen. */
1898 static bool
1899 restart_p (time_t now)
1901 return (paranoia && readylist == NULL && nready == nthreads
1902 && now >= restart_time);
1906 /* Array for times a connection was accepted. */
1907 static time_t *starttime;
1909 #ifdef HAVE_INOTIFY
1910 /* Inotify event for changed file. */
1911 union __inev
1913 struct inotify_event i;
1914 # ifndef PATH_MAX
1915 # define PATH_MAX 1024
1916 # endif
1917 char buf[sizeof (struct inotify_event) + PATH_MAX];
1920 /* Returns 0 if the file is there otherwise -1. */
1922 check_file (struct traced_file *finfo)
1924 struct stat64 st;
1925 /* We could check mtime and if different re-add
1926 the watches, and invalidate the database, but we
1927 don't because we are called from inotify_check_files
1928 which should be doing that work. If sufficient inotify
1929 events were lost then the next pruning or invalidation
1930 will do the stat and mtime check. We don't do it here to
1931 keep the logic simple. */
1932 if (stat64 (finfo->fname, &st) < 0)
1933 return -1;
1934 return 0;
1937 /* Process the inotify event in INEV. If the event matches any of the files
1938 registered with a database then mark that database as requiring its cache
1939 to be cleared. We indicate the cache needs clearing by setting
1940 TO_CLEAR[DBCNT] to true for the matching database. */
1941 static void
1942 inotify_check_files (bool *to_clear, union __inev *inev)
1944 /* Check which of the files changed. */
1945 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1947 struct traced_file *finfo = dbs[dbcnt].traced_files;
1949 while (finfo != NULL)
1951 /* The configuration file was moved or deleted.
1952 We stop watching it at that point, and reinitialize. */
1953 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1954 && ((inev->i.mask & IN_MOVE_SELF)
1955 || (inev->i.mask & IN_DELETE_SELF)
1956 || (inev->i.mask & IN_IGNORED)))
1958 int ret;
1959 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1961 if (check_file (finfo) == 0)
1963 dbg_log (_("ignored inotify event for `%s` (file exists)"),
1964 finfo->fname);
1965 return;
1968 dbg_log (_("monitored file `%s` was %s, removing watch"),
1969 finfo->fname, moved ? "moved" : "deleted");
1970 /* File was moved out, remove the watch. Watches are
1971 automatically removed when the file is deleted. */
1972 if (moved)
1974 ret = inotify_rm_watch (inotify_fd, inev->i.wd);
1975 if (ret < 0)
1976 dbg_log (_("failed to remove file watch `%s`: %s"),
1977 finfo->fname, strerror (errno));
1979 finfo->inotify_descr[TRACED_FILE] = -1;
1980 to_clear[dbcnt] = true;
1981 if (finfo->call_res_init)
1982 res_init ();
1983 return;
1985 /* The configuration file was open for writing and has just closed.
1986 We reset the cache and reinitialize. */
1987 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1988 && inev->i.mask & IN_CLOSE_WRITE)
1990 /* Mark cache as needing to be cleared and reinitialize. */
1991 dbg_log (_("monitored file `%s` was written to"), finfo->fname);
1992 to_clear[dbcnt] = true;
1993 if (finfo->call_res_init)
1994 res_init ();
1995 return;
1997 /* The parent directory was moved or deleted. We trigger one last
1998 invalidation. At the next pruning or invalidation we may add
1999 this watch back if the file is present again. */
2000 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
2001 && ((inev->i.mask & IN_DELETE_SELF)
2002 || (inev->i.mask & IN_MOVE_SELF)
2003 || (inev->i.mask & IN_IGNORED)))
2005 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
2006 /* The directory watch may have already been removed
2007 but we don't know so we just remove it again and
2008 ignore the error. Then we remove the file watch.
2009 Note: watches are automatically removed for deleted
2010 files. */
2011 if (moved)
2012 inotify_rm_watch (inotify_fd, inev->i.wd);
2013 if (finfo->inotify_descr[TRACED_FILE] != -1)
2015 dbg_log (_("monitored parent directory `%s` was %s, removing watch on `%s`"),
2016 finfo->dname, moved ? "moved" : "deleted", finfo->fname);
2017 if (inotify_rm_watch (inotify_fd, finfo->inotify_descr[TRACED_FILE]) < 0)
2018 dbg_log (_("failed to remove file watch `%s`: %s"),
2019 finfo->dname, strerror (errno));
2021 finfo->inotify_descr[TRACED_FILE] = -1;
2022 finfo->inotify_descr[TRACED_DIR] = -1;
2023 to_clear[dbcnt] = true;
2024 if (finfo->call_res_init)
2025 res_init ();
2026 /* Continue to the next entry since this might be the
2027 parent directory for multiple registered files and
2028 we want to remove watches for all registered files. */
2029 continue;
2031 /* The parent directory had a create or moved to event. */
2032 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
2033 && ((inev->i.mask & IN_MOVED_TO)
2034 || (inev->i.mask & IN_CREATE))
2035 && strcmp (inev->i.name, finfo->sfname) == 0)
2037 /* We detected a directory change. We look for the creation
2038 of the file we are tracking or the move of the same file
2039 into the directory. */
2040 int ret;
2041 dbg_log (_("monitored file `%s` was %s, adding watch"),
2042 finfo->fname,
2043 inev->i.mask & IN_CREATE ? "created" : "moved into place");
2044 /* File was moved in or created. Regenerate the watch. */
2045 if (finfo->inotify_descr[TRACED_FILE] != -1)
2046 inotify_rm_watch (inotify_fd,
2047 finfo->inotify_descr[TRACED_FILE]);
2049 ret = inotify_add_watch (inotify_fd,
2050 finfo->fname,
2051 TRACED_FILE_MASK);
2052 if (ret < 0)
2053 dbg_log (_("failed to add file watch `%s`: %s"),
2054 finfo->fname, strerror (errno));
2056 finfo->inotify_descr[TRACED_FILE] = ret;
2058 /* The file is new or moved so mark cache as needing to
2059 be cleared and reinitialize. */
2060 to_clear[dbcnt] = true;
2061 if (finfo->call_res_init)
2062 res_init ();
2064 /* Done re-adding the watch. Don't return, we may still
2065 have other files in this same directory, same watch
2066 descriptor, and need to process them. */
2068 /* Other events are ignored, and we move on to the next file. */
2069 finfo = finfo->next;
2074 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
2075 for the associated database, otherwise do nothing. The TO_CLEAR array must
2076 have LASTDB entries. */
2077 static inline void
2078 clear_db_cache (bool *to_clear)
2080 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2081 if (to_clear[dbcnt])
2083 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
2084 dbs[dbcnt].clear_cache = 1;
2085 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
2086 pthread_cond_signal (&dbs[dbcnt].prune_cond);
2091 handle_inotify_events (void)
2093 bool to_clear[lastdb] = { false, };
2094 union __inev inev;
2096 /* Read all inotify events for files registered via
2097 register_traced_file(). */
2098 while (1)
2100 /* Potentially read multiple events into buf. */
2101 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd,
2102 &inev.buf,
2103 sizeof (inev)));
2104 if (nb < (ssize_t) sizeof (struct inotify_event))
2106 /* Not even 1 event. */
2107 if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
2108 return -1;
2109 /* Done reading events that are ready. */
2110 break;
2112 /* Process all events. The normal inotify interface delivers
2113 complete events on a read and never a partial event. */
2114 char *eptr = &inev.buf[0];
2115 ssize_t count;
2116 while (1)
2118 /* Check which of the files changed. */
2119 inotify_check_files (to_clear, &inev);
2120 count = sizeof (struct inotify_event) + inev.i.len;
2121 eptr += count;
2122 nb -= count;
2123 if (nb >= (ssize_t) sizeof (struct inotify_event))
2124 memcpy (&inev, eptr, nb);
2125 else
2126 break;
2128 continue;
2130 /* Actually perform the cache clearing. */
2131 clear_db_cache (to_clear);
2132 return 0;
2135 #endif
2137 static void
2138 __attribute__ ((__noreturn__))
2139 main_loop_poll (void)
2141 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
2142 * sizeof (conns[0]));
2144 conns[0].fd = sock;
2145 conns[0].events = POLLRDNORM;
2146 size_t nused = 1;
2147 size_t firstfree = 1;
2149 #ifdef HAVE_INOTIFY
2150 if (inotify_fd != -1)
2152 conns[1].fd = inotify_fd;
2153 conns[1].events = POLLRDNORM;
2154 nused = 2;
2155 firstfree = 2;
2157 #endif
2159 #ifdef HAVE_NETLINK
2160 size_t idx_nl_status_fd = 0;
2161 if (nl_status_fd != -1)
2163 idx_nl_status_fd = nused;
2164 conns[nused].fd = nl_status_fd;
2165 conns[nused].events = POLLRDNORM;
2166 ++nused;
2167 firstfree = nused;
2169 #endif
2171 while (1)
2173 /* Wait for any event. We wait at most a couple of seconds so
2174 that we can check whether we should close any of the accepted
2175 connections since we have not received a request. */
2176 #define MAX_ACCEPT_TIMEOUT 30
2177 #define MIN_ACCEPT_TIMEOUT 5
2178 #define MAIN_THREAD_TIMEOUT \
2179 (MAX_ACCEPT_TIMEOUT * 1000 \
2180 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
2182 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
2184 time_t now = time (NULL);
2186 /* If there is a descriptor ready for reading or there is a new
2187 connection, process this now. */
2188 if (n > 0)
2190 if (conns[0].revents != 0)
2192 /* We have a new incoming connection. Accept the connection. */
2193 int fd;
2195 #ifndef __ASSUME_ACCEPT4
2196 fd = -1;
2197 if (have_accept4 >= 0)
2198 #endif
2200 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2201 SOCK_NONBLOCK));
2202 #ifndef __ASSUME_ACCEPT4
2203 if (have_accept4 == 0)
2204 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2205 #endif
2207 #ifndef __ASSUME_ACCEPT4
2208 if (have_accept4 < 0)
2209 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2210 #endif
2212 /* Use the descriptor if we have not reached the limit. */
2213 if (fd >= 0)
2215 if (firstfree < nconns)
2217 conns[firstfree].fd = fd;
2218 conns[firstfree].events = POLLRDNORM;
2219 starttime[firstfree] = now;
2220 if (firstfree >= nused)
2221 nused = firstfree + 1;
2224 ++firstfree;
2225 while (firstfree < nused && conns[firstfree].fd != -1);
2227 else
2228 /* We cannot use the connection so close it. */
2229 close (fd);
2232 --n;
2235 size_t first = 1;
2236 #ifdef HAVE_INOTIFY
2237 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
2239 if (conns[1].revents != 0)
2241 int ret;
2242 ret = handle_inotify_events ();
2243 if (ret == -1)
2245 /* Something went wrong when reading the inotify
2246 data. Better disable inotify. */
2247 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2248 conns[1].fd = -1;
2249 firstfree = 1;
2250 if (nused == 2)
2251 nused = 1;
2252 close (inotify_fd);
2253 inotify_fd = -1;
2255 --n;
2258 first = 2;
2260 #endif
2262 #ifdef HAVE_NETLINK
2263 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2265 char buf[4096];
2266 /* Read all the data. We do not interpret it here. */
2267 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2268 sizeof (buf))) != -1)
2271 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2272 = __bump_nl_timestamp ();
2274 #endif
2276 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2277 if (conns[cnt].revents != 0)
2279 fd_ready (conns[cnt].fd);
2281 /* Clean up the CONNS array. */
2282 conns[cnt].fd = -1;
2283 if (cnt < firstfree)
2284 firstfree = cnt;
2285 if (cnt == nused - 1)
2287 --nused;
2288 while (conns[nused - 1].fd == -1);
2290 --n;
2294 /* Now find entries which have timed out. */
2295 assert (nused > 0);
2297 /* We make the timeout length depend on the number of file
2298 descriptors currently used. */
2299 #define ACCEPT_TIMEOUT \
2300 (MAX_ACCEPT_TIMEOUT \
2301 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2302 time_t laststart = now - ACCEPT_TIMEOUT;
2304 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2306 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2308 /* Remove the entry, it timed out. */
2309 (void) close (conns[cnt].fd);
2310 conns[cnt].fd = -1;
2312 if (cnt < firstfree)
2313 firstfree = cnt;
2314 if (cnt == nused - 1)
2316 --nused;
2317 while (conns[nused - 1].fd == -1);
2321 if (restart_p (now))
2322 restart ();
2327 #ifdef HAVE_EPOLL
2328 static void
2329 main_loop_epoll (int efd)
2331 struct epoll_event ev = { 0, };
2332 int nused = 1;
2333 size_t highest = 0;
2335 /* Add the socket. */
2336 ev.events = EPOLLRDNORM;
2337 ev.data.fd = sock;
2338 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2339 /* We cannot use epoll. */
2340 return;
2342 # ifdef HAVE_INOTIFY
2343 if (inotify_fd != -1)
2345 ev.events = EPOLLRDNORM;
2346 ev.data.fd = inotify_fd;
2347 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2348 /* We cannot use epoll. */
2349 return;
2350 nused = 2;
2352 # endif
2354 # ifdef HAVE_NETLINK
2355 if (nl_status_fd != -1)
2357 ev.events = EPOLLRDNORM;
2358 ev.data.fd = nl_status_fd;
2359 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2360 /* We cannot use epoll. */
2361 return;
2363 # endif
2365 while (1)
2367 struct epoll_event revs[100];
2368 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2370 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2372 time_t now = time (NULL);
2374 for (int cnt = 0; cnt < n; ++cnt)
2375 if (revs[cnt].data.fd == sock)
2377 /* A new connection. */
2378 int fd;
2380 # ifndef __ASSUME_ACCEPT4
2381 fd = -1;
2382 if (have_accept4 >= 0)
2383 # endif
2385 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2386 SOCK_NONBLOCK));
2387 # ifndef __ASSUME_ACCEPT4
2388 if (have_accept4 == 0)
2389 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2390 # endif
2392 # ifndef __ASSUME_ACCEPT4
2393 if (have_accept4 < 0)
2394 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2395 # endif
2397 /* Use the descriptor if we have not reached the limit. */
2398 if (fd >= 0)
2400 /* Try to add the new descriptor. */
2401 ev.data.fd = fd;
2402 if (fd >= nconns
2403 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2404 /* The descriptor is too large or something went
2405 wrong. Close the descriptor. */
2406 close (fd);
2407 else
2409 /* Remember when we accepted the connection. */
2410 starttime[fd] = now;
2412 if (fd > highest)
2413 highest = fd;
2415 ++nused;
2419 # ifdef HAVE_INOTIFY
2420 else if (revs[cnt].data.fd == inotify_fd)
2422 int ret;
2423 ret = handle_inotify_events ();
2424 if (ret == -1)
2426 /* Something went wrong when reading the inotify
2427 data. Better disable inotify. */
2428 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2429 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd, NULL);
2430 close (inotify_fd);
2431 inotify_fd = -1;
2432 break;
2435 # endif
2436 # ifdef HAVE_NETLINK
2437 else if (revs[cnt].data.fd == nl_status_fd)
2439 char buf[4096];
2440 /* Read all the data. We do not interpret it here. */
2441 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2442 sizeof (buf))) != -1)
2445 __bump_nl_timestamp ();
2447 # endif
2448 else
2450 /* Remove the descriptor from the epoll descriptor. */
2451 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2453 /* Get a worker to handle the request. */
2454 fd_ready (revs[cnt].data.fd);
2456 /* Reset the time. */
2457 starttime[revs[cnt].data.fd] = 0;
2458 if (revs[cnt].data.fd == highest)
2460 --highest;
2461 while (highest > 0 && starttime[highest] == 0);
2463 --nused;
2466 /* Now look for descriptors for accepted connections which have
2467 no reply in too long of a time. */
2468 time_t laststart = now - ACCEPT_TIMEOUT;
2469 assert (starttime[sock] == 0);
2470 # ifdef HAVE_INOTIFY
2471 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2472 # endif
2473 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2474 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2475 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2477 /* We are waiting for this one for too long. Close it. */
2478 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2480 (void) close (cnt);
2482 starttime[cnt] = 0;
2483 if (cnt == highest)
2484 --highest;
2486 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2487 --highest;
2489 if (restart_p (now))
2490 restart ();
2493 #endif
2496 /* Start all the threads we want. The initial process is thread no. 1. */
2497 void
2498 start_threads (void)
2500 /* Initialize the conditional variable we will use. The only
2501 non-standard attribute we might use is the clock selection. */
2502 pthread_condattr_t condattr;
2503 pthread_condattr_init (&condattr);
2505 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2506 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2507 /* Determine whether the monotonous clock is available. */
2508 struct timespec dummy;
2509 # if _POSIX_MONOTONIC_CLOCK == 0
2510 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2511 # endif
2512 # if _POSIX_CLOCK_SELECTION == 0
2513 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2514 # endif
2515 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2516 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2517 timeout_clock = CLOCK_MONOTONIC;
2518 #endif
2520 /* Create the attribute for the threads. They are all created
2521 detached. */
2522 pthread_attr_init (&attr);
2523 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2524 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2525 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2527 /* We allow less than LASTDB threads only for debugging. */
2528 if (debug_level == 0)
2529 nthreads = MAX (nthreads, lastdb);
2531 /* Create the threads which prune the databases. */
2532 // XXX Ideally this work would be done by some of the worker threads.
2533 // XXX But this is problematic since we would need to be able to wake
2534 // XXX them up explicitly as well as part of the group handling the
2535 // XXX ready-list. This requires an operation where we can wait on
2536 // XXX two conditional variables at the same time. This operation
2537 // XXX does not exist (yet).
2538 for (long int i = 0; i < lastdb; ++i)
2540 /* Initialize the conditional variable. */
2541 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2543 dbg_log (_("could not initialize conditional variable"));
2544 do_exit (1, 0, NULL);
2547 pthread_t th;
2548 if (dbs[i].enabled
2549 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2551 dbg_log (_("could not start clean-up thread; terminating"));
2552 do_exit (1, 0, NULL);
2556 pthread_condattr_destroy (&condattr);
2558 for (long int i = 0; i < nthreads; ++i)
2560 pthread_t th;
2561 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2563 if (i == 0)
2565 dbg_log (_("could not start any worker thread; terminating"));
2566 do_exit (1, 0, NULL);
2569 break;
2573 /* Now it is safe to let the parent know that we're doing fine and it can
2574 exit. */
2575 notify_parent (0);
2577 /* Determine how much room for descriptors we should initially
2578 allocate. This might need to change later if we cap the number
2579 with MAXCONN. */
2580 const long int nfds = sysconf (_SC_OPEN_MAX);
2581 #define MINCONN 32
2582 #define MAXCONN 16384
2583 if (nfds == -1 || nfds > MAXCONN)
2584 nconns = MAXCONN;
2585 else if (nfds < MINCONN)
2586 nconns = MINCONN;
2587 else
2588 nconns = nfds;
2590 /* We need memory to pass descriptors on to the worker threads. */
2591 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2592 /* Array to keep track when connection was accepted. */
2593 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2595 /* In the main thread we execute the loop which handles incoming
2596 connections. */
2597 #ifdef HAVE_EPOLL
2598 int efd = epoll_create (100);
2599 if (efd != -1)
2601 main_loop_epoll (efd);
2602 close (efd);
2604 #endif
2606 main_loop_poll ();
2610 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2611 this function is called, we are not listening on the nscd socket yet so
2612 we can just use the ordinary lookup functions without causing a lockup */
2613 static void
2614 begin_drop_privileges (void)
2616 struct passwd *pwd = getpwnam (server_user);
2618 if (pwd == NULL)
2620 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2621 do_exit (EXIT_FAILURE, 0,
2622 _("Failed to run nscd as user '%s'"), server_user);
2625 server_uid = pwd->pw_uid;
2626 server_gid = pwd->pw_gid;
2628 /* Save the old UID/GID if we have to change back. */
2629 if (paranoia)
2631 old_uid = getuid ();
2632 old_gid = getgid ();
2635 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2637 /* This really must never happen. */
2638 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2639 do_exit (EXIT_FAILURE, errno,
2640 _("initial getgrouplist failed"));
2643 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2645 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2646 == -1)
2648 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2649 do_exit (EXIT_FAILURE, errno, _("getgrouplist failed"));
2654 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2655 run nscd as the user specified in the configuration file. */
2656 static void
2657 finish_drop_privileges (void)
2659 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2660 /* We need to preserve the capabilities to connect to the audit daemon. */
2661 cap_t new_caps = preserve_capabilities ();
2662 #endif
2664 if (setgroups (server_ngroups, server_groups) == -1)
2666 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2667 do_exit (EXIT_FAILURE, errno, _("setgroups failed"));
2670 int res;
2671 if (paranoia)
2672 res = setresgid (server_gid, server_gid, old_gid);
2673 else
2674 res = setgid (server_gid);
2675 if (res == -1)
2677 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2678 do_exit (4, errno, "setgid");
2681 if (paranoia)
2682 res = setresuid (server_uid, server_uid, old_uid);
2683 else
2684 res = setuid (server_uid);
2685 if (res == -1)
2687 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2688 do_exit (4, errno, "setuid");
2691 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2692 /* Remove the temporary capabilities. */
2693 install_real_capabilities (new_caps);
2694 #endif