x86: Consolidate NPTL/non versions of vfork
[glibc.git] / nscd / connections.c
blob180ae7760a4afd78921be7d477d94f5fdd3da3fd
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2014 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <ifaddrs.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <stdint.h>
35 #include <arpa/inet.h>
36 #ifdef HAVE_NETLINK
37 # include <linux/netlink.h>
38 # include <linux/rtnetlink.h>
39 #endif
40 #ifdef HAVE_EPOLL
41 # include <sys/epoll.h>
42 #endif
43 #ifdef HAVE_INOTIFY
44 # include <sys/inotify.h>
45 #endif
46 #include <sys/mman.h>
47 #include <sys/param.h>
48 #include <sys/poll.h>
49 #ifdef HAVE_SENDFILE
50 # include <sys/sendfile.h>
51 #endif
52 #include <sys/socket.h>
53 #include <sys/stat.h>
54 #include <sys/un.h>
56 #include "nscd.h"
57 #include "dbg_log.h"
58 #include "selinux.h"
59 #include <resolv/resolv.h>
61 #include <kernel-features.h>
64 /* Support to run nscd as an unprivileged user */
65 const char *server_user;
66 static uid_t server_uid;
67 static gid_t server_gid;
68 const char *stat_user;
69 uid_t stat_uid;
70 static gid_t *server_groups;
71 #ifndef NGROUPS
72 # define NGROUPS 32
73 #endif
74 static int server_ngroups;
76 static pthread_attr_t attr;
78 static void begin_drop_privileges (void);
79 static void finish_drop_privileges (void);
81 /* Map request type to a string. */
82 const char *const serv2str[LASTREQ] =
84 [GETPWBYNAME] = "GETPWBYNAME",
85 [GETPWBYUID] = "GETPWBYUID",
86 [GETGRBYNAME] = "GETGRBYNAME",
87 [GETGRBYGID] = "GETGRBYGID",
88 [GETHOSTBYNAME] = "GETHOSTBYNAME",
89 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
90 [GETHOSTBYADDR] = "GETHOSTBYADDR",
91 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
92 [SHUTDOWN] = "SHUTDOWN",
93 [GETSTAT] = "GETSTAT",
94 [INVALIDATE] = "INVALIDATE",
95 [GETFDPW] = "GETFDPW",
96 [GETFDGR] = "GETFDGR",
97 [GETFDHST] = "GETFDHST",
98 [GETAI] = "GETAI",
99 [INITGROUPS] = "INITGROUPS",
100 [GETSERVBYNAME] = "GETSERVBYNAME",
101 [GETSERVBYPORT] = "GETSERVBYPORT",
102 [GETFDSERV] = "GETFDSERV",
103 [GETNETGRENT] = "GETNETGRENT",
104 [INNETGR] = "INNETGR",
105 [GETFDNETGR] = "GETFDNETGR"
108 /* The control data structures for the services. */
109 struct database_dyn dbs[lastdb] =
111 [pwddb] = {
112 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
113 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
114 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
115 .enabled = 0,
116 .check_file = 1,
117 .persistent = 0,
118 .propagate = 1,
119 .shared = 0,
120 .max_db_size = DEFAULT_MAX_DB_SIZE,
121 .suggested_module = DEFAULT_SUGGESTED_MODULE,
122 .db_filename = _PATH_NSCD_PASSWD_DB,
123 .disabled_iov = &pwd_iov_disabled,
124 .postimeout = 3600,
125 .negtimeout = 20,
126 .wr_fd = -1,
127 .ro_fd = -1,
128 .mmap_used = false
130 [grpdb] = {
131 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
132 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
133 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
134 .enabled = 0,
135 .check_file = 1,
136 .persistent = 0,
137 .propagate = 1,
138 .shared = 0,
139 .max_db_size = DEFAULT_MAX_DB_SIZE,
140 .suggested_module = DEFAULT_SUGGESTED_MODULE,
141 .db_filename = _PATH_NSCD_GROUP_DB,
142 .disabled_iov = &grp_iov_disabled,
143 .postimeout = 3600,
144 .negtimeout = 60,
145 .wr_fd = -1,
146 .ro_fd = -1,
147 .mmap_used = false
149 [hstdb] = {
150 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
151 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
152 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
153 .enabled = 0,
154 .check_file = 1,
155 .persistent = 0,
156 .propagate = 0, /* Not used. */
157 .shared = 0,
158 .max_db_size = DEFAULT_MAX_DB_SIZE,
159 .suggested_module = DEFAULT_SUGGESTED_MODULE,
160 .db_filename = _PATH_NSCD_HOSTS_DB,
161 .disabled_iov = &hst_iov_disabled,
162 .postimeout = 3600,
163 .negtimeout = 20,
164 .wr_fd = -1,
165 .ro_fd = -1,
166 .mmap_used = false
168 [servdb] = {
169 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
170 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
171 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
172 .enabled = 0,
173 .check_file = 1,
174 .persistent = 0,
175 .propagate = 0, /* Not used. */
176 .shared = 0,
177 .max_db_size = DEFAULT_MAX_DB_SIZE,
178 .suggested_module = DEFAULT_SUGGESTED_MODULE,
179 .db_filename = _PATH_NSCD_SERVICES_DB,
180 .disabled_iov = &serv_iov_disabled,
181 .postimeout = 28800,
182 .negtimeout = 20,
183 .wr_fd = -1,
184 .ro_fd = -1,
185 .mmap_used = false
187 [netgrdb] = {
188 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
189 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
190 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
191 .enabled = 0,
192 .check_file = 1,
193 .persistent = 0,
194 .propagate = 0, /* Not used. */
195 .shared = 0,
196 .max_db_size = DEFAULT_MAX_DB_SIZE,
197 .suggested_module = DEFAULT_SUGGESTED_MODULE,
198 .db_filename = _PATH_NSCD_NETGROUP_DB,
199 .disabled_iov = &netgroup_iov_disabled,
200 .postimeout = 28800,
201 .negtimeout = 20,
202 .wr_fd = -1,
203 .ro_fd = -1,
204 .mmap_used = false
209 /* Mapping of request type to database. */
210 static struct
212 bool data_request;
213 struct database_dyn *db;
214 } const reqinfo[LASTREQ] =
216 [GETPWBYNAME] = { true, &dbs[pwddb] },
217 [GETPWBYUID] = { true, &dbs[pwddb] },
218 [GETGRBYNAME] = { true, &dbs[grpdb] },
219 [GETGRBYGID] = { true, &dbs[grpdb] },
220 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
221 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
222 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
223 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
224 [SHUTDOWN] = { false, NULL },
225 [GETSTAT] = { false, NULL },
226 [SHUTDOWN] = { false, NULL },
227 [GETFDPW] = { false, &dbs[pwddb] },
228 [GETFDGR] = { false, &dbs[grpdb] },
229 [GETFDHST] = { false, &dbs[hstdb] },
230 [GETAI] = { true, &dbs[hstdb] },
231 [INITGROUPS] = { true, &dbs[grpdb] },
232 [GETSERVBYNAME] = { true, &dbs[servdb] },
233 [GETSERVBYPORT] = { true, &dbs[servdb] },
234 [GETFDSERV] = { false, &dbs[servdb] },
235 [GETNETGRENT] = { true, &dbs[netgrdb] },
236 [INNETGR] = { true, &dbs[netgrdb] },
237 [GETFDNETGR] = { false, &dbs[netgrdb] }
241 /* Initial number of threads to use. */
242 int nthreads = -1;
243 /* Maximum number of threads to use. */
244 int max_nthreads = 32;
246 /* Socket for incoming connections. */
247 static int sock;
249 #ifdef HAVE_INOTIFY
250 /* Inotify descriptor. */
251 int inotify_fd = -1;
252 #endif
254 #ifdef HAVE_NETLINK
255 /* Descriptor for netlink status updates. */
256 static int nl_status_fd = -1;
257 #endif
259 #ifndef __ASSUME_SOCK_CLOEXEC
260 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
261 before be know the result. */
262 static int have_sock_cloexec;
263 #endif
264 #ifndef __ASSUME_ACCEPT4
265 static int have_accept4;
266 #endif
268 /* Number of times clients had to wait. */
269 unsigned long int client_queued;
272 ssize_t
273 writeall (int fd, const void *buf, size_t len)
275 size_t n = len;
276 ssize_t ret;
279 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
280 if (ret <= 0)
281 break;
282 buf = (const char *) buf + ret;
283 n -= ret;
285 while (n > 0);
286 return ret < 0 ? ret : len - n;
290 #ifdef HAVE_SENDFILE
291 ssize_t
292 sendfileall (int tofd, int fromfd, off_t off, size_t len)
294 ssize_t n = len;
295 ssize_t ret;
299 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
300 if (ret <= 0)
301 break;
302 n -= ret;
304 while (n > 0);
305 return ret < 0 ? ret : len - n;
307 #endif
310 enum usekey
312 use_not = 0,
313 /* The following three are not really used, they are symbolic constants. */
314 use_first = 16,
315 use_begin = 32,
316 use_end = 64,
318 use_he = 1,
319 use_he_begin = use_he | use_begin,
320 use_he_end = use_he | use_end,
321 #if SEPARATE_KEY
322 use_key = 2,
323 use_key_begin = use_key | use_begin,
324 use_key_end = use_key | use_end,
325 use_key_first = use_key_begin | use_first,
326 #endif
327 use_data = 3,
328 use_data_begin = use_data | use_begin,
329 use_data_end = use_data | use_end,
330 use_data_first = use_data_begin | use_first
334 static int
335 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
336 enum usekey use, ref_t start, size_t len)
338 assert (len >= 2);
340 if (start > first_free || start + len > first_free
341 || (start & BLOCK_ALIGN_M1))
342 return 0;
344 if (usemap[start] == use_not)
346 /* Add the start marker. */
347 usemap[start] = use | use_begin;
348 use &= ~use_first;
350 while (--len > 0)
351 if (usemap[++start] != use_not)
352 return 0;
353 else
354 usemap[start] = use;
356 /* Add the end marker. */
357 usemap[start] = use | use_end;
359 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
361 /* Hash entries can't be shared. */
362 if (use == use_he)
363 return 0;
365 usemap[start] |= (use & use_first);
366 use &= ~use_first;
368 while (--len > 1)
369 if (usemap[++start] != use)
370 return 0;
372 if (usemap[++start] != (use | use_end))
373 return 0;
375 else
376 /* Points to a wrong object or somewhere in the middle. */
377 return 0;
379 return 1;
383 /* Verify data in persistent database. */
384 static int
385 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
387 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
388 || dbnr == netgrdb);
390 time_t now = time (NULL);
392 struct database_pers_head *head = mem;
393 struct database_pers_head head_copy = *head;
395 /* Check that the header that was read matches the head in the database. */
396 if (memcmp (head, readhead, sizeof (*head)) != 0)
397 return 0;
399 /* First some easy tests: make sure the database header is sane. */
400 if (head->version != DB_VERSION
401 || head->header_size != sizeof (*head)
402 /* We allow a timestamp to be one hour ahead of the current time.
403 This should cover daylight saving time changes. */
404 || head->timestamp > now + 60 * 60 + 60
405 || (head->gc_cycle & 1)
406 || head->module == 0
407 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
408 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
409 || head->first_free < 0
410 || head->first_free > head->data_size
411 || (head->first_free & BLOCK_ALIGN_M1) != 0
412 || head->maxnentries < 0
413 || head->maxnsearched < 0)
414 return 0;
416 uint8_t *usemap = calloc (head->first_free, 1);
417 if (usemap == NULL)
418 return 0;
420 const char *data = (char *) &head->array[roundup (head->module,
421 ALIGN / sizeof (ref_t))];
423 nscd_ssize_t he_cnt = 0;
424 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
426 ref_t trail = head->array[cnt];
427 ref_t work = trail;
428 int tick = 0;
430 while (work != ENDREF)
432 if (! check_use (data, head->first_free, usemap, use_he, work,
433 sizeof (struct hashentry)))
434 goto fail;
436 /* Now we know we can dereference the record. */
437 struct hashentry *here = (struct hashentry *) (data + work);
439 ++he_cnt;
441 /* Make sure the record is for this type of service. */
442 if (here->type >= LASTREQ
443 || reqinfo[here->type].db != &dbs[dbnr])
444 goto fail;
446 /* Validate boolean field value. */
447 if (here->first != false && here->first != true)
448 goto fail;
450 if (here->len < 0)
451 goto fail;
453 /* Now the data. */
454 if (here->packet < 0
455 || here->packet > head->first_free
456 || here->packet + sizeof (struct datahead) > head->first_free)
457 goto fail;
459 struct datahead *dh = (struct datahead *) (data + here->packet);
461 if (! check_use (data, head->first_free, usemap,
462 use_data | (here->first ? use_first : 0),
463 here->packet, dh->allocsize))
464 goto fail;
466 if (dh->allocsize < sizeof (struct datahead)
467 || dh->recsize > dh->allocsize
468 || (dh->notfound != false && dh->notfound != true)
469 || (dh->usable != false && dh->usable != true))
470 goto fail;
472 if (here->key < here->packet + sizeof (struct datahead)
473 || here->key > here->packet + dh->allocsize
474 || here->key + here->len > here->packet + dh->allocsize)
476 #if SEPARATE_KEY
477 /* If keys can appear outside of data, this should be done
478 instead. But gc doesn't mark the data in that case. */
479 if (! check_use (data, head->first_free, usemap,
480 use_key | (here->first ? use_first : 0),
481 here->key, here->len))
482 #endif
483 goto fail;
486 work = here->next;
488 if (work == trail)
489 /* A circular list, this must not happen. */
490 goto fail;
491 if (tick)
492 trail = ((struct hashentry *) (data + trail))->next;
493 tick = 1 - tick;
497 if (he_cnt != head->nentries)
498 goto fail;
500 /* See if all data and keys had at least one reference from
501 he->first == true hashentry. */
502 for (ref_t idx = 0; idx < head->first_free; ++idx)
504 #if SEPARATE_KEY
505 if (usemap[idx] == use_key_begin)
506 goto fail;
507 #endif
508 if (usemap[idx] == use_data_begin)
509 goto fail;
512 /* Finally, make sure the database hasn't changed since the first test. */
513 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
514 goto fail;
516 free (usemap);
517 return 1;
519 fail:
520 free (usemap);
521 return 0;
525 #ifdef O_CLOEXEC
526 # define EXTRA_O_FLAGS O_CLOEXEC
527 #else
528 # define EXTRA_O_FLAGS 0
529 #endif
532 /* Initialize database information structures. */
533 void
534 nscd_init (void)
536 /* Look up unprivileged uid/gid/groups before we start listening on the
537 socket */
538 if (server_user != NULL)
539 begin_drop_privileges ();
541 if (nthreads == -1)
542 /* No configuration for this value, assume a default. */
543 nthreads = 4;
545 for (size_t cnt = 0; cnt < lastdb; ++cnt)
546 if (dbs[cnt].enabled)
548 pthread_rwlock_init (&dbs[cnt].lock, NULL);
549 pthread_mutex_init (&dbs[cnt].memlock, NULL);
551 if (dbs[cnt].persistent)
553 /* Try to open the appropriate file on disk. */
554 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
555 if (fd != -1)
557 char *msg = NULL;
558 struct stat64 st;
559 void *mem;
560 size_t total;
561 struct database_pers_head head;
562 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
563 sizeof (head)));
564 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
566 fail_db_errno:
567 /* The code is single-threaded at this point so
568 using strerror is just fine. */
569 msg = strerror (errno);
570 fail_db:
571 dbg_log (_("invalid persistent database file \"%s\": %s"),
572 dbs[cnt].db_filename, msg);
573 unlink (dbs[cnt].db_filename);
575 else if (head.module == 0 && head.data_size == 0)
577 /* The file has been created, but the head has not
578 been initialized yet. */
579 msg = _("uninitialized header");
580 goto fail_db;
582 else if (head.header_size != (int) sizeof (head))
584 msg = _("header size does not match");
585 goto fail_db;
587 else if ((total = (sizeof (head)
588 + roundup (head.module * sizeof (ref_t),
589 ALIGN)
590 + head.data_size))
591 > st.st_size
592 || total < sizeof (head))
594 msg = _("file size does not match");
595 goto fail_db;
597 /* Note we map with the maximum size allowed for the
598 database. This is likely much larger than the
599 actual file size. This is OK on most OSes since
600 extensions of the underlying file will
601 automatically translate more pages available for
602 memory access. */
603 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
604 PROT_READ | PROT_WRITE,
605 MAP_SHARED, fd, 0))
606 == MAP_FAILED)
607 goto fail_db_errno;
608 else if (!verify_persistent_db (mem, &head, cnt))
610 munmap (mem, total);
611 msg = _("verification failed");
612 goto fail_db;
614 else
616 /* Success. We have the database. */
617 dbs[cnt].head = mem;
618 dbs[cnt].memsize = total;
619 dbs[cnt].data = (char *)
620 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
621 ALIGN / sizeof (ref_t))];
622 dbs[cnt].mmap_used = true;
624 if (dbs[cnt].suggested_module > head.module)
625 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
626 dbnames[cnt]);
628 dbs[cnt].wr_fd = fd;
629 fd = -1;
630 /* We also need a read-only descriptor. */
631 if (dbs[cnt].shared)
633 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
634 O_RDONLY | EXTRA_O_FLAGS);
635 if (dbs[cnt].ro_fd == -1)
636 dbg_log (_("\
637 cannot create read-only descriptor for \"%s\"; no mmap"),
638 dbs[cnt].db_filename);
641 // XXX Shall we test whether the descriptors actually
642 // XXX point to the same file?
645 /* Close the file descriptors in case something went
646 wrong in which case the variable have not been
647 assigned -1. */
648 if (fd != -1)
649 close (fd);
651 else if (errno == EACCES)
652 do_exit (EXIT_FAILURE, 0, _("cannot access '%s'"),
653 dbs[cnt].db_filename);
656 if (dbs[cnt].head == NULL)
658 /* No database loaded. Allocate the data structure,
659 possibly on disk. */
660 struct database_pers_head head;
661 size_t total = (sizeof (head)
662 + roundup (dbs[cnt].suggested_module
663 * sizeof (ref_t), ALIGN)
664 + (dbs[cnt].suggested_module
665 * DEFAULT_DATASIZE_PER_BUCKET));
667 /* Try to create the database. If we do not need a
668 persistent database create a temporary file. */
669 int fd;
670 int ro_fd = -1;
671 if (dbs[cnt].persistent)
673 fd = open (dbs[cnt].db_filename,
674 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
675 S_IRUSR | S_IWUSR);
676 if (fd != -1 && dbs[cnt].shared)
677 ro_fd = open (dbs[cnt].db_filename,
678 O_RDONLY | EXTRA_O_FLAGS);
680 else
682 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
683 fd = mkostemp (fname, EXTRA_O_FLAGS);
685 /* We do not need the file name anymore after we
686 opened another file descriptor in read-only mode. */
687 if (fd != -1)
689 if (dbs[cnt].shared)
690 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
692 unlink (fname);
696 if (fd == -1)
698 if (errno == EEXIST)
700 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
701 dbnames[cnt], dbs[cnt].db_filename);
702 do_exit (1, 0, NULL);
705 if (dbs[cnt].persistent)
706 dbg_log (_("cannot create %s; no persistent database used"),
707 dbs[cnt].db_filename);
708 else
709 dbg_log (_("cannot create %s; no sharing possible"),
710 dbs[cnt].db_filename);
712 dbs[cnt].persistent = 0;
713 // XXX remember: no mmap
715 else
717 /* Tell the user if we could not create the read-only
718 descriptor. */
719 if (ro_fd == -1 && dbs[cnt].shared)
720 dbg_log (_("\
721 cannot create read-only descriptor for \"%s\"; no mmap"),
722 dbs[cnt].db_filename);
724 /* Before we create the header, initialize the hash
725 table. That way if we get interrupted while writing
726 the header we can recognize a partially initialized
727 database. */
728 size_t ps = sysconf (_SC_PAGESIZE);
729 char tmpbuf[ps];
730 assert (~ENDREF == 0);
731 memset (tmpbuf, '\xff', ps);
733 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
734 off_t offset = sizeof (head);
736 size_t towrite;
737 if (offset % ps != 0)
739 towrite = MIN (remaining, ps - (offset % ps));
740 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
741 goto write_fail;
742 offset += towrite;
743 remaining -= towrite;
746 while (remaining > ps)
748 if (pwrite (fd, tmpbuf, ps, offset) == -1)
749 goto write_fail;
750 offset += ps;
751 remaining -= ps;
754 if (remaining > 0
755 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
756 goto write_fail;
758 /* Create the header of the file. */
759 struct database_pers_head head =
761 .version = DB_VERSION,
762 .header_size = sizeof (head),
763 .module = dbs[cnt].suggested_module,
764 .data_size = (dbs[cnt].suggested_module
765 * DEFAULT_DATASIZE_PER_BUCKET),
766 .first_free = 0
768 void *mem;
770 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
771 != sizeof (head))
772 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
773 != 0)
774 || (mem = mmap (NULL, dbs[cnt].max_db_size,
775 PROT_READ | PROT_WRITE,
776 MAP_SHARED, fd, 0)) == MAP_FAILED)
778 write_fail:
779 unlink (dbs[cnt].db_filename);
780 dbg_log (_("cannot write to database file %s: %s"),
781 dbs[cnt].db_filename, strerror (errno));
782 dbs[cnt].persistent = 0;
784 else
786 /* Success. */
787 dbs[cnt].head = mem;
788 dbs[cnt].data = (char *)
789 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
790 ALIGN / sizeof (ref_t))];
791 dbs[cnt].memsize = total;
792 dbs[cnt].mmap_used = true;
794 /* Remember the descriptors. */
795 dbs[cnt].wr_fd = fd;
796 dbs[cnt].ro_fd = ro_fd;
797 fd = -1;
798 ro_fd = -1;
801 if (fd != -1)
802 close (fd);
803 if (ro_fd != -1)
804 close (ro_fd);
808 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
809 /* We do not check here whether the O_CLOEXEC provided to the
810 open call was successful or not. The two fcntl calls are
811 only performed once each per process start-up and therefore
812 is not noticeable at all. */
813 if (paranoia
814 && ((dbs[cnt].wr_fd != -1
815 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
816 || (dbs[cnt].ro_fd != -1
817 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
819 dbg_log (_("\
820 cannot set socket to close on exec: %s; disabling paranoia mode"),
821 strerror (errno));
822 paranoia = 0;
824 #endif
826 if (dbs[cnt].head == NULL)
828 /* We do not use the persistent database. Just
829 create an in-memory data structure. */
830 assert (! dbs[cnt].persistent);
832 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
833 + (dbs[cnt].suggested_module
834 * sizeof (ref_t)));
835 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
836 assert (~ENDREF == 0);
837 memset (dbs[cnt].head->array, '\xff',
838 dbs[cnt].suggested_module * sizeof (ref_t));
839 dbs[cnt].head->module = dbs[cnt].suggested_module;
840 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
841 * dbs[cnt].head->module);
842 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
843 dbs[cnt].head->first_free = 0;
845 dbs[cnt].shared = 0;
846 assert (dbs[cnt].ro_fd == -1);
850 /* Create the socket. */
851 #ifndef __ASSUME_SOCK_CLOEXEC
852 sock = -1;
853 if (have_sock_cloexec >= 0)
854 #endif
856 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
857 #ifndef __ASSUME_SOCK_CLOEXEC
858 if (have_sock_cloexec == 0)
859 have_sock_cloexec = sock != -1 || errno != EINVAL ? 1 : -1;
860 #endif
862 #ifndef __ASSUME_SOCK_CLOEXEC
863 if (have_sock_cloexec < 0)
864 sock = socket (AF_UNIX, SOCK_STREAM, 0);
865 #endif
866 if (sock < 0)
868 dbg_log (_("cannot open socket: %s"), strerror (errno));
869 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
871 /* Bind a name to the socket. */
872 struct sockaddr_un sock_addr;
873 sock_addr.sun_family = AF_UNIX;
874 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
875 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
877 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
878 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
881 #ifndef __ASSUME_SOCK_CLOEXEC
882 if (have_sock_cloexec < 0)
884 /* We don't want to get stuck on accept. */
885 int fl = fcntl (sock, F_GETFL);
886 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
888 dbg_log (_("cannot change socket to nonblocking mode: %s"),
889 strerror (errno));
890 do_exit (1, 0, NULL);
893 /* The descriptor needs to be closed on exec. */
894 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
896 dbg_log (_("cannot set socket to close on exec: %s"),
897 strerror (errno));
898 do_exit (1, 0, NULL);
901 #endif
903 /* Set permissions for the socket. */
904 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
906 /* Set the socket up to accept connections. */
907 if (listen (sock, SOMAXCONN) < 0)
909 dbg_log (_("cannot enable socket to accept connections: %s"),
910 strerror (errno));
911 do_exit (1, 0, NULL);
914 #ifdef HAVE_NETLINK
915 if (dbs[hstdb].enabled)
917 /* Try to open netlink socket to monitor network setting changes. */
918 nl_status_fd = socket (AF_NETLINK,
919 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
920 NETLINK_ROUTE);
921 if (nl_status_fd != -1)
923 struct sockaddr_nl snl;
924 memset (&snl, '\0', sizeof (snl));
925 snl.nl_family = AF_NETLINK;
926 /* XXX Is this the best set to use? */
927 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
928 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
929 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
930 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
931 | RTMGRP_IPV6_PREFIX);
933 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
935 close (nl_status_fd);
936 nl_status_fd = -1;
938 else
940 /* Start the timestamp process. */
941 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
942 = __bump_nl_timestamp ();
944 # ifndef __ASSUME_SOCK_CLOEXEC
945 if (have_sock_cloexec < 0)
947 /* We don't want to get stuck on accept. */
948 int fl = fcntl (nl_status_fd, F_GETFL);
949 if (fl == -1
950 || fcntl (nl_status_fd, F_SETFL, fl | O_NONBLOCK) == -1)
952 dbg_log (_("\
953 cannot change socket to nonblocking mode: %s"),
954 strerror (errno));
955 do_exit (1, 0, NULL);
958 /* The descriptor needs to be closed on exec. */
959 if (paranoia
960 && fcntl (nl_status_fd, F_SETFD, FD_CLOEXEC) == -1)
962 dbg_log (_("cannot set socket to close on exec: %s"),
963 strerror (errno));
964 do_exit (1, 0, NULL);
967 # endif
971 #endif
973 /* Change to unprivileged uid/gid/groups if specified in config file */
974 if (server_user != NULL)
975 finish_drop_privileges ();
979 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
981 We support registering multiple files per database. Each call to
982 register_traced_file adds to the list of registered files.
984 When we prune the database, either through timeout or a request to
985 invalidate, we will check to see if any of the registered files has changed.
986 When we accept new connections to handle a cache request we will also
987 check to see if any of the registered files has changed.
989 If we have inotify support then we install an inotify fd to notify us of
990 file deletion or modification, both of which will require we invalidate
991 the cache for the database. Without inotify support we stat the file and
992 store st_mtime to determine if the file has been modified. */
993 void
994 register_traced_file (size_t dbidx, struct traced_file *finfo)
996 /* If the database is disabled or file checking is disabled
997 then ignore the registration. */
998 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
999 return;
1001 if (__glibc_unlikely (debug_level > 0))
1002 dbg_log (_("register trace file %s for database %s"),
1003 finfo->fname, dbnames[dbidx]);
1005 #ifdef HAVE_INOTIFY
1006 if (inotify_fd < 0
1007 || (finfo->inotify_descr = inotify_add_watch (inotify_fd, finfo->fname,
1008 IN_DELETE_SELF
1009 | IN_MODIFY)) < 0)
1010 #endif
1012 /* We need the modification date of the file. */
1013 struct stat64 st;
1015 if (stat64 (finfo->fname, &st) < 0)
1017 /* We cannot stat() the file, disable file checking. */
1018 dbg_log (_("cannot stat() file `%s': %s"),
1019 finfo->fname, strerror (errno));
1020 return;
1023 finfo->inotify_descr = -1;
1024 finfo->mtime = st.st_mtime;
1027 /* Queue up the file name. */
1028 finfo->next = dbs[dbidx].traced_files;
1029 dbs[dbidx].traced_files = finfo;
1033 /* Close the connections. */
1034 void
1035 close_sockets (void)
1037 close (sock);
1041 static void
1042 invalidate_cache (char *key, int fd)
1044 dbtype number;
1045 int32_t resp;
1047 for (number = pwddb; number < lastdb; ++number)
1048 if (strcmp (key, dbnames[number]) == 0)
1050 if (number == hstdb)
1052 struct traced_file *runp = dbs[hstdb].traced_files;
1053 while (runp != NULL)
1054 if (runp->call_res_init)
1056 res_init ();
1057 break;
1059 else
1060 runp = runp->next;
1062 break;
1065 if (number == lastdb)
1067 resp = EINVAL;
1068 writeall (fd, &resp, sizeof (resp));
1069 return;
1072 if (dbs[number].enabled)
1074 pthread_mutex_lock (&dbs[number].prune_run_lock);
1075 prune_cache (&dbs[number], LONG_MAX, fd);
1076 pthread_mutex_unlock (&dbs[number].prune_run_lock);
1078 else
1080 resp = 0;
1081 writeall (fd, &resp, sizeof (resp));
1086 #ifdef SCM_RIGHTS
1087 static void
1088 send_ro_fd (struct database_dyn *db, char *key, int fd)
1090 /* If we do not have an read-only file descriptor do nothing. */
1091 if (db->ro_fd == -1)
1092 return;
1094 /* We need to send some data along with the descriptor. */
1095 uint64_t mapsize = (db->head->data_size
1096 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1097 + sizeof (struct database_pers_head));
1098 struct iovec iov[2];
1099 iov[0].iov_base = key;
1100 iov[0].iov_len = strlen (key) + 1;
1101 iov[1].iov_base = &mapsize;
1102 iov[1].iov_len = sizeof (mapsize);
1104 /* Prepare the control message to transfer the descriptor. */
1105 union
1107 struct cmsghdr hdr;
1108 char bytes[CMSG_SPACE (sizeof (int))];
1109 } buf;
1110 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1111 .msg_control = buf.bytes,
1112 .msg_controllen = sizeof (buf) };
1113 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1115 cmsg->cmsg_level = SOL_SOCKET;
1116 cmsg->cmsg_type = SCM_RIGHTS;
1117 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1119 int *ip = (int *) CMSG_DATA (cmsg);
1120 *ip = db->ro_fd;
1122 msg.msg_controllen = cmsg->cmsg_len;
1124 /* Send the control message. We repeat when we are interrupted but
1125 everything else is ignored. */
1126 #ifndef MSG_NOSIGNAL
1127 # define MSG_NOSIGNAL 0
1128 #endif
1129 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1131 if (__glibc_unlikely (debug_level > 0))
1132 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1134 #endif /* SCM_RIGHTS */
1137 /* Handle new request. */
1138 static void
1139 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1141 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1143 if (debug_level > 0)
1144 dbg_log (_("\
1145 cannot handle old request version %d; current version is %d"),
1146 req->version, NSCD_VERSION);
1147 return;
1150 /* Perform the SELinux check before we go on to the standard checks. */
1151 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1153 if (debug_level > 0)
1155 #ifdef SO_PEERCRED
1156 # ifdef PATH_MAX
1157 char buf[PATH_MAX];
1158 # else
1159 char buf[4096];
1160 # endif
1162 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
1163 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
1165 if (n <= 0)
1166 dbg_log (_("\
1167 request from %ld not handled due to missing permission"), (long int) pid);
1168 else
1170 buf[n] = '\0';
1171 dbg_log (_("\
1172 request from '%s' [%ld] not handled due to missing permission"),
1173 buf, (long int) pid);
1175 #else
1176 dbg_log (_("request not handled due to missing permission"));
1177 #endif
1179 return;
1182 struct database_dyn *db = reqinfo[req->type].db;
1184 /* See whether we can service the request from the cache. */
1185 if (__builtin_expect (reqinfo[req->type].data_request, true))
1187 if (__builtin_expect (debug_level, 0) > 0)
1189 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1191 char buf[INET6_ADDRSTRLEN];
1193 dbg_log ("\t%s (%s)", serv2str[req->type],
1194 inet_ntop (req->type == GETHOSTBYADDR
1195 ? AF_INET : AF_INET6,
1196 key, buf, sizeof (buf)));
1198 else
1199 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1202 /* Is this service enabled? */
1203 if (__glibc_unlikely (!db->enabled))
1205 /* No, sent the prepared record. */
1206 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1207 db->disabled_iov->iov_len,
1208 MSG_NOSIGNAL))
1209 != (ssize_t) db->disabled_iov->iov_len
1210 && __builtin_expect (debug_level, 0) > 0)
1212 /* We have problems sending the result. */
1213 char buf[256];
1214 dbg_log (_("cannot write result: %s"),
1215 strerror_r (errno, buf, sizeof (buf)));
1218 return;
1221 /* Be sure we can read the data. */
1222 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db->lock) != 0))
1224 ++db->head->rdlockdelayed;
1225 pthread_rwlock_rdlock (&db->lock);
1228 /* See whether we can handle it from the cache. */
1229 struct datahead *cached;
1230 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1231 db, uid);
1232 if (cached != NULL)
1234 /* Hurray it's in the cache. */
1235 ssize_t nwritten;
1237 #ifdef HAVE_SENDFILE
1238 if (__glibc_likely (db->mmap_used))
1240 assert (db->wr_fd != -1);
1241 assert ((char *) cached->data > (char *) db->data);
1242 assert ((char *) cached->data - (char *) db->head
1243 + cached->recsize
1244 <= (sizeof (struct database_pers_head)
1245 + db->head->module * sizeof (ref_t)
1246 + db->head->data_size));
1247 nwritten = sendfileall (fd, db->wr_fd,
1248 (char *) cached->data
1249 - (char *) db->head, cached->recsize);
1250 # ifndef __ASSUME_SENDFILE
1251 if (nwritten == -1 && errno == ENOSYS)
1252 goto use_write;
1253 # endif
1255 else
1256 # ifndef __ASSUME_SENDFILE
1257 use_write:
1258 # endif
1259 #endif
1260 nwritten = writeall (fd, cached->data, cached->recsize);
1262 if (nwritten != cached->recsize
1263 && __builtin_expect (debug_level, 0) > 0)
1265 /* We have problems sending the result. */
1266 char buf[256];
1267 dbg_log (_("cannot write result: %s"),
1268 strerror_r (errno, buf, sizeof (buf)));
1271 pthread_rwlock_unlock (&db->lock);
1273 return;
1276 pthread_rwlock_unlock (&db->lock);
1278 else if (__builtin_expect (debug_level, 0) > 0)
1280 if (req->type == INVALIDATE)
1281 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1282 else
1283 dbg_log ("\t%s", serv2str[req->type]);
1286 /* Handle the request. */
1287 switch (req->type)
1289 case GETPWBYNAME:
1290 addpwbyname (db, fd, req, key, uid);
1291 break;
1293 case GETPWBYUID:
1294 addpwbyuid (db, fd, req, key, uid);
1295 break;
1297 case GETGRBYNAME:
1298 addgrbyname (db, fd, req, key, uid);
1299 break;
1301 case GETGRBYGID:
1302 addgrbygid (db, fd, req, key, uid);
1303 break;
1305 case GETHOSTBYNAME:
1306 addhstbyname (db, fd, req, key, uid);
1307 break;
1309 case GETHOSTBYNAMEv6:
1310 addhstbynamev6 (db, fd, req, key, uid);
1311 break;
1313 case GETHOSTBYADDR:
1314 addhstbyaddr (db, fd, req, key, uid);
1315 break;
1317 case GETHOSTBYADDRv6:
1318 addhstbyaddrv6 (db, fd, req, key, uid);
1319 break;
1321 case GETAI:
1322 addhstai (db, fd, req, key, uid);
1323 break;
1325 case INITGROUPS:
1326 addinitgroups (db, fd, req, key, uid);
1327 break;
1329 case GETSERVBYNAME:
1330 addservbyname (db, fd, req, key, uid);
1331 break;
1333 case GETSERVBYPORT:
1334 addservbyport (db, fd, req, key, uid);
1335 break;
1337 case GETNETGRENT:
1338 addgetnetgrent (db, fd, req, key, uid);
1339 break;
1341 case INNETGR:
1342 addinnetgr (db, fd, req, key, uid);
1343 break;
1345 case GETSTAT:
1346 case SHUTDOWN:
1347 case INVALIDATE:
1349 /* Get the callers credentials. */
1350 #ifdef SO_PEERCRED
1351 struct ucred caller;
1352 socklen_t optlen = sizeof (caller);
1354 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1356 char buf[256];
1358 dbg_log (_("error getting caller's id: %s"),
1359 strerror_r (errno, buf, sizeof (buf)));
1360 break;
1363 uid = caller.uid;
1364 #else
1365 /* Some systems have no SO_PEERCRED implementation. They don't
1366 care about security so we don't as well. */
1367 uid = 0;
1368 #endif
1371 /* Accept shutdown, getstat and invalidate only from root. For
1372 the stat call also allow the user specified in the config file. */
1373 if (req->type == GETSTAT)
1375 if (uid == 0 || uid == stat_uid)
1376 send_stats (fd, dbs);
1378 else if (uid == 0)
1380 if (req->type == INVALIDATE)
1381 invalidate_cache (key, fd);
1382 else
1383 termination_handler (0);
1385 break;
1387 case GETFDPW:
1388 case GETFDGR:
1389 case GETFDHST:
1390 case GETFDSERV:
1391 case GETFDNETGR:
1392 #ifdef SCM_RIGHTS
1393 send_ro_fd (reqinfo[req->type].db, key, fd);
1394 #endif
1395 break;
1397 default:
1398 /* Ignore the command, it's nothing we know. */
1399 break;
1404 /* Restart the process. */
1405 static void
1406 restart (void)
1408 /* First determine the parameters. We do not use the parameters
1409 passed to main() since in case nscd is started by running the
1410 dynamic linker this will not work. Yes, this is not the usual
1411 case but nscd is part of glibc and we occasionally do this. */
1412 size_t buflen = 1024;
1413 char *buf = alloca (buflen);
1414 size_t readlen = 0;
1415 int fd = open ("/proc/self/cmdline", O_RDONLY);
1416 if (fd == -1)
1418 dbg_log (_("\
1419 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1420 strerror (errno));
1422 paranoia = 0;
1423 return;
1426 while (1)
1428 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1429 buflen - readlen));
1430 if (n == -1)
1432 dbg_log (_("\
1433 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1434 strerror (errno));
1436 close (fd);
1437 paranoia = 0;
1438 return;
1441 readlen += n;
1443 if (readlen < buflen)
1444 break;
1446 /* We might have to extend the buffer. */
1447 size_t old_buflen = buflen;
1448 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1449 buf = memmove (newp, buf, old_buflen);
1452 close (fd);
1454 /* Parse the command line. Worst case scenario: every two
1455 characters form one parameter (one character plus NUL). */
1456 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1457 int argc = 0;
1459 char *cp = buf;
1460 while (cp < buf + readlen)
1462 argv[argc++] = cp;
1463 cp = (char *) rawmemchr (cp, '\0') + 1;
1465 argv[argc] = NULL;
1467 /* Second, change back to the old user if we changed it. */
1468 if (server_user != NULL)
1470 if (setresuid (old_uid, old_uid, old_uid) != 0)
1472 dbg_log (_("\
1473 cannot change to old UID: %s; disabling paranoia mode"),
1474 strerror (errno));
1476 paranoia = 0;
1477 return;
1480 if (setresgid (old_gid, old_gid, old_gid) != 0)
1482 dbg_log (_("\
1483 cannot change to old GID: %s; disabling paranoia mode"),
1484 strerror (errno));
1486 setuid (server_uid);
1487 paranoia = 0;
1488 return;
1492 /* Next change back to the old working directory. */
1493 if (chdir (oldcwd) == -1)
1495 dbg_log (_("\
1496 cannot change to old working directory: %s; disabling paranoia mode"),
1497 strerror (errno));
1499 if (server_user != NULL)
1501 setuid (server_uid);
1502 setgid (server_gid);
1504 paranoia = 0;
1505 return;
1508 /* Synchronize memory. */
1509 int32_t certainly[lastdb];
1510 for (int cnt = 0; cnt < lastdb; ++cnt)
1511 if (dbs[cnt].enabled)
1513 /* Make sure nobody keeps using the database. */
1514 dbs[cnt].head->timestamp = 0;
1515 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1516 dbs[cnt].head->nscd_certainly_running = 0;
1518 if (dbs[cnt].persistent)
1519 // XXX async OK?
1520 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1523 /* The preparations are done. */
1524 #ifdef PATH_MAX
1525 char pathbuf[PATH_MAX];
1526 #else
1527 char pathbuf[256];
1528 #endif
1529 /* Try to exec the real nscd program so the process name (as reported
1530 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1531 if readlink or the exec with the result of the readlink call fails. */
1532 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1533 if (n != -1)
1535 pathbuf[n] = '\0';
1536 execv (pathbuf, argv);
1538 execv ("/proc/self/exe", argv);
1540 /* If we come here, we will never be able to re-exec. */
1541 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1542 strerror (errno));
1544 if (server_user != NULL)
1546 setuid (server_uid);
1547 setgid (server_gid);
1549 if (chdir ("/") != 0)
1550 dbg_log (_("cannot change current working directory to \"/\": %s"),
1551 strerror (errno));
1552 paranoia = 0;
1554 /* Reenable the databases. */
1555 time_t now = time (NULL);
1556 for (int cnt = 0; cnt < lastdb; ++cnt)
1557 if (dbs[cnt].enabled)
1559 dbs[cnt].head->timestamp = now;
1560 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1565 /* List of file descriptors. */
1566 struct fdlist
1568 int fd;
1569 struct fdlist *next;
1571 /* Memory allocated for the list. */
1572 static struct fdlist *fdlist;
1573 /* List of currently ready-to-read file descriptors. */
1574 static struct fdlist *readylist;
1576 /* Conditional variable and mutex to signal availability of entries in
1577 READYLIST. The condvar is initialized dynamically since we might
1578 use a different clock depending on availability. */
1579 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1580 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1582 /* The clock to use with the condvar. */
1583 static clockid_t timeout_clock = CLOCK_REALTIME;
1585 /* Number of threads ready to handle the READYLIST. */
1586 static unsigned long int nready;
1589 /* Function for the clean-up threads. */
1590 static void *
1591 __attribute__ ((__noreturn__))
1592 nscd_run_prune (void *p)
1594 const long int my_number = (long int) p;
1595 assert (dbs[my_number].enabled);
1597 int dont_need_update = setup_thread (&dbs[my_number]);
1599 time_t now = time (NULL);
1601 /* We are running. */
1602 dbs[my_number].head->timestamp = now;
1604 struct timespec prune_ts;
1605 if (__glibc_unlikely (clock_gettime (timeout_clock, &prune_ts) == -1))
1606 /* Should never happen. */
1607 abort ();
1609 /* Compute the initial timeout time. Prevent all the timers to go
1610 off at the same time by adding a db-based value. */
1611 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1612 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1614 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1615 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1616 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1618 pthread_mutex_lock (prune_lock);
1619 while (1)
1621 /* Wait, but not forever. */
1622 int e = 0;
1623 if (! dbs[my_number].clear_cache)
1624 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1625 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1627 time_t next_wait;
1628 now = time (NULL);
1629 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1630 || dbs[my_number].clear_cache)
1632 /* We will determine the new timout values based on the
1633 cache content. Should there be concurrent additions to
1634 the cache which are not accounted for in the cache
1635 pruning we want to know about it. Therefore set the
1636 timeout to the maximum. It will be descreased when adding
1637 new entries to the cache, if necessary. */
1638 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1640 /* Unconditionally reset the flag. */
1641 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1642 dbs[my_number].clear_cache = 0;
1644 pthread_mutex_unlock (prune_lock);
1646 /* We use a separate lock for running the prune function (instead
1647 of keeping prune_lock locked) because this enables concurrent
1648 invocations of cache_add which might modify the timeout value. */
1649 pthread_mutex_lock (prune_run_lock);
1650 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1651 pthread_mutex_unlock (prune_run_lock);
1653 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1654 /* If clients cannot determine for sure whether nscd is running
1655 we need to wake up occasionally to update the timestamp.
1656 Wait 90% of the update period. */
1657 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1658 if (__glibc_unlikely (! dont_need_update))
1660 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1661 dbs[my_number].head->timestamp = now;
1664 pthread_mutex_lock (prune_lock);
1666 /* Make it known when we will wake up again. */
1667 if (now + next_wait < dbs[my_number].wakeup_time)
1668 dbs[my_number].wakeup_time = now + next_wait;
1669 else
1670 next_wait = dbs[my_number].wakeup_time - now;
1672 else
1673 /* The cache was just pruned. Do not do it again now. Just
1674 use the new timeout value. */
1675 next_wait = dbs[my_number].wakeup_time - now;
1677 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1678 /* Should never happen. */
1679 abort ();
1681 /* Compute next timeout time. */
1682 prune_ts.tv_sec += next_wait;
1687 /* This is the main loop. It is replicated in different threads but
1688 the use of the ready list makes sure only one thread handles an
1689 incoming connection. */
1690 static void *
1691 __attribute__ ((__noreturn__))
1692 nscd_run_worker (void *p)
1694 char buf[256];
1696 /* Initial locking. */
1697 pthread_mutex_lock (&readylist_lock);
1699 /* One more thread available. */
1700 ++nready;
1702 while (1)
1704 while (readylist == NULL)
1705 pthread_cond_wait (&readylist_cond, &readylist_lock);
1707 struct fdlist *it = readylist->next;
1708 if (readylist->next == readylist)
1709 /* Just one entry on the list. */
1710 readylist = NULL;
1711 else
1712 readylist->next = it->next;
1714 /* Extract the information and mark the record ready to be used
1715 again. */
1716 int fd = it->fd;
1717 it->next = NULL;
1719 /* One more thread available. */
1720 --nready;
1722 /* We are done with the list. */
1723 pthread_mutex_unlock (&readylist_lock);
1725 #ifndef __ASSUME_ACCEPT4
1726 if (have_accept4 < 0)
1728 /* We do not want to block on a short read or so. */
1729 int fl = fcntl (fd, F_GETFL);
1730 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1731 goto close_and_out;
1733 #endif
1735 /* Now read the request. */
1736 request_header req;
1737 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1738 != sizeof (req), 0))
1740 /* We failed to read data. Note that this also might mean we
1741 failed because we would have blocked. */
1742 if (debug_level > 0)
1743 dbg_log (_("short read while reading request: %s"),
1744 strerror_r (errno, buf, sizeof (buf)));
1745 goto close_and_out;
1748 /* Check whether this is a valid request type. */
1749 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1750 goto close_and_out;
1752 /* Some systems have no SO_PEERCRED implementation. They don't
1753 care about security so we don't as well. */
1754 uid_t uid = -1;
1755 #ifdef SO_PEERCRED
1756 pid_t pid = 0;
1758 if (__glibc_unlikely (debug_level > 0))
1760 struct ucred caller;
1761 socklen_t optlen = sizeof (caller);
1763 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1764 pid = caller.pid;
1766 #else
1767 const pid_t pid = 0;
1768 #endif
1770 /* It should not be possible to crash the nscd with a silly
1771 request (i.e., a terribly large key). We limit the size to 1kb. */
1772 if (__builtin_expect (req.key_len, 1) < 0
1773 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1775 if (debug_level > 0)
1776 dbg_log (_("key length in request too long: %d"), req.key_len);
1778 else
1780 /* Get the key. */
1781 char keybuf[MAXKEYLEN + 1];
1783 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1784 req.key_len))
1785 != req.key_len, 0))
1787 /* Again, this can also mean we would have blocked. */
1788 if (debug_level > 0)
1789 dbg_log (_("short read while reading request key: %s"),
1790 strerror_r (errno, buf, sizeof (buf)));
1791 goto close_and_out;
1793 keybuf[req.key_len] = '\0';
1795 if (__builtin_expect (debug_level, 0) > 0)
1797 #ifdef SO_PEERCRED
1798 if (pid != 0)
1799 dbg_log (_("\
1800 handle_request: request received (Version = %d) from PID %ld"),
1801 req.version, (long int) pid);
1802 else
1803 #endif
1804 dbg_log (_("\
1805 handle_request: request received (Version = %d)"), req.version);
1808 /* Phew, we got all the data, now process it. */
1809 handle_request (fd, &req, keybuf, uid, pid);
1812 close_and_out:
1813 /* We are done. */
1814 close (fd);
1816 /* Re-locking. */
1817 pthread_mutex_lock (&readylist_lock);
1819 /* One more thread available. */
1820 ++nready;
1822 /* NOTREACHED */
1826 static unsigned int nconns;
1828 static void
1829 fd_ready (int fd)
1831 pthread_mutex_lock (&readylist_lock);
1833 /* Find an empty entry in FDLIST. */
1834 size_t inner;
1835 for (inner = 0; inner < nconns; ++inner)
1836 if (fdlist[inner].next == NULL)
1837 break;
1838 assert (inner < nconns);
1840 fdlist[inner].fd = fd;
1842 if (readylist == NULL)
1843 readylist = fdlist[inner].next = &fdlist[inner];
1844 else
1846 fdlist[inner].next = readylist->next;
1847 readylist = readylist->next = &fdlist[inner];
1850 bool do_signal = true;
1851 if (__glibc_unlikely (nready == 0))
1853 ++client_queued;
1854 do_signal = false;
1856 /* Try to start another thread to help out. */
1857 pthread_t th;
1858 if (nthreads < max_nthreads
1859 && pthread_create (&th, &attr, nscd_run_worker,
1860 (void *) (long int) nthreads) == 0)
1862 /* We got another thread. */
1863 ++nthreads;
1864 /* The new thread might need a kick. */
1865 do_signal = true;
1870 pthread_mutex_unlock (&readylist_lock);
1872 /* Tell one of the worker threads there is work to do. */
1873 if (do_signal)
1874 pthread_cond_signal (&readylist_cond);
1878 /* Check whether restarting should happen. */
1879 static bool
1880 restart_p (time_t now)
1882 return (paranoia && readylist == NULL && nready == nthreads
1883 && now >= restart_time);
1887 /* Array for times a connection was accepted. */
1888 static time_t *starttime;
1890 #ifdef HAVE_INOTIFY
1891 /* Inotify event for changed file. */
1892 union __inev
1894 struct inotify_event i;
1895 # ifndef PATH_MAX
1896 # define PATH_MAX 1024
1897 # endif
1898 char buf[sizeof (struct inotify_event) + PATH_MAX];
1901 /* Process the inotify event in INEV. If the event matches any of the files
1902 registered with a database then mark that database as requiring its cache
1903 to be cleared. We indicate the cache needs clearing by setting
1904 TO_CLEAR[DBCNT] to true for the matching database. */
1905 static inline void
1906 inotify_check_files (bool *to_clear, union __inev *inev)
1908 /* Check which of the files changed. */
1909 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1911 struct traced_file *finfo = dbs[dbcnt].traced_files;
1913 while (finfo != NULL)
1915 /* Inotify event watch descriptor matches. */
1916 if (finfo->inotify_descr == inev->i.wd)
1918 /* Mark cache as needing to be cleared and reinitialize. */
1919 to_clear[dbcnt] = true;
1920 if (finfo->call_res_init)
1921 res_init ();
1922 return;
1925 finfo = finfo->next;
1930 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1931 for the associated database, otherwise do nothing. The TO_CLEAR array must
1932 have LASTDB entries. */
1933 static inline void
1934 clear_db_cache (bool *to_clear)
1936 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1937 if (to_clear[dbcnt])
1939 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
1940 dbs[dbcnt].clear_cache = 1;
1941 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1942 pthread_cond_signal (&dbs[dbcnt].prune_cond);
1946 #endif
1948 static void
1949 __attribute__ ((__noreturn__))
1950 main_loop_poll (void)
1952 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1953 * sizeof (conns[0]));
1955 conns[0].fd = sock;
1956 conns[0].events = POLLRDNORM;
1957 size_t nused = 1;
1958 size_t firstfree = 1;
1960 #ifdef HAVE_INOTIFY
1961 if (inotify_fd != -1)
1963 conns[1].fd = inotify_fd;
1964 conns[1].events = POLLRDNORM;
1965 nused = 2;
1966 firstfree = 2;
1968 #endif
1970 #ifdef HAVE_NETLINK
1971 size_t idx_nl_status_fd = 0;
1972 if (nl_status_fd != -1)
1974 idx_nl_status_fd = nused;
1975 conns[nused].fd = nl_status_fd;
1976 conns[nused].events = POLLRDNORM;
1977 ++nused;
1978 firstfree = nused;
1980 #endif
1982 while (1)
1984 /* Wait for any event. We wait at most a couple of seconds so
1985 that we can check whether we should close any of the accepted
1986 connections since we have not received a request. */
1987 #define MAX_ACCEPT_TIMEOUT 30
1988 #define MIN_ACCEPT_TIMEOUT 5
1989 #define MAIN_THREAD_TIMEOUT \
1990 (MAX_ACCEPT_TIMEOUT * 1000 \
1991 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1993 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1995 time_t now = time (NULL);
1997 /* If there is a descriptor ready for reading or there is a new
1998 connection, process this now. */
1999 if (n > 0)
2001 if (conns[0].revents != 0)
2003 /* We have a new incoming connection. Accept the connection. */
2004 int fd;
2006 #ifndef __ASSUME_ACCEPT4
2007 fd = -1;
2008 if (have_accept4 >= 0)
2009 #endif
2011 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2012 SOCK_NONBLOCK));
2013 #ifndef __ASSUME_ACCEPT4
2014 if (have_accept4 == 0)
2015 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2016 #endif
2018 #ifndef __ASSUME_ACCEPT4
2019 if (have_accept4 < 0)
2020 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2021 #endif
2023 /* Use the descriptor if we have not reached the limit. */
2024 if (fd >= 0)
2026 if (firstfree < nconns)
2028 conns[firstfree].fd = fd;
2029 conns[firstfree].events = POLLRDNORM;
2030 starttime[firstfree] = now;
2031 if (firstfree >= nused)
2032 nused = firstfree + 1;
2035 ++firstfree;
2036 while (firstfree < nused && conns[firstfree].fd != -1);
2038 else
2039 /* We cannot use the connection so close it. */
2040 close (fd);
2043 --n;
2046 size_t first = 1;
2047 #ifdef HAVE_INOTIFY
2048 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
2050 if (conns[1].revents != 0)
2052 bool to_clear[lastdb] = { false, };
2053 union __inev inev;
2055 /* Read all inotify events for files registered via
2056 register_traced_file(). */
2057 while (1)
2059 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
2060 sizeof (inev)));
2061 if (nb < (ssize_t) sizeof (struct inotify_event))
2063 if (__builtin_expect (nb == -1 && errno != EAGAIN,
2066 /* Something went wrong when reading the inotify
2067 data. Better disable inotify. */
2068 dbg_log (_("\
2069 disabled inotify after read error %d"),
2070 errno);
2071 conns[1].fd = -1;
2072 firstfree = 1;
2073 if (nused == 2)
2074 nused = 1;
2075 close (inotify_fd);
2076 inotify_fd = -1;
2078 break;
2081 /* Check which of the files changed. */
2082 inotify_check_files (to_clear, &inev);
2085 /* Actually perform the cache clearing. */
2086 clear_db_cache (to_clear);
2088 --n;
2091 first = 2;
2093 #endif
2095 #ifdef HAVE_NETLINK
2096 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2098 char buf[4096];
2099 /* Read all the data. We do not interpret it here. */
2100 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2101 sizeof (buf))) != -1)
2104 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2105 = __bump_nl_timestamp ();
2107 #endif
2109 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2110 if (conns[cnt].revents != 0)
2112 fd_ready (conns[cnt].fd);
2114 /* Clean up the CONNS array. */
2115 conns[cnt].fd = -1;
2116 if (cnt < firstfree)
2117 firstfree = cnt;
2118 if (cnt == nused - 1)
2120 --nused;
2121 while (conns[nused - 1].fd == -1);
2123 --n;
2127 /* Now find entries which have timed out. */
2128 assert (nused > 0);
2130 /* We make the timeout length depend on the number of file
2131 descriptors currently used. */
2132 #define ACCEPT_TIMEOUT \
2133 (MAX_ACCEPT_TIMEOUT \
2134 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2135 time_t laststart = now - ACCEPT_TIMEOUT;
2137 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2139 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2141 /* Remove the entry, it timed out. */
2142 (void) close (conns[cnt].fd);
2143 conns[cnt].fd = -1;
2145 if (cnt < firstfree)
2146 firstfree = cnt;
2147 if (cnt == nused - 1)
2149 --nused;
2150 while (conns[nused - 1].fd == -1);
2154 if (restart_p (now))
2155 restart ();
2160 #ifdef HAVE_EPOLL
2161 static void
2162 main_loop_epoll (int efd)
2164 struct epoll_event ev = { 0, };
2165 int nused = 1;
2166 size_t highest = 0;
2168 /* Add the socket. */
2169 ev.events = EPOLLRDNORM;
2170 ev.data.fd = sock;
2171 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2172 /* We cannot use epoll. */
2173 return;
2175 # ifdef HAVE_INOTIFY
2176 if (inotify_fd != -1)
2178 ev.events = EPOLLRDNORM;
2179 ev.data.fd = inotify_fd;
2180 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2181 /* We cannot use epoll. */
2182 return;
2183 nused = 2;
2185 # endif
2187 # ifdef HAVE_NETLINK
2188 if (nl_status_fd != -1)
2190 ev.events = EPOLLRDNORM;
2191 ev.data.fd = nl_status_fd;
2192 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2193 /* We cannot use epoll. */
2194 return;
2196 # endif
2198 while (1)
2200 struct epoll_event revs[100];
2201 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2203 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2205 time_t now = time (NULL);
2207 for (int cnt = 0; cnt < n; ++cnt)
2208 if (revs[cnt].data.fd == sock)
2210 /* A new connection. */
2211 int fd;
2213 # ifndef __ASSUME_ACCEPT4
2214 fd = -1;
2215 if (have_accept4 >= 0)
2216 # endif
2218 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2219 SOCK_NONBLOCK));
2220 # ifndef __ASSUME_ACCEPT4
2221 if (have_accept4 == 0)
2222 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2223 # endif
2225 # ifndef __ASSUME_ACCEPT4
2226 if (have_accept4 < 0)
2227 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2228 # endif
2230 /* Use the descriptor if we have not reached the limit. */
2231 if (fd >= 0)
2233 /* Try to add the new descriptor. */
2234 ev.data.fd = fd;
2235 if (fd >= nconns
2236 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2237 /* The descriptor is too large or something went
2238 wrong. Close the descriptor. */
2239 close (fd);
2240 else
2242 /* Remember when we accepted the connection. */
2243 starttime[fd] = now;
2245 if (fd > highest)
2246 highest = fd;
2248 ++nused;
2252 # ifdef HAVE_INOTIFY
2253 else if (revs[cnt].data.fd == inotify_fd)
2255 bool to_clear[lastdb] = { false, };
2256 union __inev inev;
2258 /* Read all inotify events for files registered via
2259 register_traced_file(). */
2260 while (1)
2262 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
2263 sizeof (inev)));
2264 if (nb < (ssize_t) sizeof (struct inotify_event))
2266 if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
2268 /* Something went wrong when reading the inotify
2269 data. Better disable inotify. */
2270 dbg_log (_("disabled inotify after read error %d"),
2271 errno);
2272 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd,
2273 NULL);
2274 close (inotify_fd);
2275 inotify_fd = -1;
2277 break;
2280 /* Check which of the files changed. */
2281 inotify_check_files(to_clear, &inev);
2284 /* Actually perform the cache clearing. */
2285 clear_db_cache (to_clear);
2287 # endif
2288 # ifdef HAVE_NETLINK
2289 else if (revs[cnt].data.fd == nl_status_fd)
2291 char buf[4096];
2292 /* Read all the data. We do not interpret it here. */
2293 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2294 sizeof (buf))) != -1)
2297 __bump_nl_timestamp ();
2299 # endif
2300 else
2302 /* Remove the descriptor from the epoll descriptor. */
2303 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2305 /* Get a worker to handle the request. */
2306 fd_ready (revs[cnt].data.fd);
2308 /* Reset the time. */
2309 starttime[revs[cnt].data.fd] = 0;
2310 if (revs[cnt].data.fd == highest)
2312 --highest;
2313 while (highest > 0 && starttime[highest] == 0);
2315 --nused;
2318 /* Now look for descriptors for accepted connections which have
2319 no reply in too long of a time. */
2320 time_t laststart = now - ACCEPT_TIMEOUT;
2321 assert (starttime[sock] == 0);
2322 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2323 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2324 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2325 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2327 /* We are waiting for this one for too long. Close it. */
2328 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2330 (void) close (cnt);
2332 starttime[cnt] = 0;
2333 if (cnt == highest)
2334 --highest;
2336 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2337 --highest;
2339 if (restart_p (now))
2340 restart ();
2343 #endif
2346 /* Start all the threads we want. The initial process is thread no. 1. */
2347 void
2348 start_threads (void)
2350 /* Initialize the conditional variable we will use. The only
2351 non-standard attribute we might use is the clock selection. */
2352 pthread_condattr_t condattr;
2353 pthread_condattr_init (&condattr);
2355 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2356 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2357 /* Determine whether the monotonous clock is available. */
2358 struct timespec dummy;
2359 # if _POSIX_MONOTONIC_CLOCK == 0
2360 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2361 # endif
2362 # if _POSIX_CLOCK_SELECTION == 0
2363 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2364 # endif
2365 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2366 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2367 timeout_clock = CLOCK_MONOTONIC;
2368 #endif
2370 /* Create the attribute for the threads. They are all created
2371 detached. */
2372 pthread_attr_init (&attr);
2373 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2374 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2375 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2377 /* We allow less than LASTDB threads only for debugging. */
2378 if (debug_level == 0)
2379 nthreads = MAX (nthreads, lastdb);
2381 /* Create the threads which prune the databases. */
2382 // XXX Ideally this work would be done by some of the worker threads.
2383 // XXX But this is problematic since we would need to be able to wake
2384 // XXX them up explicitly as well as part of the group handling the
2385 // XXX ready-list. This requires an operation where we can wait on
2386 // XXX two conditional variables at the same time. This operation
2387 // XXX does not exist (yet).
2388 for (long int i = 0; i < lastdb; ++i)
2390 /* Initialize the conditional variable. */
2391 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2393 dbg_log (_("could not initialize conditional variable"));
2394 do_exit (1, 0, NULL);
2397 pthread_t th;
2398 if (dbs[i].enabled
2399 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2401 dbg_log (_("could not start clean-up thread; terminating"));
2402 do_exit (1, 0, NULL);
2406 pthread_condattr_destroy (&condattr);
2408 for (long int i = 0; i < nthreads; ++i)
2410 pthread_t th;
2411 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2413 if (i == 0)
2415 dbg_log (_("could not start any worker thread; terminating"));
2416 do_exit (1, 0, NULL);
2419 break;
2423 /* Now it is safe to let the parent know that we're doing fine and it can
2424 exit. */
2425 notify_parent (0);
2427 /* Determine how much room for descriptors we should initially
2428 allocate. This might need to change later if we cap the number
2429 with MAXCONN. */
2430 const long int nfds = sysconf (_SC_OPEN_MAX);
2431 #define MINCONN 32
2432 #define MAXCONN 16384
2433 if (nfds == -1 || nfds > MAXCONN)
2434 nconns = MAXCONN;
2435 else if (nfds < MINCONN)
2436 nconns = MINCONN;
2437 else
2438 nconns = nfds;
2440 /* We need memory to pass descriptors on to the worker threads. */
2441 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2442 /* Array to keep track when connection was accepted. */
2443 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2445 /* In the main thread we execute the loop which handles incoming
2446 connections. */
2447 #ifdef HAVE_EPOLL
2448 int efd = epoll_create (100);
2449 if (efd != -1)
2451 main_loop_epoll (efd);
2452 close (efd);
2454 #endif
2456 main_loop_poll ();
2460 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2461 this function is called, we are not listening on the nscd socket yet so
2462 we can just use the ordinary lookup functions without causing a lockup */
2463 static void
2464 begin_drop_privileges (void)
2466 struct passwd *pwd = getpwnam (server_user);
2468 if (pwd == NULL)
2470 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2471 do_exit (EXIT_FAILURE, 0,
2472 _("Failed to run nscd as user '%s'"), server_user);
2475 server_uid = pwd->pw_uid;
2476 server_gid = pwd->pw_gid;
2478 /* Save the old UID/GID if we have to change back. */
2479 if (paranoia)
2481 old_uid = getuid ();
2482 old_gid = getgid ();
2485 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2487 /* This really must never happen. */
2488 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2489 do_exit (EXIT_FAILURE, errno,
2490 _("initial getgrouplist failed"));
2493 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2495 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2496 == -1)
2498 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2499 do_exit (EXIT_FAILURE, errno, _("getgrouplist failed"));
2504 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2505 run nscd as the user specified in the configuration file. */
2506 static void
2507 finish_drop_privileges (void)
2509 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2510 /* We need to preserve the capabilities to connect to the audit daemon. */
2511 cap_t new_caps = preserve_capabilities ();
2512 #endif
2514 if (setgroups (server_ngroups, server_groups) == -1)
2516 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2517 do_exit (EXIT_FAILURE, errno, _("setgroups failed"));
2520 int res;
2521 if (paranoia)
2522 res = setresgid (server_gid, server_gid, old_gid);
2523 else
2524 res = setgid (server_gid);
2525 if (res == -1)
2527 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2528 do_exit (4, errno, "setgid");
2531 if (paranoia)
2532 res = setresuid (server_uid, server_uid, old_uid);
2533 else
2534 res = setuid (server_uid);
2535 if (res == -1)
2537 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2538 do_exit (4, errno, "setuid");
2541 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2542 /* Remove the temporary capabilities. */
2543 install_real_capabilities (new_caps);
2544 #endif