Disable x87 inline functions for x86-64
[glibc.git] / nscd / connections.c
blob77b9ac40a339ba22f5a6a68be0a5f5e3020fdd98
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2012 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <ifaddrs.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <arpa/inet.h>
35 #ifdef HAVE_NETLINK
36 # include <linux/netlink.h>
37 # include <linux/rtnetlink.h>
38 #endif
39 #ifdef HAVE_EPOLL
40 # include <sys/epoll.h>
41 #endif
42 #ifdef HAVE_INOTIFY
43 # include <sys/inotify.h>
44 #endif
45 #include <sys/mman.h>
46 #include <sys/param.h>
47 #include <sys/poll.h>
48 #ifdef HAVE_SENDFILE
49 # include <sys/sendfile.h>
50 #endif
51 #include <sys/socket.h>
52 #include <sys/stat.h>
53 #include <sys/un.h>
55 #include "nscd.h"
56 #include "dbg_log.h"
57 #include "selinux.h"
58 #include <resolv/resolv.h>
60 #include <kernel-features.h>
63 /* Support to run nscd as an unprivileged user */
64 const char *server_user;
65 static uid_t server_uid;
66 static gid_t server_gid;
67 const char *stat_user;
68 uid_t stat_uid;
69 static gid_t *server_groups;
70 #ifndef NGROUPS
71 # define NGROUPS 32
72 #endif
73 static int server_ngroups;
75 static pthread_attr_t attr;
77 static void begin_drop_privileges (void);
78 static void finish_drop_privileges (void);
80 /* Map request type to a string. */
81 const char *const serv2str[LASTREQ] =
83 [GETPWBYNAME] = "GETPWBYNAME",
84 [GETPWBYUID] = "GETPWBYUID",
85 [GETGRBYNAME] = "GETGRBYNAME",
86 [GETGRBYGID] = "GETGRBYGID",
87 [GETHOSTBYNAME] = "GETHOSTBYNAME",
88 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
89 [GETHOSTBYADDR] = "GETHOSTBYADDR",
90 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
91 [SHUTDOWN] = "SHUTDOWN",
92 [GETSTAT] = "GETSTAT",
93 [INVALIDATE] = "INVALIDATE",
94 [GETFDPW] = "GETFDPW",
95 [GETFDGR] = "GETFDGR",
96 [GETFDHST] = "GETFDHST",
97 [GETAI] = "GETAI",
98 [INITGROUPS] = "INITGROUPS",
99 [GETSERVBYNAME] = "GETSERVBYNAME",
100 [GETSERVBYPORT] = "GETSERVBYPORT",
101 [GETFDSERV] = "GETFDSERV",
102 [GETNETGRENT] = "GETNETGRENT",
103 [INNETGR] = "INNETGR",
104 [GETFDNETGR] = "GETFDNETGR"
107 /* The control data structures for the services. */
108 struct database_dyn dbs[lastdb] =
110 [pwddb] = {
111 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
112 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
113 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
114 .enabled = 0,
115 .check_file = 1,
116 .persistent = 0,
117 .propagate = 1,
118 .shared = 0,
119 .max_db_size = DEFAULT_MAX_DB_SIZE,
120 .suggested_module = DEFAULT_SUGGESTED_MODULE,
121 .db_filename = _PATH_NSCD_PASSWD_DB,
122 .disabled_iov = &pwd_iov_disabled,
123 .postimeout = 3600,
124 .negtimeout = 20,
125 .wr_fd = -1,
126 .ro_fd = -1,
127 .mmap_used = false
129 [grpdb] = {
130 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
131 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
132 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
133 .enabled = 0,
134 .check_file = 1,
135 .persistent = 0,
136 .propagate = 1,
137 .shared = 0,
138 .max_db_size = DEFAULT_MAX_DB_SIZE,
139 .suggested_module = DEFAULT_SUGGESTED_MODULE,
140 .db_filename = _PATH_NSCD_GROUP_DB,
141 .disabled_iov = &grp_iov_disabled,
142 .postimeout = 3600,
143 .negtimeout = 60,
144 .wr_fd = -1,
145 .ro_fd = -1,
146 .mmap_used = false
148 [hstdb] = {
149 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
150 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
151 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
152 .enabled = 0,
153 .check_file = 1,
154 .persistent = 0,
155 .propagate = 0, /* Not used. */
156 .shared = 0,
157 .max_db_size = DEFAULT_MAX_DB_SIZE,
158 .suggested_module = DEFAULT_SUGGESTED_MODULE,
159 .db_filename = _PATH_NSCD_HOSTS_DB,
160 .disabled_iov = &hst_iov_disabled,
161 .postimeout = 3600,
162 .negtimeout = 20,
163 .wr_fd = -1,
164 .ro_fd = -1,
165 .mmap_used = false
167 [servdb] = {
168 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
169 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
170 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
171 .enabled = 0,
172 .check_file = 1,
173 .persistent = 0,
174 .propagate = 0, /* Not used. */
175 .shared = 0,
176 .max_db_size = DEFAULT_MAX_DB_SIZE,
177 .suggested_module = DEFAULT_SUGGESTED_MODULE,
178 .db_filename = _PATH_NSCD_SERVICES_DB,
179 .disabled_iov = &serv_iov_disabled,
180 .postimeout = 28800,
181 .negtimeout = 20,
182 .wr_fd = -1,
183 .ro_fd = -1,
184 .mmap_used = false
186 [netgrdb] = {
187 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
188 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
189 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
190 .enabled = 0,
191 .check_file = 1,
192 .persistent = 0,
193 .propagate = 0, /* Not used. */
194 .shared = 0,
195 .max_db_size = DEFAULT_MAX_DB_SIZE,
196 .suggested_module = DEFAULT_SUGGESTED_MODULE,
197 .db_filename = _PATH_NSCD_NETGROUP_DB,
198 .disabled_iov = &netgroup_iov_disabled,
199 .postimeout = 28800,
200 .negtimeout = 20,
201 .wr_fd = -1,
202 .ro_fd = -1,
203 .mmap_used = false
208 /* Mapping of request type to database. */
209 static struct
211 bool data_request;
212 struct database_dyn *db;
213 } const reqinfo[LASTREQ] =
215 [GETPWBYNAME] = { true, &dbs[pwddb] },
216 [GETPWBYUID] = { true, &dbs[pwddb] },
217 [GETGRBYNAME] = { true, &dbs[grpdb] },
218 [GETGRBYGID] = { true, &dbs[grpdb] },
219 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
220 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
221 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
222 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
223 [SHUTDOWN] = { false, NULL },
224 [GETSTAT] = { false, NULL },
225 [SHUTDOWN] = { false, NULL },
226 [GETFDPW] = { false, &dbs[pwddb] },
227 [GETFDGR] = { false, &dbs[grpdb] },
228 [GETFDHST] = { false, &dbs[hstdb] },
229 [GETAI] = { true, &dbs[hstdb] },
230 [INITGROUPS] = { true, &dbs[grpdb] },
231 [GETSERVBYNAME] = { true, &dbs[servdb] },
232 [GETSERVBYPORT] = { true, &dbs[servdb] },
233 [GETFDSERV] = { false, &dbs[servdb] },
234 [GETNETGRENT] = { true, &dbs[netgrdb] },
235 [INNETGR] = { true, &dbs[netgrdb] },
236 [GETFDNETGR] = { false, &dbs[netgrdb] }
240 /* Initial number of threads to use. */
241 int nthreads = -1;
242 /* Maximum number of threads to use. */
243 int max_nthreads = 32;
245 /* Socket for incoming connections. */
246 static int sock;
248 #ifdef HAVE_INOTIFY
249 /* Inotify descriptor. */
250 int inotify_fd = -1;
251 #endif
253 #ifdef HAVE_NETLINK
254 /* Descriptor for netlink status updates. */
255 static int nl_status_fd = -1;
256 #endif
258 #ifndef __ASSUME_SOCK_CLOEXEC
259 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
260 before be know the result. */
261 static int have_sock_cloexec;
262 #endif
263 #ifndef __ASSUME_ACCEPT4
264 static int have_accept4;
265 #endif
267 /* Number of times clients had to wait. */
268 unsigned long int client_queued;
271 ssize_t
272 writeall (int fd, const void *buf, size_t len)
274 size_t n = len;
275 ssize_t ret;
278 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
279 if (ret <= 0)
280 break;
281 buf = (const char *) buf + ret;
282 n -= ret;
284 while (n > 0);
285 return ret < 0 ? ret : len - n;
289 #ifdef HAVE_SENDFILE
290 ssize_t
291 sendfileall (int tofd, int fromfd, off_t off, size_t len)
293 ssize_t n = len;
294 ssize_t ret;
298 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
299 if (ret <= 0)
300 break;
301 n -= ret;
303 while (n > 0);
304 return ret < 0 ? ret : len - n;
306 #endif
309 enum usekey
311 use_not = 0,
312 /* The following three are not really used, they are symbolic constants. */
313 use_first = 16,
314 use_begin = 32,
315 use_end = 64,
317 use_he = 1,
318 use_he_begin = use_he | use_begin,
319 use_he_end = use_he | use_end,
320 #if SEPARATE_KEY
321 use_key = 2,
322 use_key_begin = use_key | use_begin,
323 use_key_end = use_key | use_end,
324 use_key_first = use_key_begin | use_first,
325 #endif
326 use_data = 3,
327 use_data_begin = use_data | use_begin,
328 use_data_end = use_data | use_end,
329 use_data_first = use_data_begin | use_first
333 static int
334 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
335 enum usekey use, ref_t start, size_t len)
337 assert (len >= 2);
339 if (start > first_free || start + len > first_free
340 || (start & BLOCK_ALIGN_M1))
341 return 0;
343 if (usemap[start] == use_not)
345 /* Add the start marker. */
346 usemap[start] = use | use_begin;
347 use &= ~use_first;
349 while (--len > 0)
350 if (usemap[++start] != use_not)
351 return 0;
352 else
353 usemap[start] = use;
355 /* Add the end marker. */
356 usemap[start] = use | use_end;
358 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
360 /* Hash entries can't be shared. */
361 if (use == use_he)
362 return 0;
364 usemap[start] |= (use & use_first);
365 use &= ~use_first;
367 while (--len > 1)
368 if (usemap[++start] != use)
369 return 0;
371 if (usemap[++start] != (use | use_end))
372 return 0;
374 else
375 /* Points to a wrong object or somewhere in the middle. */
376 return 0;
378 return 1;
382 /* Verify data in persistent database. */
383 static int
384 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
386 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
387 || dbnr == netgrdb);
389 time_t now = time (NULL);
391 struct database_pers_head *head = mem;
392 struct database_pers_head head_copy = *head;
394 /* Check that the header that was read matches the head in the database. */
395 if (memcmp (head, readhead, sizeof (*head)) != 0)
396 return 0;
398 /* First some easy tests: make sure the database header is sane. */
399 if (head->version != DB_VERSION
400 || head->header_size != sizeof (*head)
401 /* We allow a timestamp to be one hour ahead of the current time.
402 This should cover daylight saving time changes. */
403 || head->timestamp > now + 60 * 60 + 60
404 || (head->gc_cycle & 1)
405 || head->module == 0
406 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
407 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
408 || head->first_free < 0
409 || head->first_free > head->data_size
410 || (head->first_free & BLOCK_ALIGN_M1) != 0
411 || head->maxnentries < 0
412 || head->maxnsearched < 0)
413 return 0;
415 uint8_t *usemap = calloc (head->first_free, 1);
416 if (usemap == NULL)
417 return 0;
419 const char *data = (char *) &head->array[roundup (head->module,
420 ALIGN / sizeof (ref_t))];
422 nscd_ssize_t he_cnt = 0;
423 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
425 ref_t trail = head->array[cnt];
426 ref_t work = trail;
427 int tick = 0;
429 while (work != ENDREF)
431 if (! check_use (data, head->first_free, usemap, use_he, work,
432 sizeof (struct hashentry)))
433 goto fail;
435 /* Now we know we can dereference the record. */
436 struct hashentry *here = (struct hashentry *) (data + work);
438 ++he_cnt;
440 /* Make sure the record is for this type of service. */
441 if (here->type >= LASTREQ
442 || reqinfo[here->type].db != &dbs[dbnr])
443 goto fail;
445 /* Validate boolean field value. */
446 if (here->first != false && here->first != true)
447 goto fail;
449 if (here->len < 0)
450 goto fail;
452 /* Now the data. */
453 if (here->packet < 0
454 || here->packet > head->first_free
455 || here->packet + sizeof (struct datahead) > head->first_free)
456 goto fail;
458 struct datahead *dh = (struct datahead *) (data + here->packet);
460 if (! check_use (data, head->first_free, usemap,
461 use_data | (here->first ? use_first : 0),
462 here->packet, dh->allocsize))
463 goto fail;
465 if (dh->allocsize < sizeof (struct datahead)
466 || dh->recsize > dh->allocsize
467 || (dh->notfound != false && dh->notfound != true)
468 || (dh->usable != false && dh->usable != true))
469 goto fail;
471 if (here->key < here->packet + sizeof (struct datahead)
472 || here->key > here->packet + dh->allocsize
473 || here->key + here->len > here->packet + dh->allocsize)
475 #if SEPARATE_KEY
476 /* If keys can appear outside of data, this should be done
477 instead. But gc doesn't mark the data in that case. */
478 if (! check_use (data, head->first_free, usemap,
479 use_key | (here->first ? use_first : 0),
480 here->key, here->len))
481 #endif
482 goto fail;
485 work = here->next;
487 if (work == trail)
488 /* A circular list, this must not happen. */
489 goto fail;
490 if (tick)
491 trail = ((struct hashentry *) (data + trail))->next;
492 tick = 1 - tick;
496 if (he_cnt != head->nentries)
497 goto fail;
499 /* See if all data and keys had at least one reference from
500 he->first == true hashentry. */
501 for (ref_t idx = 0; idx < head->first_free; ++idx)
503 #if SEPARATE_KEY
504 if (usemap[idx] == use_key_begin)
505 goto fail;
506 #endif
507 if (usemap[idx] == use_data_begin)
508 goto fail;
511 /* Finally, make sure the database hasn't changed since the first test. */
512 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
513 goto fail;
515 free (usemap);
516 return 1;
518 fail:
519 free (usemap);
520 return 0;
524 #ifdef O_CLOEXEC
525 # define EXTRA_O_FLAGS O_CLOEXEC
526 #else
527 # define EXTRA_O_FLAGS 0
528 #endif
531 /* Initialize database information structures. */
532 void
533 nscd_init (void)
535 /* Look up unprivileged uid/gid/groups before we start listening on the
536 socket */
537 if (server_user != NULL)
538 begin_drop_privileges ();
540 if (nthreads == -1)
541 /* No configuration for this value, assume a default. */
542 nthreads = 4;
544 for (size_t cnt = 0; cnt < lastdb; ++cnt)
545 if (dbs[cnt].enabled)
547 pthread_rwlock_init (&dbs[cnt].lock, NULL);
548 pthread_mutex_init (&dbs[cnt].memlock, NULL);
550 if (dbs[cnt].persistent)
552 /* Try to open the appropriate file on disk. */
553 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
554 if (fd != -1)
556 char *msg = NULL;
557 struct stat64 st;
558 void *mem;
559 size_t total;
560 struct database_pers_head head;
561 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
562 sizeof (head)));
563 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
565 fail_db_errno:
566 /* The code is single-threaded at this point so
567 using strerror is just fine. */
568 msg = strerror (errno);
569 fail_db:
570 dbg_log (_("invalid persistent database file \"%s\": %s"),
571 dbs[cnt].db_filename, msg);
572 unlink (dbs[cnt].db_filename);
574 else if (head.module == 0 && head.data_size == 0)
576 /* The file has been created, but the head has not
577 been initialized yet. */
578 msg = _("uninitialized header");
579 goto fail_db;
581 else if (head.header_size != (int) sizeof (head))
583 msg = _("header size does not match");
584 goto fail_db;
586 else if ((total = (sizeof (head)
587 + roundup (head.module * sizeof (ref_t),
588 ALIGN)
589 + head.data_size))
590 > st.st_size
591 || total < sizeof (head))
593 msg = _("file size does not match");
594 goto fail_db;
596 /* Note we map with the maximum size allowed for the
597 database. This is likely much larger than the
598 actual file size. This is OK on most OSes since
599 extensions of the underlying file will
600 automatically translate more pages available for
601 memory access. */
602 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
603 PROT_READ | PROT_WRITE,
604 MAP_SHARED, fd, 0))
605 == MAP_FAILED)
606 goto fail_db_errno;
607 else if (!verify_persistent_db (mem, &head, cnt))
609 munmap (mem, total);
610 msg = _("verification failed");
611 goto fail_db;
613 else
615 /* Success. We have the database. */
616 dbs[cnt].head = mem;
617 dbs[cnt].memsize = total;
618 dbs[cnt].data = (char *)
619 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
620 ALIGN / sizeof (ref_t))];
621 dbs[cnt].mmap_used = true;
623 if (dbs[cnt].suggested_module > head.module)
624 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
625 dbnames[cnt]);
627 dbs[cnt].wr_fd = fd;
628 fd = -1;
629 /* We also need a read-only descriptor. */
630 if (dbs[cnt].shared)
632 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
633 O_RDONLY | EXTRA_O_FLAGS);
634 if (dbs[cnt].ro_fd == -1)
635 dbg_log (_("\
636 cannot create read-only descriptor for \"%s\"; no mmap"),
637 dbs[cnt].db_filename);
640 // XXX Shall we test whether the descriptors actually
641 // XXX point to the same file?
644 /* Close the file descriptors in case something went
645 wrong in which case the variable have not been
646 assigned -1. */
647 if (fd != -1)
648 close (fd);
650 else if (errno == EACCES)
651 error (EXIT_FAILURE, 0, _("cannot access '%s'"),
652 dbs[cnt].db_filename);
655 if (dbs[cnt].head == NULL)
657 /* No database loaded. Allocate the data structure,
658 possibly on disk. */
659 struct database_pers_head head;
660 size_t total = (sizeof (head)
661 + roundup (dbs[cnt].suggested_module
662 * sizeof (ref_t), ALIGN)
663 + (dbs[cnt].suggested_module
664 * DEFAULT_DATASIZE_PER_BUCKET));
666 /* Try to create the database. If we do not need a
667 persistent database create a temporary file. */
668 int fd;
669 int ro_fd = -1;
670 if (dbs[cnt].persistent)
672 fd = open (dbs[cnt].db_filename,
673 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
674 S_IRUSR | S_IWUSR);
675 if (fd != -1 && dbs[cnt].shared)
676 ro_fd = open (dbs[cnt].db_filename,
677 O_RDONLY | EXTRA_O_FLAGS);
679 else
681 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
682 fd = mkostemp (fname, EXTRA_O_FLAGS);
684 /* We do not need the file name anymore after we
685 opened another file descriptor in read-only mode. */
686 if (fd != -1)
688 if (dbs[cnt].shared)
689 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
691 unlink (fname);
695 if (fd == -1)
697 if (errno == EEXIST)
699 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
700 dbnames[cnt], dbs[cnt].db_filename);
701 // XXX Correct way to terminate?
702 exit (1);
705 if (dbs[cnt].persistent)
706 dbg_log (_("cannot create %s; no persistent database used"),
707 dbs[cnt].db_filename);
708 else
709 dbg_log (_("cannot create %s; no sharing possible"),
710 dbs[cnt].db_filename);
712 dbs[cnt].persistent = 0;
713 // XXX remember: no mmap
715 else
717 /* Tell the user if we could not create the read-only
718 descriptor. */
719 if (ro_fd == -1 && dbs[cnt].shared)
720 dbg_log (_("\
721 cannot create read-only descriptor for \"%s\"; no mmap"),
722 dbs[cnt].db_filename);
724 /* Before we create the header, initialiye the hash
725 table. So that if we get interrupted if writing
726 the header we can recognize a partially initialized
727 database. */
728 size_t ps = sysconf (_SC_PAGESIZE);
729 char tmpbuf[ps];
730 assert (~ENDREF == 0);
731 memset (tmpbuf, '\xff', ps);
733 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
734 off_t offset = sizeof (head);
736 size_t towrite;
737 if (offset % ps != 0)
739 towrite = MIN (remaining, ps - (offset % ps));
740 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
741 goto write_fail;
742 offset += towrite;
743 remaining -= towrite;
746 while (remaining > ps)
748 if (pwrite (fd, tmpbuf, ps, offset) == -1)
749 goto write_fail;
750 offset += ps;
751 remaining -= ps;
754 if (remaining > 0
755 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
756 goto write_fail;
758 /* Create the header of the file. */
759 struct database_pers_head head =
761 .version = DB_VERSION,
762 .header_size = sizeof (head),
763 .module = dbs[cnt].suggested_module,
764 .data_size = (dbs[cnt].suggested_module
765 * DEFAULT_DATASIZE_PER_BUCKET),
766 .first_free = 0
768 void *mem;
770 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
771 != sizeof (head))
772 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
773 != 0)
774 || (mem = mmap (NULL, dbs[cnt].max_db_size,
775 PROT_READ | PROT_WRITE,
776 MAP_SHARED, fd, 0)) == MAP_FAILED)
778 write_fail:
779 unlink (dbs[cnt].db_filename);
780 dbg_log (_("cannot write to database file %s: %s"),
781 dbs[cnt].db_filename, strerror (errno));
782 dbs[cnt].persistent = 0;
784 else
786 /* Success. */
787 dbs[cnt].head = mem;
788 dbs[cnt].data = (char *)
789 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
790 ALIGN / sizeof (ref_t))];
791 dbs[cnt].memsize = total;
792 dbs[cnt].mmap_used = true;
794 /* Remember the descriptors. */
795 dbs[cnt].wr_fd = fd;
796 dbs[cnt].ro_fd = ro_fd;
797 fd = -1;
798 ro_fd = -1;
801 if (fd != -1)
802 close (fd);
803 if (ro_fd != -1)
804 close (ro_fd);
808 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
809 /* We do not check here whether the O_CLOEXEC provided to the
810 open call was successful or not. The two fcntl calls are
811 only performed once each per process start-up and therefore
812 is not noticeable at all. */
813 if (paranoia
814 && ((dbs[cnt].wr_fd != -1
815 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
816 || (dbs[cnt].ro_fd != -1
817 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
819 dbg_log (_("\
820 cannot set socket to close on exec: %s; disabling paranoia mode"),
821 strerror (errno));
822 paranoia = 0;
824 #endif
826 if (dbs[cnt].head == NULL)
828 /* We do not use the persistent database. Just
829 create an in-memory data structure. */
830 assert (! dbs[cnt].persistent);
832 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
833 + (dbs[cnt].suggested_module
834 * sizeof (ref_t)));
835 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
836 assert (~ENDREF == 0);
837 memset (dbs[cnt].head->array, '\xff',
838 dbs[cnt].suggested_module * sizeof (ref_t));
839 dbs[cnt].head->module = dbs[cnt].suggested_module;
840 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
841 * dbs[cnt].head->module);
842 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
843 dbs[cnt].head->first_free = 0;
845 dbs[cnt].shared = 0;
846 assert (dbs[cnt].ro_fd == -1);
850 /* Create the socket. */
851 #ifndef __ASSUME_SOCK_CLOEXEC
852 sock = -1;
853 if (have_sock_cloexec >= 0)
854 #endif
856 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
857 #ifndef __ASSUME_SOCK_CLOEXEC
858 if (have_sock_cloexec == 0)
859 have_sock_cloexec = sock != -1 || errno != EINVAL ? 1 : -1;
860 #endif
862 #ifndef __ASSUME_SOCK_CLOEXEC
863 if (have_sock_cloexec < 0)
864 sock = socket (AF_UNIX, SOCK_STREAM, 0);
865 #endif
866 if (sock < 0)
868 dbg_log (_("cannot open socket: %s"), strerror (errno));
869 exit (errno == EACCES ? 4 : 1);
871 /* Bind a name to the socket. */
872 struct sockaddr_un sock_addr;
873 sock_addr.sun_family = AF_UNIX;
874 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
875 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
877 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
878 exit (errno == EACCES ? 4 : 1);
881 #ifndef __ASSUME_SOCK_CLOEXEC
882 if (have_sock_cloexec < 0)
884 /* We don't want to get stuck on accept. */
885 int fl = fcntl (sock, F_GETFL);
886 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
888 dbg_log (_("cannot change socket to nonblocking mode: %s"),
889 strerror (errno));
890 exit (1);
893 /* The descriptor needs to be closed on exec. */
894 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
896 dbg_log (_("cannot set socket to close on exec: %s"),
897 strerror (errno));
898 exit (1);
901 #endif
903 /* Set permissions for the socket. */
904 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
906 /* Set the socket up to accept connections. */
907 if (listen (sock, SOMAXCONN) < 0)
909 dbg_log (_("cannot enable socket to accept connections: %s"),
910 strerror (errno));
911 exit (1);
914 #ifdef HAVE_NETLINK
915 if (dbs[hstdb].enabled)
917 /* Try to open netlink socket to monitor network setting changes. */
918 nl_status_fd = socket (AF_NETLINK,
919 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
920 NETLINK_ROUTE);
921 if (nl_status_fd != -1)
923 struct sockaddr_nl snl;
924 memset (&snl, '\0', sizeof (snl));
925 snl.nl_family = AF_NETLINK;
926 /* XXX Is this the best set to use? */
927 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
928 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
929 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
930 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
931 | RTMGRP_IPV6_PREFIX);
933 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
935 close (nl_status_fd);
936 nl_status_fd = -1;
938 else
940 /* Start the timestamp process. */
941 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
942 = __bump_nl_timestamp ();
944 # ifndef __ASSUME_SOCK_CLOEXEC
945 if (have_sock_cloexec < 0)
947 /* We don't want to get stuck on accept. */
948 int fl = fcntl (nl_status_fd, F_GETFL);
949 if (fl == -1
950 || fcntl (nl_status_fd, F_SETFL, fl | O_NONBLOCK) == -1)
952 dbg_log (_("\
953 cannot change socket to nonblocking mode: %s"),
954 strerror (errno));
955 exit (1);
958 /* The descriptor needs to be closed on exec. */
959 if (paranoia
960 && fcntl (nl_status_fd, F_SETFD, FD_CLOEXEC) == -1)
962 dbg_log (_("cannot set socket to close on exec: %s"),
963 strerror (errno));
964 exit (1);
967 # endif
971 #endif
973 /* Change to unprivileged uid/gid/groups if specified in config file */
974 if (server_user != NULL)
975 finish_drop_privileges ();
979 void
980 register_traced_file (size_t dbidx, struct traced_file *finfo)
982 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
983 return;
985 if (__builtin_expect (debug_level > 0, 0))
986 dbg_log (_("register trace file %s for database %s"),
987 finfo->fname, dbnames[dbidx]);
989 #ifdef HAVE_INOTIFY
990 if (inotify_fd < 0
991 || (finfo->inotify_descr = inotify_add_watch (inotify_fd, finfo->fname,
992 IN_DELETE_SELF
993 | IN_MODIFY)) < 0)
994 #endif
996 /* We need the modification date of the file. */
997 struct stat64 st;
999 if (stat64 (finfo->fname, &st) < 0)
1001 /* We cannot stat() the file, disable file checking. */
1002 dbg_log (_("cannot stat() file `%s': %s"),
1003 finfo->fname, strerror (errno));
1004 return;
1007 finfo->inotify_descr = -1;
1008 finfo->mtime = st.st_mtime;
1011 /* Queue up the file name. */
1012 finfo->next = dbs[dbidx].traced_files;
1013 dbs[dbidx].traced_files = finfo;
1017 /* Close the connections. */
1018 void
1019 close_sockets (void)
1021 close (sock);
1025 static void
1026 invalidate_cache (char *key, int fd)
1028 dbtype number;
1029 int32_t resp;
1031 for (number = pwddb; number < lastdb; ++number)
1032 if (strcmp (key, dbnames[number]) == 0)
1034 if (number == hstdb)
1036 struct traced_file *runp = dbs[hstdb].traced_files;
1037 while (runp != NULL)
1038 if (runp->call_res_init)
1040 res_init ();
1041 break;
1043 else
1044 runp = runp->next;
1046 break;
1049 if (number == lastdb)
1051 resp = EINVAL;
1052 writeall (fd, &resp, sizeof (resp));
1053 return;
1056 if (dbs[number].enabled)
1058 pthread_mutex_lock (&dbs[number].prune_run_lock);
1059 prune_cache (&dbs[number], LONG_MAX, fd);
1060 pthread_mutex_unlock (&dbs[number].prune_run_lock);
1062 else
1064 resp = 0;
1065 writeall (fd, &resp, sizeof (resp));
1070 #ifdef SCM_RIGHTS
1071 static void
1072 send_ro_fd (struct database_dyn *db, char *key, int fd)
1074 /* If we do not have an read-only file descriptor do nothing. */
1075 if (db->ro_fd == -1)
1076 return;
1078 /* We need to send some data along with the descriptor. */
1079 uint64_t mapsize = (db->head->data_size
1080 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1081 + sizeof (struct database_pers_head));
1082 struct iovec iov[2];
1083 iov[0].iov_base = key;
1084 iov[0].iov_len = strlen (key) + 1;
1085 iov[1].iov_base = &mapsize;
1086 iov[1].iov_len = sizeof (mapsize);
1088 /* Prepare the control message to transfer the descriptor. */
1089 union
1091 struct cmsghdr hdr;
1092 char bytes[CMSG_SPACE (sizeof (int))];
1093 } buf;
1094 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1095 .msg_control = buf.bytes,
1096 .msg_controllen = sizeof (buf) };
1097 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1099 cmsg->cmsg_level = SOL_SOCKET;
1100 cmsg->cmsg_type = SCM_RIGHTS;
1101 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1103 int *ip = (int *) CMSG_DATA (cmsg);
1104 *ip = db->ro_fd;
1106 msg.msg_controllen = cmsg->cmsg_len;
1108 /* Send the control message. We repeat when we are interrupted but
1109 everything else is ignored. */
1110 #ifndef MSG_NOSIGNAL
1111 # define MSG_NOSIGNAL 0
1112 #endif
1113 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1115 if (__builtin_expect (debug_level > 0, 0))
1116 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1118 #endif /* SCM_RIGHTS */
1121 /* Handle new request. */
1122 static void
1123 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1125 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1127 if (debug_level > 0)
1128 dbg_log (_("\
1129 cannot handle old request version %d; current version is %d"),
1130 req->version, NSCD_VERSION);
1131 return;
1134 /* Perform the SELinux check before we go on to the standard checks. */
1135 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1137 if (debug_level > 0)
1139 #ifdef SO_PEERCRED
1140 # ifdef PATH_MAX
1141 char buf[PATH_MAX];
1142 # else
1143 char buf[4096];
1144 # endif
1146 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
1147 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
1149 if (n <= 0)
1150 dbg_log (_("\
1151 request from %ld not handled due to missing permission"), (long int) pid);
1152 else
1154 buf[n] = '\0';
1155 dbg_log (_("\
1156 request from '%s' [%ld] not handled due to missing permission"),
1157 buf, (long int) pid);
1159 #else
1160 dbg_log (_("request not handled due to missing permission"));
1161 #endif
1163 return;
1166 struct database_dyn *db = reqinfo[req->type].db;
1168 /* See whether we can service the request from the cache. */
1169 if (__builtin_expect (reqinfo[req->type].data_request, true))
1171 if (__builtin_expect (debug_level, 0) > 0)
1173 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1175 char buf[INET6_ADDRSTRLEN];
1177 dbg_log ("\t%s (%s)", serv2str[req->type],
1178 inet_ntop (req->type == GETHOSTBYADDR
1179 ? AF_INET : AF_INET6,
1180 key, buf, sizeof (buf)));
1182 else
1183 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1186 /* Is this service enabled? */
1187 if (__builtin_expect (!db->enabled, 0))
1189 /* No, sent the prepared record. */
1190 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1191 db->disabled_iov->iov_len,
1192 MSG_NOSIGNAL))
1193 != (ssize_t) db->disabled_iov->iov_len
1194 && __builtin_expect (debug_level, 0) > 0)
1196 /* We have problems sending the result. */
1197 char buf[256];
1198 dbg_log (_("cannot write result: %s"),
1199 strerror_r (errno, buf, sizeof (buf)));
1202 return;
1205 /* Be sure we can read the data. */
1206 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
1208 ++db->head->rdlockdelayed;
1209 pthread_rwlock_rdlock (&db->lock);
1212 /* See whether we can handle it from the cache. */
1213 struct datahead *cached;
1214 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1215 db, uid);
1216 if (cached != NULL)
1218 /* Hurray it's in the cache. */
1219 ssize_t nwritten;
1221 #ifdef HAVE_SENDFILE
1222 if (__builtin_expect (db->mmap_used, 1))
1224 assert (db->wr_fd != -1);
1225 assert ((char *) cached->data > (char *) db->data);
1226 assert ((char *) cached->data - (char *) db->head
1227 + cached->recsize
1228 <= (sizeof (struct database_pers_head)
1229 + db->head->module * sizeof (ref_t)
1230 + db->head->data_size));
1231 nwritten = sendfileall (fd, db->wr_fd,
1232 (char *) cached->data
1233 - (char *) db->head, cached->recsize);
1234 # ifndef __ASSUME_SENDFILE
1235 if (nwritten == -1 && errno == ENOSYS)
1236 goto use_write;
1237 # endif
1239 else
1240 # ifndef __ASSUME_SENDFILE
1241 use_write:
1242 # endif
1243 #endif
1244 nwritten = writeall (fd, cached->data, cached->recsize);
1246 if (nwritten != cached->recsize
1247 && __builtin_expect (debug_level, 0) > 0)
1249 /* We have problems sending the result. */
1250 char buf[256];
1251 dbg_log (_("cannot write result: %s"),
1252 strerror_r (errno, buf, sizeof (buf)));
1255 pthread_rwlock_unlock (&db->lock);
1257 return;
1260 pthread_rwlock_unlock (&db->lock);
1262 else if (__builtin_expect (debug_level, 0) > 0)
1264 if (req->type == INVALIDATE)
1265 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1266 else
1267 dbg_log ("\t%s", serv2str[req->type]);
1270 /* Handle the request. */
1271 switch (req->type)
1273 case GETPWBYNAME:
1274 addpwbyname (db, fd, req, key, uid);
1275 break;
1277 case GETPWBYUID:
1278 addpwbyuid (db, fd, req, key, uid);
1279 break;
1281 case GETGRBYNAME:
1282 addgrbyname (db, fd, req, key, uid);
1283 break;
1285 case GETGRBYGID:
1286 addgrbygid (db, fd, req, key, uid);
1287 break;
1289 case GETHOSTBYNAME:
1290 addhstbyname (db, fd, req, key, uid);
1291 break;
1293 case GETHOSTBYNAMEv6:
1294 addhstbynamev6 (db, fd, req, key, uid);
1295 break;
1297 case GETHOSTBYADDR:
1298 addhstbyaddr (db, fd, req, key, uid);
1299 break;
1301 case GETHOSTBYADDRv6:
1302 addhstbyaddrv6 (db, fd, req, key, uid);
1303 break;
1305 case GETAI:
1306 addhstai (db, fd, req, key, uid);
1307 break;
1309 case INITGROUPS:
1310 addinitgroups (db, fd, req, key, uid);
1311 break;
1313 case GETSERVBYNAME:
1314 addservbyname (db, fd, req, key, uid);
1315 break;
1317 case GETSERVBYPORT:
1318 addservbyport (db, fd, req, key, uid);
1319 break;
1321 case GETNETGRENT:
1322 addgetnetgrent (db, fd, req, key, uid);
1323 break;
1325 case INNETGR:
1326 addinnetgr (db, fd, req, key, uid);
1327 break;
1329 case GETSTAT:
1330 case SHUTDOWN:
1331 case INVALIDATE:
1333 /* Get the callers credentials. */
1334 #ifdef SO_PEERCRED
1335 struct ucred caller;
1336 socklen_t optlen = sizeof (caller);
1338 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1340 char buf[256];
1342 dbg_log (_("error getting caller's id: %s"),
1343 strerror_r (errno, buf, sizeof (buf)));
1344 break;
1347 uid = caller.uid;
1348 #else
1349 /* Some systems have no SO_PEERCRED implementation. They don't
1350 care about security so we don't as well. */
1351 uid = 0;
1352 #endif
1355 /* Accept shutdown, getstat and invalidate only from root. For
1356 the stat call also allow the user specified in the config file. */
1357 if (req->type == GETSTAT)
1359 if (uid == 0 || uid == stat_uid)
1360 send_stats (fd, dbs);
1362 else if (uid == 0)
1364 if (req->type == INVALIDATE)
1365 invalidate_cache (key, fd);
1366 else
1367 termination_handler (0);
1369 break;
1371 case GETFDPW:
1372 case GETFDGR:
1373 case GETFDHST:
1374 case GETFDSERV:
1375 case GETFDNETGR:
1376 #ifdef SCM_RIGHTS
1377 send_ro_fd (reqinfo[req->type].db, key, fd);
1378 #endif
1379 break;
1381 default:
1382 /* Ignore the command, it's nothing we know. */
1383 break;
1388 /* Restart the process. */
1389 static void
1390 restart (void)
1392 /* First determine the parameters. We do not use the parameters
1393 passed to main() since in case nscd is started by running the
1394 dynamic linker this will not work. Yes, this is not the usual
1395 case but nscd is part of glibc and we occasionally do this. */
1396 size_t buflen = 1024;
1397 char *buf = alloca (buflen);
1398 size_t readlen = 0;
1399 int fd = open ("/proc/self/cmdline", O_RDONLY);
1400 if (fd == -1)
1402 dbg_log (_("\
1403 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1404 strerror (errno));
1406 paranoia = 0;
1407 return;
1410 while (1)
1412 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1413 buflen - readlen));
1414 if (n == -1)
1416 dbg_log (_("\
1417 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1418 strerror (errno));
1420 close (fd);
1421 paranoia = 0;
1422 return;
1425 readlen += n;
1427 if (readlen < buflen)
1428 break;
1430 /* We might have to extend the buffer. */
1431 size_t old_buflen = buflen;
1432 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1433 buf = memmove (newp, buf, old_buflen);
1436 close (fd);
1438 /* Parse the command line. Worst case scenario: every two
1439 characters form one parameter (one character plus NUL). */
1440 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1441 int argc = 0;
1443 char *cp = buf;
1444 while (cp < buf + readlen)
1446 argv[argc++] = cp;
1447 cp = (char *) rawmemchr (cp, '\0') + 1;
1449 argv[argc] = NULL;
1451 /* Second, change back to the old user if we changed it. */
1452 if (server_user != NULL)
1454 if (setresuid (old_uid, old_uid, old_uid) != 0)
1456 dbg_log (_("\
1457 cannot change to old UID: %s; disabling paranoia mode"),
1458 strerror (errno));
1460 paranoia = 0;
1461 return;
1464 if (setresgid (old_gid, old_gid, old_gid) != 0)
1466 dbg_log (_("\
1467 cannot change to old GID: %s; disabling paranoia mode"),
1468 strerror (errno));
1470 setuid (server_uid);
1471 paranoia = 0;
1472 return;
1476 /* Next change back to the old working directory. */
1477 if (chdir (oldcwd) == -1)
1479 dbg_log (_("\
1480 cannot change to old working directory: %s; disabling paranoia mode"),
1481 strerror (errno));
1483 if (server_user != NULL)
1485 setuid (server_uid);
1486 setgid (server_gid);
1488 paranoia = 0;
1489 return;
1492 /* Synchronize memory. */
1493 int32_t certainly[lastdb];
1494 for (int cnt = 0; cnt < lastdb; ++cnt)
1495 if (dbs[cnt].enabled)
1497 /* Make sure nobody keeps using the database. */
1498 dbs[cnt].head->timestamp = 0;
1499 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1500 dbs[cnt].head->nscd_certainly_running = 0;
1502 if (dbs[cnt].persistent)
1503 // XXX async OK?
1504 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1507 /* The preparations are done. */
1508 #ifdef PATH_MAX
1509 char pathbuf[PATH_MAX];
1510 #else
1511 char pathbuf[256];
1512 #endif
1513 /* Try to exec the real nscd program so the process name (as reported
1514 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1515 if readlink or the exec with the result of the readlink call fails. */
1516 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1517 if (n != -1)
1519 pathbuf[n] = '\0';
1520 execv (pathbuf, argv);
1522 execv ("/proc/self/exe", argv);
1524 /* If we come here, we will never be able to re-exec. */
1525 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1526 strerror (errno));
1528 if (server_user != NULL)
1530 setuid (server_uid);
1531 setgid (server_gid);
1533 if (chdir ("/") != 0)
1534 dbg_log (_("cannot change current working directory to \"/\": %s"),
1535 strerror (errno));
1536 paranoia = 0;
1538 /* Reenable the databases. */
1539 time_t now = time (NULL);
1540 for (int cnt = 0; cnt < lastdb; ++cnt)
1541 if (dbs[cnt].enabled)
1543 dbs[cnt].head->timestamp = now;
1544 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1549 /* List of file descriptors. */
1550 struct fdlist
1552 int fd;
1553 struct fdlist *next;
1555 /* Memory allocated for the list. */
1556 static struct fdlist *fdlist;
1557 /* List of currently ready-to-read file descriptors. */
1558 static struct fdlist *readylist;
1560 /* Conditional variable and mutex to signal availability of entries in
1561 READYLIST. The condvar is initialized dynamically since we might
1562 use a different clock depending on availability. */
1563 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1564 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1566 /* The clock to use with the condvar. */
1567 static clockid_t timeout_clock = CLOCK_REALTIME;
1569 /* Number of threads ready to handle the READYLIST. */
1570 static unsigned long int nready;
1573 /* Function for the clean-up threads. */
1574 static void *
1575 __attribute__ ((__noreturn__))
1576 nscd_run_prune (void *p)
1578 const long int my_number = (long int) p;
1579 assert (dbs[my_number].enabled);
1581 int dont_need_update = setup_thread (&dbs[my_number]);
1583 time_t now = time (NULL);
1585 /* We are running. */
1586 dbs[my_number].head->timestamp = now;
1588 struct timespec prune_ts;
1589 if (__builtin_expect (clock_gettime (timeout_clock, &prune_ts) == -1, 0))
1590 /* Should never happen. */
1591 abort ();
1593 /* Compute the initial timeout time. Prevent all the timers to go
1594 off at the same time by adding a db-based value. */
1595 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1596 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1598 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1599 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1600 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1602 pthread_mutex_lock (prune_lock);
1603 while (1)
1605 /* Wait, but not forever. */
1606 int e = 0;
1607 if (! dbs[my_number].clear_cache)
1608 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1609 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1611 time_t next_wait;
1612 now = time (NULL);
1613 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1614 || dbs[my_number].clear_cache)
1616 /* We will determine the new timout values based on the
1617 cache content. Should there be concurrent additions to
1618 the cache which are not accounted for in the cache
1619 pruning we want to know about it. Therefore set the
1620 timeout to the maximum. It will be descreased when adding
1621 new entries to the cache, if necessary. */
1622 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1624 /* Unconditionally reset the flag. */
1625 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1626 dbs[my_number].clear_cache = 0;
1628 pthread_mutex_unlock (prune_lock);
1630 /* We use a separate lock for running the prune function (instead
1631 of keeping prune_lock locked) because this enables concurrent
1632 invocations of cache_add which might modify the timeout value. */
1633 pthread_mutex_lock (prune_run_lock);
1634 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1635 pthread_mutex_unlock (prune_run_lock);
1637 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1638 /* If clients cannot determine for sure whether nscd is running
1639 we need to wake up occasionally to update the timestamp.
1640 Wait 90% of the update period. */
1641 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1642 if (__builtin_expect (! dont_need_update, 0))
1644 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1645 dbs[my_number].head->timestamp = now;
1648 pthread_mutex_lock (prune_lock);
1650 /* Make it known when we will wake up again. */
1651 if (now + next_wait < dbs[my_number].wakeup_time)
1652 dbs[my_number].wakeup_time = now + next_wait;
1653 else
1654 next_wait = dbs[my_number].wakeup_time - now;
1656 else
1657 /* The cache was just pruned. Do not do it again now. Just
1658 use the new timeout value. */
1659 next_wait = dbs[my_number].wakeup_time - now;
1661 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1662 /* Should never happen. */
1663 abort ();
1665 /* Compute next timeout time. */
1666 prune_ts.tv_sec += next_wait;
1671 /* This is the main loop. It is replicated in different threads but
1672 the use of the ready list makes sure only one thread handles an
1673 incoming connection. */
1674 static void *
1675 __attribute__ ((__noreturn__))
1676 nscd_run_worker (void *p)
1678 char buf[256];
1680 /* Initial locking. */
1681 pthread_mutex_lock (&readylist_lock);
1683 /* One more thread available. */
1684 ++nready;
1686 while (1)
1688 while (readylist == NULL)
1689 pthread_cond_wait (&readylist_cond, &readylist_lock);
1691 struct fdlist *it = readylist->next;
1692 if (readylist->next == readylist)
1693 /* Just one entry on the list. */
1694 readylist = NULL;
1695 else
1696 readylist->next = it->next;
1698 /* Extract the information and mark the record ready to be used
1699 again. */
1700 int fd = it->fd;
1701 it->next = NULL;
1703 /* One more thread available. */
1704 --nready;
1706 /* We are done with the list. */
1707 pthread_mutex_unlock (&readylist_lock);
1709 #ifndef __ASSUME_ACCEPT4
1710 if (have_accept4 < 0)
1712 /* We do not want to block on a short read or so. */
1713 int fl = fcntl (fd, F_GETFL);
1714 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1715 goto close_and_out;
1717 #endif
1719 /* Now read the request. */
1720 request_header req;
1721 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1722 != sizeof (req), 0))
1724 /* We failed to read data. Note that this also might mean we
1725 failed because we would have blocked. */
1726 if (debug_level > 0)
1727 dbg_log (_("short read while reading request: %s"),
1728 strerror_r (errno, buf, sizeof (buf)));
1729 goto close_and_out;
1732 /* Check whether this is a valid request type. */
1733 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1734 goto close_and_out;
1736 /* Some systems have no SO_PEERCRED implementation. They don't
1737 care about security so we don't as well. */
1738 uid_t uid = -1;
1739 #ifdef SO_PEERCRED
1740 pid_t pid = 0;
1742 if (__builtin_expect (debug_level > 0, 0))
1744 struct ucred caller;
1745 socklen_t optlen = sizeof (caller);
1747 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1748 pid = caller.pid;
1750 #else
1751 const pid_t pid = 0;
1752 #endif
1754 /* It should not be possible to crash the nscd with a silly
1755 request (i.e., a terribly large key). We limit the size to 1kb. */
1756 if (__builtin_expect (req.key_len, 1) < 0
1757 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1759 if (debug_level > 0)
1760 dbg_log (_("key length in request too long: %d"), req.key_len);
1762 else
1764 /* Get the key. */
1765 char keybuf[MAXKEYLEN];
1767 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1768 req.key_len))
1769 != req.key_len, 0))
1771 /* Again, this can also mean we would have blocked. */
1772 if (debug_level > 0)
1773 dbg_log (_("short read while reading request key: %s"),
1774 strerror_r (errno, buf, sizeof (buf)));
1775 goto close_and_out;
1778 if (__builtin_expect (debug_level, 0) > 0)
1780 #ifdef SO_PEERCRED
1781 if (pid != 0)
1782 dbg_log (_("\
1783 handle_request: request received (Version = %d) from PID %ld"),
1784 req.version, (long int) pid);
1785 else
1786 #endif
1787 dbg_log (_("\
1788 handle_request: request received (Version = %d)"), req.version);
1791 /* Phew, we got all the data, now process it. */
1792 handle_request (fd, &req, keybuf, uid, pid);
1795 close_and_out:
1796 /* We are done. */
1797 close (fd);
1799 /* Re-locking. */
1800 pthread_mutex_lock (&readylist_lock);
1802 /* One more thread available. */
1803 ++nready;
1805 /* NOTREACHED */
1809 static unsigned int nconns;
1811 static void
1812 fd_ready (int fd)
1814 pthread_mutex_lock (&readylist_lock);
1816 /* Find an empty entry in FDLIST. */
1817 size_t inner;
1818 for (inner = 0; inner < nconns; ++inner)
1819 if (fdlist[inner].next == NULL)
1820 break;
1821 assert (inner < nconns);
1823 fdlist[inner].fd = fd;
1825 if (readylist == NULL)
1826 readylist = fdlist[inner].next = &fdlist[inner];
1827 else
1829 fdlist[inner].next = readylist->next;
1830 readylist = readylist->next = &fdlist[inner];
1833 bool do_signal = true;
1834 if (__builtin_expect (nready == 0, 0))
1836 ++client_queued;
1837 do_signal = false;
1839 /* Try to start another thread to help out. */
1840 pthread_t th;
1841 if (nthreads < max_nthreads
1842 && pthread_create (&th, &attr, nscd_run_worker,
1843 (void *) (long int) nthreads) == 0)
1845 /* We got another thread. */
1846 ++nthreads;
1847 /* The new thread might need a kick. */
1848 do_signal = true;
1853 pthread_mutex_unlock (&readylist_lock);
1855 /* Tell one of the worker threads there is work to do. */
1856 if (do_signal)
1857 pthread_cond_signal (&readylist_cond);
1861 /* Check whether restarting should happen. */
1862 static inline int
1863 restart_p (time_t now)
1865 return (paranoia && readylist == NULL && nready == nthreads
1866 && now >= restart_time);
1870 /* Array for times a connection was accepted. */
1871 static time_t *starttime;
1874 static void
1875 __attribute__ ((__noreturn__))
1876 main_loop_poll (void)
1878 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1879 * sizeof (conns[0]));
1881 conns[0].fd = sock;
1882 conns[0].events = POLLRDNORM;
1883 size_t nused = 1;
1884 size_t firstfree = 1;
1886 #ifdef HAVE_INOTIFY
1887 if (inotify_fd != -1)
1889 conns[1].fd = inotify_fd;
1890 conns[1].events = POLLRDNORM;
1891 nused = 2;
1892 firstfree = 2;
1894 #endif
1896 #ifdef HAVE_NETLINK
1897 size_t idx_nl_status_fd = 0;
1898 if (nl_status_fd != -1)
1900 idx_nl_status_fd = nused;
1901 conns[nused].fd = nl_status_fd;
1902 conns[nused].events = POLLRDNORM;
1903 ++nused;
1904 firstfree = nused;
1906 #endif
1908 while (1)
1910 /* Wait for any event. We wait at most a couple of seconds so
1911 that we can check whether we should close any of the accepted
1912 connections since we have not received a request. */
1913 #define MAX_ACCEPT_TIMEOUT 30
1914 #define MIN_ACCEPT_TIMEOUT 5
1915 #define MAIN_THREAD_TIMEOUT \
1916 (MAX_ACCEPT_TIMEOUT * 1000 \
1917 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1919 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1921 time_t now = time (NULL);
1923 /* If there is a descriptor ready for reading or there is a new
1924 connection, process this now. */
1925 if (n > 0)
1927 if (conns[0].revents != 0)
1929 /* We have a new incoming connection. Accept the connection. */
1930 int fd;
1932 #ifndef __ASSUME_ACCEPT4
1933 fd = -1;
1934 if (have_accept4 >= 0)
1935 #endif
1937 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
1938 SOCK_NONBLOCK));
1939 #ifndef __ASSUME_ACCEPT4
1940 if (have_accept4 == 0)
1941 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
1942 #endif
1944 #ifndef __ASSUME_ACCEPT4
1945 if (have_accept4 < 0)
1946 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1947 #endif
1949 /* Use the descriptor if we have not reached the limit. */
1950 if (fd >= 0)
1952 if (firstfree < nconns)
1954 conns[firstfree].fd = fd;
1955 conns[firstfree].events = POLLRDNORM;
1956 starttime[firstfree] = now;
1957 if (firstfree >= nused)
1958 nused = firstfree + 1;
1961 ++firstfree;
1962 while (firstfree < nused && conns[firstfree].fd != -1);
1964 else
1965 /* We cannot use the connection so close it. */
1966 close (fd);
1969 --n;
1972 size_t first = 1;
1973 #ifdef HAVE_INOTIFY
1974 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
1976 if (conns[1].revents != 0)
1978 bool to_clear[lastdb] = { false, };
1979 union
1981 # ifndef PATH_MAX
1982 # define PATH_MAX 1024
1983 # endif
1984 struct inotify_event i;
1985 char buf[sizeof (struct inotify_event) + PATH_MAX];
1986 } inev;
1988 while (1)
1990 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
1991 sizeof (inev)));
1992 if (nb < (ssize_t) sizeof (struct inotify_event))
1994 if (__builtin_expect (nb == -1 && errno != EAGAIN,
1997 /* Something went wrong when reading the inotify
1998 data. Better disable inotify. */
1999 dbg_log (_("\
2000 disabled inotify after read error %d"),
2001 errno);
2002 conns[1].fd = -1;
2003 firstfree = 1;
2004 if (nused == 2)
2005 nused = 1;
2006 close (inotify_fd);
2007 inotify_fd = -1;
2009 break;
2012 /* Check which of the files changed. */
2013 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2015 struct traced_file *finfo = dbs[dbcnt].traced_files;
2017 while (finfo != NULL)
2019 if (finfo->inotify_descr == inev.i.wd)
2021 to_clear[dbcnt] = true;
2022 if (finfo->call_res_init)
2023 res_init ();
2024 goto next;
2027 finfo = finfo->next;
2030 next:;
2033 /* Actually perform the cache clearing. */
2034 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2035 if (to_clear[dbcnt])
2037 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
2038 dbs[dbcnt].clear_cache = 1;
2039 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
2040 pthread_cond_signal (&dbs[dbcnt].prune_cond);
2043 --n;
2046 first = 2;
2048 #endif
2050 #ifdef HAVE_NETLINK
2051 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2053 char buf[4096];
2054 /* Read all the data. We do not interpret it here. */
2055 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2056 sizeof (buf))) != -1)
2059 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2060 = __bump_nl_timestamp ();
2062 #endif
2064 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2065 if (conns[cnt].revents != 0)
2067 fd_ready (conns[cnt].fd);
2069 /* Clean up the CONNS array. */
2070 conns[cnt].fd = -1;
2071 if (cnt < firstfree)
2072 firstfree = cnt;
2073 if (cnt == nused - 1)
2075 --nused;
2076 while (conns[nused - 1].fd == -1);
2078 --n;
2082 /* Now find entries which have timed out. */
2083 assert (nused > 0);
2085 /* We make the timeout length depend on the number of file
2086 descriptors currently used. */
2087 #define ACCEPT_TIMEOUT \
2088 (MAX_ACCEPT_TIMEOUT \
2089 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2090 time_t laststart = now - ACCEPT_TIMEOUT;
2092 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2094 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2096 /* Remove the entry, it timed out. */
2097 (void) close (conns[cnt].fd);
2098 conns[cnt].fd = -1;
2100 if (cnt < firstfree)
2101 firstfree = cnt;
2102 if (cnt == nused - 1)
2104 --nused;
2105 while (conns[nused - 1].fd == -1);
2109 if (restart_p (now))
2110 restart ();
2115 #ifdef HAVE_EPOLL
2116 static void
2117 main_loop_epoll (int efd)
2119 struct epoll_event ev = { 0, };
2120 int nused = 1;
2121 size_t highest = 0;
2123 /* Add the socket. */
2124 ev.events = EPOLLRDNORM;
2125 ev.data.fd = sock;
2126 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2127 /* We cannot use epoll. */
2128 return;
2130 # ifdef HAVE_INOTIFY
2131 if (inotify_fd != -1)
2133 ev.events = EPOLLRDNORM;
2134 ev.data.fd = inotify_fd;
2135 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2136 /* We cannot use epoll. */
2137 return;
2138 nused = 2;
2140 # endif
2142 # ifdef HAVE_NETLINK
2143 if (nl_status_fd != -1)
2145 ev.events = EPOLLRDNORM;
2146 ev.data.fd = nl_status_fd;
2147 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2148 /* We cannot use epoll. */
2149 return;
2151 # endif
2153 while (1)
2155 struct epoll_event revs[100];
2156 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2158 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2160 time_t now = time (NULL);
2162 for (int cnt = 0; cnt < n; ++cnt)
2163 if (revs[cnt].data.fd == sock)
2165 /* A new connection. */
2166 int fd;
2168 # ifndef __ASSUME_ACCEPT4
2169 fd = -1;
2170 if (have_accept4 >= 0)
2171 # endif
2173 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2174 SOCK_NONBLOCK));
2175 # ifndef __ASSUME_ACCEPT4
2176 if (have_accept4 == 0)
2177 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2178 # endif
2180 # ifndef __ASSUME_ACCEPT4
2181 if (have_accept4 < 0)
2182 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2183 # endif
2185 /* Use the descriptor if we have not reached the limit. */
2186 if (fd >= 0)
2188 /* Try to add the new descriptor. */
2189 ev.data.fd = fd;
2190 if (fd >= nconns
2191 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2192 /* The descriptor is too large or something went
2193 wrong. Close the descriptor. */
2194 close (fd);
2195 else
2197 /* Remember when we accepted the connection. */
2198 starttime[fd] = now;
2200 if (fd > highest)
2201 highest = fd;
2203 ++nused;
2207 # ifdef HAVE_INOTIFY
2208 else if (revs[cnt].data.fd == inotify_fd)
2210 bool to_clear[lastdb] = { false, };
2211 union
2213 struct inotify_event i;
2214 char buf[sizeof (struct inotify_event) + PATH_MAX];
2215 } inev;
2217 while (1)
2219 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
2220 sizeof (inev)));
2221 if (nb < (ssize_t) sizeof (struct inotify_event))
2223 if (__builtin_expect (nb == -1 && errno != EAGAIN, 0))
2225 /* Something went wrong when reading the inotify
2226 data. Better disable inotify. */
2227 dbg_log (_("disabled inotify after read error %d"),
2228 errno);
2229 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd,
2230 NULL);
2231 close (inotify_fd);
2232 inotify_fd = -1;
2234 break;
2237 /* Check which of the files changed. */
2238 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2240 struct traced_file *finfo = dbs[dbcnt].traced_files;
2242 while (finfo != NULL)
2244 if (finfo->inotify_descr == inev.i.wd)
2246 to_clear[dbcnt] = true;
2247 if (finfo->call_res_init)
2248 res_init ();
2249 goto next;
2252 finfo = finfo->next;
2255 next:;
2258 /* Actually perform the cache clearing. */
2259 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2260 if (to_clear[dbcnt])
2262 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
2263 dbs[dbcnt].clear_cache = 1;
2264 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
2265 pthread_cond_signal (&dbs[dbcnt].prune_cond);
2268 # endif
2269 # ifdef HAVE_NETLINK
2270 else if (revs[cnt].data.fd == nl_status_fd)
2272 char buf[4096];
2273 /* Read all the data. We do not interpret it here. */
2274 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2275 sizeof (buf))) != -1)
2278 __bump_nl_timestamp ();
2280 # endif
2281 else
2283 /* Remove the descriptor from the epoll descriptor. */
2284 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2286 /* Get a worker to handle the request. */
2287 fd_ready (revs[cnt].data.fd);
2289 /* Reset the time. */
2290 starttime[revs[cnt].data.fd] = 0;
2291 if (revs[cnt].data.fd == highest)
2293 --highest;
2294 while (highest > 0 && starttime[highest] == 0);
2296 --nused;
2299 /* Now look for descriptors for accepted connections which have
2300 no reply in too long of a time. */
2301 time_t laststart = now - ACCEPT_TIMEOUT;
2302 assert (starttime[sock] == 0);
2303 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2304 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2305 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2306 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2308 /* We are waiting for this one for too long. Close it. */
2309 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2311 (void) close (cnt);
2313 starttime[cnt] = 0;
2314 if (cnt == highest)
2315 --highest;
2317 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2318 --highest;
2320 if (restart_p (now))
2321 restart ();
2324 #endif
2327 /* Start all the threads we want. The initial process is thread no. 1. */
2328 void
2329 start_threads (void)
2331 /* Initialize the conditional variable we will use. The only
2332 non-standard attribute we might use is the clock selection. */
2333 pthread_condattr_t condattr;
2334 pthread_condattr_init (&condattr);
2336 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2337 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2338 /* Determine whether the monotonous clock is available. */
2339 struct timespec dummy;
2340 # if _POSIX_MONOTONIC_CLOCK == 0
2341 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2342 # endif
2343 # if _POSIX_CLOCK_SELECTION == 0
2344 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2345 # endif
2346 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2347 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2348 timeout_clock = CLOCK_MONOTONIC;
2349 #endif
2351 /* Create the attribute for the threads. They are all created
2352 detached. */
2353 pthread_attr_init (&attr);
2354 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2355 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2356 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2358 /* We allow less than LASTDB threads only for debugging. */
2359 if (debug_level == 0)
2360 nthreads = MAX (nthreads, lastdb);
2362 /* Create the threads which prune the databases. */
2363 // XXX Ideally this work would be done by some of the worker threads.
2364 // XXX But this is problematic since we would need to be able to wake
2365 // XXX them up explicitly as well as part of the group handling the
2366 // XXX ready-list. This requires an operation where we can wait on
2367 // XXX two conditional variables at the same time. This operation
2368 // XXX does not exist (yet).
2369 for (long int i = 0; i < lastdb; ++i)
2371 /* Initialize the conditional variable. */
2372 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2374 dbg_log (_("could not initialize conditional variable"));
2375 exit (1);
2378 pthread_t th;
2379 if (dbs[i].enabled
2380 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2382 dbg_log (_("could not start clean-up thread; terminating"));
2383 exit (1);
2387 pthread_condattr_destroy (&condattr);
2389 for (long int i = 0; i < nthreads; ++i)
2391 pthread_t th;
2392 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2394 if (i == 0)
2396 dbg_log (_("could not start any worker thread; terminating"));
2397 exit (1);
2400 break;
2404 /* Determine how much room for descriptors we should initially
2405 allocate. This might need to change later if we cap the number
2406 with MAXCONN. */
2407 const long int nfds = sysconf (_SC_OPEN_MAX);
2408 #define MINCONN 32
2409 #define MAXCONN 16384
2410 if (nfds == -1 || nfds > MAXCONN)
2411 nconns = MAXCONN;
2412 else if (nfds < MINCONN)
2413 nconns = MINCONN;
2414 else
2415 nconns = nfds;
2417 /* We need memory to pass descriptors on to the worker threads. */
2418 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2419 /* Array to keep track when connection was accepted. */
2420 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2422 /* In the main thread we execute the loop which handles incoming
2423 connections. */
2424 #ifdef HAVE_EPOLL
2425 int efd = epoll_create (100);
2426 if (efd != -1)
2428 main_loop_epoll (efd);
2429 close (efd);
2431 #endif
2433 main_loop_poll ();
2437 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2438 this function is called, we are not listening on the nscd socket yet so
2439 we can just use the ordinary lookup functions without causing a lockup */
2440 static void
2441 begin_drop_privileges (void)
2443 struct passwd *pwd = getpwnam (server_user);
2445 if (pwd == NULL)
2447 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2448 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
2449 server_user);
2452 server_uid = pwd->pw_uid;
2453 server_gid = pwd->pw_gid;
2455 /* Save the old UID/GID if we have to change back. */
2456 if (paranoia)
2458 old_uid = getuid ();
2459 old_gid = getgid ();
2462 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2464 /* This really must never happen. */
2465 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2466 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
2469 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2471 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2472 == -1)
2474 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2475 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
2480 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2481 run nscd as the user specified in the configuration file. */
2482 static void
2483 finish_drop_privileges (void)
2485 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2486 /* We need to preserve the capabilities to connect to the audit daemon. */
2487 cap_t new_caps = preserve_capabilities ();
2488 #endif
2490 if (setgroups (server_ngroups, server_groups) == -1)
2492 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2493 error (EXIT_FAILURE, errno, _("setgroups failed"));
2496 int res;
2497 if (paranoia)
2498 res = setresgid (server_gid, server_gid, old_gid);
2499 else
2500 res = setgid (server_gid);
2501 if (res == -1)
2503 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2504 perror ("setgid");
2505 exit (4);
2508 if (paranoia)
2509 res = setresuid (server_uid, server_uid, old_uid);
2510 else
2511 res = setuid (server_uid);
2512 if (res == -1)
2514 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2515 perror ("setuid");
2516 exit (4);
2519 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2520 /* Remove the temporary capabilities. */
2521 install_real_capabilities (new_caps);
2522 #endif