Cache network interface information
[glibc.git] / nscd / connections.c
blob1b8a9bdba74c3c3572a73860dd6fa28816df7448
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2007, 2008, 2009, 2011 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #include <alloca.h>
21 #include <assert.h>
22 #include <atomic.h>
23 #include <error.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <grp.h>
27 #include <ifaddrs.h>
28 #include <libintl.h>
29 #include <pthread.h>
30 #include <pwd.h>
31 #include <resolv.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <unistd.h>
35 #include <arpa/inet.h>
36 #ifdef HAVE_NETLINK
37 # include <netlink/netlink.h>
38 #endif
39 #ifdef HAVE_EPOLL
40 # include <sys/epoll.h>
41 #endif
42 #ifdef HAVE_INOTIFY
43 # include <sys/inotify.h>
44 #endif
45 #include <sys/mman.h>
46 #include <sys/param.h>
47 #include <sys/poll.h>
48 #ifdef HAVE_SENDFILE
49 # include <sys/sendfile.h>
50 #endif
51 #include <sys/socket.h>
52 #include <sys/stat.h>
53 #include <sys/un.h>
55 #include "nscd.h"
56 #include "dbg_log.h"
57 #include "selinux.h"
58 #include <resolv/resolv.h>
59 #ifdef HAVE_SENDFILE
60 # include <kernel-features.h>
61 #endif
64 /* Support to run nscd as an unprivileged user */
65 const char *server_user;
66 static uid_t server_uid;
67 static gid_t server_gid;
68 const char *stat_user;
69 uid_t stat_uid;
70 static gid_t *server_groups;
71 #ifndef NGROUPS
72 # define NGROUPS 32
73 #endif
74 static int server_ngroups;
76 static pthread_attr_t attr;
78 static void begin_drop_privileges (void);
79 static void finish_drop_privileges (void);
81 /* Map request type to a string. */
82 const char *const serv2str[LASTREQ] =
84 [GETPWBYNAME] = "GETPWBYNAME",
85 [GETPWBYUID] = "GETPWBYUID",
86 [GETGRBYNAME] = "GETGRBYNAME",
87 [GETGRBYGID] = "GETGRBYGID",
88 [GETHOSTBYNAME] = "GETHOSTBYNAME",
89 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
90 [GETHOSTBYADDR] = "GETHOSTBYADDR",
91 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
92 [SHUTDOWN] = "SHUTDOWN",
93 [GETSTAT] = "GETSTAT",
94 [INVALIDATE] = "INVALIDATE",
95 [GETFDPW] = "GETFDPW",
96 [GETFDGR] = "GETFDGR",
97 [GETFDHST] = "GETFDHST",
98 [GETAI] = "GETAI",
99 [INITGROUPS] = "INITGROUPS",
100 [GETSERVBYNAME] = "GETSERVBYNAME",
101 [GETSERVBYPORT] = "GETSERVBYPORT",
102 [GETFDSERV] = "GETFDSERV",
103 [GETNETGRENT] = "GETNETGRENT",
104 [INNETGR] = "INNETGR",
105 [GETFDNETGR] = "GETFDNETGR"
108 /* The control data structures for the services. */
109 struct database_dyn dbs[lastdb] =
111 [pwddb] = {
112 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
113 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
114 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
115 .enabled = 0,
116 .check_file = 1,
117 .persistent = 0,
118 .propagate = 1,
119 .shared = 0,
120 .max_db_size = DEFAULT_MAX_DB_SIZE,
121 .suggested_module = DEFAULT_SUGGESTED_MODULE,
122 .db_filename = _PATH_NSCD_PASSWD_DB,
123 .disabled_iov = &pwd_iov_disabled,
124 .postimeout = 3600,
125 .negtimeout = 20,
126 .wr_fd = -1,
127 .ro_fd = -1,
128 .mmap_used = false
130 [grpdb] = {
131 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
132 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
133 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
134 .enabled = 0,
135 .check_file = 1,
136 .persistent = 0,
137 .propagate = 1,
138 .shared = 0,
139 .max_db_size = DEFAULT_MAX_DB_SIZE,
140 .suggested_module = DEFAULT_SUGGESTED_MODULE,
141 .db_filename = _PATH_NSCD_GROUP_DB,
142 .disabled_iov = &grp_iov_disabled,
143 .postimeout = 3600,
144 .negtimeout = 60,
145 .wr_fd = -1,
146 .ro_fd = -1,
147 .mmap_used = false
149 [hstdb] = {
150 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
151 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
152 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
153 .enabled = 0,
154 .check_file = 1,
155 .persistent = 0,
156 .propagate = 0, /* Not used. */
157 .shared = 0,
158 .max_db_size = DEFAULT_MAX_DB_SIZE,
159 .suggested_module = DEFAULT_SUGGESTED_MODULE,
160 .db_filename = _PATH_NSCD_HOSTS_DB,
161 .disabled_iov = &hst_iov_disabled,
162 .postimeout = 3600,
163 .negtimeout = 20,
164 .wr_fd = -1,
165 .ro_fd = -1,
166 .mmap_used = false
168 [servdb] = {
169 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
170 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
171 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
172 .enabled = 0,
173 .check_file = 1,
174 .persistent = 0,
175 .propagate = 0, /* Not used. */
176 .shared = 0,
177 .max_db_size = DEFAULT_MAX_DB_SIZE,
178 .suggested_module = DEFAULT_SUGGESTED_MODULE,
179 .db_filename = _PATH_NSCD_SERVICES_DB,
180 .disabled_iov = &serv_iov_disabled,
181 .postimeout = 28800,
182 .negtimeout = 20,
183 .wr_fd = -1,
184 .ro_fd = -1,
185 .mmap_used = false
187 [netgrdb] = {
188 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
189 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
190 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
191 .enabled = 0,
192 .check_file = 1,
193 .persistent = 0,
194 .propagate = 0, /* Not used. */
195 .shared = 0,
196 .max_db_size = DEFAULT_MAX_DB_SIZE,
197 .suggested_module = DEFAULT_SUGGESTED_MODULE,
198 .db_filename = _PATH_NSCD_NETGROUP_DB,
199 .disabled_iov = &netgroup_iov_disabled,
200 .postimeout = 28800,
201 .negtimeout = 20,
202 .wr_fd = -1,
203 .ro_fd = -1,
204 .mmap_used = false
209 /* Mapping of request type to database. */
210 static struct
212 bool data_request;
213 struct database_dyn *db;
214 } const reqinfo[LASTREQ] =
216 [GETPWBYNAME] = { true, &dbs[pwddb] },
217 [GETPWBYUID] = { true, &dbs[pwddb] },
218 [GETGRBYNAME] = { true, &dbs[grpdb] },
219 [GETGRBYGID] = { true, &dbs[grpdb] },
220 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
221 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
222 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
223 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
224 [SHUTDOWN] = { false, NULL },
225 [GETSTAT] = { false, NULL },
226 [SHUTDOWN] = { false, NULL },
227 [GETFDPW] = { false, &dbs[pwddb] },
228 [GETFDGR] = { false, &dbs[grpdb] },
229 [GETFDHST] = { false, &dbs[hstdb] },
230 [GETAI] = { true, &dbs[hstdb] },
231 [INITGROUPS] = { true, &dbs[grpdb] },
232 [GETSERVBYNAME] = { true, &dbs[servdb] },
233 [GETSERVBYPORT] = { true, &dbs[servdb] },
234 [GETFDSERV] = { false, &dbs[servdb] },
235 [GETNETGRENT] = { true, &dbs[netgrdb] },
236 [INNETGR] = { true, &dbs[netgrdb] },
237 [GETFDNETGR] = { false, &dbs[netgrdb] }
241 /* Initial number of threads to use. */
242 int nthreads = -1;
243 /* Maximum number of threads to use. */
244 int max_nthreads = 32;
246 /* Socket for incoming connections. */
247 static int sock;
249 #ifdef HAVE_INOTIFY
250 /* Inotify descriptor. */
251 int inotify_fd = -1;
252 #endif
254 #ifdef HAVE_NETLINK
255 /* Descriptor for netlink status updates. */
256 static int nl_status_fd = -1;
257 #endif
259 #ifndef __ASSUME_SOCK_CLOEXEC
260 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
261 before be know the result. */
262 static int have_sock_cloexec;
263 #endif
264 #ifndef __ASSUME_ACCEPT4
265 static int have_accept4;
266 #endif
268 /* Number of times clients had to wait. */
269 unsigned long int client_queued;
272 ssize_t
273 writeall (int fd, const void *buf, size_t len)
275 size_t n = len;
276 ssize_t ret;
279 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
280 if (ret <= 0)
281 break;
282 buf = (const char *) buf + ret;
283 n -= ret;
285 while (n > 0);
286 return ret < 0 ? ret : len - n;
290 #ifdef HAVE_SENDFILE
291 ssize_t
292 sendfileall (int tofd, int fromfd, off_t off, size_t len)
294 ssize_t n = len;
295 ssize_t ret;
299 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
300 if (ret <= 0)
301 break;
302 n -= ret;
304 while (n > 0);
305 return ret < 0 ? ret : len - n;
307 #endif
310 enum usekey
312 use_not = 0,
313 /* The following three are not really used, they are symbolic constants. */
314 use_first = 16,
315 use_begin = 32,
316 use_end = 64,
318 use_he = 1,
319 use_he_begin = use_he | use_begin,
320 use_he_end = use_he | use_end,
321 #if SEPARATE_KEY
322 use_key = 2,
323 use_key_begin = use_key | use_begin,
324 use_key_end = use_key | use_end,
325 use_key_first = use_key_begin | use_first,
326 #endif
327 use_data = 3,
328 use_data_begin = use_data | use_begin,
329 use_data_end = use_data | use_end,
330 use_data_first = use_data_begin | use_first
334 static int
335 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
336 enum usekey use, ref_t start, size_t len)
338 assert (len >= 2);
340 if (start > first_free || start + len > first_free
341 || (start & BLOCK_ALIGN_M1))
342 return 0;
344 if (usemap[start] == use_not)
346 /* Add the start marker. */
347 usemap[start] = use | use_begin;
348 use &= ~use_first;
350 while (--len > 0)
351 if (usemap[++start] != use_not)
352 return 0;
353 else
354 usemap[start] = use;
356 /* Add the end marker. */
357 usemap[start] = use | use_end;
359 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
361 /* Hash entries can't be shared. */
362 if (use == use_he)
363 return 0;
365 usemap[start] |= (use & use_first);
366 use &= ~use_first;
368 while (--len > 1)
369 if (usemap[++start] != use)
370 return 0;
372 if (usemap[++start] != (use | use_end))
373 return 0;
375 else
376 /* Points to a wrong object or somewhere in the middle. */
377 return 0;
379 return 1;
383 /* Verify data in persistent database. */
384 static int
385 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
387 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
388 || dbnr == netgrdb);
390 time_t now = time (NULL);
392 struct database_pers_head *head = mem;
393 struct database_pers_head head_copy = *head;
395 /* Check that the header that was read matches the head in the database. */
396 if (memcmp (head, readhead, sizeof (*head)) != 0)
397 return 0;
399 /* First some easy tests: make sure the database header is sane. */
400 if (head->version != DB_VERSION
401 || head->header_size != sizeof (*head)
402 /* We allow a timestamp to be one hour ahead of the current time.
403 This should cover daylight saving time changes. */
404 || head->timestamp > now + 60 * 60 + 60
405 || (head->gc_cycle & 1)
406 || head->module == 0
407 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
408 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
409 || head->first_free < 0
410 || head->first_free > head->data_size
411 || (head->first_free & BLOCK_ALIGN_M1) != 0
412 || head->maxnentries < 0
413 || head->maxnsearched < 0)
414 return 0;
416 uint8_t *usemap = calloc (head->first_free, 1);
417 if (usemap == NULL)
418 return 0;
420 const char *data = (char *) &head->array[roundup (head->module,
421 ALIGN / sizeof (ref_t))];
423 nscd_ssize_t he_cnt = 0;
424 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
426 ref_t trail = head->array[cnt];
427 ref_t work = trail;
428 int tick = 0;
430 while (work != ENDREF)
432 if (! check_use (data, head->first_free, usemap, use_he, work,
433 sizeof (struct hashentry)))
434 goto fail;
436 /* Now we know we can dereference the record. */
437 struct hashentry *here = (struct hashentry *) (data + work);
439 ++he_cnt;
441 /* Make sure the record is for this type of service. */
442 if (here->type >= LASTREQ
443 || reqinfo[here->type].db != &dbs[dbnr])
444 goto fail;
446 /* Validate boolean field value. */
447 if (here->first != false && here->first != true)
448 goto fail;
450 if (here->len < 0)
451 goto fail;
453 /* Now the data. */
454 if (here->packet < 0
455 || here->packet > head->first_free
456 || here->packet + sizeof (struct datahead) > head->first_free)
457 goto fail;
459 struct datahead *dh = (struct datahead *) (data + here->packet);
461 if (! check_use (data, head->first_free, usemap,
462 use_data | (here->first ? use_first : 0),
463 here->packet, dh->allocsize))
464 goto fail;
466 if (dh->allocsize < sizeof (struct datahead)
467 || dh->recsize > dh->allocsize
468 || (dh->notfound != false && dh->notfound != true)
469 || (dh->usable != false && dh->usable != true))
470 goto fail;
472 if (here->key < here->packet + sizeof (struct datahead)
473 || here->key > here->packet + dh->allocsize
474 || here->key + here->len > here->packet + dh->allocsize)
476 #if SEPARATE_KEY
477 /* If keys can appear outside of data, this should be done
478 instead. But gc doesn't mark the data in that case. */
479 if (! check_use (data, head->first_free, usemap,
480 use_key | (here->first ? use_first : 0),
481 here->key, here->len))
482 #endif
483 goto fail;
486 work = here->next;
488 if (work == trail)
489 /* A circular list, this must not happen. */
490 goto fail;
491 if (tick)
492 trail = ((struct hashentry *) (data + trail))->next;
493 tick = 1 - tick;
497 if (he_cnt != head->nentries)
498 goto fail;
500 /* See if all data and keys had at least one reference from
501 he->first == true hashentry. */
502 for (ref_t idx = 0; idx < head->first_free; ++idx)
504 #if SEPARATE_KEY
505 if (usemap[idx] == use_key_begin)
506 goto fail;
507 #endif
508 if (usemap[idx] == use_data_begin)
509 goto fail;
512 /* Finally, make sure the database hasn't changed since the first test. */
513 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
514 goto fail;
516 free (usemap);
517 return 1;
519 fail:
520 free (usemap);
521 return 0;
525 #ifdef O_CLOEXEC
526 # define EXTRA_O_FLAGS O_CLOEXEC
527 #else
528 # define EXTRA_O_FLAGS 0
529 #endif
532 /* Initialize database information structures. */
533 void
534 nscd_init (void)
536 /* Look up unprivileged uid/gid/groups before we start listening on the
537 socket */
538 if (server_user != NULL)
539 begin_drop_privileges ();
541 if (nthreads == -1)
542 /* No configuration for this value, assume a default. */
543 nthreads = 4;
545 for (size_t cnt = 0; cnt < lastdb; ++cnt)
546 if (dbs[cnt].enabled)
548 pthread_rwlock_init (&dbs[cnt].lock, NULL);
549 pthread_mutex_init (&dbs[cnt].memlock, NULL);
551 if (dbs[cnt].persistent)
553 /* Try to open the appropriate file on disk. */
554 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
555 if (fd != -1)
557 char *msg = NULL;
558 struct stat64 st;
559 void *mem;
560 size_t total;
561 struct database_pers_head head;
562 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
563 sizeof (head)));
564 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
566 fail_db_errno:
567 /* The code is single-threaded at this point so
568 using strerror is just fine. */
569 msg = strerror (errno);
570 fail_db:
571 dbg_log (_("invalid persistent database file \"%s\": %s"),
572 dbs[cnt].db_filename, msg);
573 unlink (dbs[cnt].db_filename);
575 else if (head.module == 0 && head.data_size == 0)
577 /* The file has been created, but the head has not
578 been initialized yet. */
579 msg = _("uninitialized header");
580 goto fail_db;
582 else if (head.header_size != (int) sizeof (head))
584 msg = _("header size does not match");
585 goto fail_db;
587 else if ((total = (sizeof (head)
588 + roundup (head.module * sizeof (ref_t),
589 ALIGN)
590 + head.data_size))
591 > st.st_size
592 || total < sizeof (head))
594 msg = _("file size does not match");
595 goto fail_db;
597 /* Note we map with the maximum size allowed for the
598 database. This is likely much larger than the
599 actual file size. This is OK on most OSes since
600 extensions of the underlying file will
601 automatically translate more pages available for
602 memory access. */
603 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
604 PROT_READ | PROT_WRITE,
605 MAP_SHARED, fd, 0))
606 == MAP_FAILED)
607 goto fail_db_errno;
608 else if (!verify_persistent_db (mem, &head, cnt))
610 munmap (mem, total);
611 msg = _("verification failed");
612 goto fail_db;
614 else
616 /* Success. We have the database. */
617 dbs[cnt].head = mem;
618 dbs[cnt].memsize = total;
619 dbs[cnt].data = (char *)
620 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
621 ALIGN / sizeof (ref_t))];
622 dbs[cnt].mmap_used = true;
624 if (dbs[cnt].suggested_module > head.module)
625 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
626 dbnames[cnt]);
628 dbs[cnt].wr_fd = fd;
629 fd = -1;
630 /* We also need a read-only descriptor. */
631 if (dbs[cnt].shared)
633 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
634 O_RDONLY | EXTRA_O_FLAGS);
635 if (dbs[cnt].ro_fd == -1)
636 dbg_log (_("\
637 cannot create read-only descriptor for \"%s\"; no mmap"),
638 dbs[cnt].db_filename);
641 // XXX Shall we test whether the descriptors actually
642 // XXX point to the same file?
645 /* Close the file descriptors in case something went
646 wrong in which case the variable have not been
647 assigned -1. */
648 if (fd != -1)
649 close (fd);
651 else if (errno == EACCES)
652 error (EXIT_FAILURE, 0, _("cannot access '%s'"),
653 dbs[cnt].db_filename);
656 if (dbs[cnt].head == NULL)
658 /* No database loaded. Allocate the data structure,
659 possibly on disk. */
660 struct database_pers_head head;
661 size_t total = (sizeof (head)
662 + roundup (dbs[cnt].suggested_module
663 * sizeof (ref_t), ALIGN)
664 + (dbs[cnt].suggested_module
665 * DEFAULT_DATASIZE_PER_BUCKET));
667 /* Try to create the database. If we do not need a
668 persistent database create a temporary file. */
669 int fd;
670 int ro_fd = -1;
671 if (dbs[cnt].persistent)
673 fd = open (dbs[cnt].db_filename,
674 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
675 S_IRUSR | S_IWUSR);
676 if (fd != -1 && dbs[cnt].shared)
677 ro_fd = open (dbs[cnt].db_filename,
678 O_RDONLY | EXTRA_O_FLAGS);
680 else
682 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
683 fd = mkostemp (fname, EXTRA_O_FLAGS);
685 /* We do not need the file name anymore after we
686 opened another file descriptor in read-only mode. */
687 if (fd != -1)
689 if (dbs[cnt].shared)
690 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
692 unlink (fname);
696 if (fd == -1)
698 if (errno == EEXIST)
700 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
701 dbnames[cnt], dbs[cnt].db_filename);
702 // XXX Correct way to terminate?
703 exit (1);
706 if (dbs[cnt].persistent)
707 dbg_log (_("cannot create %s; no persistent database used"),
708 dbs[cnt].db_filename);
709 else
710 dbg_log (_("cannot create %s; no sharing possible"),
711 dbs[cnt].db_filename);
713 dbs[cnt].persistent = 0;
714 // XXX remember: no mmap
716 else
718 /* Tell the user if we could not create the read-only
719 descriptor. */
720 if (ro_fd == -1 && dbs[cnt].shared)
721 dbg_log (_("\
722 cannot create read-only descriptor for \"%s\"; no mmap"),
723 dbs[cnt].db_filename);
725 /* Before we create the header, initialiye the hash
726 table. So that if we get interrupted if writing
727 the header we can recognize a partially initialized
728 database. */
729 size_t ps = sysconf (_SC_PAGESIZE);
730 char tmpbuf[ps];
731 assert (~ENDREF == 0);
732 memset (tmpbuf, '\xff', ps);
734 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
735 off_t offset = sizeof (head);
737 size_t towrite;
738 if (offset % ps != 0)
740 towrite = MIN (remaining, ps - (offset % ps));
741 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
742 goto write_fail;
743 offset += towrite;
744 remaining -= towrite;
747 while (remaining > ps)
749 if (pwrite (fd, tmpbuf, ps, offset) == -1)
750 goto write_fail;
751 offset += ps;
752 remaining -= ps;
755 if (remaining > 0
756 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
757 goto write_fail;
759 /* Create the header of the file. */
760 struct database_pers_head head =
762 .version = DB_VERSION,
763 .header_size = sizeof (head),
764 .module = dbs[cnt].suggested_module,
765 .data_size = (dbs[cnt].suggested_module
766 * DEFAULT_DATASIZE_PER_BUCKET),
767 .first_free = 0
769 void *mem;
771 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
772 != sizeof (head))
773 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
774 != 0)
775 || (mem = mmap (NULL, dbs[cnt].max_db_size,
776 PROT_READ | PROT_WRITE,
777 MAP_SHARED, fd, 0)) == MAP_FAILED)
779 write_fail:
780 unlink (dbs[cnt].db_filename);
781 dbg_log (_("cannot write to database file %s: %s"),
782 dbs[cnt].db_filename, strerror (errno));
783 dbs[cnt].persistent = 0;
785 else
787 /* Success. */
788 dbs[cnt].head = mem;
789 dbs[cnt].data = (char *)
790 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
791 ALIGN / sizeof (ref_t))];
792 dbs[cnt].memsize = total;
793 dbs[cnt].mmap_used = true;
795 /* Remember the descriptors. */
796 dbs[cnt].wr_fd = fd;
797 dbs[cnt].ro_fd = ro_fd;
798 fd = -1;
799 ro_fd = -1;
802 if (fd != -1)
803 close (fd);
804 if (ro_fd != -1)
805 close (ro_fd);
809 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
810 /* We do not check here whether the O_CLOEXEC provided to the
811 open call was successful or not. The two fcntl calls are
812 only performed once each per process start-up and therefore
813 is not noticeable at all. */
814 if (paranoia
815 && ((dbs[cnt].wr_fd != -1
816 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
817 || (dbs[cnt].ro_fd != -1
818 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
820 dbg_log (_("\
821 cannot set socket to close on exec: %s; disabling paranoia mode"),
822 strerror (errno));
823 paranoia = 0;
825 #endif
827 if (dbs[cnt].head == NULL)
829 /* We do not use the persistent database. Just
830 create an in-memory data structure. */
831 assert (! dbs[cnt].persistent);
833 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
834 + (dbs[cnt].suggested_module
835 * sizeof (ref_t)));
836 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
837 assert (~ENDREF == 0);
838 memset (dbs[cnt].head->array, '\xff',
839 dbs[cnt].suggested_module * sizeof (ref_t));
840 dbs[cnt].head->module = dbs[cnt].suggested_module;
841 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
842 * dbs[cnt].head->module);
843 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
844 dbs[cnt].head->first_free = 0;
846 dbs[cnt].shared = 0;
847 assert (dbs[cnt].ro_fd == -1);
851 /* Create the socket. */
852 #ifndef __ASSUME_SOCK_CLOEXEC
853 sock = -1;
854 if (have_sock_cloexec >= 0)
855 #endif
857 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
858 #ifndef __ASSUME_SOCK_CLOEXEC
859 if (have_sock_cloexec == 0)
860 have_sock_cloexec = sock != -1 || errno != EINVAL ? 1 : -1;
861 #endif
863 #ifndef __ASSUME_SOCK_CLOEXEC
864 if (have_sock_cloexec < 0)
865 sock = socket (AF_UNIX, SOCK_STREAM, 0);
866 #endif
867 if (sock < 0)
869 dbg_log (_("cannot open socket: %s"), strerror (errno));
870 exit (errno == EACCES ? 4 : 1);
872 /* Bind a name to the socket. */
873 struct sockaddr_un sock_addr;
874 sock_addr.sun_family = AF_UNIX;
875 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
876 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
878 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
879 exit (errno == EACCES ? 4 : 1);
882 #ifndef __ASSUME_SOCK_CLOEXEC
883 if (have_sock_cloexec < 0)
885 /* We don't want to get stuck on accept. */
886 int fl = fcntl (sock, F_GETFL);
887 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
889 dbg_log (_("cannot change socket to nonblocking mode: %s"),
890 strerror (errno));
891 exit (1);
894 /* The descriptor needs to be closed on exec. */
895 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
897 dbg_log (_("cannot set socket to close on exec: %s"),
898 strerror (errno));
899 exit (1);
902 #endif
904 /* Set permissions for the socket. */
905 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
907 /* Set the socket up to accept connections. */
908 if (listen (sock, SOMAXCONN) < 0)
910 dbg_log (_("cannot enable socket to accept connections: %s"),
911 strerror (errno));
912 exit (1);
915 #ifdef HAVE_NETLINK
916 if (dbs[hstdb].enabled)
918 /* Try to open netlink socket to monitor network setting changes. */
919 nl_status_fd = socket (AF_NETLINK,
920 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
921 NETLINK_ROUTE);
922 if (nl_status_fd != -1)
924 struct sockaddr_nl snl;
925 memset (&snl, '\0', sizeof (snl));
926 snl.nl_family = AF_NETLINK;
927 /* XXX Is this the best set to use? */
928 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
929 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
930 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
931 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
932 | RTMGRP_IPV6_PREFIX);
934 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
936 close (nl_status_fd);
937 nl_status_fd = -1;
939 else
941 /* Start the timestamp process. */
942 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
943 = __bump_nl_timestamp ();
945 # ifndef __ASSUME_SOCK_CLOEXEC
946 if (have_sock_cloexec < 0)
948 /* We don't want to get stuck on accept. */
949 int fl = fcntl (nl_status_fd, F_GETFL);
950 if (fl == -1
951 || fcntl (nl_status_fd, F_SETFL, fl | O_NONBLOCK) == -1)
953 dbg_log (_("\
954 cannot change socket to nonblocking mode: %s"),
955 strerror (errno));
956 exit (1);
959 /* The descriptor needs to be closed on exec. */
960 if (paranoia
961 && fcntl (nl_status_fd, F_SETFD, FD_CLOEXEC) == -1)
963 dbg_log (_("cannot set socket to close on exec: %s"),
964 strerror (errno));
965 exit (1);
968 # endif
972 #endif
974 /* Change to unprivileged uid/gid/groups if specified in config file */
975 if (server_user != NULL)
976 finish_drop_privileges ();
980 void
981 register_traced_file (size_t dbidx, struct traced_file *finfo)
983 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
984 return;
986 if (__builtin_expect (debug_level > 0, 0))
987 dbg_log (_("register trace file %s for database %s"),
988 finfo->fname, dbnames[dbidx]);
990 #ifdef HAVE_INOTIFY
991 if (inotify_fd < 0
992 || (finfo->inotify_descr = inotify_add_watch (inotify_fd, finfo->fname,
993 IN_DELETE_SELF
994 | IN_MODIFY)) < 0)
995 #endif
997 /* We need the modification date of the file. */
998 struct stat64 st;
1000 if (stat64 (finfo->fname, &st) < 0)
1002 /* We cannot stat() the file, disable file checking. */
1003 dbg_log (_("cannot stat() file `%s': %s"),
1004 finfo->fname, strerror (errno));
1005 return;
1008 finfo->inotify_descr = -1;
1009 finfo->mtime = st.st_mtime;
1012 /* Queue up the file name. */
1013 finfo->next = dbs[dbidx].traced_files;
1014 dbs[dbidx].traced_files = finfo;
1018 /* Close the connections. */
1019 void
1020 close_sockets (void)
1022 close (sock);
1026 static void
1027 invalidate_cache (char *key, int fd)
1029 dbtype number;
1030 int32_t resp;
1032 for (number = pwddb; number < lastdb; ++number)
1033 if (strcmp (key, dbnames[number]) == 0)
1035 if (number == hstdb)
1037 struct traced_file *runp = dbs[hstdb].traced_files;
1038 while (runp != NULL)
1039 if (runp->call_res_init)
1041 res_init ();
1042 break;
1044 else
1045 runp = runp->next;
1047 break;
1050 if (number == lastdb)
1052 resp = EINVAL;
1053 writeall (fd, &resp, sizeof (resp));
1054 return;
1057 if (dbs[number].enabled)
1059 pthread_mutex_lock (&dbs[number].prune_run_lock);
1060 prune_cache (&dbs[number], LONG_MAX, fd);
1061 pthread_mutex_unlock (&dbs[number].prune_run_lock);
1063 else
1065 resp = 0;
1066 writeall (fd, &resp, sizeof (resp));
1071 #ifdef SCM_RIGHTS
1072 static void
1073 send_ro_fd (struct database_dyn *db, char *key, int fd)
1075 /* If we do not have an read-only file descriptor do nothing. */
1076 if (db->ro_fd == -1)
1077 return;
1079 /* We need to send some data along with the descriptor. */
1080 uint64_t mapsize = (db->head->data_size
1081 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1082 + sizeof (struct database_pers_head));
1083 struct iovec iov[2];
1084 iov[0].iov_base = key;
1085 iov[0].iov_len = strlen (key) + 1;
1086 iov[1].iov_base = &mapsize;
1087 iov[1].iov_len = sizeof (mapsize);
1089 /* Prepare the control message to transfer the descriptor. */
1090 union
1092 struct cmsghdr hdr;
1093 char bytes[CMSG_SPACE (sizeof (int))];
1094 } buf;
1095 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1096 .msg_control = buf.bytes,
1097 .msg_controllen = sizeof (buf) };
1098 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1100 cmsg->cmsg_level = SOL_SOCKET;
1101 cmsg->cmsg_type = SCM_RIGHTS;
1102 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1104 int *ip = (int *) CMSG_DATA (cmsg);
1105 *ip = db->ro_fd;
1107 msg.msg_controllen = cmsg->cmsg_len;
1109 /* Send the control message. We repeat when we are interrupted but
1110 everything else is ignored. */
1111 #ifndef MSG_NOSIGNAL
1112 # define MSG_NOSIGNAL 0
1113 #endif
1114 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1116 if (__builtin_expect (debug_level > 0, 0))
1117 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1119 #endif /* SCM_RIGHTS */
1122 /* Handle new request. */
1123 static void
1124 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1126 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1128 if (debug_level > 0)
1129 dbg_log (_("\
1130 cannot handle old request version %d; current version is %d"),
1131 req->version, NSCD_VERSION);
1132 return;
1135 /* Perform the SELinux check before we go on to the standard checks. */
1136 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1138 if (debug_level > 0)
1140 #ifdef SO_PEERCRED
1141 # ifdef PATH_MAX
1142 char buf[PATH_MAX];
1143 # else
1144 char buf[4096];
1145 # endif
1147 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
1148 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
1150 if (n <= 0)
1151 dbg_log (_("\
1152 request from %ld not handled due to missing permission"), (long int) pid);
1153 else
1155 buf[n] = '\0';
1156 dbg_log (_("\
1157 request from '%s' [%ld] not handled due to missing permission"),
1158 buf, (long int) pid);
1160 #else
1161 dbg_log (_("request not handled due to missing permission"));
1162 #endif
1164 return;
1167 struct database_dyn *db = reqinfo[req->type].db;
1169 /* See whether we can service the request from the cache. */
1170 if (__builtin_expect (reqinfo[req->type].data_request, true))
1172 if (__builtin_expect (debug_level, 0) > 0)
1174 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1176 char buf[INET6_ADDRSTRLEN];
1178 dbg_log ("\t%s (%s)", serv2str[req->type],
1179 inet_ntop (req->type == GETHOSTBYADDR
1180 ? AF_INET : AF_INET6,
1181 key, buf, sizeof (buf)));
1183 else
1184 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1187 /* Is this service enabled? */
1188 if (__builtin_expect (!db->enabled, 0))
1190 /* No, sent the prepared record. */
1191 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1192 db->disabled_iov->iov_len,
1193 MSG_NOSIGNAL))
1194 != (ssize_t) db->disabled_iov->iov_len
1195 && __builtin_expect (debug_level, 0) > 0)
1197 /* We have problems sending the result. */
1198 char buf[256];
1199 dbg_log (_("cannot write result: %s"),
1200 strerror_r (errno, buf, sizeof (buf)));
1203 return;
1206 /* Be sure we can read the data. */
1207 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
1209 ++db->head->rdlockdelayed;
1210 pthread_rwlock_rdlock (&db->lock);
1213 /* See whether we can handle it from the cache. */
1214 struct datahead *cached;
1215 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1216 db, uid);
1217 if (cached != NULL)
1219 /* Hurray it's in the cache. */
1220 ssize_t nwritten;
1222 #ifdef HAVE_SENDFILE
1223 if (__builtin_expect (db->mmap_used, 1))
1225 assert (db->wr_fd != -1);
1226 assert ((char *) cached->data > (char *) db->data);
1227 assert ((char *) cached->data - (char *) db->head
1228 + cached->recsize
1229 <= (sizeof (struct database_pers_head)
1230 + db->head->module * sizeof (ref_t)
1231 + db->head->data_size));
1232 nwritten = sendfileall (fd, db->wr_fd,
1233 (char *) cached->data
1234 - (char *) db->head, cached->recsize);
1235 # ifndef __ASSUME_SENDFILE
1236 if (nwritten == -1 && errno == ENOSYS)
1237 goto use_write;
1238 # endif
1240 else
1241 # ifndef __ASSUME_SENDFILE
1242 use_write:
1243 # endif
1244 #endif
1245 nwritten = writeall (fd, cached->data, cached->recsize);
1247 if (nwritten != cached->recsize
1248 && __builtin_expect (debug_level, 0) > 0)
1250 /* We have problems sending the result. */
1251 char buf[256];
1252 dbg_log (_("cannot write result: %s"),
1253 strerror_r (errno, buf, sizeof (buf)));
1256 pthread_rwlock_unlock (&db->lock);
1258 return;
1261 pthread_rwlock_unlock (&db->lock);
1263 else if (__builtin_expect (debug_level, 0) > 0)
1265 if (req->type == INVALIDATE)
1266 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1267 else
1268 dbg_log ("\t%s", serv2str[req->type]);
1271 /* Handle the request. */
1272 switch (req->type)
1274 case GETPWBYNAME:
1275 addpwbyname (db, fd, req, key, uid);
1276 break;
1278 case GETPWBYUID:
1279 addpwbyuid (db, fd, req, key, uid);
1280 break;
1282 case GETGRBYNAME:
1283 addgrbyname (db, fd, req, key, uid);
1284 break;
1286 case GETGRBYGID:
1287 addgrbygid (db, fd, req, key, uid);
1288 break;
1290 case GETHOSTBYNAME:
1291 addhstbyname (db, fd, req, key, uid);
1292 break;
1294 case GETHOSTBYNAMEv6:
1295 addhstbynamev6 (db, fd, req, key, uid);
1296 break;
1298 case GETHOSTBYADDR:
1299 addhstbyaddr (db, fd, req, key, uid);
1300 break;
1302 case GETHOSTBYADDRv6:
1303 addhstbyaddrv6 (db, fd, req, key, uid);
1304 break;
1306 case GETAI:
1307 addhstai (db, fd, req, key, uid);
1308 break;
1310 case INITGROUPS:
1311 addinitgroups (db, fd, req, key, uid);
1312 break;
1314 case GETSERVBYNAME:
1315 addservbyname (db, fd, req, key, uid);
1316 break;
1318 case GETSERVBYPORT:
1319 addservbyport (db, fd, req, key, uid);
1320 break;
1322 case GETNETGRENT:
1323 addgetnetgrent (db, fd, req, key, uid);
1324 break;
1326 case INNETGR:
1327 addinnetgr (db, fd, req, key, uid);
1328 break;
1330 case GETSTAT:
1331 case SHUTDOWN:
1332 case INVALIDATE:
1334 /* Get the callers credentials. */
1335 #ifdef SO_PEERCRED
1336 struct ucred caller;
1337 socklen_t optlen = sizeof (caller);
1339 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1341 char buf[256];
1343 dbg_log (_("error getting caller's id: %s"),
1344 strerror_r (errno, buf, sizeof (buf)));
1345 break;
1348 uid = caller.uid;
1349 #else
1350 /* Some systems have no SO_PEERCRED implementation. They don't
1351 care about security so we don't as well. */
1352 uid = 0;
1353 #endif
1356 /* Accept shutdown, getstat and invalidate only from root. For
1357 the stat call also allow the user specified in the config file. */
1358 if (req->type == GETSTAT)
1360 if (uid == 0 || uid == stat_uid)
1361 send_stats (fd, dbs);
1363 else if (uid == 0)
1365 if (req->type == INVALIDATE)
1366 invalidate_cache (key, fd);
1367 else
1368 termination_handler (0);
1370 break;
1372 case GETFDPW:
1373 case GETFDGR:
1374 case GETFDHST:
1375 case GETFDSERV:
1376 case GETFDNETGR:
1377 #ifdef SCM_RIGHTS
1378 send_ro_fd (reqinfo[req->type].db, key, fd);
1379 #endif
1380 break;
1382 default:
1383 /* Ignore the command, it's nothing we know. */
1384 break;
1389 /* Restart the process. */
1390 static void
1391 restart (void)
1393 /* First determine the parameters. We do not use the parameters
1394 passed to main() since in case nscd is started by running the
1395 dynamic linker this will not work. Yes, this is not the usual
1396 case but nscd is part of glibc and we occasionally do this. */
1397 size_t buflen = 1024;
1398 char *buf = alloca (buflen);
1399 size_t readlen = 0;
1400 int fd = open ("/proc/self/cmdline", O_RDONLY);
1401 if (fd == -1)
1403 dbg_log (_("\
1404 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1405 strerror (errno));
1407 paranoia = 0;
1408 return;
1411 while (1)
1413 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1414 buflen - readlen));
1415 if (n == -1)
1417 dbg_log (_("\
1418 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1419 strerror (errno));
1421 close (fd);
1422 paranoia = 0;
1423 return;
1426 readlen += n;
1428 if (readlen < buflen)
1429 break;
1431 /* We might have to extend the buffer. */
1432 size_t old_buflen = buflen;
1433 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1434 buf = memmove (newp, buf, old_buflen);
1437 close (fd);
1439 /* Parse the command line. Worst case scenario: every two
1440 characters form one parameter (one character plus NUL). */
1441 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1442 int argc = 0;
1444 char *cp = buf;
1445 while (cp < buf + readlen)
1447 argv[argc++] = cp;
1448 cp = (char *) rawmemchr (cp, '\0') + 1;
1450 argv[argc] = NULL;
1452 /* Second, change back to the old user if we changed it. */
1453 if (server_user != NULL)
1455 if (setresuid (old_uid, old_uid, old_uid) != 0)
1457 dbg_log (_("\
1458 cannot change to old UID: %s; disabling paranoia mode"),
1459 strerror (errno));
1461 paranoia = 0;
1462 return;
1465 if (setresgid (old_gid, old_gid, old_gid) != 0)
1467 dbg_log (_("\
1468 cannot change to old GID: %s; disabling paranoia mode"),
1469 strerror (errno));
1471 setuid (server_uid);
1472 paranoia = 0;
1473 return;
1477 /* Next change back to the old working directory. */
1478 if (chdir (oldcwd) == -1)
1480 dbg_log (_("\
1481 cannot change to old working directory: %s; disabling paranoia mode"),
1482 strerror (errno));
1484 if (server_user != NULL)
1486 setuid (server_uid);
1487 setgid (server_gid);
1489 paranoia = 0;
1490 return;
1493 /* Synchronize memory. */
1494 int32_t certainly[lastdb];
1495 for (int cnt = 0; cnt < lastdb; ++cnt)
1496 if (dbs[cnt].enabled)
1498 /* Make sure nobody keeps using the database. */
1499 dbs[cnt].head->timestamp = 0;
1500 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1501 dbs[cnt].head->nscd_certainly_running = 0;
1503 if (dbs[cnt].persistent)
1504 // XXX async OK?
1505 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1508 /* The preparations are done. */
1509 #ifdef PATH_MAX
1510 char pathbuf[PATH_MAX];
1511 #else
1512 char pathbuf[256];
1513 #endif
1514 /* Try to exec the real nscd program so the process name (as reported
1515 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1516 if readlink or the exec with the result of the readlink call fails. */
1517 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1518 if (n != -1)
1520 pathbuf[n] = '\0';
1521 execv (pathbuf, argv);
1523 execv ("/proc/self/exe", argv);
1525 /* If we come here, we will never be able to re-exec. */
1526 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1527 strerror (errno));
1529 if (server_user != NULL)
1531 setuid (server_uid);
1532 setgid (server_gid);
1534 if (chdir ("/") != 0)
1535 dbg_log (_("cannot change current working directory to \"/\": %s"),
1536 strerror (errno));
1537 paranoia = 0;
1539 /* Reenable the databases. */
1540 time_t now = time (NULL);
1541 for (int cnt = 0; cnt < lastdb; ++cnt)
1542 if (dbs[cnt].enabled)
1544 dbs[cnt].head->timestamp = now;
1545 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1550 /* List of file descriptors. */
1551 struct fdlist
1553 int fd;
1554 struct fdlist *next;
1556 /* Memory allocated for the list. */
1557 static struct fdlist *fdlist;
1558 /* List of currently ready-to-read file descriptors. */
1559 static struct fdlist *readylist;
1561 /* Conditional variable and mutex to signal availability of entries in
1562 READYLIST. The condvar is initialized dynamically since we might
1563 use a different clock depending on availability. */
1564 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1565 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1567 /* The clock to use with the condvar. */
1568 static clockid_t timeout_clock = CLOCK_REALTIME;
1570 /* Number of threads ready to handle the READYLIST. */
1571 static unsigned long int nready;
1574 /* Function for the clean-up threads. */
1575 static void *
1576 __attribute__ ((__noreturn__))
1577 nscd_run_prune (void *p)
1579 const long int my_number = (long int) p;
1580 assert (dbs[my_number].enabled);
1582 int dont_need_update = setup_thread (&dbs[my_number]);
1584 time_t now = time (NULL);
1586 /* We are running. */
1587 dbs[my_number].head->timestamp = now;
1589 struct timespec prune_ts;
1590 if (__builtin_expect (clock_gettime (timeout_clock, &prune_ts) == -1, 0))
1591 /* Should never happen. */
1592 abort ();
1594 /* Compute the initial timeout time. Prevent all the timers to go
1595 off at the same time by adding a db-based value. */
1596 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1597 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1599 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1600 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1601 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1603 pthread_mutex_lock (prune_lock);
1604 while (1)
1606 /* Wait, but not forever. */
1607 int e = 0;
1608 if (! dbs[my_number].clear_cache)
1609 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1610 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1612 time_t next_wait;
1613 now = time (NULL);
1614 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1615 || dbs[my_number].clear_cache)
1617 /* We will determine the new timout values based on the
1618 cache content. Should there be concurrent additions to
1619 the cache which are not accounted for in the cache
1620 pruning we want to know about it. Therefore set the
1621 timeout to the maximum. It will be descreased when adding
1622 new entries to the cache, if necessary. */
1623 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1625 /* Unconditionally reset the flag. */
1626 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1627 dbs[my_number].clear_cache = 0;
1629 pthread_mutex_unlock (prune_lock);
1631 /* We use a separate lock for running the prune function (instead
1632 of keeping prune_lock locked) because this enables concurrent
1633 invocations of cache_add which might modify the timeout value. */
1634 pthread_mutex_lock (prune_run_lock);
1635 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1636 pthread_mutex_unlock (prune_run_lock);
1638 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1639 /* If clients cannot determine for sure whether nscd is running
1640 we need to wake up occasionally to update the timestamp.
1641 Wait 90% of the update period. */
1642 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1643 if (__builtin_expect (! dont_need_update, 0))
1645 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1646 dbs[my_number].head->timestamp = now;
1649 pthread_mutex_lock (prune_lock);
1651 /* Make it known when we will wake up again. */
1652 if (now + next_wait < dbs[my_number].wakeup_time)
1653 dbs[my_number].wakeup_time = now + next_wait;
1654 else
1655 next_wait = dbs[my_number].wakeup_time - now;
1657 else
1658 /* The cache was just pruned. Do not do it again now. Just
1659 use the new timeout value. */
1660 next_wait = dbs[my_number].wakeup_time - now;
1662 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1663 /* Should never happen. */
1664 abort ();
1666 /* Compute next timeout time. */
1667 prune_ts.tv_sec += next_wait;
1672 /* This is the main loop. It is replicated in different threads but
1673 the use of the ready list makes sure only one thread handles an
1674 incoming connection. */
1675 static void *
1676 __attribute__ ((__noreturn__))
1677 nscd_run_worker (void *p)
1679 char buf[256];
1681 /* Initial locking. */
1682 pthread_mutex_lock (&readylist_lock);
1684 /* One more thread available. */
1685 ++nready;
1687 while (1)
1689 while (readylist == NULL)
1690 pthread_cond_wait (&readylist_cond, &readylist_lock);
1692 struct fdlist *it = readylist->next;
1693 if (readylist->next == readylist)
1694 /* Just one entry on the list. */
1695 readylist = NULL;
1696 else
1697 readylist->next = it->next;
1699 /* Extract the information and mark the record ready to be used
1700 again. */
1701 int fd = it->fd;
1702 it->next = NULL;
1704 /* One more thread available. */
1705 --nready;
1707 /* We are done with the list. */
1708 pthread_mutex_unlock (&readylist_lock);
1710 #ifndef __ASSUME_ACCEPT4
1711 if (have_accept4 < 0)
1713 /* We do not want to block on a short read or so. */
1714 int fl = fcntl (fd, F_GETFL);
1715 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1716 goto close_and_out;
1718 #endif
1720 /* Now read the request. */
1721 request_header req;
1722 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1723 != sizeof (req), 0))
1725 /* We failed to read data. Note that this also might mean we
1726 failed because we would have blocked. */
1727 if (debug_level > 0)
1728 dbg_log (_("short read while reading request: %s"),
1729 strerror_r (errno, buf, sizeof (buf)));
1730 goto close_and_out;
1733 /* Check whether this is a valid request type. */
1734 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1735 goto close_and_out;
1737 /* Some systems have no SO_PEERCRED implementation. They don't
1738 care about security so we don't as well. */
1739 uid_t uid = -1;
1740 #ifdef SO_PEERCRED
1741 pid_t pid = 0;
1743 if (__builtin_expect (debug_level > 0, 0))
1745 struct ucred caller;
1746 socklen_t optlen = sizeof (caller);
1748 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1749 pid = caller.pid;
1751 #else
1752 const pid_t pid = 0;
1753 #endif
1755 /* It should not be possible to crash the nscd with a silly
1756 request (i.e., a terribly large key). We limit the size to 1kb. */
1757 if (__builtin_expect (req.key_len, 1) < 0
1758 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1760 if (debug_level > 0)
1761 dbg_log (_("key length in request too long: %d"), req.key_len);
1763 else
1765 /* Get the key. */
1766 char keybuf[MAXKEYLEN];
1768 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1769 req.key_len))
1770 != req.key_len, 0))
1772 /* Again, this can also mean we would have blocked. */
1773 if (debug_level > 0)
1774 dbg_log (_("short read while reading request key: %s"),
1775 strerror_r (errno, buf, sizeof (buf)));
1776 goto close_and_out;
1779 if (__builtin_expect (debug_level, 0) > 0)
1781 #ifdef SO_PEERCRED
1782 if (pid != 0)
1783 dbg_log (_("\
1784 handle_request: request received (Version = %d) from PID %ld"),
1785 req.version, (long int) pid);
1786 else
1787 #endif
1788 dbg_log (_("\
1789 handle_request: request received (Version = %d)"), req.version);
1792 /* Phew, we got all the data, now process it. */
1793 handle_request (fd, &req, keybuf, uid, pid);
1796 close_and_out:
1797 /* We are done. */
1798 close (fd);
1800 /* Re-locking. */
1801 pthread_mutex_lock (&readylist_lock);
1803 /* One more thread available. */
1804 ++nready;
1806 /* NOTREACHED */
1810 static unsigned int nconns;
1812 static void
1813 fd_ready (int fd)
1815 pthread_mutex_lock (&readylist_lock);
1817 /* Find an empty entry in FDLIST. */
1818 size_t inner;
1819 for (inner = 0; inner < nconns; ++inner)
1820 if (fdlist[inner].next == NULL)
1821 break;
1822 assert (inner < nconns);
1824 fdlist[inner].fd = fd;
1826 if (readylist == NULL)
1827 readylist = fdlist[inner].next = &fdlist[inner];
1828 else
1830 fdlist[inner].next = readylist->next;
1831 readylist = readylist->next = &fdlist[inner];
1834 bool do_signal = true;
1835 if (__builtin_expect (nready == 0, 0))
1837 ++client_queued;
1838 do_signal = false;
1840 /* Try to start another thread to help out. */
1841 pthread_t th;
1842 if (nthreads < max_nthreads
1843 && pthread_create (&th, &attr, nscd_run_worker,
1844 (void *) (long int) nthreads) == 0)
1846 /* We got another thread. */
1847 ++nthreads;
1848 /* The new thread might need a kick. */
1849 do_signal = true;
1854 pthread_mutex_unlock (&readylist_lock);
1856 /* Tell one of the worker threads there is work to do. */
1857 if (do_signal)
1858 pthread_cond_signal (&readylist_cond);
1862 /* Check whether restarting should happen. */
1863 static inline int
1864 restart_p (time_t now)
1866 return (paranoia && readylist == NULL && nready == nthreads
1867 && now >= restart_time);
1871 /* Array for times a connection was accepted. */
1872 static time_t *starttime;
1875 static void
1876 __attribute__ ((__noreturn__))
1877 main_loop_poll (void)
1879 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1880 * sizeof (conns[0]));
1882 conns[0].fd = sock;
1883 conns[0].events = POLLRDNORM;
1884 size_t nused = 1;
1885 size_t firstfree = 1;
1887 #ifdef HAVE_INOTIFY
1888 if (inotify_fd != -1)
1890 conns[1].fd = inotify_fd;
1891 conns[1].events = POLLRDNORM;
1892 nused = 2;
1893 firstfree = 2;
1895 #endif
1897 #ifdef HAVE_NETLINK
1898 size_t idx_nl_status_fd = 0;
1899 if (nl_status_fd != -1)
1901 idx_nl_status_fd = nused;
1902 conns[nused].fd = nl_status_fd;
1903 conns[nused].events = POLLRDNORM;
1904 ++nused;
1905 firstfree = nused;
1907 #endif
1909 while (1)
1911 /* Wait for any event. We wait at most a couple of seconds so
1912 that we can check whether we should close any of the accepted
1913 connections since we have not received a request. */
1914 #define MAX_ACCEPT_TIMEOUT 30
1915 #define MIN_ACCEPT_TIMEOUT 5
1916 #define MAIN_THREAD_TIMEOUT \
1917 (MAX_ACCEPT_TIMEOUT * 1000 \
1918 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1920 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1922 time_t now = time (NULL);
1924 /* If there is a descriptor ready for reading or there is a new
1925 connection, process this now. */
1926 if (n > 0)
1928 if (conns[0].revents != 0)
1930 /* We have a new incoming connection. Accept the connection. */
1931 int fd;
1933 #ifndef __ASSUME_ACCEPT4
1934 fd = -1;
1935 if (have_accept4 >= 0)
1936 #endif
1938 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
1939 SOCK_NONBLOCK));
1940 #ifndef __ASSUME_ACCEPT4
1941 if (have_accept4 == 0)
1942 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
1943 #endif
1945 #ifndef __ASSUME_ACCEPT4
1946 if (have_accept4 < 0)
1947 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1948 #endif
1950 /* Use the descriptor if we have not reached the limit. */
1951 if (fd >= 0)
1953 if (firstfree < nconns)
1955 conns[firstfree].fd = fd;
1956 conns[firstfree].events = POLLRDNORM;
1957 starttime[firstfree] = now;
1958 if (firstfree >= nused)
1959 nused = firstfree + 1;
1962 ++firstfree;
1963 while (firstfree < nused && conns[firstfree].fd != -1);
1965 else
1966 /* We cannot use the connection so close it. */
1967 close (fd);
1970 --n;
1973 size_t first = 1;
1974 #ifdef HAVE_INOTIFY
1975 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
1977 if (conns[1].revents != 0)
1979 bool to_clear[lastdb] = { false, };
1980 union
1982 # ifndef PATH_MAX
1983 # define PATH_MAX 1024
1984 # endif
1985 struct inotify_event i;
1986 char buf[sizeof (struct inotify_event) + PATH_MAX];
1987 } inev;
1989 while (1)
1991 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
1992 sizeof (inev)));
1993 if (nb < (ssize_t) sizeof (struct inotify_event))
1995 if (__builtin_expect (nb == -1 && errno != EAGAIN,
1998 /* Something went wrong when reading the inotify
1999 data. Better disable inotify. */
2000 dbg_log (_("\
2001 disabled inotify after read error %d"),
2002 errno);
2003 conns[1].fd = -1;
2004 firstfree = 1;
2005 if (nused == 2)
2006 nused = 1;
2007 close (inotify_fd);
2008 inotify_fd = -1;
2010 break;
2013 /* Check which of the files changed. */
2014 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2016 struct traced_file *finfo = dbs[dbcnt].traced_files;
2018 while (finfo != NULL)
2020 if (finfo->inotify_descr == inev.i.wd)
2022 to_clear[dbcnt] = true;
2023 if (finfo->call_res_init)
2024 res_init ();
2025 goto next;
2028 finfo = finfo->next;
2031 next:;
2034 /* Actually perform the cache clearing. */
2035 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2036 if (to_clear[dbcnt])
2038 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
2039 dbs[dbcnt].clear_cache = 1;
2040 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
2041 pthread_cond_signal (&dbs[dbcnt].prune_cond);
2044 --n;
2047 first = 2;
2049 #endif
2051 #ifdef HAVE_NETLINK
2052 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2054 char buf[4096];
2055 /* Read all the data. We do not interpret it here. */
2056 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2057 sizeof (buf))) != -1)
2060 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2061 = __bump_nl_timestamp ();
2063 #endif
2065 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2066 if (conns[cnt].revents != 0)
2068 fd_ready (conns[cnt].fd);
2070 /* Clean up the CONNS array. */
2071 conns[cnt].fd = -1;
2072 if (cnt < firstfree)
2073 firstfree = cnt;
2074 if (cnt == nused - 1)
2076 --nused;
2077 while (conns[nused - 1].fd == -1);
2079 --n;
2083 /* Now find entries which have timed out. */
2084 assert (nused > 0);
2086 /* We make the timeout length depend on the number of file
2087 descriptors currently used. */
2088 #define ACCEPT_TIMEOUT \
2089 (MAX_ACCEPT_TIMEOUT \
2090 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2091 time_t laststart = now - ACCEPT_TIMEOUT;
2093 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2095 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2097 /* Remove the entry, it timed out. */
2098 (void) close (conns[cnt].fd);
2099 conns[cnt].fd = -1;
2101 if (cnt < firstfree)
2102 firstfree = cnt;
2103 if (cnt == nused - 1)
2105 --nused;
2106 while (conns[nused - 1].fd == -1);
2110 if (restart_p (now))
2111 restart ();
2116 #ifdef HAVE_EPOLL
2117 static void
2118 main_loop_epoll (int efd)
2120 struct epoll_event ev = { 0, };
2121 int nused = 1;
2122 size_t highest = 0;
2124 /* Add the socket. */
2125 ev.events = EPOLLRDNORM;
2126 ev.data.fd = sock;
2127 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2128 /* We cannot use epoll. */
2129 return;
2131 # ifdef HAVE_INOTIFY
2132 if (inotify_fd != -1)
2134 ev.events = EPOLLRDNORM;
2135 ev.data.fd = inotify_fd;
2136 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2137 /* We cannot use epoll. */
2138 return;
2139 nused = 2;
2141 # endif
2143 # ifdef HAVE_NETLINK
2144 if (nl_status_fd != -1)
2146 ev.events = EPOLLRDNORM;
2147 ev.data.fd = nl_status_fd;
2148 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2149 /* We cannot use epoll. */
2150 return;
2152 # endif
2154 while (1)
2156 struct epoll_event revs[100];
2157 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2159 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2161 time_t now = time (NULL);
2163 for (int cnt = 0; cnt < n; ++cnt)
2164 if (revs[cnt].data.fd == sock)
2166 /* A new connection. */
2167 int fd;
2169 # ifndef __ASSUME_ACCEPT4
2170 fd = -1;
2171 if (have_accept4 >= 0)
2172 # endif
2174 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2175 SOCK_NONBLOCK));
2176 # ifndef __ASSUME_ACCEPT4
2177 if (have_accept4 == 0)
2178 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2179 # endif
2181 # ifndef __ASSUME_ACCEPT4
2182 if (have_accept4 < 0)
2183 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2184 # endif
2186 /* Use the descriptor if we have not reached the limit. */
2187 if (fd >= 0)
2189 /* Try to add the new descriptor. */
2190 ev.data.fd = fd;
2191 if (fd >= nconns
2192 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2193 /* The descriptor is too large or something went
2194 wrong. Close the descriptor. */
2195 close (fd);
2196 else
2198 /* Remember when we accepted the connection. */
2199 starttime[fd] = now;
2201 if (fd > highest)
2202 highest = fd;
2204 ++nused;
2208 # ifdef HAVE_INOTIFY
2209 else if (revs[cnt].data.fd == inotify_fd)
2211 bool to_clear[lastdb] = { false, };
2212 union
2214 struct inotify_event i;
2215 char buf[sizeof (struct inotify_event) + PATH_MAX];
2216 } inev;
2218 while (1)
2220 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
2221 sizeof (inev)));
2222 if (nb < (ssize_t) sizeof (struct inotify_event))
2224 if (__builtin_expect (nb == -1 && errno != EAGAIN, 0))
2226 /* Something went wrong when reading the inotify
2227 data. Better disable inotify. */
2228 dbg_log (_("disabled inotify after read error %d"),
2229 errno);
2230 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd,
2231 NULL);
2232 close (inotify_fd);
2233 inotify_fd = -1;
2235 break;
2238 /* Check which of the files changed. */
2239 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2241 struct traced_file *finfo = dbs[dbcnt].traced_files;
2243 while (finfo != NULL)
2245 if (finfo->inotify_descr == inev.i.wd)
2247 to_clear[dbcnt] = true;
2248 if (finfo->call_res_init)
2249 res_init ();
2250 goto next;
2253 finfo = finfo->next;
2256 next:;
2259 /* Actually perform the cache clearing. */
2260 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2261 if (to_clear[dbcnt])
2263 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
2264 dbs[dbcnt].clear_cache = 1;
2265 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
2266 pthread_cond_signal (&dbs[dbcnt].prune_cond);
2269 # endif
2270 # ifdef HAVE_NETLINK
2271 else if (revs[cnt].data.fd == nl_status_fd)
2273 char buf[4096];
2274 /* Read all the data. We do not interpret it here. */
2275 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2276 sizeof (buf))) != -1)
2279 __bump_nl_timestamp ();
2281 # endif
2282 else
2284 /* Remove the descriptor from the epoll descriptor. */
2285 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2287 /* Get a worker to handle the request. */
2288 fd_ready (revs[cnt].data.fd);
2290 /* Reset the time. */
2291 starttime[revs[cnt].data.fd] = 0;
2292 if (revs[cnt].data.fd == highest)
2294 --highest;
2295 while (highest > 0 && starttime[highest] == 0);
2297 --nused;
2300 /* Now look for descriptors for accepted connections which have
2301 no reply in too long of a time. */
2302 time_t laststart = now - ACCEPT_TIMEOUT;
2303 assert (starttime[sock] == 0);
2304 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2305 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2306 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2307 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2309 /* We are waiting for this one for too long. Close it. */
2310 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2312 (void) close (cnt);
2314 starttime[cnt] = 0;
2315 if (cnt == highest)
2316 --highest;
2318 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2319 --highest;
2321 if (restart_p (now))
2322 restart ();
2325 #endif
2328 /* Start all the threads we want. The initial process is thread no. 1. */
2329 void
2330 start_threads (void)
2332 /* Initialize the conditional variable we will use. The only
2333 non-standard attribute we might use is the clock selection. */
2334 pthread_condattr_t condattr;
2335 pthread_condattr_init (&condattr);
2337 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2338 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2339 /* Determine whether the monotonous clock is available. */
2340 struct timespec dummy;
2341 # if _POSIX_MONOTONIC_CLOCK == 0
2342 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2343 # endif
2344 # if _POSIX_CLOCK_SELECTION == 0
2345 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2346 # endif
2347 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2348 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2349 timeout_clock = CLOCK_MONOTONIC;
2350 #endif
2352 /* Create the attribute for the threads. They are all created
2353 detached. */
2354 pthread_attr_init (&attr);
2355 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2356 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2357 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2359 /* We allow less than LASTDB threads only for debugging. */
2360 if (debug_level == 0)
2361 nthreads = MAX (nthreads, lastdb);
2363 /* Create the threads which prune the databases. */
2364 // XXX Ideally this work would be done by some of the worker threads.
2365 // XXX But this is problematic since we would need to be able to wake
2366 // XXX them up explicitly as well as part of the group handling the
2367 // XXX ready-list. This requires an operation where we can wait on
2368 // XXX two conditional variables at the same time. This operation
2369 // XXX does not exist (yet).
2370 for (long int i = 0; i < lastdb; ++i)
2372 /* Initialize the conditional variable. */
2373 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2375 dbg_log (_("could not initialize conditional variable"));
2376 exit (1);
2379 pthread_t th;
2380 if (dbs[i].enabled
2381 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2383 dbg_log (_("could not start clean-up thread; terminating"));
2384 exit (1);
2388 pthread_condattr_destroy (&condattr);
2390 for (long int i = 0; i < nthreads; ++i)
2392 pthread_t th;
2393 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2395 if (i == 0)
2397 dbg_log (_("could not start any worker thread; terminating"));
2398 exit (1);
2401 break;
2405 /* Determine how much room for descriptors we should initially
2406 allocate. This might need to change later if we cap the number
2407 with MAXCONN. */
2408 const long int nfds = sysconf (_SC_OPEN_MAX);
2409 #define MINCONN 32
2410 #define MAXCONN 16384
2411 if (nfds == -1 || nfds > MAXCONN)
2412 nconns = MAXCONN;
2413 else if (nfds < MINCONN)
2414 nconns = MINCONN;
2415 else
2416 nconns = nfds;
2418 /* We need memory to pass descriptors on to the worker threads. */
2419 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2420 /* Array to keep track when connection was accepted. */
2421 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2423 /* In the main thread we execute the loop which handles incoming
2424 connections. */
2425 #ifdef HAVE_EPOLL
2426 int efd = epoll_create (100);
2427 if (efd != -1)
2429 main_loop_epoll (efd);
2430 close (efd);
2432 #endif
2434 main_loop_poll ();
2438 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2439 this function is called, we are not listening on the nscd socket yet so
2440 we can just use the ordinary lookup functions without causing a lockup */
2441 static void
2442 begin_drop_privileges (void)
2444 struct passwd *pwd = getpwnam (server_user);
2446 if (pwd == NULL)
2448 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2449 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
2450 server_user);
2453 server_uid = pwd->pw_uid;
2454 server_gid = pwd->pw_gid;
2456 /* Save the old UID/GID if we have to change back. */
2457 if (paranoia)
2459 old_uid = getuid ();
2460 old_gid = getgid ();
2463 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2465 /* This really must never happen. */
2466 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2467 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
2470 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2472 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2473 == -1)
2475 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2476 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
2481 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2482 run nscd as the user specified in the configuration file. */
2483 static void
2484 finish_drop_privileges (void)
2486 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2487 /* We need to preserve the capabilities to connect to the audit daemon. */
2488 cap_t new_caps = preserve_capabilities ();
2489 #endif
2491 if (setgroups (server_ngroups, server_groups) == -1)
2493 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2494 error (EXIT_FAILURE, errno, _("setgroups failed"));
2497 int res;
2498 if (paranoia)
2499 res = setresgid (server_gid, server_gid, old_gid);
2500 else
2501 res = setgid (server_gid);
2502 if (res == -1)
2504 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2505 perror ("setgid");
2506 exit (4);
2509 if (paranoia)
2510 res = setresuid (server_uid, server_uid, old_uid);
2511 else
2512 res = setuid (server_uid);
2513 if (res == -1)
2515 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2516 perror ("setuid");
2517 exit (4);
2520 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2521 /* Remove the temporary capabilities. */
2522 install_real_capabilities (new_caps);
2523 #endif