alpha: Fix tls-macros.h
[glibc.git] / nscd / connections.c
blobe54d4f213adb911a679b361f3036173982091a43
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2013 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <ifaddrs.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <stdint.h>
35 #include <arpa/inet.h>
36 #ifdef HAVE_NETLINK
37 # include <linux/netlink.h>
38 # include <linux/rtnetlink.h>
39 #endif
40 #ifdef HAVE_EPOLL
41 # include <sys/epoll.h>
42 #endif
43 #ifdef HAVE_INOTIFY
44 # include <sys/inotify.h>
45 #endif
46 #include <sys/mman.h>
47 #include <sys/param.h>
48 #include <sys/poll.h>
49 #ifdef HAVE_SENDFILE
50 # include <sys/sendfile.h>
51 #endif
52 #include <sys/socket.h>
53 #include <sys/stat.h>
54 #include <sys/un.h>
56 #include "nscd.h"
57 #include "dbg_log.h"
58 #include "selinux.h"
59 #include <resolv/resolv.h>
61 #include <kernel-features.h>
64 /* Support to run nscd as an unprivileged user */
65 const char *server_user;
66 static uid_t server_uid;
67 static gid_t server_gid;
68 const char *stat_user;
69 uid_t stat_uid;
70 static gid_t *server_groups;
71 #ifndef NGROUPS
72 # define NGROUPS 32
73 #endif
74 static int server_ngroups;
76 static pthread_attr_t attr;
78 static void begin_drop_privileges (void);
79 static void finish_drop_privileges (void);
81 /* Map request type to a string. */
82 const char *const serv2str[LASTREQ] =
84 [GETPWBYNAME] = "GETPWBYNAME",
85 [GETPWBYUID] = "GETPWBYUID",
86 [GETGRBYNAME] = "GETGRBYNAME",
87 [GETGRBYGID] = "GETGRBYGID",
88 [GETHOSTBYNAME] = "GETHOSTBYNAME",
89 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
90 [GETHOSTBYADDR] = "GETHOSTBYADDR",
91 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
92 [SHUTDOWN] = "SHUTDOWN",
93 [GETSTAT] = "GETSTAT",
94 [INVALIDATE] = "INVALIDATE",
95 [GETFDPW] = "GETFDPW",
96 [GETFDGR] = "GETFDGR",
97 [GETFDHST] = "GETFDHST",
98 [GETAI] = "GETAI",
99 [INITGROUPS] = "INITGROUPS",
100 [GETSERVBYNAME] = "GETSERVBYNAME",
101 [GETSERVBYPORT] = "GETSERVBYPORT",
102 [GETFDSERV] = "GETFDSERV",
103 [GETNETGRENT] = "GETNETGRENT",
104 [INNETGR] = "INNETGR",
105 [GETFDNETGR] = "GETFDNETGR"
108 /* The control data structures for the services. */
109 struct database_dyn dbs[lastdb] =
111 [pwddb] = {
112 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
113 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
114 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
115 .enabled = 0,
116 .check_file = 1,
117 .persistent = 0,
118 .propagate = 1,
119 .shared = 0,
120 .max_db_size = DEFAULT_MAX_DB_SIZE,
121 .suggested_module = DEFAULT_SUGGESTED_MODULE,
122 .db_filename = _PATH_NSCD_PASSWD_DB,
123 .disabled_iov = &pwd_iov_disabled,
124 .postimeout = 3600,
125 .negtimeout = 20,
126 .wr_fd = -1,
127 .ro_fd = -1,
128 .mmap_used = false
130 [grpdb] = {
131 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
132 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
133 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
134 .enabled = 0,
135 .check_file = 1,
136 .persistent = 0,
137 .propagate = 1,
138 .shared = 0,
139 .max_db_size = DEFAULT_MAX_DB_SIZE,
140 .suggested_module = DEFAULT_SUGGESTED_MODULE,
141 .db_filename = _PATH_NSCD_GROUP_DB,
142 .disabled_iov = &grp_iov_disabled,
143 .postimeout = 3600,
144 .negtimeout = 60,
145 .wr_fd = -1,
146 .ro_fd = -1,
147 .mmap_used = false
149 [hstdb] = {
150 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
151 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
152 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
153 .enabled = 0,
154 .check_file = 1,
155 .persistent = 0,
156 .propagate = 0, /* Not used. */
157 .shared = 0,
158 .max_db_size = DEFAULT_MAX_DB_SIZE,
159 .suggested_module = DEFAULT_SUGGESTED_MODULE,
160 .db_filename = _PATH_NSCD_HOSTS_DB,
161 .disabled_iov = &hst_iov_disabled,
162 .postimeout = 3600,
163 .negtimeout = 20,
164 .wr_fd = -1,
165 .ro_fd = -1,
166 .mmap_used = false
168 [servdb] = {
169 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
170 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
171 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
172 .enabled = 0,
173 .check_file = 1,
174 .persistent = 0,
175 .propagate = 0, /* Not used. */
176 .shared = 0,
177 .max_db_size = DEFAULT_MAX_DB_SIZE,
178 .suggested_module = DEFAULT_SUGGESTED_MODULE,
179 .db_filename = _PATH_NSCD_SERVICES_DB,
180 .disabled_iov = &serv_iov_disabled,
181 .postimeout = 28800,
182 .negtimeout = 20,
183 .wr_fd = -1,
184 .ro_fd = -1,
185 .mmap_used = false
187 [netgrdb] = {
188 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
189 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
190 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
191 .enabled = 0,
192 .check_file = 1,
193 .persistent = 0,
194 .propagate = 0, /* Not used. */
195 .shared = 0,
196 .max_db_size = DEFAULT_MAX_DB_SIZE,
197 .suggested_module = DEFAULT_SUGGESTED_MODULE,
198 .db_filename = _PATH_NSCD_NETGROUP_DB,
199 .disabled_iov = &netgroup_iov_disabled,
200 .postimeout = 28800,
201 .negtimeout = 20,
202 .wr_fd = -1,
203 .ro_fd = -1,
204 .mmap_used = false
209 /* Mapping of request type to database. */
210 static struct
212 bool data_request;
213 struct database_dyn *db;
214 } const reqinfo[LASTREQ] =
216 [GETPWBYNAME] = { true, &dbs[pwddb] },
217 [GETPWBYUID] = { true, &dbs[pwddb] },
218 [GETGRBYNAME] = { true, &dbs[grpdb] },
219 [GETGRBYGID] = { true, &dbs[grpdb] },
220 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
221 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
222 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
223 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
224 [SHUTDOWN] = { false, NULL },
225 [GETSTAT] = { false, NULL },
226 [SHUTDOWN] = { false, NULL },
227 [GETFDPW] = { false, &dbs[pwddb] },
228 [GETFDGR] = { false, &dbs[grpdb] },
229 [GETFDHST] = { false, &dbs[hstdb] },
230 [GETAI] = { true, &dbs[hstdb] },
231 [INITGROUPS] = { true, &dbs[grpdb] },
232 [GETSERVBYNAME] = { true, &dbs[servdb] },
233 [GETSERVBYPORT] = { true, &dbs[servdb] },
234 [GETFDSERV] = { false, &dbs[servdb] },
235 [GETNETGRENT] = { true, &dbs[netgrdb] },
236 [INNETGR] = { true, &dbs[netgrdb] },
237 [GETFDNETGR] = { false, &dbs[netgrdb] }
241 /* Initial number of threads to use. */
242 int nthreads = -1;
243 /* Maximum number of threads to use. */
244 int max_nthreads = 32;
246 /* Socket for incoming connections. */
247 static int sock;
249 #ifdef HAVE_INOTIFY
250 /* Inotify descriptor. */
251 int inotify_fd = -1;
252 #endif
254 #ifdef HAVE_NETLINK
255 /* Descriptor for netlink status updates. */
256 static int nl_status_fd = -1;
257 #endif
259 #ifndef __ASSUME_SOCK_CLOEXEC
260 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
261 before be know the result. */
262 static int have_sock_cloexec;
263 #endif
264 #ifndef __ASSUME_ACCEPT4
265 static int have_accept4;
266 #endif
268 /* Number of times clients had to wait. */
269 unsigned long int client_queued;
272 ssize_t
273 writeall (int fd, const void *buf, size_t len)
275 size_t n = len;
276 ssize_t ret;
279 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
280 if (ret <= 0)
281 break;
282 buf = (const char *) buf + ret;
283 n -= ret;
285 while (n > 0);
286 return ret < 0 ? ret : len - n;
290 #ifdef HAVE_SENDFILE
291 ssize_t
292 sendfileall (int tofd, int fromfd, off_t off, size_t len)
294 ssize_t n = len;
295 ssize_t ret;
299 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
300 if (ret <= 0)
301 break;
302 n -= ret;
304 while (n > 0);
305 return ret < 0 ? ret : len - n;
307 #endif
310 enum usekey
312 use_not = 0,
313 /* The following three are not really used, they are symbolic constants. */
314 use_first = 16,
315 use_begin = 32,
316 use_end = 64,
318 use_he = 1,
319 use_he_begin = use_he | use_begin,
320 use_he_end = use_he | use_end,
321 #if SEPARATE_KEY
322 use_key = 2,
323 use_key_begin = use_key | use_begin,
324 use_key_end = use_key | use_end,
325 use_key_first = use_key_begin | use_first,
326 #endif
327 use_data = 3,
328 use_data_begin = use_data | use_begin,
329 use_data_end = use_data | use_end,
330 use_data_first = use_data_begin | use_first
334 static int
335 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
336 enum usekey use, ref_t start, size_t len)
338 assert (len >= 2);
340 if (start > first_free || start + len > first_free
341 || (start & BLOCK_ALIGN_M1))
342 return 0;
344 if (usemap[start] == use_not)
346 /* Add the start marker. */
347 usemap[start] = use | use_begin;
348 use &= ~use_first;
350 while (--len > 0)
351 if (usemap[++start] != use_not)
352 return 0;
353 else
354 usemap[start] = use;
356 /* Add the end marker. */
357 usemap[start] = use | use_end;
359 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
361 /* Hash entries can't be shared. */
362 if (use == use_he)
363 return 0;
365 usemap[start] |= (use & use_first);
366 use &= ~use_first;
368 while (--len > 1)
369 if (usemap[++start] != use)
370 return 0;
372 if (usemap[++start] != (use | use_end))
373 return 0;
375 else
376 /* Points to a wrong object or somewhere in the middle. */
377 return 0;
379 return 1;
383 /* Verify data in persistent database. */
384 static int
385 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
387 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
388 || dbnr == netgrdb);
390 time_t now = time (NULL);
392 struct database_pers_head *head = mem;
393 struct database_pers_head head_copy = *head;
395 /* Check that the header that was read matches the head in the database. */
396 if (memcmp (head, readhead, sizeof (*head)) != 0)
397 return 0;
399 /* First some easy tests: make sure the database header is sane. */
400 if (head->version != DB_VERSION
401 || head->header_size != sizeof (*head)
402 /* We allow a timestamp to be one hour ahead of the current time.
403 This should cover daylight saving time changes. */
404 || head->timestamp > now + 60 * 60 + 60
405 || (head->gc_cycle & 1)
406 || head->module == 0
407 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
408 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
409 || head->first_free < 0
410 || head->first_free > head->data_size
411 || (head->first_free & BLOCK_ALIGN_M1) != 0
412 || head->maxnentries < 0
413 || head->maxnsearched < 0)
414 return 0;
416 uint8_t *usemap = calloc (head->first_free, 1);
417 if (usemap == NULL)
418 return 0;
420 const char *data = (char *) &head->array[roundup (head->module,
421 ALIGN / sizeof (ref_t))];
423 nscd_ssize_t he_cnt = 0;
424 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
426 ref_t trail = head->array[cnt];
427 ref_t work = trail;
428 int tick = 0;
430 while (work != ENDREF)
432 if (! check_use (data, head->first_free, usemap, use_he, work,
433 sizeof (struct hashentry)))
434 goto fail;
436 /* Now we know we can dereference the record. */
437 struct hashentry *here = (struct hashentry *) (data + work);
439 ++he_cnt;
441 /* Make sure the record is for this type of service. */
442 if (here->type >= LASTREQ
443 || reqinfo[here->type].db != &dbs[dbnr])
444 goto fail;
446 /* Validate boolean field value. */
447 if (here->first != false && here->first != true)
448 goto fail;
450 if (here->len < 0)
451 goto fail;
453 /* Now the data. */
454 if (here->packet < 0
455 || here->packet > head->first_free
456 || here->packet + sizeof (struct datahead) > head->first_free)
457 goto fail;
459 struct datahead *dh = (struct datahead *) (data + here->packet);
461 if (! check_use (data, head->first_free, usemap,
462 use_data | (here->first ? use_first : 0),
463 here->packet, dh->allocsize))
464 goto fail;
466 if (dh->allocsize < sizeof (struct datahead)
467 || dh->recsize > dh->allocsize
468 || (dh->notfound != false && dh->notfound != true)
469 || (dh->usable != false && dh->usable != true))
470 goto fail;
472 if (here->key < here->packet + sizeof (struct datahead)
473 || here->key > here->packet + dh->allocsize
474 || here->key + here->len > here->packet + dh->allocsize)
476 #if SEPARATE_KEY
477 /* If keys can appear outside of data, this should be done
478 instead. But gc doesn't mark the data in that case. */
479 if (! check_use (data, head->first_free, usemap,
480 use_key | (here->first ? use_first : 0),
481 here->key, here->len))
482 #endif
483 goto fail;
486 work = here->next;
488 if (work == trail)
489 /* A circular list, this must not happen. */
490 goto fail;
491 if (tick)
492 trail = ((struct hashentry *) (data + trail))->next;
493 tick = 1 - tick;
497 if (he_cnt != head->nentries)
498 goto fail;
500 /* See if all data and keys had at least one reference from
501 he->first == true hashentry. */
502 for (ref_t idx = 0; idx < head->first_free; ++idx)
504 #if SEPARATE_KEY
505 if (usemap[idx] == use_key_begin)
506 goto fail;
507 #endif
508 if (usemap[idx] == use_data_begin)
509 goto fail;
512 /* Finally, make sure the database hasn't changed since the first test. */
513 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
514 goto fail;
516 free (usemap);
517 return 1;
519 fail:
520 free (usemap);
521 return 0;
525 #ifdef O_CLOEXEC
526 # define EXTRA_O_FLAGS O_CLOEXEC
527 #else
528 # define EXTRA_O_FLAGS 0
529 #endif
532 /* Initialize database information structures. */
533 void
534 nscd_init (void)
536 /* Look up unprivileged uid/gid/groups before we start listening on the
537 socket */
538 if (server_user != NULL)
539 begin_drop_privileges ();
541 if (nthreads == -1)
542 /* No configuration for this value, assume a default. */
543 nthreads = 4;
545 for (size_t cnt = 0; cnt < lastdb; ++cnt)
546 if (dbs[cnt].enabled)
548 pthread_rwlock_init (&dbs[cnt].lock, NULL);
549 pthread_mutex_init (&dbs[cnt].memlock, NULL);
551 if (dbs[cnt].persistent)
553 /* Try to open the appropriate file on disk. */
554 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
555 if (fd != -1)
557 char *msg = NULL;
558 struct stat64 st;
559 void *mem;
560 size_t total;
561 struct database_pers_head head;
562 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
563 sizeof (head)));
564 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
566 fail_db_errno:
567 /* The code is single-threaded at this point so
568 using strerror is just fine. */
569 msg = strerror (errno);
570 fail_db:
571 dbg_log (_("invalid persistent database file \"%s\": %s"),
572 dbs[cnt].db_filename, msg);
573 unlink (dbs[cnt].db_filename);
575 else if (head.module == 0 && head.data_size == 0)
577 /* The file has been created, but the head has not
578 been initialized yet. */
579 msg = _("uninitialized header");
580 goto fail_db;
582 else if (head.header_size != (int) sizeof (head))
584 msg = _("header size does not match");
585 goto fail_db;
587 else if ((total = (sizeof (head)
588 + roundup (head.module * sizeof (ref_t),
589 ALIGN)
590 + head.data_size))
591 > st.st_size
592 || total < sizeof (head))
594 msg = _("file size does not match");
595 goto fail_db;
597 /* Note we map with the maximum size allowed for the
598 database. This is likely much larger than the
599 actual file size. This is OK on most OSes since
600 extensions of the underlying file will
601 automatically translate more pages available for
602 memory access. */
603 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
604 PROT_READ | PROT_WRITE,
605 MAP_SHARED, fd, 0))
606 == MAP_FAILED)
607 goto fail_db_errno;
608 else if (!verify_persistent_db (mem, &head, cnt))
610 munmap (mem, total);
611 msg = _("verification failed");
612 goto fail_db;
614 else
616 /* Success. We have the database. */
617 dbs[cnt].head = mem;
618 dbs[cnt].memsize = total;
619 dbs[cnt].data = (char *)
620 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
621 ALIGN / sizeof (ref_t))];
622 dbs[cnt].mmap_used = true;
624 if (dbs[cnt].suggested_module > head.module)
625 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
626 dbnames[cnt]);
628 dbs[cnt].wr_fd = fd;
629 fd = -1;
630 /* We also need a read-only descriptor. */
631 if (dbs[cnt].shared)
633 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
634 O_RDONLY | EXTRA_O_FLAGS);
635 if (dbs[cnt].ro_fd == -1)
636 dbg_log (_("\
637 cannot create read-only descriptor for \"%s\"; no mmap"),
638 dbs[cnt].db_filename);
641 // XXX Shall we test whether the descriptors actually
642 // XXX point to the same file?
645 /* Close the file descriptors in case something went
646 wrong in which case the variable have not been
647 assigned -1. */
648 if (fd != -1)
649 close (fd);
651 else if (errno == EACCES)
652 error (EXIT_FAILURE, 0, _("cannot access '%s'"),
653 dbs[cnt].db_filename);
656 if (dbs[cnt].head == NULL)
658 /* No database loaded. Allocate the data structure,
659 possibly on disk. */
660 struct database_pers_head head;
661 size_t total = (sizeof (head)
662 + roundup (dbs[cnt].suggested_module
663 * sizeof (ref_t), ALIGN)
664 + (dbs[cnt].suggested_module
665 * DEFAULT_DATASIZE_PER_BUCKET));
667 /* Try to create the database. If we do not need a
668 persistent database create a temporary file. */
669 int fd;
670 int ro_fd = -1;
671 if (dbs[cnt].persistent)
673 fd = open (dbs[cnt].db_filename,
674 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
675 S_IRUSR | S_IWUSR);
676 if (fd != -1 && dbs[cnt].shared)
677 ro_fd = open (dbs[cnt].db_filename,
678 O_RDONLY | EXTRA_O_FLAGS);
680 else
682 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
683 fd = mkostemp (fname, EXTRA_O_FLAGS);
685 /* We do not need the file name anymore after we
686 opened another file descriptor in read-only mode. */
687 if (fd != -1)
689 if (dbs[cnt].shared)
690 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
692 unlink (fname);
696 if (fd == -1)
698 if (errno == EEXIST)
700 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
701 dbnames[cnt], dbs[cnt].db_filename);
702 // XXX Correct way to terminate?
703 exit (1);
706 if (dbs[cnt].persistent)
707 dbg_log (_("cannot create %s; no persistent database used"),
708 dbs[cnt].db_filename);
709 else
710 dbg_log (_("cannot create %s; no sharing possible"),
711 dbs[cnt].db_filename);
713 dbs[cnt].persistent = 0;
714 // XXX remember: no mmap
716 else
718 /* Tell the user if we could not create the read-only
719 descriptor. */
720 if (ro_fd == -1 && dbs[cnt].shared)
721 dbg_log (_("\
722 cannot create read-only descriptor for \"%s\"; no mmap"),
723 dbs[cnt].db_filename);
725 /* Before we create the header, initialize the hash
726 table. That way if we get interrupted while writing
727 the header we can recognize a partially initialized
728 database. */
729 size_t ps = sysconf (_SC_PAGESIZE);
730 char tmpbuf[ps];
731 assert (~ENDREF == 0);
732 memset (tmpbuf, '\xff', ps);
734 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
735 off_t offset = sizeof (head);
737 size_t towrite;
738 if (offset % ps != 0)
740 towrite = MIN (remaining, ps - (offset % ps));
741 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
742 goto write_fail;
743 offset += towrite;
744 remaining -= towrite;
747 while (remaining > ps)
749 if (pwrite (fd, tmpbuf, ps, offset) == -1)
750 goto write_fail;
751 offset += ps;
752 remaining -= ps;
755 if (remaining > 0
756 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
757 goto write_fail;
759 /* Create the header of the file. */
760 struct database_pers_head head =
762 .version = DB_VERSION,
763 .header_size = sizeof (head),
764 .module = dbs[cnt].suggested_module,
765 .data_size = (dbs[cnt].suggested_module
766 * DEFAULT_DATASIZE_PER_BUCKET),
767 .first_free = 0
769 void *mem;
771 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
772 != sizeof (head))
773 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
774 != 0)
775 || (mem = mmap (NULL, dbs[cnt].max_db_size,
776 PROT_READ | PROT_WRITE,
777 MAP_SHARED, fd, 0)) == MAP_FAILED)
779 write_fail:
780 unlink (dbs[cnt].db_filename);
781 dbg_log (_("cannot write to database file %s: %s"),
782 dbs[cnt].db_filename, strerror (errno));
783 dbs[cnt].persistent = 0;
785 else
787 /* Success. */
788 dbs[cnt].head = mem;
789 dbs[cnt].data = (char *)
790 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
791 ALIGN / sizeof (ref_t))];
792 dbs[cnt].memsize = total;
793 dbs[cnt].mmap_used = true;
795 /* Remember the descriptors. */
796 dbs[cnt].wr_fd = fd;
797 dbs[cnt].ro_fd = ro_fd;
798 fd = -1;
799 ro_fd = -1;
802 if (fd != -1)
803 close (fd);
804 if (ro_fd != -1)
805 close (ro_fd);
809 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
810 /* We do not check here whether the O_CLOEXEC provided to the
811 open call was successful or not. The two fcntl calls are
812 only performed once each per process start-up and therefore
813 is not noticeable at all. */
814 if (paranoia
815 && ((dbs[cnt].wr_fd != -1
816 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
817 || (dbs[cnt].ro_fd != -1
818 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
820 dbg_log (_("\
821 cannot set socket to close on exec: %s; disabling paranoia mode"),
822 strerror (errno));
823 paranoia = 0;
825 #endif
827 if (dbs[cnt].head == NULL)
829 /* We do not use the persistent database. Just
830 create an in-memory data structure. */
831 assert (! dbs[cnt].persistent);
833 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
834 + (dbs[cnt].suggested_module
835 * sizeof (ref_t)));
836 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
837 assert (~ENDREF == 0);
838 memset (dbs[cnt].head->array, '\xff',
839 dbs[cnt].suggested_module * sizeof (ref_t));
840 dbs[cnt].head->module = dbs[cnt].suggested_module;
841 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
842 * dbs[cnt].head->module);
843 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
844 dbs[cnt].head->first_free = 0;
846 dbs[cnt].shared = 0;
847 assert (dbs[cnt].ro_fd == -1);
851 /* Create the socket. */
852 #ifndef __ASSUME_SOCK_CLOEXEC
853 sock = -1;
854 if (have_sock_cloexec >= 0)
855 #endif
857 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
858 #ifndef __ASSUME_SOCK_CLOEXEC
859 if (have_sock_cloexec == 0)
860 have_sock_cloexec = sock != -1 || errno != EINVAL ? 1 : -1;
861 #endif
863 #ifndef __ASSUME_SOCK_CLOEXEC
864 if (have_sock_cloexec < 0)
865 sock = socket (AF_UNIX, SOCK_STREAM, 0);
866 #endif
867 if (sock < 0)
869 dbg_log (_("cannot open socket: %s"), strerror (errno));
870 exit (errno == EACCES ? 4 : 1);
872 /* Bind a name to the socket. */
873 struct sockaddr_un sock_addr;
874 sock_addr.sun_family = AF_UNIX;
875 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
876 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
878 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
879 exit (errno == EACCES ? 4 : 1);
882 #ifndef __ASSUME_SOCK_CLOEXEC
883 if (have_sock_cloexec < 0)
885 /* We don't want to get stuck on accept. */
886 int fl = fcntl (sock, F_GETFL);
887 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
889 dbg_log (_("cannot change socket to nonblocking mode: %s"),
890 strerror (errno));
891 exit (1);
894 /* The descriptor needs to be closed on exec. */
895 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
897 dbg_log (_("cannot set socket to close on exec: %s"),
898 strerror (errno));
899 exit (1);
902 #endif
904 /* Set permissions for the socket. */
905 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
907 /* Set the socket up to accept connections. */
908 if (listen (sock, SOMAXCONN) < 0)
910 dbg_log (_("cannot enable socket to accept connections: %s"),
911 strerror (errno));
912 exit (1);
915 #ifdef HAVE_NETLINK
916 if (dbs[hstdb].enabled)
918 /* Try to open netlink socket to monitor network setting changes. */
919 nl_status_fd = socket (AF_NETLINK,
920 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
921 NETLINK_ROUTE);
922 if (nl_status_fd != -1)
924 struct sockaddr_nl snl;
925 memset (&snl, '\0', sizeof (snl));
926 snl.nl_family = AF_NETLINK;
927 /* XXX Is this the best set to use? */
928 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
929 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
930 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
931 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
932 | RTMGRP_IPV6_PREFIX);
934 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
936 close (nl_status_fd);
937 nl_status_fd = -1;
939 else
941 /* Start the timestamp process. */
942 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
943 = __bump_nl_timestamp ();
945 # ifndef __ASSUME_SOCK_CLOEXEC
946 if (have_sock_cloexec < 0)
948 /* We don't want to get stuck on accept. */
949 int fl = fcntl (nl_status_fd, F_GETFL);
950 if (fl == -1
951 || fcntl (nl_status_fd, F_SETFL, fl | O_NONBLOCK) == -1)
953 dbg_log (_("\
954 cannot change socket to nonblocking mode: %s"),
955 strerror (errno));
956 exit (1);
959 /* The descriptor needs to be closed on exec. */
960 if (paranoia
961 && fcntl (nl_status_fd, F_SETFD, FD_CLOEXEC) == -1)
963 dbg_log (_("cannot set socket to close on exec: %s"),
964 strerror (errno));
965 exit (1);
968 # endif
972 #endif
974 /* Change to unprivileged uid/gid/groups if specified in config file */
975 if (server_user != NULL)
976 finish_drop_privileges ();
980 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
982 We support registering multiple files per database. Each call to
983 register_traced_file adds to the list of registered files.
985 When we prune the database, either through timeout or a request to
986 invalidate, we will check to see if any of the registered files has changed.
987 When we accept new connections to handle a cache request we will also
988 check to see if any of the registered files has changed.
990 If we have inotify support then we install an inotify fd to notify us of
991 file deletion or modification, both of which will require we invalidate
992 the cache for the database. Without inotify support we stat the file and
993 store st_mtime to determine if the file has been modified. */
994 void
995 register_traced_file (size_t dbidx, struct traced_file *finfo)
997 /* If the database is disabled or file checking is disabled
998 then ignore the registration. */
999 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
1000 return;
1002 if (__builtin_expect (debug_level > 0, 0))
1003 dbg_log (_("register trace file %s for database %s"),
1004 finfo->fname, dbnames[dbidx]);
1006 #ifdef HAVE_INOTIFY
1007 if (inotify_fd < 0
1008 || (finfo->inotify_descr = inotify_add_watch (inotify_fd, finfo->fname,
1009 IN_DELETE_SELF
1010 | IN_MODIFY)) < 0)
1011 #endif
1013 /* We need the modification date of the file. */
1014 struct stat64 st;
1016 if (stat64 (finfo->fname, &st) < 0)
1018 /* We cannot stat() the file, disable file checking. */
1019 dbg_log (_("cannot stat() file `%s': %s"),
1020 finfo->fname, strerror (errno));
1021 return;
1024 finfo->inotify_descr = -1;
1025 finfo->mtime = st.st_mtime;
1028 /* Queue up the file name. */
1029 finfo->next = dbs[dbidx].traced_files;
1030 dbs[dbidx].traced_files = finfo;
1034 /* Close the connections. */
1035 void
1036 close_sockets (void)
1038 close (sock);
1042 static void
1043 invalidate_cache (char *key, int fd)
1045 dbtype number;
1046 int32_t resp;
1048 for (number = pwddb; number < lastdb; ++number)
1049 if (strcmp (key, dbnames[number]) == 0)
1051 if (number == hstdb)
1053 struct traced_file *runp = dbs[hstdb].traced_files;
1054 while (runp != NULL)
1055 if (runp->call_res_init)
1057 res_init ();
1058 break;
1060 else
1061 runp = runp->next;
1063 break;
1066 if (number == lastdb)
1068 resp = EINVAL;
1069 writeall (fd, &resp, sizeof (resp));
1070 return;
1073 if (dbs[number].enabled)
1075 pthread_mutex_lock (&dbs[number].prune_run_lock);
1076 prune_cache (&dbs[number], LONG_MAX, fd);
1077 pthread_mutex_unlock (&dbs[number].prune_run_lock);
1079 else
1081 resp = 0;
1082 writeall (fd, &resp, sizeof (resp));
1087 #ifdef SCM_RIGHTS
1088 static void
1089 send_ro_fd (struct database_dyn *db, char *key, int fd)
1091 /* If we do not have an read-only file descriptor do nothing. */
1092 if (db->ro_fd == -1)
1093 return;
1095 /* We need to send some data along with the descriptor. */
1096 uint64_t mapsize = (db->head->data_size
1097 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1098 + sizeof (struct database_pers_head));
1099 struct iovec iov[2];
1100 iov[0].iov_base = key;
1101 iov[0].iov_len = strlen (key) + 1;
1102 iov[1].iov_base = &mapsize;
1103 iov[1].iov_len = sizeof (mapsize);
1105 /* Prepare the control message to transfer the descriptor. */
1106 union
1108 struct cmsghdr hdr;
1109 char bytes[CMSG_SPACE (sizeof (int))];
1110 } buf;
1111 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1112 .msg_control = buf.bytes,
1113 .msg_controllen = sizeof (buf) };
1114 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1116 cmsg->cmsg_level = SOL_SOCKET;
1117 cmsg->cmsg_type = SCM_RIGHTS;
1118 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1120 int *ip = (int *) CMSG_DATA (cmsg);
1121 *ip = db->ro_fd;
1123 msg.msg_controllen = cmsg->cmsg_len;
1125 /* Send the control message. We repeat when we are interrupted but
1126 everything else is ignored. */
1127 #ifndef MSG_NOSIGNAL
1128 # define MSG_NOSIGNAL 0
1129 #endif
1130 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1132 if (__builtin_expect (debug_level > 0, 0))
1133 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1135 #endif /* SCM_RIGHTS */
1138 /* Handle new request. */
1139 static void
1140 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1142 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1144 if (debug_level > 0)
1145 dbg_log (_("\
1146 cannot handle old request version %d; current version is %d"),
1147 req->version, NSCD_VERSION);
1148 return;
1151 /* Perform the SELinux check before we go on to the standard checks. */
1152 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1154 if (debug_level > 0)
1156 #ifdef SO_PEERCRED
1157 # ifdef PATH_MAX
1158 char buf[PATH_MAX];
1159 # else
1160 char buf[4096];
1161 # endif
1163 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
1164 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
1166 if (n <= 0)
1167 dbg_log (_("\
1168 request from %ld not handled due to missing permission"), (long int) pid);
1169 else
1171 buf[n] = '\0';
1172 dbg_log (_("\
1173 request from '%s' [%ld] not handled due to missing permission"),
1174 buf, (long int) pid);
1176 #else
1177 dbg_log (_("request not handled due to missing permission"));
1178 #endif
1180 return;
1183 struct database_dyn *db = reqinfo[req->type].db;
1185 /* See whether we can service the request from the cache. */
1186 if (__builtin_expect (reqinfo[req->type].data_request, true))
1188 if (__builtin_expect (debug_level, 0) > 0)
1190 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1192 char buf[INET6_ADDRSTRLEN];
1194 dbg_log ("\t%s (%s)", serv2str[req->type],
1195 inet_ntop (req->type == GETHOSTBYADDR
1196 ? AF_INET : AF_INET6,
1197 key, buf, sizeof (buf)));
1199 else
1200 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1203 /* Is this service enabled? */
1204 if (__builtin_expect (!db->enabled, 0))
1206 /* No, sent the prepared record. */
1207 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1208 db->disabled_iov->iov_len,
1209 MSG_NOSIGNAL))
1210 != (ssize_t) db->disabled_iov->iov_len
1211 && __builtin_expect (debug_level, 0) > 0)
1213 /* We have problems sending the result. */
1214 char buf[256];
1215 dbg_log (_("cannot write result: %s"),
1216 strerror_r (errno, buf, sizeof (buf)));
1219 return;
1222 /* Be sure we can read the data. */
1223 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
1225 ++db->head->rdlockdelayed;
1226 pthread_rwlock_rdlock (&db->lock);
1229 /* See whether we can handle it from the cache. */
1230 struct datahead *cached;
1231 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1232 db, uid);
1233 if (cached != NULL)
1235 /* Hurray it's in the cache. */
1236 ssize_t nwritten;
1238 #ifdef HAVE_SENDFILE
1239 if (__builtin_expect (db->mmap_used, 1))
1241 assert (db->wr_fd != -1);
1242 assert ((char *) cached->data > (char *) db->data);
1243 assert ((char *) cached->data - (char *) db->head
1244 + cached->recsize
1245 <= (sizeof (struct database_pers_head)
1246 + db->head->module * sizeof (ref_t)
1247 + db->head->data_size));
1248 nwritten = sendfileall (fd, db->wr_fd,
1249 (char *) cached->data
1250 - (char *) db->head, cached->recsize);
1251 # ifndef __ASSUME_SENDFILE
1252 if (nwritten == -1 && errno == ENOSYS)
1253 goto use_write;
1254 # endif
1256 else
1257 # ifndef __ASSUME_SENDFILE
1258 use_write:
1259 # endif
1260 #endif
1261 nwritten = writeall (fd, cached->data, cached->recsize);
1263 if (nwritten != cached->recsize
1264 && __builtin_expect (debug_level, 0) > 0)
1266 /* We have problems sending the result. */
1267 char buf[256];
1268 dbg_log (_("cannot write result: %s"),
1269 strerror_r (errno, buf, sizeof (buf)));
1272 pthread_rwlock_unlock (&db->lock);
1274 return;
1277 pthread_rwlock_unlock (&db->lock);
1279 else if (__builtin_expect (debug_level, 0) > 0)
1281 if (req->type == INVALIDATE)
1282 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1283 else
1284 dbg_log ("\t%s", serv2str[req->type]);
1287 /* Handle the request. */
1288 switch (req->type)
1290 case GETPWBYNAME:
1291 addpwbyname (db, fd, req, key, uid);
1292 break;
1294 case GETPWBYUID:
1295 addpwbyuid (db, fd, req, key, uid);
1296 break;
1298 case GETGRBYNAME:
1299 addgrbyname (db, fd, req, key, uid);
1300 break;
1302 case GETGRBYGID:
1303 addgrbygid (db, fd, req, key, uid);
1304 break;
1306 case GETHOSTBYNAME:
1307 addhstbyname (db, fd, req, key, uid);
1308 break;
1310 case GETHOSTBYNAMEv6:
1311 addhstbynamev6 (db, fd, req, key, uid);
1312 break;
1314 case GETHOSTBYADDR:
1315 addhstbyaddr (db, fd, req, key, uid);
1316 break;
1318 case GETHOSTBYADDRv6:
1319 addhstbyaddrv6 (db, fd, req, key, uid);
1320 break;
1322 case GETAI:
1323 addhstai (db, fd, req, key, uid);
1324 break;
1326 case INITGROUPS:
1327 addinitgroups (db, fd, req, key, uid);
1328 break;
1330 case GETSERVBYNAME:
1331 addservbyname (db, fd, req, key, uid);
1332 break;
1334 case GETSERVBYPORT:
1335 addservbyport (db, fd, req, key, uid);
1336 break;
1338 case GETNETGRENT:
1339 addgetnetgrent (db, fd, req, key, uid);
1340 break;
1342 case INNETGR:
1343 addinnetgr (db, fd, req, key, uid);
1344 break;
1346 case GETSTAT:
1347 case SHUTDOWN:
1348 case INVALIDATE:
1350 /* Get the callers credentials. */
1351 #ifdef SO_PEERCRED
1352 struct ucred caller;
1353 socklen_t optlen = sizeof (caller);
1355 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1357 char buf[256];
1359 dbg_log (_("error getting caller's id: %s"),
1360 strerror_r (errno, buf, sizeof (buf)));
1361 break;
1364 uid = caller.uid;
1365 #else
1366 /* Some systems have no SO_PEERCRED implementation. They don't
1367 care about security so we don't as well. */
1368 uid = 0;
1369 #endif
1372 /* Accept shutdown, getstat and invalidate only from root. For
1373 the stat call also allow the user specified in the config file. */
1374 if (req->type == GETSTAT)
1376 if (uid == 0 || uid == stat_uid)
1377 send_stats (fd, dbs);
1379 else if (uid == 0)
1381 if (req->type == INVALIDATE)
1382 invalidate_cache (key, fd);
1383 else
1384 termination_handler (0);
1386 break;
1388 case GETFDPW:
1389 case GETFDGR:
1390 case GETFDHST:
1391 case GETFDSERV:
1392 case GETFDNETGR:
1393 #ifdef SCM_RIGHTS
1394 send_ro_fd (reqinfo[req->type].db, key, fd);
1395 #endif
1396 break;
1398 default:
1399 /* Ignore the command, it's nothing we know. */
1400 break;
1405 /* Restart the process. */
1406 static void
1407 restart (void)
1409 /* First determine the parameters. We do not use the parameters
1410 passed to main() since in case nscd is started by running the
1411 dynamic linker this will not work. Yes, this is not the usual
1412 case but nscd is part of glibc and we occasionally do this. */
1413 size_t buflen = 1024;
1414 char *buf = alloca (buflen);
1415 size_t readlen = 0;
1416 int fd = open ("/proc/self/cmdline", O_RDONLY);
1417 if (fd == -1)
1419 dbg_log (_("\
1420 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1421 strerror (errno));
1423 paranoia = 0;
1424 return;
1427 while (1)
1429 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1430 buflen - readlen));
1431 if (n == -1)
1433 dbg_log (_("\
1434 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1435 strerror (errno));
1437 close (fd);
1438 paranoia = 0;
1439 return;
1442 readlen += n;
1444 if (readlen < buflen)
1445 break;
1447 /* We might have to extend the buffer. */
1448 size_t old_buflen = buflen;
1449 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1450 buf = memmove (newp, buf, old_buflen);
1453 close (fd);
1455 /* Parse the command line. Worst case scenario: every two
1456 characters form one parameter (one character plus NUL). */
1457 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1458 int argc = 0;
1460 char *cp = buf;
1461 while (cp < buf + readlen)
1463 argv[argc++] = cp;
1464 cp = (char *) rawmemchr (cp, '\0') + 1;
1466 argv[argc] = NULL;
1468 /* Second, change back to the old user if we changed it. */
1469 if (server_user != NULL)
1471 if (setresuid (old_uid, old_uid, old_uid) != 0)
1473 dbg_log (_("\
1474 cannot change to old UID: %s; disabling paranoia mode"),
1475 strerror (errno));
1477 paranoia = 0;
1478 return;
1481 if (setresgid (old_gid, old_gid, old_gid) != 0)
1483 dbg_log (_("\
1484 cannot change to old GID: %s; disabling paranoia mode"),
1485 strerror (errno));
1487 setuid (server_uid);
1488 paranoia = 0;
1489 return;
1493 /* Next change back to the old working directory. */
1494 if (chdir (oldcwd) == -1)
1496 dbg_log (_("\
1497 cannot change to old working directory: %s; disabling paranoia mode"),
1498 strerror (errno));
1500 if (server_user != NULL)
1502 setuid (server_uid);
1503 setgid (server_gid);
1505 paranoia = 0;
1506 return;
1509 /* Synchronize memory. */
1510 int32_t certainly[lastdb];
1511 for (int cnt = 0; cnt < lastdb; ++cnt)
1512 if (dbs[cnt].enabled)
1514 /* Make sure nobody keeps using the database. */
1515 dbs[cnt].head->timestamp = 0;
1516 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1517 dbs[cnt].head->nscd_certainly_running = 0;
1519 if (dbs[cnt].persistent)
1520 // XXX async OK?
1521 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1524 /* The preparations are done. */
1525 #ifdef PATH_MAX
1526 char pathbuf[PATH_MAX];
1527 #else
1528 char pathbuf[256];
1529 #endif
1530 /* Try to exec the real nscd program so the process name (as reported
1531 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1532 if readlink or the exec with the result of the readlink call fails. */
1533 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1534 if (n != -1)
1536 pathbuf[n] = '\0';
1537 execv (pathbuf, argv);
1539 execv ("/proc/self/exe", argv);
1541 /* If we come here, we will never be able to re-exec. */
1542 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1543 strerror (errno));
1545 if (server_user != NULL)
1547 setuid (server_uid);
1548 setgid (server_gid);
1550 if (chdir ("/") != 0)
1551 dbg_log (_("cannot change current working directory to \"/\": %s"),
1552 strerror (errno));
1553 paranoia = 0;
1555 /* Reenable the databases. */
1556 time_t now = time (NULL);
1557 for (int cnt = 0; cnt < lastdb; ++cnt)
1558 if (dbs[cnt].enabled)
1560 dbs[cnt].head->timestamp = now;
1561 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1566 /* List of file descriptors. */
1567 struct fdlist
1569 int fd;
1570 struct fdlist *next;
1572 /* Memory allocated for the list. */
1573 static struct fdlist *fdlist;
1574 /* List of currently ready-to-read file descriptors. */
1575 static struct fdlist *readylist;
1577 /* Conditional variable and mutex to signal availability of entries in
1578 READYLIST. The condvar is initialized dynamically since we might
1579 use a different clock depending on availability. */
1580 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1581 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1583 /* The clock to use with the condvar. */
1584 static clockid_t timeout_clock = CLOCK_REALTIME;
1586 /* Number of threads ready to handle the READYLIST. */
1587 static unsigned long int nready;
1590 /* Function for the clean-up threads. */
1591 static void *
1592 __attribute__ ((__noreturn__))
1593 nscd_run_prune (void *p)
1595 const long int my_number = (long int) p;
1596 assert (dbs[my_number].enabled);
1598 int dont_need_update = setup_thread (&dbs[my_number]);
1600 time_t now = time (NULL);
1602 /* We are running. */
1603 dbs[my_number].head->timestamp = now;
1605 struct timespec prune_ts;
1606 if (__builtin_expect (clock_gettime (timeout_clock, &prune_ts) == -1, 0))
1607 /* Should never happen. */
1608 abort ();
1610 /* Compute the initial timeout time. Prevent all the timers to go
1611 off at the same time by adding a db-based value. */
1612 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1613 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1615 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1616 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1617 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1619 pthread_mutex_lock (prune_lock);
1620 while (1)
1622 /* Wait, but not forever. */
1623 int e = 0;
1624 if (! dbs[my_number].clear_cache)
1625 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1626 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1628 time_t next_wait;
1629 now = time (NULL);
1630 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1631 || dbs[my_number].clear_cache)
1633 /* We will determine the new timout values based on the
1634 cache content. Should there be concurrent additions to
1635 the cache which are not accounted for in the cache
1636 pruning we want to know about it. Therefore set the
1637 timeout to the maximum. It will be descreased when adding
1638 new entries to the cache, if necessary. */
1639 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1641 /* Unconditionally reset the flag. */
1642 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1643 dbs[my_number].clear_cache = 0;
1645 pthread_mutex_unlock (prune_lock);
1647 /* We use a separate lock for running the prune function (instead
1648 of keeping prune_lock locked) because this enables concurrent
1649 invocations of cache_add which might modify the timeout value. */
1650 pthread_mutex_lock (prune_run_lock);
1651 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1652 pthread_mutex_unlock (prune_run_lock);
1654 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1655 /* If clients cannot determine for sure whether nscd is running
1656 we need to wake up occasionally to update the timestamp.
1657 Wait 90% of the update period. */
1658 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1659 if (__builtin_expect (! dont_need_update, 0))
1661 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1662 dbs[my_number].head->timestamp = now;
1665 pthread_mutex_lock (prune_lock);
1667 /* Make it known when we will wake up again. */
1668 if (now + next_wait < dbs[my_number].wakeup_time)
1669 dbs[my_number].wakeup_time = now + next_wait;
1670 else
1671 next_wait = dbs[my_number].wakeup_time - now;
1673 else
1674 /* The cache was just pruned. Do not do it again now. Just
1675 use the new timeout value. */
1676 next_wait = dbs[my_number].wakeup_time - now;
1678 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1679 /* Should never happen. */
1680 abort ();
1682 /* Compute next timeout time. */
1683 prune_ts.tv_sec += next_wait;
1688 /* This is the main loop. It is replicated in different threads but
1689 the use of the ready list makes sure only one thread handles an
1690 incoming connection. */
1691 static void *
1692 __attribute__ ((__noreturn__))
1693 nscd_run_worker (void *p)
1695 char buf[256];
1697 /* Initial locking. */
1698 pthread_mutex_lock (&readylist_lock);
1700 /* One more thread available. */
1701 ++nready;
1703 while (1)
1705 while (readylist == NULL)
1706 pthread_cond_wait (&readylist_cond, &readylist_lock);
1708 struct fdlist *it = readylist->next;
1709 if (readylist->next == readylist)
1710 /* Just one entry on the list. */
1711 readylist = NULL;
1712 else
1713 readylist->next = it->next;
1715 /* Extract the information and mark the record ready to be used
1716 again. */
1717 int fd = it->fd;
1718 it->next = NULL;
1720 /* One more thread available. */
1721 --nready;
1723 /* We are done with the list. */
1724 pthread_mutex_unlock (&readylist_lock);
1726 #ifndef __ASSUME_ACCEPT4
1727 if (have_accept4 < 0)
1729 /* We do not want to block on a short read or so. */
1730 int fl = fcntl (fd, F_GETFL);
1731 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1732 goto close_and_out;
1734 #endif
1736 /* Now read the request. */
1737 request_header req;
1738 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1739 != sizeof (req), 0))
1741 /* We failed to read data. Note that this also might mean we
1742 failed because we would have blocked. */
1743 if (debug_level > 0)
1744 dbg_log (_("short read while reading request: %s"),
1745 strerror_r (errno, buf, sizeof (buf)));
1746 goto close_and_out;
1749 /* Check whether this is a valid request type. */
1750 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1751 goto close_and_out;
1753 /* Some systems have no SO_PEERCRED implementation. They don't
1754 care about security so we don't as well. */
1755 uid_t uid = -1;
1756 #ifdef SO_PEERCRED
1757 pid_t pid = 0;
1759 if (__builtin_expect (debug_level > 0, 0))
1761 struct ucred caller;
1762 socklen_t optlen = sizeof (caller);
1764 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1765 pid = caller.pid;
1767 #else
1768 const pid_t pid = 0;
1769 #endif
1771 /* It should not be possible to crash the nscd with a silly
1772 request (i.e., a terribly large key). We limit the size to 1kb. */
1773 if (__builtin_expect (req.key_len, 1) < 0
1774 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1776 if (debug_level > 0)
1777 dbg_log (_("key length in request too long: %d"), req.key_len);
1779 else
1781 /* Get the key. */
1782 char keybuf[MAXKEYLEN + 1];
1784 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1785 req.key_len))
1786 != req.key_len, 0))
1788 /* Again, this can also mean we would have blocked. */
1789 if (debug_level > 0)
1790 dbg_log (_("short read while reading request key: %s"),
1791 strerror_r (errno, buf, sizeof (buf)));
1792 goto close_and_out;
1794 keybuf[req.key_len] = '\0';
1796 if (__builtin_expect (debug_level, 0) > 0)
1798 #ifdef SO_PEERCRED
1799 if (pid != 0)
1800 dbg_log (_("\
1801 handle_request: request received (Version = %d) from PID %ld"),
1802 req.version, (long int) pid);
1803 else
1804 #endif
1805 dbg_log (_("\
1806 handle_request: request received (Version = %d)"), req.version);
1809 /* Phew, we got all the data, now process it. */
1810 handle_request (fd, &req, keybuf, uid, pid);
1813 close_and_out:
1814 /* We are done. */
1815 close (fd);
1817 /* Re-locking. */
1818 pthread_mutex_lock (&readylist_lock);
1820 /* One more thread available. */
1821 ++nready;
1823 /* NOTREACHED */
1827 static unsigned int nconns;
1829 static void
1830 fd_ready (int fd)
1832 pthread_mutex_lock (&readylist_lock);
1834 /* Find an empty entry in FDLIST. */
1835 size_t inner;
1836 for (inner = 0; inner < nconns; ++inner)
1837 if (fdlist[inner].next == NULL)
1838 break;
1839 assert (inner < nconns);
1841 fdlist[inner].fd = fd;
1843 if (readylist == NULL)
1844 readylist = fdlist[inner].next = &fdlist[inner];
1845 else
1847 fdlist[inner].next = readylist->next;
1848 readylist = readylist->next = &fdlist[inner];
1851 bool do_signal = true;
1852 if (__builtin_expect (nready == 0, 0))
1854 ++client_queued;
1855 do_signal = false;
1857 /* Try to start another thread to help out. */
1858 pthread_t th;
1859 if (nthreads < max_nthreads
1860 && pthread_create (&th, &attr, nscd_run_worker,
1861 (void *) (long int) nthreads) == 0)
1863 /* We got another thread. */
1864 ++nthreads;
1865 /* The new thread might need a kick. */
1866 do_signal = true;
1871 pthread_mutex_unlock (&readylist_lock);
1873 /* Tell one of the worker threads there is work to do. */
1874 if (do_signal)
1875 pthread_cond_signal (&readylist_cond);
1879 /* Check whether restarting should happen. */
1880 static bool
1881 restart_p (time_t now)
1883 return (paranoia && readylist == NULL && nready == nthreads
1884 && now >= restart_time);
1888 /* Array for times a connection was accepted. */
1889 static time_t *starttime;
1891 #ifdef HAVE_INOTIFY
1892 /* Inotify event for changed file. */
1893 union __inev
1895 struct inotify_event i;
1896 # ifndef PATH_MAX
1897 # define PATH_MAX 1024
1898 # endif
1899 char buf[sizeof (struct inotify_event) + PATH_MAX];
1902 /* Process the inotify event in INEV. If the event matches any of the files
1903 registered with a database then mark that database as requiring its cache
1904 to be cleared. We indicate the cache needs clearing by setting
1905 TO_CLEAR[DBCNT] to true for the matching database. */
1906 static inline void
1907 inotify_check_files (bool *to_clear, union __inev *inev)
1909 /* Check which of the files changed. */
1910 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1912 struct traced_file *finfo = dbs[dbcnt].traced_files;
1914 while (finfo != NULL)
1916 /* Inotify event watch descriptor matches. */
1917 if (finfo->inotify_descr == inev->i.wd)
1919 /* Mark cache as needing to be cleared and reinitialize. */
1920 to_clear[dbcnt] = true;
1921 if (finfo->call_res_init)
1922 res_init ();
1923 return;
1926 finfo = finfo->next;
1931 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1932 for the associated database, otherwise do nothing. The TO_CLEAR array must
1933 have LASTDB entries. */
1934 static inline void
1935 clear_db_cache (bool *to_clear)
1937 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1938 if (to_clear[dbcnt])
1940 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
1941 dbs[dbcnt].clear_cache = 1;
1942 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1943 pthread_cond_signal (&dbs[dbcnt].prune_cond);
1947 #endif
1949 static void
1950 __attribute__ ((__noreturn__))
1951 main_loop_poll (void)
1953 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1954 * sizeof (conns[0]));
1956 conns[0].fd = sock;
1957 conns[0].events = POLLRDNORM;
1958 size_t nused = 1;
1959 size_t firstfree = 1;
1961 #ifdef HAVE_INOTIFY
1962 if (inotify_fd != -1)
1964 conns[1].fd = inotify_fd;
1965 conns[1].events = POLLRDNORM;
1966 nused = 2;
1967 firstfree = 2;
1969 #endif
1971 #ifdef HAVE_NETLINK
1972 size_t idx_nl_status_fd = 0;
1973 if (nl_status_fd != -1)
1975 idx_nl_status_fd = nused;
1976 conns[nused].fd = nl_status_fd;
1977 conns[nused].events = POLLRDNORM;
1978 ++nused;
1979 firstfree = nused;
1981 #endif
1983 while (1)
1985 /* Wait for any event. We wait at most a couple of seconds so
1986 that we can check whether we should close any of the accepted
1987 connections since we have not received a request. */
1988 #define MAX_ACCEPT_TIMEOUT 30
1989 #define MIN_ACCEPT_TIMEOUT 5
1990 #define MAIN_THREAD_TIMEOUT \
1991 (MAX_ACCEPT_TIMEOUT * 1000 \
1992 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1994 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1996 time_t now = time (NULL);
1998 /* If there is a descriptor ready for reading or there is a new
1999 connection, process this now. */
2000 if (n > 0)
2002 if (conns[0].revents != 0)
2004 /* We have a new incoming connection. Accept the connection. */
2005 int fd;
2007 #ifndef __ASSUME_ACCEPT4
2008 fd = -1;
2009 if (have_accept4 >= 0)
2010 #endif
2012 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2013 SOCK_NONBLOCK));
2014 #ifndef __ASSUME_ACCEPT4
2015 if (have_accept4 == 0)
2016 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2017 #endif
2019 #ifndef __ASSUME_ACCEPT4
2020 if (have_accept4 < 0)
2021 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2022 #endif
2024 /* Use the descriptor if we have not reached the limit. */
2025 if (fd >= 0)
2027 if (firstfree < nconns)
2029 conns[firstfree].fd = fd;
2030 conns[firstfree].events = POLLRDNORM;
2031 starttime[firstfree] = now;
2032 if (firstfree >= nused)
2033 nused = firstfree + 1;
2036 ++firstfree;
2037 while (firstfree < nused && conns[firstfree].fd != -1);
2039 else
2040 /* We cannot use the connection so close it. */
2041 close (fd);
2044 --n;
2047 size_t first = 1;
2048 #ifdef HAVE_INOTIFY
2049 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
2051 if (conns[1].revents != 0)
2053 bool to_clear[lastdb] = { false, };
2054 union __inev inev;
2056 /* Read all inotify events for files registered via
2057 register_traced_file(). */
2058 while (1)
2060 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
2061 sizeof (inev)));
2062 if (nb < (ssize_t) sizeof (struct inotify_event))
2064 if (__builtin_expect (nb == -1 && errno != EAGAIN,
2067 /* Something went wrong when reading the inotify
2068 data. Better disable inotify. */
2069 dbg_log (_("\
2070 disabled inotify after read error %d"),
2071 errno);
2072 conns[1].fd = -1;
2073 firstfree = 1;
2074 if (nused == 2)
2075 nused = 1;
2076 close (inotify_fd);
2077 inotify_fd = -1;
2079 break;
2082 /* Check which of the files changed. */
2083 inotify_check_files (to_clear, &inev);
2086 /* Actually perform the cache clearing. */
2087 clear_db_cache (to_clear);
2089 --n;
2092 first = 2;
2094 #endif
2096 #ifdef HAVE_NETLINK
2097 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2099 char buf[4096];
2100 /* Read all the data. We do not interpret it here. */
2101 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2102 sizeof (buf))) != -1)
2105 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2106 = __bump_nl_timestamp ();
2108 #endif
2110 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2111 if (conns[cnt].revents != 0)
2113 fd_ready (conns[cnt].fd);
2115 /* Clean up the CONNS array. */
2116 conns[cnt].fd = -1;
2117 if (cnt < firstfree)
2118 firstfree = cnt;
2119 if (cnt == nused - 1)
2121 --nused;
2122 while (conns[nused - 1].fd == -1);
2124 --n;
2128 /* Now find entries which have timed out. */
2129 assert (nused > 0);
2131 /* We make the timeout length depend on the number of file
2132 descriptors currently used. */
2133 #define ACCEPT_TIMEOUT \
2134 (MAX_ACCEPT_TIMEOUT \
2135 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2136 time_t laststart = now - ACCEPT_TIMEOUT;
2138 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2140 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2142 /* Remove the entry, it timed out. */
2143 (void) close (conns[cnt].fd);
2144 conns[cnt].fd = -1;
2146 if (cnt < firstfree)
2147 firstfree = cnt;
2148 if (cnt == nused - 1)
2150 --nused;
2151 while (conns[nused - 1].fd == -1);
2155 if (restart_p (now))
2156 restart ();
2161 #ifdef HAVE_EPOLL
2162 static void
2163 main_loop_epoll (int efd)
2165 struct epoll_event ev = { 0, };
2166 int nused = 1;
2167 size_t highest = 0;
2169 /* Add the socket. */
2170 ev.events = EPOLLRDNORM;
2171 ev.data.fd = sock;
2172 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2173 /* We cannot use epoll. */
2174 return;
2176 # ifdef HAVE_INOTIFY
2177 if (inotify_fd != -1)
2179 ev.events = EPOLLRDNORM;
2180 ev.data.fd = inotify_fd;
2181 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2182 /* We cannot use epoll. */
2183 return;
2184 nused = 2;
2186 # endif
2188 # ifdef HAVE_NETLINK
2189 if (nl_status_fd != -1)
2191 ev.events = EPOLLRDNORM;
2192 ev.data.fd = nl_status_fd;
2193 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2194 /* We cannot use epoll. */
2195 return;
2197 # endif
2199 while (1)
2201 struct epoll_event revs[100];
2202 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2204 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2206 time_t now = time (NULL);
2208 for (int cnt = 0; cnt < n; ++cnt)
2209 if (revs[cnt].data.fd == sock)
2211 /* A new connection. */
2212 int fd;
2214 # ifndef __ASSUME_ACCEPT4
2215 fd = -1;
2216 if (have_accept4 >= 0)
2217 # endif
2219 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2220 SOCK_NONBLOCK));
2221 # ifndef __ASSUME_ACCEPT4
2222 if (have_accept4 == 0)
2223 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2224 # endif
2226 # ifndef __ASSUME_ACCEPT4
2227 if (have_accept4 < 0)
2228 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2229 # endif
2231 /* Use the descriptor if we have not reached the limit. */
2232 if (fd >= 0)
2234 /* Try to add the new descriptor. */
2235 ev.data.fd = fd;
2236 if (fd >= nconns
2237 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2238 /* The descriptor is too large or something went
2239 wrong. Close the descriptor. */
2240 close (fd);
2241 else
2243 /* Remember when we accepted the connection. */
2244 starttime[fd] = now;
2246 if (fd > highest)
2247 highest = fd;
2249 ++nused;
2253 # ifdef HAVE_INOTIFY
2254 else if (revs[cnt].data.fd == inotify_fd)
2256 bool to_clear[lastdb] = { false, };
2257 union __inev inev;
2259 /* Read all inotify events for files registered via
2260 register_traced_file(). */
2261 while (1)
2263 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
2264 sizeof (inev)));
2265 if (nb < (ssize_t) sizeof (struct inotify_event))
2267 if (__builtin_expect (nb == -1 && errno != EAGAIN, 0))
2269 /* Something went wrong when reading the inotify
2270 data. Better disable inotify. */
2271 dbg_log (_("disabled inotify after read error %d"),
2272 errno);
2273 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd,
2274 NULL);
2275 close (inotify_fd);
2276 inotify_fd = -1;
2278 break;
2281 /* Check which of the files changed. */
2282 inotify_check_files(to_clear, &inev);
2285 /* Actually perform the cache clearing. */
2286 clear_db_cache (to_clear);
2288 # endif
2289 # ifdef HAVE_NETLINK
2290 else if (revs[cnt].data.fd == nl_status_fd)
2292 char buf[4096];
2293 /* Read all the data. We do not interpret it here. */
2294 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2295 sizeof (buf))) != -1)
2298 __bump_nl_timestamp ();
2300 # endif
2301 else
2303 /* Remove the descriptor from the epoll descriptor. */
2304 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2306 /* Get a worker to handle the request. */
2307 fd_ready (revs[cnt].data.fd);
2309 /* Reset the time. */
2310 starttime[revs[cnt].data.fd] = 0;
2311 if (revs[cnt].data.fd == highest)
2313 --highest;
2314 while (highest > 0 && starttime[highest] == 0);
2316 --nused;
2319 /* Now look for descriptors for accepted connections which have
2320 no reply in too long of a time. */
2321 time_t laststart = now - ACCEPT_TIMEOUT;
2322 assert (starttime[sock] == 0);
2323 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2324 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2325 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2326 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2328 /* We are waiting for this one for too long. Close it. */
2329 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2331 (void) close (cnt);
2333 starttime[cnt] = 0;
2334 if (cnt == highest)
2335 --highest;
2337 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2338 --highest;
2340 if (restart_p (now))
2341 restart ();
2344 #endif
2347 /* Start all the threads we want. The initial process is thread no. 1. */
2348 void
2349 start_threads (void)
2351 /* Initialize the conditional variable we will use. The only
2352 non-standard attribute we might use is the clock selection. */
2353 pthread_condattr_t condattr;
2354 pthread_condattr_init (&condattr);
2356 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2357 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2358 /* Determine whether the monotonous clock is available. */
2359 struct timespec dummy;
2360 # if _POSIX_MONOTONIC_CLOCK == 0
2361 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2362 # endif
2363 # if _POSIX_CLOCK_SELECTION == 0
2364 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2365 # endif
2366 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2367 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2368 timeout_clock = CLOCK_MONOTONIC;
2369 #endif
2371 /* Create the attribute for the threads. They are all created
2372 detached. */
2373 pthread_attr_init (&attr);
2374 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2375 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2376 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2378 /* We allow less than LASTDB threads only for debugging. */
2379 if (debug_level == 0)
2380 nthreads = MAX (nthreads, lastdb);
2382 /* Create the threads which prune the databases. */
2383 // XXX Ideally this work would be done by some of the worker threads.
2384 // XXX But this is problematic since we would need to be able to wake
2385 // XXX them up explicitly as well as part of the group handling the
2386 // XXX ready-list. This requires an operation where we can wait on
2387 // XXX two conditional variables at the same time. This operation
2388 // XXX does not exist (yet).
2389 for (long int i = 0; i < lastdb; ++i)
2391 /* Initialize the conditional variable. */
2392 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2394 dbg_log (_("could not initialize conditional variable"));
2395 exit (1);
2398 pthread_t th;
2399 if (dbs[i].enabled
2400 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2402 dbg_log (_("could not start clean-up thread; terminating"));
2403 exit (1);
2407 pthread_condattr_destroy (&condattr);
2409 for (long int i = 0; i < nthreads; ++i)
2411 pthread_t th;
2412 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2414 if (i == 0)
2416 dbg_log (_("could not start any worker thread; terminating"));
2417 exit (1);
2420 break;
2424 /* Determine how much room for descriptors we should initially
2425 allocate. This might need to change later if we cap the number
2426 with MAXCONN. */
2427 const long int nfds = sysconf (_SC_OPEN_MAX);
2428 #define MINCONN 32
2429 #define MAXCONN 16384
2430 if (nfds == -1 || nfds > MAXCONN)
2431 nconns = MAXCONN;
2432 else if (nfds < MINCONN)
2433 nconns = MINCONN;
2434 else
2435 nconns = nfds;
2437 /* We need memory to pass descriptors on to the worker threads. */
2438 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2439 /* Array to keep track when connection was accepted. */
2440 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2442 /* In the main thread we execute the loop which handles incoming
2443 connections. */
2444 #ifdef HAVE_EPOLL
2445 int efd = epoll_create (100);
2446 if (efd != -1)
2448 main_loop_epoll (efd);
2449 close (efd);
2451 #endif
2453 main_loop_poll ();
2457 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2458 this function is called, we are not listening on the nscd socket yet so
2459 we can just use the ordinary lookup functions without causing a lockup */
2460 static void
2461 begin_drop_privileges (void)
2463 struct passwd *pwd = getpwnam (server_user);
2465 if (pwd == NULL)
2467 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2468 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
2469 server_user);
2472 server_uid = pwd->pw_uid;
2473 server_gid = pwd->pw_gid;
2475 /* Save the old UID/GID if we have to change back. */
2476 if (paranoia)
2478 old_uid = getuid ();
2479 old_gid = getgid ();
2482 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2484 /* This really must never happen. */
2485 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2486 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
2489 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2491 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2492 == -1)
2494 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2495 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
2500 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2501 run nscd as the user specified in the configuration file. */
2502 static void
2503 finish_drop_privileges (void)
2505 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2506 /* We need to preserve the capabilities to connect to the audit daemon. */
2507 cap_t new_caps = preserve_capabilities ();
2508 #endif
2510 if (setgroups (server_ngroups, server_groups) == -1)
2512 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2513 error (EXIT_FAILURE, errno, _("setgroups failed"));
2516 int res;
2517 if (paranoia)
2518 res = setresgid (server_gid, server_gid, old_gid);
2519 else
2520 res = setgid (server_gid);
2521 if (res == -1)
2523 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2524 perror ("setgid");
2525 exit (4);
2528 if (paranoia)
2529 res = setresuid (server_uid, server_uid, old_uid);
2530 else
2531 res = setuid (server_uid);
2532 if (res == -1)
2534 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2535 perror ("setuid");
2536 exit (4);
2539 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2540 /* Remove the temporary capabilities. */
2541 install_real_capabilities (new_caps);
2542 #endif