nptl: Fix comment typo in fork.c.
[glibc.git] / nscd / connections.c
blob56c327971b95e89b7f773bbdf05516c0ed190d80
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2013 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <ifaddrs.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <arpa/inet.h>
35 #ifdef HAVE_NETLINK
36 # include <linux/netlink.h>
37 # include <linux/rtnetlink.h>
38 #endif
39 #ifdef HAVE_EPOLL
40 # include <sys/epoll.h>
41 #endif
42 #ifdef HAVE_INOTIFY
43 # include <sys/inotify.h>
44 #endif
45 #include <sys/mman.h>
46 #include <sys/param.h>
47 #include <sys/poll.h>
48 #ifdef HAVE_SENDFILE
49 # include <sys/sendfile.h>
50 #endif
51 #include <sys/socket.h>
52 #include <sys/stat.h>
53 #include <sys/un.h>
55 #include "nscd.h"
56 #include "dbg_log.h"
57 #include "selinux.h"
58 #include <resolv/resolv.h>
60 #include <kernel-features.h>
63 /* Support to run nscd as an unprivileged user */
64 const char *server_user;
65 static uid_t server_uid;
66 static gid_t server_gid;
67 const char *stat_user;
68 uid_t stat_uid;
69 static gid_t *server_groups;
70 #ifndef NGROUPS
71 # define NGROUPS 32
72 #endif
73 static int server_ngroups;
75 static pthread_attr_t attr;
77 static void begin_drop_privileges (void);
78 static void finish_drop_privileges (void);
80 /* Map request type to a string. */
81 const char *const serv2str[LASTREQ] =
83 [GETPWBYNAME] = "GETPWBYNAME",
84 [GETPWBYUID] = "GETPWBYUID",
85 [GETGRBYNAME] = "GETGRBYNAME",
86 [GETGRBYGID] = "GETGRBYGID",
87 [GETHOSTBYNAME] = "GETHOSTBYNAME",
88 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
89 [GETHOSTBYADDR] = "GETHOSTBYADDR",
90 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
91 [SHUTDOWN] = "SHUTDOWN",
92 [GETSTAT] = "GETSTAT",
93 [INVALIDATE] = "INVALIDATE",
94 [GETFDPW] = "GETFDPW",
95 [GETFDGR] = "GETFDGR",
96 [GETFDHST] = "GETFDHST",
97 [GETAI] = "GETAI",
98 [INITGROUPS] = "INITGROUPS",
99 [GETSERVBYNAME] = "GETSERVBYNAME",
100 [GETSERVBYPORT] = "GETSERVBYPORT",
101 [GETFDSERV] = "GETFDSERV",
102 [GETNETGRENT] = "GETNETGRENT",
103 [INNETGR] = "INNETGR",
104 [GETFDNETGR] = "GETFDNETGR"
107 /* The control data structures for the services. */
108 struct database_dyn dbs[lastdb] =
110 [pwddb] = {
111 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
112 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
113 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
114 .enabled = 0,
115 .check_file = 1,
116 .persistent = 0,
117 .propagate = 1,
118 .shared = 0,
119 .max_db_size = DEFAULT_MAX_DB_SIZE,
120 .suggested_module = DEFAULT_SUGGESTED_MODULE,
121 .db_filename = _PATH_NSCD_PASSWD_DB,
122 .disabled_iov = &pwd_iov_disabled,
123 .postimeout = 3600,
124 .negtimeout = 20,
125 .wr_fd = -1,
126 .ro_fd = -1,
127 .mmap_used = false
129 [grpdb] = {
130 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
131 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
132 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
133 .enabled = 0,
134 .check_file = 1,
135 .persistent = 0,
136 .propagate = 1,
137 .shared = 0,
138 .max_db_size = DEFAULT_MAX_DB_SIZE,
139 .suggested_module = DEFAULT_SUGGESTED_MODULE,
140 .db_filename = _PATH_NSCD_GROUP_DB,
141 .disabled_iov = &grp_iov_disabled,
142 .postimeout = 3600,
143 .negtimeout = 60,
144 .wr_fd = -1,
145 .ro_fd = -1,
146 .mmap_used = false
148 [hstdb] = {
149 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
150 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
151 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
152 .enabled = 0,
153 .check_file = 1,
154 .persistent = 0,
155 .propagate = 0, /* Not used. */
156 .shared = 0,
157 .max_db_size = DEFAULT_MAX_DB_SIZE,
158 .suggested_module = DEFAULT_SUGGESTED_MODULE,
159 .db_filename = _PATH_NSCD_HOSTS_DB,
160 .disabled_iov = &hst_iov_disabled,
161 .postimeout = 3600,
162 .negtimeout = 20,
163 .wr_fd = -1,
164 .ro_fd = -1,
165 .mmap_used = false
167 [servdb] = {
168 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
169 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
170 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
171 .enabled = 0,
172 .check_file = 1,
173 .persistent = 0,
174 .propagate = 0, /* Not used. */
175 .shared = 0,
176 .max_db_size = DEFAULT_MAX_DB_SIZE,
177 .suggested_module = DEFAULT_SUGGESTED_MODULE,
178 .db_filename = _PATH_NSCD_SERVICES_DB,
179 .disabled_iov = &serv_iov_disabled,
180 .postimeout = 28800,
181 .negtimeout = 20,
182 .wr_fd = -1,
183 .ro_fd = -1,
184 .mmap_used = false
186 [netgrdb] = {
187 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
188 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
189 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
190 .enabled = 0,
191 .check_file = 1,
192 .persistent = 0,
193 .propagate = 0, /* Not used. */
194 .shared = 0,
195 .max_db_size = DEFAULT_MAX_DB_SIZE,
196 .suggested_module = DEFAULT_SUGGESTED_MODULE,
197 .db_filename = _PATH_NSCD_NETGROUP_DB,
198 .disabled_iov = &netgroup_iov_disabled,
199 .postimeout = 28800,
200 .negtimeout = 20,
201 .wr_fd = -1,
202 .ro_fd = -1,
203 .mmap_used = false
208 /* Mapping of request type to database. */
209 static struct
211 bool data_request;
212 struct database_dyn *db;
213 } const reqinfo[LASTREQ] =
215 [GETPWBYNAME] = { true, &dbs[pwddb] },
216 [GETPWBYUID] = { true, &dbs[pwddb] },
217 [GETGRBYNAME] = { true, &dbs[grpdb] },
218 [GETGRBYGID] = { true, &dbs[grpdb] },
219 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
220 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
221 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
222 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
223 [SHUTDOWN] = { false, NULL },
224 [GETSTAT] = { false, NULL },
225 [SHUTDOWN] = { false, NULL },
226 [GETFDPW] = { false, &dbs[pwddb] },
227 [GETFDGR] = { false, &dbs[grpdb] },
228 [GETFDHST] = { false, &dbs[hstdb] },
229 [GETAI] = { true, &dbs[hstdb] },
230 [INITGROUPS] = { true, &dbs[grpdb] },
231 [GETSERVBYNAME] = { true, &dbs[servdb] },
232 [GETSERVBYPORT] = { true, &dbs[servdb] },
233 [GETFDSERV] = { false, &dbs[servdb] },
234 [GETNETGRENT] = { true, &dbs[netgrdb] },
235 [INNETGR] = { true, &dbs[netgrdb] },
236 [GETFDNETGR] = { false, &dbs[netgrdb] }
240 /* Initial number of threads to use. */
241 int nthreads = -1;
242 /* Maximum number of threads to use. */
243 int max_nthreads = 32;
245 /* Socket for incoming connections. */
246 static int sock;
248 #ifdef HAVE_INOTIFY
249 /* Inotify descriptor. */
250 int inotify_fd = -1;
251 #endif
253 #ifdef HAVE_NETLINK
254 /* Descriptor for netlink status updates. */
255 static int nl_status_fd = -1;
256 #endif
258 #ifndef __ASSUME_SOCK_CLOEXEC
259 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
260 before be know the result. */
261 static int have_sock_cloexec;
262 #endif
263 #ifndef __ASSUME_ACCEPT4
264 static int have_accept4;
265 #endif
267 /* Number of times clients had to wait. */
268 unsigned long int client_queued;
271 ssize_t
272 writeall (int fd, const void *buf, size_t len)
274 size_t n = len;
275 ssize_t ret;
278 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
279 if (ret <= 0)
280 break;
281 buf = (const char *) buf + ret;
282 n -= ret;
284 while (n > 0);
285 return ret < 0 ? ret : len - n;
289 #ifdef HAVE_SENDFILE
290 ssize_t
291 sendfileall (int tofd, int fromfd, off_t off, size_t len)
293 ssize_t n = len;
294 ssize_t ret;
298 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
299 if (ret <= 0)
300 break;
301 n -= ret;
303 while (n > 0);
304 return ret < 0 ? ret : len - n;
306 #endif
309 enum usekey
311 use_not = 0,
312 /* The following three are not really used, they are symbolic constants. */
313 use_first = 16,
314 use_begin = 32,
315 use_end = 64,
317 use_he = 1,
318 use_he_begin = use_he | use_begin,
319 use_he_end = use_he | use_end,
320 #if SEPARATE_KEY
321 use_key = 2,
322 use_key_begin = use_key | use_begin,
323 use_key_end = use_key | use_end,
324 use_key_first = use_key_begin | use_first,
325 #endif
326 use_data = 3,
327 use_data_begin = use_data | use_begin,
328 use_data_end = use_data | use_end,
329 use_data_first = use_data_begin | use_first
333 static int
334 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
335 enum usekey use, ref_t start, size_t len)
337 assert (len >= 2);
339 if (start > first_free || start + len > first_free
340 || (start & BLOCK_ALIGN_M1))
341 return 0;
343 if (usemap[start] == use_not)
345 /* Add the start marker. */
346 usemap[start] = use | use_begin;
347 use &= ~use_first;
349 while (--len > 0)
350 if (usemap[++start] != use_not)
351 return 0;
352 else
353 usemap[start] = use;
355 /* Add the end marker. */
356 usemap[start] = use | use_end;
358 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
360 /* Hash entries can't be shared. */
361 if (use == use_he)
362 return 0;
364 usemap[start] |= (use & use_first);
365 use &= ~use_first;
367 while (--len > 1)
368 if (usemap[++start] != use)
369 return 0;
371 if (usemap[++start] != (use | use_end))
372 return 0;
374 else
375 /* Points to a wrong object or somewhere in the middle. */
376 return 0;
378 return 1;
382 /* Verify data in persistent database. */
383 static int
384 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
386 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
387 || dbnr == netgrdb);
389 time_t now = time (NULL);
391 struct database_pers_head *head = mem;
392 struct database_pers_head head_copy = *head;
394 /* Check that the header that was read matches the head in the database. */
395 if (memcmp (head, readhead, sizeof (*head)) != 0)
396 return 0;
398 /* First some easy tests: make sure the database header is sane. */
399 if (head->version != DB_VERSION
400 || head->header_size != sizeof (*head)
401 /* We allow a timestamp to be one hour ahead of the current time.
402 This should cover daylight saving time changes. */
403 || head->timestamp > now + 60 * 60 + 60
404 || (head->gc_cycle & 1)
405 || head->module == 0
406 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
407 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
408 || head->first_free < 0
409 || head->first_free > head->data_size
410 || (head->first_free & BLOCK_ALIGN_M1) != 0
411 || head->maxnentries < 0
412 || head->maxnsearched < 0)
413 return 0;
415 uint8_t *usemap = calloc (head->first_free, 1);
416 if (usemap == NULL)
417 return 0;
419 const char *data = (char *) &head->array[roundup (head->module,
420 ALIGN / sizeof (ref_t))];
422 nscd_ssize_t he_cnt = 0;
423 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
425 ref_t trail = head->array[cnt];
426 ref_t work = trail;
427 int tick = 0;
429 while (work != ENDREF)
431 if (! check_use (data, head->first_free, usemap, use_he, work,
432 sizeof (struct hashentry)))
433 goto fail;
435 /* Now we know we can dereference the record. */
436 struct hashentry *here = (struct hashentry *) (data + work);
438 ++he_cnt;
440 /* Make sure the record is for this type of service. */
441 if (here->type >= LASTREQ
442 || reqinfo[here->type].db != &dbs[dbnr])
443 goto fail;
445 /* Validate boolean field value. */
446 if (here->first != false && here->first != true)
447 goto fail;
449 if (here->len < 0)
450 goto fail;
452 /* Now the data. */
453 if (here->packet < 0
454 || here->packet > head->first_free
455 || here->packet + sizeof (struct datahead) > head->first_free)
456 goto fail;
458 struct datahead *dh = (struct datahead *) (data + here->packet);
460 if (! check_use (data, head->first_free, usemap,
461 use_data | (here->first ? use_first : 0),
462 here->packet, dh->allocsize))
463 goto fail;
465 if (dh->allocsize < sizeof (struct datahead)
466 || dh->recsize > dh->allocsize
467 || (dh->notfound != false && dh->notfound != true)
468 || (dh->usable != false && dh->usable != true))
469 goto fail;
471 if (here->key < here->packet + sizeof (struct datahead)
472 || here->key > here->packet + dh->allocsize
473 || here->key + here->len > here->packet + dh->allocsize)
475 #if SEPARATE_KEY
476 /* If keys can appear outside of data, this should be done
477 instead. But gc doesn't mark the data in that case. */
478 if (! check_use (data, head->first_free, usemap,
479 use_key | (here->first ? use_first : 0),
480 here->key, here->len))
481 #endif
482 goto fail;
485 work = here->next;
487 if (work == trail)
488 /* A circular list, this must not happen. */
489 goto fail;
490 if (tick)
491 trail = ((struct hashentry *) (data + trail))->next;
492 tick = 1 - tick;
496 if (he_cnt != head->nentries)
497 goto fail;
499 /* See if all data and keys had at least one reference from
500 he->first == true hashentry. */
501 for (ref_t idx = 0; idx < head->first_free; ++idx)
503 #if SEPARATE_KEY
504 if (usemap[idx] == use_key_begin)
505 goto fail;
506 #endif
507 if (usemap[idx] == use_data_begin)
508 goto fail;
511 /* Finally, make sure the database hasn't changed since the first test. */
512 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
513 goto fail;
515 free (usemap);
516 return 1;
518 fail:
519 free (usemap);
520 return 0;
524 #ifdef O_CLOEXEC
525 # define EXTRA_O_FLAGS O_CLOEXEC
526 #else
527 # define EXTRA_O_FLAGS 0
528 #endif
531 /* Initialize database information structures. */
532 void
533 nscd_init (void)
535 /* Look up unprivileged uid/gid/groups before we start listening on the
536 socket */
537 if (server_user != NULL)
538 begin_drop_privileges ();
540 if (nthreads == -1)
541 /* No configuration for this value, assume a default. */
542 nthreads = 4;
544 for (size_t cnt = 0; cnt < lastdb; ++cnt)
545 if (dbs[cnt].enabled)
547 pthread_rwlock_init (&dbs[cnt].lock, NULL);
548 pthread_mutex_init (&dbs[cnt].memlock, NULL);
550 if (dbs[cnt].persistent)
552 /* Try to open the appropriate file on disk. */
553 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
554 if (fd != -1)
556 char *msg = NULL;
557 struct stat64 st;
558 void *mem;
559 size_t total;
560 struct database_pers_head head;
561 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
562 sizeof (head)));
563 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
565 fail_db_errno:
566 /* The code is single-threaded at this point so
567 using strerror is just fine. */
568 msg = strerror (errno);
569 fail_db:
570 dbg_log (_("invalid persistent database file \"%s\": %s"),
571 dbs[cnt].db_filename, msg);
572 unlink (dbs[cnt].db_filename);
574 else if (head.module == 0 && head.data_size == 0)
576 /* The file has been created, but the head has not
577 been initialized yet. */
578 msg = _("uninitialized header");
579 goto fail_db;
581 else if (head.header_size != (int) sizeof (head))
583 msg = _("header size does not match");
584 goto fail_db;
586 else if ((total = (sizeof (head)
587 + roundup (head.module * sizeof (ref_t),
588 ALIGN)
589 + head.data_size))
590 > st.st_size
591 || total < sizeof (head))
593 msg = _("file size does not match");
594 goto fail_db;
596 /* Note we map with the maximum size allowed for the
597 database. This is likely much larger than the
598 actual file size. This is OK on most OSes since
599 extensions of the underlying file will
600 automatically translate more pages available for
601 memory access. */
602 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
603 PROT_READ | PROT_WRITE,
604 MAP_SHARED, fd, 0))
605 == MAP_FAILED)
606 goto fail_db_errno;
607 else if (!verify_persistent_db (mem, &head, cnt))
609 munmap (mem, total);
610 msg = _("verification failed");
611 goto fail_db;
613 else
615 /* Success. We have the database. */
616 dbs[cnt].head = mem;
617 dbs[cnt].memsize = total;
618 dbs[cnt].data = (char *)
619 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
620 ALIGN / sizeof (ref_t))];
621 dbs[cnt].mmap_used = true;
623 if (dbs[cnt].suggested_module > head.module)
624 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
625 dbnames[cnt]);
627 dbs[cnt].wr_fd = fd;
628 fd = -1;
629 /* We also need a read-only descriptor. */
630 if (dbs[cnt].shared)
632 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
633 O_RDONLY | EXTRA_O_FLAGS);
634 if (dbs[cnt].ro_fd == -1)
635 dbg_log (_("\
636 cannot create read-only descriptor for \"%s\"; no mmap"),
637 dbs[cnt].db_filename);
640 // XXX Shall we test whether the descriptors actually
641 // XXX point to the same file?
644 /* Close the file descriptors in case something went
645 wrong in which case the variable have not been
646 assigned -1. */
647 if (fd != -1)
648 close (fd);
650 else if (errno == EACCES)
651 error (EXIT_FAILURE, 0, _("cannot access '%s'"),
652 dbs[cnt].db_filename);
655 if (dbs[cnt].head == NULL)
657 /* No database loaded. Allocate the data structure,
658 possibly on disk. */
659 struct database_pers_head head;
660 size_t total = (sizeof (head)
661 + roundup (dbs[cnt].suggested_module
662 * sizeof (ref_t), ALIGN)
663 + (dbs[cnt].suggested_module
664 * DEFAULT_DATASIZE_PER_BUCKET));
666 /* Try to create the database. If we do not need a
667 persistent database create a temporary file. */
668 int fd;
669 int ro_fd = -1;
670 if (dbs[cnt].persistent)
672 fd = open (dbs[cnt].db_filename,
673 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
674 S_IRUSR | S_IWUSR);
675 if (fd != -1 && dbs[cnt].shared)
676 ro_fd = open (dbs[cnt].db_filename,
677 O_RDONLY | EXTRA_O_FLAGS);
679 else
681 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
682 fd = mkostemp (fname, EXTRA_O_FLAGS);
684 /* We do not need the file name anymore after we
685 opened another file descriptor in read-only mode. */
686 if (fd != -1)
688 if (dbs[cnt].shared)
689 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
691 unlink (fname);
695 if (fd == -1)
697 if (errno == EEXIST)
699 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
700 dbnames[cnt], dbs[cnt].db_filename);
701 // XXX Correct way to terminate?
702 exit (1);
705 if (dbs[cnt].persistent)
706 dbg_log (_("cannot create %s; no persistent database used"),
707 dbs[cnt].db_filename);
708 else
709 dbg_log (_("cannot create %s; no sharing possible"),
710 dbs[cnt].db_filename);
712 dbs[cnt].persistent = 0;
713 // XXX remember: no mmap
715 else
717 /* Tell the user if we could not create the read-only
718 descriptor. */
719 if (ro_fd == -1 && dbs[cnt].shared)
720 dbg_log (_("\
721 cannot create read-only descriptor for \"%s\"; no mmap"),
722 dbs[cnt].db_filename);
724 /* Before we create the header, initialiye the hash
725 table. So that if we get interrupted if writing
726 the header we can recognize a partially initialized
727 database. */
728 size_t ps = sysconf (_SC_PAGESIZE);
729 char tmpbuf[ps];
730 assert (~ENDREF == 0);
731 memset (tmpbuf, '\xff', ps);
733 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
734 off_t offset = sizeof (head);
736 size_t towrite;
737 if (offset % ps != 0)
739 towrite = MIN (remaining, ps - (offset % ps));
740 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
741 goto write_fail;
742 offset += towrite;
743 remaining -= towrite;
746 while (remaining > ps)
748 if (pwrite (fd, tmpbuf, ps, offset) == -1)
749 goto write_fail;
750 offset += ps;
751 remaining -= ps;
754 if (remaining > 0
755 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
756 goto write_fail;
758 /* Create the header of the file. */
759 struct database_pers_head head =
761 .version = DB_VERSION,
762 .header_size = sizeof (head),
763 .module = dbs[cnt].suggested_module,
764 .data_size = (dbs[cnt].suggested_module
765 * DEFAULT_DATASIZE_PER_BUCKET),
766 .first_free = 0
768 void *mem;
770 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
771 != sizeof (head))
772 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
773 != 0)
774 || (mem = mmap (NULL, dbs[cnt].max_db_size,
775 PROT_READ | PROT_WRITE,
776 MAP_SHARED, fd, 0)) == MAP_FAILED)
778 write_fail:
779 unlink (dbs[cnt].db_filename);
780 dbg_log (_("cannot write to database file %s: %s"),
781 dbs[cnt].db_filename, strerror (errno));
782 dbs[cnt].persistent = 0;
784 else
786 /* Success. */
787 dbs[cnt].head = mem;
788 dbs[cnt].data = (char *)
789 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
790 ALIGN / sizeof (ref_t))];
791 dbs[cnt].memsize = total;
792 dbs[cnt].mmap_used = true;
794 /* Remember the descriptors. */
795 dbs[cnt].wr_fd = fd;
796 dbs[cnt].ro_fd = ro_fd;
797 fd = -1;
798 ro_fd = -1;
801 if (fd != -1)
802 close (fd);
803 if (ro_fd != -1)
804 close (ro_fd);
808 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
809 /* We do not check here whether the O_CLOEXEC provided to the
810 open call was successful or not. The two fcntl calls are
811 only performed once each per process start-up and therefore
812 is not noticeable at all. */
813 if (paranoia
814 && ((dbs[cnt].wr_fd != -1
815 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
816 || (dbs[cnt].ro_fd != -1
817 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
819 dbg_log (_("\
820 cannot set socket to close on exec: %s; disabling paranoia mode"),
821 strerror (errno));
822 paranoia = 0;
824 #endif
826 if (dbs[cnt].head == NULL)
828 /* We do not use the persistent database. Just
829 create an in-memory data structure. */
830 assert (! dbs[cnt].persistent);
832 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
833 + (dbs[cnt].suggested_module
834 * sizeof (ref_t)));
835 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
836 assert (~ENDREF == 0);
837 memset (dbs[cnt].head->array, '\xff',
838 dbs[cnt].suggested_module * sizeof (ref_t));
839 dbs[cnt].head->module = dbs[cnt].suggested_module;
840 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
841 * dbs[cnt].head->module);
842 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
843 dbs[cnt].head->first_free = 0;
845 dbs[cnt].shared = 0;
846 assert (dbs[cnt].ro_fd == -1);
850 /* Create the socket. */
851 #ifndef __ASSUME_SOCK_CLOEXEC
852 sock = -1;
853 if (have_sock_cloexec >= 0)
854 #endif
856 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
857 #ifndef __ASSUME_SOCK_CLOEXEC
858 if (have_sock_cloexec == 0)
859 have_sock_cloexec = sock != -1 || errno != EINVAL ? 1 : -1;
860 #endif
862 #ifndef __ASSUME_SOCK_CLOEXEC
863 if (have_sock_cloexec < 0)
864 sock = socket (AF_UNIX, SOCK_STREAM, 0);
865 #endif
866 if (sock < 0)
868 dbg_log (_("cannot open socket: %s"), strerror (errno));
869 exit (errno == EACCES ? 4 : 1);
871 /* Bind a name to the socket. */
872 struct sockaddr_un sock_addr;
873 sock_addr.sun_family = AF_UNIX;
874 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
875 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
877 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
878 exit (errno == EACCES ? 4 : 1);
881 #ifndef __ASSUME_SOCK_CLOEXEC
882 if (have_sock_cloexec < 0)
884 /* We don't want to get stuck on accept. */
885 int fl = fcntl (sock, F_GETFL);
886 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
888 dbg_log (_("cannot change socket to nonblocking mode: %s"),
889 strerror (errno));
890 exit (1);
893 /* The descriptor needs to be closed on exec. */
894 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
896 dbg_log (_("cannot set socket to close on exec: %s"),
897 strerror (errno));
898 exit (1);
901 #endif
903 /* Set permissions for the socket. */
904 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
906 /* Set the socket up to accept connections. */
907 if (listen (sock, SOMAXCONN) < 0)
909 dbg_log (_("cannot enable socket to accept connections: %s"),
910 strerror (errno));
911 exit (1);
914 #ifdef HAVE_NETLINK
915 if (dbs[hstdb].enabled)
917 /* Try to open netlink socket to monitor network setting changes. */
918 nl_status_fd = socket (AF_NETLINK,
919 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
920 NETLINK_ROUTE);
921 if (nl_status_fd != -1)
923 struct sockaddr_nl snl;
924 memset (&snl, '\0', sizeof (snl));
925 snl.nl_family = AF_NETLINK;
926 /* XXX Is this the best set to use? */
927 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
928 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
929 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
930 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
931 | RTMGRP_IPV6_PREFIX);
933 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
935 close (nl_status_fd);
936 nl_status_fd = -1;
938 else
940 /* Start the timestamp process. */
941 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
942 = __bump_nl_timestamp ();
944 # ifndef __ASSUME_SOCK_CLOEXEC
945 if (have_sock_cloexec < 0)
947 /* We don't want to get stuck on accept. */
948 int fl = fcntl (nl_status_fd, F_GETFL);
949 if (fl == -1
950 || fcntl (nl_status_fd, F_SETFL, fl | O_NONBLOCK) == -1)
952 dbg_log (_("\
953 cannot change socket to nonblocking mode: %s"),
954 strerror (errno));
955 exit (1);
958 /* The descriptor needs to be closed on exec. */
959 if (paranoia
960 && fcntl (nl_status_fd, F_SETFD, FD_CLOEXEC) == -1)
962 dbg_log (_("cannot set socket to close on exec: %s"),
963 strerror (errno));
964 exit (1);
967 # endif
971 #endif
973 /* Change to unprivileged uid/gid/groups if specified in config file */
974 if (server_user != NULL)
975 finish_drop_privileges ();
979 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
981 We support registering multiple files per database. Each call to
982 register_traced_file adds to the list of registered files.
984 When we prune the database, either through timeout or a request to
985 invalidate, we will check to see if any of the registered files has changed.
986 When we accept new connections to handle a cache request we will also
987 check to see if any of the registered files has changed.
989 If we have inotify support then we install an inotify fd to notify us of
990 file deletion or modification, both of which will require we invalidate
991 the cache for the database. Without inotify support we stat the file and
992 store st_mtime to determine if the file has been modified. */
993 void
994 register_traced_file (size_t dbidx, struct traced_file *finfo)
996 /* If the database is disabled or file checking is disabled
997 then ignore the registration. */
998 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
999 return;
1001 if (__builtin_expect (debug_level > 0, 0))
1002 dbg_log (_("register trace file %s for database %s"),
1003 finfo->fname, dbnames[dbidx]);
1005 #ifdef HAVE_INOTIFY
1006 if (inotify_fd < 0
1007 || (finfo->inotify_descr = inotify_add_watch (inotify_fd, finfo->fname,
1008 IN_DELETE_SELF
1009 | IN_MODIFY)) < 0)
1010 #endif
1012 /* We need the modification date of the file. */
1013 struct stat64 st;
1015 if (stat64 (finfo->fname, &st) < 0)
1017 /* We cannot stat() the file, disable file checking. */
1018 dbg_log (_("cannot stat() file `%s': %s"),
1019 finfo->fname, strerror (errno));
1020 return;
1023 finfo->inotify_descr = -1;
1024 finfo->mtime = st.st_mtime;
1027 /* Queue up the file name. */
1028 finfo->next = dbs[dbidx].traced_files;
1029 dbs[dbidx].traced_files = finfo;
1033 /* Close the connections. */
1034 void
1035 close_sockets (void)
1037 close (sock);
1041 static void
1042 invalidate_cache (char *key, int fd)
1044 dbtype number;
1045 int32_t resp;
1047 for (number = pwddb; number < lastdb; ++number)
1048 if (strcmp (key, dbnames[number]) == 0)
1050 if (number == hstdb)
1052 struct traced_file *runp = dbs[hstdb].traced_files;
1053 while (runp != NULL)
1054 if (runp->call_res_init)
1056 res_init ();
1057 break;
1059 else
1060 runp = runp->next;
1062 break;
1065 if (number == lastdb)
1067 resp = EINVAL;
1068 writeall (fd, &resp, sizeof (resp));
1069 return;
1072 if (dbs[number].enabled)
1074 pthread_mutex_lock (&dbs[number].prune_run_lock);
1075 prune_cache (&dbs[number], LONG_MAX, fd);
1076 pthread_mutex_unlock (&dbs[number].prune_run_lock);
1078 else
1080 resp = 0;
1081 writeall (fd, &resp, sizeof (resp));
1086 #ifdef SCM_RIGHTS
1087 static void
1088 send_ro_fd (struct database_dyn *db, char *key, int fd)
1090 /* If we do not have an read-only file descriptor do nothing. */
1091 if (db->ro_fd == -1)
1092 return;
1094 /* We need to send some data along with the descriptor. */
1095 uint64_t mapsize = (db->head->data_size
1096 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1097 + sizeof (struct database_pers_head));
1098 struct iovec iov[2];
1099 iov[0].iov_base = key;
1100 iov[0].iov_len = strlen (key) + 1;
1101 iov[1].iov_base = &mapsize;
1102 iov[1].iov_len = sizeof (mapsize);
1104 /* Prepare the control message to transfer the descriptor. */
1105 union
1107 struct cmsghdr hdr;
1108 char bytes[CMSG_SPACE (sizeof (int))];
1109 } buf;
1110 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1111 .msg_control = buf.bytes,
1112 .msg_controllen = sizeof (buf) };
1113 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1115 cmsg->cmsg_level = SOL_SOCKET;
1116 cmsg->cmsg_type = SCM_RIGHTS;
1117 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1119 int *ip = (int *) CMSG_DATA (cmsg);
1120 *ip = db->ro_fd;
1122 msg.msg_controllen = cmsg->cmsg_len;
1124 /* Send the control message. We repeat when we are interrupted but
1125 everything else is ignored. */
1126 #ifndef MSG_NOSIGNAL
1127 # define MSG_NOSIGNAL 0
1128 #endif
1129 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1131 if (__builtin_expect (debug_level > 0, 0))
1132 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1134 #endif /* SCM_RIGHTS */
1137 /* Handle new request. */
1138 static void
1139 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1141 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1143 if (debug_level > 0)
1144 dbg_log (_("\
1145 cannot handle old request version %d; current version is %d"),
1146 req->version, NSCD_VERSION);
1147 return;
1150 /* Perform the SELinux check before we go on to the standard checks. */
1151 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1153 if (debug_level > 0)
1155 #ifdef SO_PEERCRED
1156 # ifdef PATH_MAX
1157 char buf[PATH_MAX];
1158 # else
1159 char buf[4096];
1160 # endif
1162 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
1163 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
1165 if (n <= 0)
1166 dbg_log (_("\
1167 request from %ld not handled due to missing permission"), (long int) pid);
1168 else
1170 buf[n] = '\0';
1171 dbg_log (_("\
1172 request from '%s' [%ld] not handled due to missing permission"),
1173 buf, (long int) pid);
1175 #else
1176 dbg_log (_("request not handled due to missing permission"));
1177 #endif
1179 return;
1182 struct database_dyn *db = reqinfo[req->type].db;
1184 /* See whether we can service the request from the cache. */
1185 if (__builtin_expect (reqinfo[req->type].data_request, true))
1187 if (__builtin_expect (debug_level, 0) > 0)
1189 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1191 char buf[INET6_ADDRSTRLEN];
1193 dbg_log ("\t%s (%s)", serv2str[req->type],
1194 inet_ntop (req->type == GETHOSTBYADDR
1195 ? AF_INET : AF_INET6,
1196 key, buf, sizeof (buf)));
1198 else
1199 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1202 /* Is this service enabled? */
1203 if (__builtin_expect (!db->enabled, 0))
1205 /* No, sent the prepared record. */
1206 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1207 db->disabled_iov->iov_len,
1208 MSG_NOSIGNAL))
1209 != (ssize_t) db->disabled_iov->iov_len
1210 && __builtin_expect (debug_level, 0) > 0)
1212 /* We have problems sending the result. */
1213 char buf[256];
1214 dbg_log (_("cannot write result: %s"),
1215 strerror_r (errno, buf, sizeof (buf)));
1218 return;
1221 /* Be sure we can read the data. */
1222 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
1224 ++db->head->rdlockdelayed;
1225 pthread_rwlock_rdlock (&db->lock);
1228 /* See whether we can handle it from the cache. */
1229 struct datahead *cached;
1230 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1231 db, uid);
1232 if (cached != NULL)
1234 /* Hurray it's in the cache. */
1235 ssize_t nwritten;
1237 #ifdef HAVE_SENDFILE
1238 if (__builtin_expect (db->mmap_used, 1))
1240 assert (db->wr_fd != -1);
1241 assert ((char *) cached->data > (char *) db->data);
1242 assert ((char *) cached->data - (char *) db->head
1243 + cached->recsize
1244 <= (sizeof (struct database_pers_head)
1245 + db->head->module * sizeof (ref_t)
1246 + db->head->data_size));
1247 nwritten = sendfileall (fd, db->wr_fd,
1248 (char *) cached->data
1249 - (char *) db->head, cached->recsize);
1250 # ifndef __ASSUME_SENDFILE
1251 if (nwritten == -1 && errno == ENOSYS)
1252 goto use_write;
1253 # endif
1255 else
1256 # ifndef __ASSUME_SENDFILE
1257 use_write:
1258 # endif
1259 #endif
1260 nwritten = writeall (fd, cached->data, cached->recsize);
1262 if (nwritten != cached->recsize
1263 && __builtin_expect (debug_level, 0) > 0)
1265 /* We have problems sending the result. */
1266 char buf[256];
1267 dbg_log (_("cannot write result: %s"),
1268 strerror_r (errno, buf, sizeof (buf)));
1271 pthread_rwlock_unlock (&db->lock);
1273 return;
1276 pthread_rwlock_unlock (&db->lock);
1278 else if (__builtin_expect (debug_level, 0) > 0)
1280 if (req->type == INVALIDATE)
1281 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1282 else
1283 dbg_log ("\t%s", serv2str[req->type]);
1286 /* Handle the request. */
1287 switch (req->type)
1289 case GETPWBYNAME:
1290 addpwbyname (db, fd, req, key, uid);
1291 break;
1293 case GETPWBYUID:
1294 addpwbyuid (db, fd, req, key, uid);
1295 break;
1297 case GETGRBYNAME:
1298 addgrbyname (db, fd, req, key, uid);
1299 break;
1301 case GETGRBYGID:
1302 addgrbygid (db, fd, req, key, uid);
1303 break;
1305 case GETHOSTBYNAME:
1306 addhstbyname (db, fd, req, key, uid);
1307 break;
1309 case GETHOSTBYNAMEv6:
1310 addhstbynamev6 (db, fd, req, key, uid);
1311 break;
1313 case GETHOSTBYADDR:
1314 addhstbyaddr (db, fd, req, key, uid);
1315 break;
1317 case GETHOSTBYADDRv6:
1318 addhstbyaddrv6 (db, fd, req, key, uid);
1319 break;
1321 case GETAI:
1322 addhstai (db, fd, req, key, uid);
1323 break;
1325 case INITGROUPS:
1326 addinitgroups (db, fd, req, key, uid);
1327 break;
1329 case GETSERVBYNAME:
1330 addservbyname (db, fd, req, key, uid);
1331 break;
1333 case GETSERVBYPORT:
1334 addservbyport (db, fd, req, key, uid);
1335 break;
1337 case GETNETGRENT:
1338 addgetnetgrent (db, fd, req, key, uid);
1339 break;
1341 case INNETGR:
1342 addinnetgr (db, fd, req, key, uid);
1343 break;
1345 case GETSTAT:
1346 case SHUTDOWN:
1347 case INVALIDATE:
1349 /* Get the callers credentials. */
1350 #ifdef SO_PEERCRED
1351 struct ucred caller;
1352 socklen_t optlen = sizeof (caller);
1354 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1356 char buf[256];
1358 dbg_log (_("error getting caller's id: %s"),
1359 strerror_r (errno, buf, sizeof (buf)));
1360 break;
1363 uid = caller.uid;
1364 #else
1365 /* Some systems have no SO_PEERCRED implementation. They don't
1366 care about security so we don't as well. */
1367 uid = 0;
1368 #endif
1371 /* Accept shutdown, getstat and invalidate only from root. For
1372 the stat call also allow the user specified in the config file. */
1373 if (req->type == GETSTAT)
1375 if (uid == 0 || uid == stat_uid)
1376 send_stats (fd, dbs);
1378 else if (uid == 0)
1380 if (req->type == INVALIDATE)
1381 invalidate_cache (key, fd);
1382 else
1383 termination_handler (0);
1385 break;
1387 case GETFDPW:
1388 case GETFDGR:
1389 case GETFDHST:
1390 case GETFDSERV:
1391 case GETFDNETGR:
1392 #ifdef SCM_RIGHTS
1393 send_ro_fd (reqinfo[req->type].db, key, fd);
1394 #endif
1395 break;
1397 default:
1398 /* Ignore the command, it's nothing we know. */
1399 break;
1404 /* Restart the process. */
1405 static void
1406 restart (void)
1408 /* First determine the parameters. We do not use the parameters
1409 passed to main() since in case nscd is started by running the
1410 dynamic linker this will not work. Yes, this is not the usual
1411 case but nscd is part of glibc and we occasionally do this. */
1412 size_t buflen = 1024;
1413 char *buf = alloca (buflen);
1414 size_t readlen = 0;
1415 int fd = open ("/proc/self/cmdline", O_RDONLY);
1416 if (fd == -1)
1418 dbg_log (_("\
1419 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1420 strerror (errno));
1422 paranoia = 0;
1423 return;
1426 while (1)
1428 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1429 buflen - readlen));
1430 if (n == -1)
1432 dbg_log (_("\
1433 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1434 strerror (errno));
1436 close (fd);
1437 paranoia = 0;
1438 return;
1441 readlen += n;
1443 if (readlen < buflen)
1444 break;
1446 /* We might have to extend the buffer. */
1447 size_t old_buflen = buflen;
1448 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1449 buf = memmove (newp, buf, old_buflen);
1452 close (fd);
1454 /* Parse the command line. Worst case scenario: every two
1455 characters form one parameter (one character plus NUL). */
1456 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1457 int argc = 0;
1459 char *cp = buf;
1460 while (cp < buf + readlen)
1462 argv[argc++] = cp;
1463 cp = (char *) rawmemchr (cp, '\0') + 1;
1465 argv[argc] = NULL;
1467 /* Second, change back to the old user if we changed it. */
1468 if (server_user != NULL)
1470 if (setresuid (old_uid, old_uid, old_uid) != 0)
1472 dbg_log (_("\
1473 cannot change to old UID: %s; disabling paranoia mode"),
1474 strerror (errno));
1476 paranoia = 0;
1477 return;
1480 if (setresgid (old_gid, old_gid, old_gid) != 0)
1482 dbg_log (_("\
1483 cannot change to old GID: %s; disabling paranoia mode"),
1484 strerror (errno));
1486 setuid (server_uid);
1487 paranoia = 0;
1488 return;
1492 /* Next change back to the old working directory. */
1493 if (chdir (oldcwd) == -1)
1495 dbg_log (_("\
1496 cannot change to old working directory: %s; disabling paranoia mode"),
1497 strerror (errno));
1499 if (server_user != NULL)
1501 setuid (server_uid);
1502 setgid (server_gid);
1504 paranoia = 0;
1505 return;
1508 /* Synchronize memory. */
1509 int32_t certainly[lastdb];
1510 for (int cnt = 0; cnt < lastdb; ++cnt)
1511 if (dbs[cnt].enabled)
1513 /* Make sure nobody keeps using the database. */
1514 dbs[cnt].head->timestamp = 0;
1515 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1516 dbs[cnt].head->nscd_certainly_running = 0;
1518 if (dbs[cnt].persistent)
1519 // XXX async OK?
1520 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1523 /* The preparations are done. */
1524 #ifdef PATH_MAX
1525 char pathbuf[PATH_MAX];
1526 #else
1527 char pathbuf[256];
1528 #endif
1529 /* Try to exec the real nscd program so the process name (as reported
1530 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1531 if readlink or the exec with the result of the readlink call fails. */
1532 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1533 if (n != -1)
1535 pathbuf[n] = '\0';
1536 execv (pathbuf, argv);
1538 execv ("/proc/self/exe", argv);
1540 /* If we come here, we will never be able to re-exec. */
1541 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1542 strerror (errno));
1544 if (server_user != NULL)
1546 setuid (server_uid);
1547 setgid (server_gid);
1549 if (chdir ("/") != 0)
1550 dbg_log (_("cannot change current working directory to \"/\": %s"),
1551 strerror (errno));
1552 paranoia = 0;
1554 /* Reenable the databases. */
1555 time_t now = time (NULL);
1556 for (int cnt = 0; cnt < lastdb; ++cnt)
1557 if (dbs[cnt].enabled)
1559 dbs[cnt].head->timestamp = now;
1560 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1565 /* List of file descriptors. */
1566 struct fdlist
1568 int fd;
1569 struct fdlist *next;
1571 /* Memory allocated for the list. */
1572 static struct fdlist *fdlist;
1573 /* List of currently ready-to-read file descriptors. */
1574 static struct fdlist *readylist;
1576 /* Conditional variable and mutex to signal availability of entries in
1577 READYLIST. The condvar is initialized dynamically since we might
1578 use a different clock depending on availability. */
1579 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1580 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1582 /* The clock to use with the condvar. */
1583 static clockid_t timeout_clock = CLOCK_REALTIME;
1585 /* Number of threads ready to handle the READYLIST. */
1586 static unsigned long int nready;
1589 /* Function for the clean-up threads. */
1590 static void *
1591 __attribute__ ((__noreturn__))
1592 nscd_run_prune (void *p)
1594 const long int my_number = (long int) p;
1595 assert (dbs[my_number].enabled);
1597 int dont_need_update = setup_thread (&dbs[my_number]);
1599 time_t now = time (NULL);
1601 /* We are running. */
1602 dbs[my_number].head->timestamp = now;
1604 struct timespec prune_ts;
1605 if (__builtin_expect (clock_gettime (timeout_clock, &prune_ts) == -1, 0))
1606 /* Should never happen. */
1607 abort ();
1609 /* Compute the initial timeout time. Prevent all the timers to go
1610 off at the same time by adding a db-based value. */
1611 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1612 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1614 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1615 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1616 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1618 pthread_mutex_lock (prune_lock);
1619 while (1)
1621 /* Wait, but not forever. */
1622 int e = 0;
1623 if (! dbs[my_number].clear_cache)
1624 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1625 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1627 time_t next_wait;
1628 now = time (NULL);
1629 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1630 || dbs[my_number].clear_cache)
1632 /* We will determine the new timout values based on the
1633 cache content. Should there be concurrent additions to
1634 the cache which are not accounted for in the cache
1635 pruning we want to know about it. Therefore set the
1636 timeout to the maximum. It will be descreased when adding
1637 new entries to the cache, if necessary. */
1638 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1640 /* Unconditionally reset the flag. */
1641 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1642 dbs[my_number].clear_cache = 0;
1644 pthread_mutex_unlock (prune_lock);
1646 /* We use a separate lock for running the prune function (instead
1647 of keeping prune_lock locked) because this enables concurrent
1648 invocations of cache_add which might modify the timeout value. */
1649 pthread_mutex_lock (prune_run_lock);
1650 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1651 pthread_mutex_unlock (prune_run_lock);
1653 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1654 /* If clients cannot determine for sure whether nscd is running
1655 we need to wake up occasionally to update the timestamp.
1656 Wait 90% of the update period. */
1657 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1658 if (__builtin_expect (! dont_need_update, 0))
1660 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1661 dbs[my_number].head->timestamp = now;
1664 pthread_mutex_lock (prune_lock);
1666 /* Make it known when we will wake up again. */
1667 if (now + next_wait < dbs[my_number].wakeup_time)
1668 dbs[my_number].wakeup_time = now + next_wait;
1669 else
1670 next_wait = dbs[my_number].wakeup_time - now;
1672 else
1673 /* The cache was just pruned. Do not do it again now. Just
1674 use the new timeout value. */
1675 next_wait = dbs[my_number].wakeup_time - now;
1677 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1678 /* Should never happen. */
1679 abort ();
1681 /* Compute next timeout time. */
1682 prune_ts.tv_sec += next_wait;
1687 /* This is the main loop. It is replicated in different threads but
1688 the use of the ready list makes sure only one thread handles an
1689 incoming connection. */
1690 static void *
1691 __attribute__ ((__noreturn__))
1692 nscd_run_worker (void *p)
1694 char buf[256];
1696 /* Initial locking. */
1697 pthread_mutex_lock (&readylist_lock);
1699 /* One more thread available. */
1700 ++nready;
1702 while (1)
1704 while (readylist == NULL)
1705 pthread_cond_wait (&readylist_cond, &readylist_lock);
1707 struct fdlist *it = readylist->next;
1708 if (readylist->next == readylist)
1709 /* Just one entry on the list. */
1710 readylist = NULL;
1711 else
1712 readylist->next = it->next;
1714 /* Extract the information and mark the record ready to be used
1715 again. */
1716 int fd = it->fd;
1717 it->next = NULL;
1719 /* One more thread available. */
1720 --nready;
1722 /* We are done with the list. */
1723 pthread_mutex_unlock (&readylist_lock);
1725 #ifndef __ASSUME_ACCEPT4
1726 if (have_accept4 < 0)
1728 /* We do not want to block on a short read or so. */
1729 int fl = fcntl (fd, F_GETFL);
1730 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1731 goto close_and_out;
1733 #endif
1735 /* Now read the request. */
1736 request_header req;
1737 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1738 != sizeof (req), 0))
1740 /* We failed to read data. Note that this also might mean we
1741 failed because we would have blocked. */
1742 if (debug_level > 0)
1743 dbg_log (_("short read while reading request: %s"),
1744 strerror_r (errno, buf, sizeof (buf)));
1745 goto close_and_out;
1748 /* Check whether this is a valid request type. */
1749 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1750 goto close_and_out;
1752 /* Some systems have no SO_PEERCRED implementation. They don't
1753 care about security so we don't as well. */
1754 uid_t uid = -1;
1755 #ifdef SO_PEERCRED
1756 pid_t pid = 0;
1758 if (__builtin_expect (debug_level > 0, 0))
1760 struct ucred caller;
1761 socklen_t optlen = sizeof (caller);
1763 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1764 pid = caller.pid;
1766 #else
1767 const pid_t pid = 0;
1768 #endif
1770 /* It should not be possible to crash the nscd with a silly
1771 request (i.e., a terribly large key). We limit the size to 1kb. */
1772 if (__builtin_expect (req.key_len, 1) < 0
1773 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1775 if (debug_level > 0)
1776 dbg_log (_("key length in request too long: %d"), req.key_len);
1778 else
1780 /* Get the key. */
1781 char keybuf[MAXKEYLEN];
1783 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1784 req.key_len))
1785 != req.key_len, 0))
1787 /* Again, this can also mean we would have blocked. */
1788 if (debug_level > 0)
1789 dbg_log (_("short read while reading request key: %s"),
1790 strerror_r (errno, buf, sizeof (buf)));
1791 goto close_and_out;
1794 if (__builtin_expect (debug_level, 0) > 0)
1796 #ifdef SO_PEERCRED
1797 if (pid != 0)
1798 dbg_log (_("\
1799 handle_request: request received (Version = %d) from PID %ld"),
1800 req.version, (long int) pid);
1801 else
1802 #endif
1803 dbg_log (_("\
1804 handle_request: request received (Version = %d)"), req.version);
1807 /* Phew, we got all the data, now process it. */
1808 handle_request (fd, &req, keybuf, uid, pid);
1811 close_and_out:
1812 /* We are done. */
1813 close (fd);
1815 /* Re-locking. */
1816 pthread_mutex_lock (&readylist_lock);
1818 /* One more thread available. */
1819 ++nready;
1821 /* NOTREACHED */
1825 static unsigned int nconns;
1827 static void
1828 fd_ready (int fd)
1830 pthread_mutex_lock (&readylist_lock);
1832 /* Find an empty entry in FDLIST. */
1833 size_t inner;
1834 for (inner = 0; inner < nconns; ++inner)
1835 if (fdlist[inner].next == NULL)
1836 break;
1837 assert (inner < nconns);
1839 fdlist[inner].fd = fd;
1841 if (readylist == NULL)
1842 readylist = fdlist[inner].next = &fdlist[inner];
1843 else
1845 fdlist[inner].next = readylist->next;
1846 readylist = readylist->next = &fdlist[inner];
1849 bool do_signal = true;
1850 if (__builtin_expect (nready == 0, 0))
1852 ++client_queued;
1853 do_signal = false;
1855 /* Try to start another thread to help out. */
1856 pthread_t th;
1857 if (nthreads < max_nthreads
1858 && pthread_create (&th, &attr, nscd_run_worker,
1859 (void *) (long int) nthreads) == 0)
1861 /* We got another thread. */
1862 ++nthreads;
1863 /* The new thread might need a kick. */
1864 do_signal = true;
1869 pthread_mutex_unlock (&readylist_lock);
1871 /* Tell one of the worker threads there is work to do. */
1872 if (do_signal)
1873 pthread_cond_signal (&readylist_cond);
1877 /* Check whether restarting should happen. */
1878 static bool
1879 restart_p (time_t now)
1881 return (paranoia && readylist == NULL && nready == nthreads
1882 && now >= restart_time);
1886 /* Array for times a connection was accepted. */
1887 static time_t *starttime;
1889 #ifdef HAVE_INOTIFY
1890 /* Inotify event for changed file. */
1891 union __inev
1893 struct inotify_event i;
1894 # ifndef PATH_MAX
1895 # define PATH_MAX 1024
1896 # endif
1897 char buf[sizeof (struct inotify_event) + PATH_MAX];
1900 /* Process the inotify event in INEV. If the event matches any of the files
1901 registered with a database then mark that database as requiring its cache
1902 to be cleared. We indicate the cache needs clearing by setting
1903 TO_CLEAR[DBCNT] to true for the matching database. */
1904 static inline void
1905 inotify_check_files (bool *to_clear, union __inev *inev)
1907 /* Check which of the files changed. */
1908 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1910 struct traced_file *finfo = dbs[dbcnt].traced_files;
1912 while (finfo != NULL)
1914 /* Inotify event watch descriptor matches. */
1915 if (finfo->inotify_descr == inev->i.wd)
1917 /* Mark cache as needing to be cleared and reinitialize. */
1918 to_clear[dbcnt] = true;
1919 if (finfo->call_res_init)
1920 res_init ();
1921 return;
1924 finfo = finfo->next;
1929 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1930 for the associated database, otherwise do nothing. The TO_CLEAR array must
1931 have LASTDB entries. */
1932 static inline void
1933 clear_db_cache (bool *to_clear)
1935 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1936 if (to_clear[dbcnt])
1938 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
1939 dbs[dbcnt].clear_cache = 1;
1940 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1941 pthread_cond_signal (&dbs[dbcnt].prune_cond);
1945 #endif
1947 static void
1948 __attribute__ ((__noreturn__))
1949 main_loop_poll (void)
1951 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1952 * sizeof (conns[0]));
1954 conns[0].fd = sock;
1955 conns[0].events = POLLRDNORM;
1956 size_t nused = 1;
1957 size_t firstfree = 1;
1959 #ifdef HAVE_INOTIFY
1960 if (inotify_fd != -1)
1962 conns[1].fd = inotify_fd;
1963 conns[1].events = POLLRDNORM;
1964 nused = 2;
1965 firstfree = 2;
1967 #endif
1969 #ifdef HAVE_NETLINK
1970 size_t idx_nl_status_fd = 0;
1971 if (nl_status_fd != -1)
1973 idx_nl_status_fd = nused;
1974 conns[nused].fd = nl_status_fd;
1975 conns[nused].events = POLLRDNORM;
1976 ++nused;
1977 firstfree = nused;
1979 #endif
1981 while (1)
1983 /* Wait for any event. We wait at most a couple of seconds so
1984 that we can check whether we should close any of the accepted
1985 connections since we have not received a request. */
1986 #define MAX_ACCEPT_TIMEOUT 30
1987 #define MIN_ACCEPT_TIMEOUT 5
1988 #define MAIN_THREAD_TIMEOUT \
1989 (MAX_ACCEPT_TIMEOUT * 1000 \
1990 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1992 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1994 time_t now = time (NULL);
1996 /* If there is a descriptor ready for reading or there is a new
1997 connection, process this now. */
1998 if (n > 0)
2000 if (conns[0].revents != 0)
2002 /* We have a new incoming connection. Accept the connection. */
2003 int fd;
2005 #ifndef __ASSUME_ACCEPT4
2006 fd = -1;
2007 if (have_accept4 >= 0)
2008 #endif
2010 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2011 SOCK_NONBLOCK));
2012 #ifndef __ASSUME_ACCEPT4
2013 if (have_accept4 == 0)
2014 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2015 #endif
2017 #ifndef __ASSUME_ACCEPT4
2018 if (have_accept4 < 0)
2019 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2020 #endif
2022 /* Use the descriptor if we have not reached the limit. */
2023 if (fd >= 0)
2025 if (firstfree < nconns)
2027 conns[firstfree].fd = fd;
2028 conns[firstfree].events = POLLRDNORM;
2029 starttime[firstfree] = now;
2030 if (firstfree >= nused)
2031 nused = firstfree + 1;
2034 ++firstfree;
2035 while (firstfree < nused && conns[firstfree].fd != -1);
2037 else
2038 /* We cannot use the connection so close it. */
2039 close (fd);
2042 --n;
2045 size_t first = 1;
2046 #ifdef HAVE_INOTIFY
2047 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
2049 if (conns[1].revents != 0)
2051 bool to_clear[lastdb] = { false, };
2052 union __inev inev;
2054 /* Read all inotify events for files registered via
2055 register_traced_file(). */
2056 while (1)
2058 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
2059 sizeof (inev)));
2060 if (nb < (ssize_t) sizeof (struct inotify_event))
2062 if (__builtin_expect (nb == -1 && errno != EAGAIN,
2065 /* Something went wrong when reading the inotify
2066 data. Better disable inotify. */
2067 dbg_log (_("\
2068 disabled inotify after read error %d"),
2069 errno);
2070 conns[1].fd = -1;
2071 firstfree = 1;
2072 if (nused == 2)
2073 nused = 1;
2074 close (inotify_fd);
2075 inotify_fd = -1;
2077 break;
2080 /* Check which of the files changed. */
2081 inotify_check_files (to_clear, &inev);
2084 /* Actually perform the cache clearing. */
2085 clear_db_cache (to_clear);
2087 --n;
2090 first = 2;
2092 #endif
2094 #ifdef HAVE_NETLINK
2095 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2097 char buf[4096];
2098 /* Read all the data. We do not interpret it here. */
2099 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2100 sizeof (buf))) != -1)
2103 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2104 = __bump_nl_timestamp ();
2106 #endif
2108 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2109 if (conns[cnt].revents != 0)
2111 fd_ready (conns[cnt].fd);
2113 /* Clean up the CONNS array. */
2114 conns[cnt].fd = -1;
2115 if (cnt < firstfree)
2116 firstfree = cnt;
2117 if (cnt == nused - 1)
2119 --nused;
2120 while (conns[nused - 1].fd == -1);
2122 --n;
2126 /* Now find entries which have timed out. */
2127 assert (nused > 0);
2129 /* We make the timeout length depend on the number of file
2130 descriptors currently used. */
2131 #define ACCEPT_TIMEOUT \
2132 (MAX_ACCEPT_TIMEOUT \
2133 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2134 time_t laststart = now - ACCEPT_TIMEOUT;
2136 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2138 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2140 /* Remove the entry, it timed out. */
2141 (void) close (conns[cnt].fd);
2142 conns[cnt].fd = -1;
2144 if (cnt < firstfree)
2145 firstfree = cnt;
2146 if (cnt == nused - 1)
2148 --nused;
2149 while (conns[nused - 1].fd == -1);
2153 if (restart_p (now))
2154 restart ();
2159 #ifdef HAVE_EPOLL
2160 static void
2161 main_loop_epoll (int efd)
2163 struct epoll_event ev = { 0, };
2164 int nused = 1;
2165 size_t highest = 0;
2167 /* Add the socket. */
2168 ev.events = EPOLLRDNORM;
2169 ev.data.fd = sock;
2170 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2171 /* We cannot use epoll. */
2172 return;
2174 # ifdef HAVE_INOTIFY
2175 if (inotify_fd != -1)
2177 ev.events = EPOLLRDNORM;
2178 ev.data.fd = inotify_fd;
2179 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2180 /* We cannot use epoll. */
2181 return;
2182 nused = 2;
2184 # endif
2186 # ifdef HAVE_NETLINK
2187 if (nl_status_fd != -1)
2189 ev.events = EPOLLRDNORM;
2190 ev.data.fd = nl_status_fd;
2191 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2192 /* We cannot use epoll. */
2193 return;
2195 # endif
2197 while (1)
2199 struct epoll_event revs[100];
2200 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2202 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2204 time_t now = time (NULL);
2206 for (int cnt = 0; cnt < n; ++cnt)
2207 if (revs[cnt].data.fd == sock)
2209 /* A new connection. */
2210 int fd;
2212 # ifndef __ASSUME_ACCEPT4
2213 fd = -1;
2214 if (have_accept4 >= 0)
2215 # endif
2217 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2218 SOCK_NONBLOCK));
2219 # ifndef __ASSUME_ACCEPT4
2220 if (have_accept4 == 0)
2221 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2222 # endif
2224 # ifndef __ASSUME_ACCEPT4
2225 if (have_accept4 < 0)
2226 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2227 # endif
2229 /* Use the descriptor if we have not reached the limit. */
2230 if (fd >= 0)
2232 /* Try to add the new descriptor. */
2233 ev.data.fd = fd;
2234 if (fd >= nconns
2235 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2236 /* The descriptor is too large or something went
2237 wrong. Close the descriptor. */
2238 close (fd);
2239 else
2241 /* Remember when we accepted the connection. */
2242 starttime[fd] = now;
2244 if (fd > highest)
2245 highest = fd;
2247 ++nused;
2251 # ifdef HAVE_INOTIFY
2252 else if (revs[cnt].data.fd == inotify_fd)
2254 bool to_clear[lastdb] = { false, };
2255 union __inev inev;
2257 /* Read all inotify events for files registered via
2258 register_traced_file(). */
2259 while (1)
2261 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
2262 sizeof (inev)));
2263 if (nb < (ssize_t) sizeof (struct inotify_event))
2265 if (__builtin_expect (nb == -1 && errno != EAGAIN, 0))
2267 /* Something went wrong when reading the inotify
2268 data. Better disable inotify. */
2269 dbg_log (_("disabled inotify after read error %d"),
2270 errno);
2271 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd,
2272 NULL);
2273 close (inotify_fd);
2274 inotify_fd = -1;
2276 break;
2279 /* Check which of the files changed. */
2280 inotify_check_files(to_clear, &inev);
2283 /* Actually perform the cache clearing. */
2284 clear_db_cache (to_clear);
2286 # endif
2287 # ifdef HAVE_NETLINK
2288 else if (revs[cnt].data.fd == nl_status_fd)
2290 char buf[4096];
2291 /* Read all the data. We do not interpret it here. */
2292 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2293 sizeof (buf))) != -1)
2296 __bump_nl_timestamp ();
2298 # endif
2299 else
2301 /* Remove the descriptor from the epoll descriptor. */
2302 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2304 /* Get a worker to handle the request. */
2305 fd_ready (revs[cnt].data.fd);
2307 /* Reset the time. */
2308 starttime[revs[cnt].data.fd] = 0;
2309 if (revs[cnt].data.fd == highest)
2311 --highest;
2312 while (highest > 0 && starttime[highest] == 0);
2314 --nused;
2317 /* Now look for descriptors for accepted connections which have
2318 no reply in too long of a time. */
2319 time_t laststart = now - ACCEPT_TIMEOUT;
2320 assert (starttime[sock] == 0);
2321 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2322 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2323 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2324 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2326 /* We are waiting for this one for too long. Close it. */
2327 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2329 (void) close (cnt);
2331 starttime[cnt] = 0;
2332 if (cnt == highest)
2333 --highest;
2335 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2336 --highest;
2338 if (restart_p (now))
2339 restart ();
2342 #endif
2345 /* Start all the threads we want. The initial process is thread no. 1. */
2346 void
2347 start_threads (void)
2349 /* Initialize the conditional variable we will use. The only
2350 non-standard attribute we might use is the clock selection. */
2351 pthread_condattr_t condattr;
2352 pthread_condattr_init (&condattr);
2354 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2355 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2356 /* Determine whether the monotonous clock is available. */
2357 struct timespec dummy;
2358 # if _POSIX_MONOTONIC_CLOCK == 0
2359 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2360 # endif
2361 # if _POSIX_CLOCK_SELECTION == 0
2362 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2363 # endif
2364 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2365 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2366 timeout_clock = CLOCK_MONOTONIC;
2367 #endif
2369 /* Create the attribute for the threads. They are all created
2370 detached. */
2371 pthread_attr_init (&attr);
2372 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2373 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2374 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2376 /* We allow less than LASTDB threads only for debugging. */
2377 if (debug_level == 0)
2378 nthreads = MAX (nthreads, lastdb);
2380 /* Create the threads which prune the databases. */
2381 // XXX Ideally this work would be done by some of the worker threads.
2382 // XXX But this is problematic since we would need to be able to wake
2383 // XXX them up explicitly as well as part of the group handling the
2384 // XXX ready-list. This requires an operation where we can wait on
2385 // XXX two conditional variables at the same time. This operation
2386 // XXX does not exist (yet).
2387 for (long int i = 0; i < lastdb; ++i)
2389 /* Initialize the conditional variable. */
2390 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2392 dbg_log (_("could not initialize conditional variable"));
2393 exit (1);
2396 pthread_t th;
2397 if (dbs[i].enabled
2398 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2400 dbg_log (_("could not start clean-up thread; terminating"));
2401 exit (1);
2405 pthread_condattr_destroy (&condattr);
2407 for (long int i = 0; i < nthreads; ++i)
2409 pthread_t th;
2410 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2412 if (i == 0)
2414 dbg_log (_("could not start any worker thread; terminating"));
2415 exit (1);
2418 break;
2422 /* Determine how much room for descriptors we should initially
2423 allocate. This might need to change later if we cap the number
2424 with MAXCONN. */
2425 const long int nfds = sysconf (_SC_OPEN_MAX);
2426 #define MINCONN 32
2427 #define MAXCONN 16384
2428 if (nfds == -1 || nfds > MAXCONN)
2429 nconns = MAXCONN;
2430 else if (nfds < MINCONN)
2431 nconns = MINCONN;
2432 else
2433 nconns = nfds;
2435 /* We need memory to pass descriptors on to the worker threads. */
2436 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2437 /* Array to keep track when connection was accepted. */
2438 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2440 /* In the main thread we execute the loop which handles incoming
2441 connections. */
2442 #ifdef HAVE_EPOLL
2443 int efd = epoll_create (100);
2444 if (efd != -1)
2446 main_loop_epoll (efd);
2447 close (efd);
2449 #endif
2451 main_loop_poll ();
2455 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2456 this function is called, we are not listening on the nscd socket yet so
2457 we can just use the ordinary lookup functions without causing a lockup */
2458 static void
2459 begin_drop_privileges (void)
2461 struct passwd *pwd = getpwnam (server_user);
2463 if (pwd == NULL)
2465 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2466 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
2467 server_user);
2470 server_uid = pwd->pw_uid;
2471 server_gid = pwd->pw_gid;
2473 /* Save the old UID/GID if we have to change back. */
2474 if (paranoia)
2476 old_uid = getuid ();
2477 old_gid = getgid ();
2480 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2482 /* This really must never happen. */
2483 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2484 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
2487 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2489 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2490 == -1)
2492 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2493 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
2498 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2499 run nscd as the user specified in the configuration file. */
2500 static void
2501 finish_drop_privileges (void)
2503 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2504 /* We need to preserve the capabilities to connect to the audit daemon. */
2505 cap_t new_caps = preserve_capabilities ();
2506 #endif
2508 if (setgroups (server_ngroups, server_groups) == -1)
2510 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2511 error (EXIT_FAILURE, errno, _("setgroups failed"));
2514 int res;
2515 if (paranoia)
2516 res = setresgid (server_gid, server_gid, old_gid);
2517 else
2518 res = setgid (server_gid);
2519 if (res == -1)
2521 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2522 perror ("setgid");
2523 exit (4);
2526 if (paranoia)
2527 res = setresuid (server_uid, server_uid, old_uid);
2528 else
2529 res = setuid (server_uid);
2530 if (res == -1)
2532 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2533 perror ("setuid");
2534 exit (4);
2537 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2538 /* Remove the temporary capabilities. */
2539 install_real_capabilities (new_caps);
2540 #endif