Avoid array-bounds warning for strncat on i586 (bug 20260)
[glibc.git] / nscd / connections.c
blobf3b16f7246eb864a49a43c90ce646f7724d9255a
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2016 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <ifaddrs.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <stdint.h>
35 #include <arpa/inet.h>
36 #ifdef HAVE_NETLINK
37 # include <linux/netlink.h>
38 # include <linux/rtnetlink.h>
39 #endif
40 #ifdef HAVE_EPOLL
41 # include <sys/epoll.h>
42 #endif
43 #ifdef HAVE_INOTIFY
44 # include <sys/inotify.h>
45 #endif
46 #include <sys/mman.h>
47 #include <sys/param.h>
48 #include <sys/poll.h>
49 #ifdef HAVE_SENDFILE
50 # include <sys/sendfile.h>
51 #endif
52 #include <sys/socket.h>
53 #include <sys/stat.h>
54 #include <sys/un.h>
56 #include "nscd.h"
57 #include "dbg_log.h"
58 #include "selinux.h"
59 #include <resolv/resolv.h>
61 #include <kernel-features.h>
62 #include <libc-internal.h>
65 /* Support to run nscd as an unprivileged user */
66 const char *server_user;
67 static uid_t server_uid;
68 static gid_t server_gid;
69 const char *stat_user;
70 uid_t stat_uid;
71 static gid_t *server_groups;
72 #ifndef NGROUPS
73 # define NGROUPS 32
74 #endif
75 static int server_ngroups;
77 static pthread_attr_t attr;
79 static void begin_drop_privileges (void);
80 static void finish_drop_privileges (void);
82 /* Map request type to a string. */
83 const char *const serv2str[LASTREQ] =
85 [GETPWBYNAME] = "GETPWBYNAME",
86 [GETPWBYUID] = "GETPWBYUID",
87 [GETGRBYNAME] = "GETGRBYNAME",
88 [GETGRBYGID] = "GETGRBYGID",
89 [GETHOSTBYNAME] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
93 [SHUTDOWN] = "SHUTDOWN",
94 [GETSTAT] = "GETSTAT",
95 [INVALIDATE] = "INVALIDATE",
96 [GETFDPW] = "GETFDPW",
97 [GETFDGR] = "GETFDGR",
98 [GETFDHST] = "GETFDHST",
99 [GETAI] = "GETAI",
100 [INITGROUPS] = "INITGROUPS",
101 [GETSERVBYNAME] = "GETSERVBYNAME",
102 [GETSERVBYPORT] = "GETSERVBYPORT",
103 [GETFDSERV] = "GETFDSERV",
104 [GETNETGRENT] = "GETNETGRENT",
105 [INNETGR] = "INNETGR",
106 [GETFDNETGR] = "GETFDNETGR"
109 /* The control data structures for the services. */
110 struct database_dyn dbs[lastdb] =
112 [pwddb] = {
113 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
114 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
115 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
116 .enabled = 0,
117 .check_file = 1,
118 .persistent = 0,
119 .propagate = 1,
120 .shared = 0,
121 .max_db_size = DEFAULT_MAX_DB_SIZE,
122 .suggested_module = DEFAULT_SUGGESTED_MODULE,
123 .db_filename = _PATH_NSCD_PASSWD_DB,
124 .disabled_iov = &pwd_iov_disabled,
125 .postimeout = 3600,
126 .negtimeout = 20,
127 .wr_fd = -1,
128 .ro_fd = -1,
129 .mmap_used = false
131 [grpdb] = {
132 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
133 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
134 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
135 .enabled = 0,
136 .check_file = 1,
137 .persistent = 0,
138 .propagate = 1,
139 .shared = 0,
140 .max_db_size = DEFAULT_MAX_DB_SIZE,
141 .suggested_module = DEFAULT_SUGGESTED_MODULE,
142 .db_filename = _PATH_NSCD_GROUP_DB,
143 .disabled_iov = &grp_iov_disabled,
144 .postimeout = 3600,
145 .negtimeout = 60,
146 .wr_fd = -1,
147 .ro_fd = -1,
148 .mmap_used = false
150 [hstdb] = {
151 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
152 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
153 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
154 .enabled = 0,
155 .check_file = 1,
156 .persistent = 0,
157 .propagate = 0, /* Not used. */
158 .shared = 0,
159 .max_db_size = DEFAULT_MAX_DB_SIZE,
160 .suggested_module = DEFAULT_SUGGESTED_MODULE,
161 .db_filename = _PATH_NSCD_HOSTS_DB,
162 .disabled_iov = &hst_iov_disabled,
163 .postimeout = 3600,
164 .negtimeout = 20,
165 .wr_fd = -1,
166 .ro_fd = -1,
167 .mmap_used = false
169 [servdb] = {
170 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
171 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
172 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
173 .enabled = 0,
174 .check_file = 1,
175 .persistent = 0,
176 .propagate = 0, /* Not used. */
177 .shared = 0,
178 .max_db_size = DEFAULT_MAX_DB_SIZE,
179 .suggested_module = DEFAULT_SUGGESTED_MODULE,
180 .db_filename = _PATH_NSCD_SERVICES_DB,
181 .disabled_iov = &serv_iov_disabled,
182 .postimeout = 28800,
183 .negtimeout = 20,
184 .wr_fd = -1,
185 .ro_fd = -1,
186 .mmap_used = false
188 [netgrdb] = {
189 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
190 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
191 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
192 .enabled = 0,
193 .check_file = 1,
194 .persistent = 0,
195 .propagate = 0, /* Not used. */
196 .shared = 0,
197 .max_db_size = DEFAULT_MAX_DB_SIZE,
198 .suggested_module = DEFAULT_SUGGESTED_MODULE,
199 .db_filename = _PATH_NSCD_NETGROUP_DB,
200 .disabled_iov = &netgroup_iov_disabled,
201 .postimeout = 28800,
202 .negtimeout = 20,
203 .wr_fd = -1,
204 .ro_fd = -1,
205 .mmap_used = false
210 /* Mapping of request type to database. */
211 static struct
213 bool data_request;
214 struct database_dyn *db;
215 } const reqinfo[LASTREQ] =
217 [GETPWBYNAME] = { true, &dbs[pwddb] },
218 [GETPWBYUID] = { true, &dbs[pwddb] },
219 [GETGRBYNAME] = { true, &dbs[grpdb] },
220 [GETGRBYGID] = { true, &dbs[grpdb] },
221 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
222 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
223 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
224 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
225 [SHUTDOWN] = { false, NULL },
226 [GETSTAT] = { false, NULL },
227 [SHUTDOWN] = { false, NULL },
228 [GETFDPW] = { false, &dbs[pwddb] },
229 [GETFDGR] = { false, &dbs[grpdb] },
230 [GETFDHST] = { false, &dbs[hstdb] },
231 [GETAI] = { true, &dbs[hstdb] },
232 [INITGROUPS] = { true, &dbs[grpdb] },
233 [GETSERVBYNAME] = { true, &dbs[servdb] },
234 [GETSERVBYPORT] = { true, &dbs[servdb] },
235 [GETFDSERV] = { false, &dbs[servdb] },
236 [GETNETGRENT] = { true, &dbs[netgrdb] },
237 [INNETGR] = { true, &dbs[netgrdb] },
238 [GETFDNETGR] = { false, &dbs[netgrdb] }
242 /* Initial number of threads to use. */
243 int nthreads = -1;
244 /* Maximum number of threads to use. */
245 int max_nthreads = 32;
247 /* Socket for incoming connections. */
248 static int sock;
250 #ifdef HAVE_INOTIFY
251 /* Inotify descriptor. */
252 int inotify_fd = -1;
253 #endif
255 #ifdef HAVE_NETLINK
256 /* Descriptor for netlink status updates. */
257 static int nl_status_fd = -1;
258 #endif
260 #ifndef __ASSUME_ACCEPT4
261 static int have_accept4;
262 #endif
264 /* Number of times clients had to wait. */
265 unsigned long int client_queued;
268 ssize_t
269 writeall (int fd, const void *buf, size_t len)
271 size_t n = len;
272 ssize_t ret;
275 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
276 if (ret <= 0)
277 break;
278 buf = (const char *) buf + ret;
279 n -= ret;
281 while (n > 0);
282 return ret < 0 ? ret : len - n;
286 #ifdef HAVE_SENDFILE
287 ssize_t
288 sendfileall (int tofd, int fromfd, off_t off, size_t len)
290 ssize_t n = len;
291 ssize_t ret;
295 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
296 if (ret <= 0)
297 break;
298 n -= ret;
300 while (n > 0);
301 return ret < 0 ? ret : len - n;
303 #endif
306 enum usekey
308 use_not = 0,
309 /* The following three are not really used, they are symbolic constants. */
310 use_first = 16,
311 use_begin = 32,
312 use_end = 64,
314 use_he = 1,
315 use_he_begin = use_he | use_begin,
316 use_he_end = use_he | use_end,
317 use_data = 3,
318 use_data_begin = use_data | use_begin,
319 use_data_end = use_data | use_end,
320 use_data_first = use_data_begin | use_first
324 static int
325 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
326 enum usekey use, ref_t start, size_t len)
328 assert (len >= 2);
330 if (start > first_free || start + len > first_free
331 || (start & BLOCK_ALIGN_M1))
332 return 0;
334 if (usemap[start] == use_not)
336 /* Add the start marker. */
337 usemap[start] = use | use_begin;
338 use &= ~use_first;
340 while (--len > 0)
341 if (usemap[++start] != use_not)
342 return 0;
343 else
344 usemap[start] = use;
346 /* Add the end marker. */
347 usemap[start] = use | use_end;
349 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
351 /* Hash entries can't be shared. */
352 if (use == use_he)
353 return 0;
355 usemap[start] |= (use & use_first);
356 use &= ~use_first;
358 while (--len > 1)
359 if (usemap[++start] != use)
360 return 0;
362 if (usemap[++start] != (use | use_end))
363 return 0;
365 else
366 /* Points to a wrong object or somewhere in the middle. */
367 return 0;
369 return 1;
373 /* Verify data in persistent database. */
374 static int
375 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
377 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
378 || dbnr == netgrdb);
380 time_t now = time (NULL);
382 struct database_pers_head *head = mem;
383 struct database_pers_head head_copy = *head;
385 /* Check that the header that was read matches the head in the database. */
386 if (memcmp (head, readhead, sizeof (*head)) != 0)
387 return 0;
389 /* First some easy tests: make sure the database header is sane. */
390 if (head->version != DB_VERSION
391 || head->header_size != sizeof (*head)
392 /* We allow a timestamp to be one hour ahead of the current time.
393 This should cover daylight saving time changes. */
394 || head->timestamp > now + 60 * 60 + 60
395 || (head->gc_cycle & 1)
396 || head->module == 0
397 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
398 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
399 || head->first_free < 0
400 || head->first_free > head->data_size
401 || (head->first_free & BLOCK_ALIGN_M1) != 0
402 || head->maxnentries < 0
403 || head->maxnsearched < 0)
404 return 0;
406 uint8_t *usemap = calloc (head->first_free, 1);
407 if (usemap == NULL)
408 return 0;
410 const char *data = (char *) &head->array[roundup (head->module,
411 ALIGN / sizeof (ref_t))];
413 nscd_ssize_t he_cnt = 0;
414 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
416 ref_t trail = head->array[cnt];
417 ref_t work = trail;
418 int tick = 0;
420 while (work != ENDREF)
422 if (! check_use (data, head->first_free, usemap, use_he, work,
423 sizeof (struct hashentry)))
424 goto fail;
426 /* Now we know we can dereference the record. */
427 struct hashentry *here = (struct hashentry *) (data + work);
429 ++he_cnt;
431 /* Make sure the record is for this type of service. */
432 if (here->type >= LASTREQ
433 || reqinfo[here->type].db != &dbs[dbnr])
434 goto fail;
436 /* Validate boolean field value. */
437 if (here->first != false && here->first != true)
438 goto fail;
440 if (here->len < 0)
441 goto fail;
443 /* Now the data. */
444 if (here->packet < 0
445 || here->packet > head->first_free
446 || here->packet + sizeof (struct datahead) > head->first_free)
447 goto fail;
449 struct datahead *dh = (struct datahead *) (data + here->packet);
451 if (! check_use (data, head->first_free, usemap,
452 use_data | (here->first ? use_first : 0),
453 here->packet, dh->allocsize))
454 goto fail;
456 if (dh->allocsize < sizeof (struct datahead)
457 || dh->recsize > dh->allocsize
458 || (dh->notfound != false && dh->notfound != true)
459 || (dh->usable != false && dh->usable != true))
460 goto fail;
462 if (here->key < here->packet + sizeof (struct datahead)
463 || here->key > here->packet + dh->allocsize
464 || here->key + here->len > here->packet + dh->allocsize)
465 goto fail;
467 work = here->next;
469 if (work == trail)
470 /* A circular list, this must not happen. */
471 goto fail;
472 if (tick)
473 trail = ((struct hashentry *) (data + trail))->next;
474 tick = 1 - tick;
478 if (he_cnt != head->nentries)
479 goto fail;
481 /* See if all data and keys had at least one reference from
482 he->first == true hashentry. */
483 for (ref_t idx = 0; idx < head->first_free; ++idx)
485 if (usemap[idx] == use_data_begin)
486 goto fail;
489 /* Finally, make sure the database hasn't changed since the first test. */
490 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
491 goto fail;
493 free (usemap);
494 return 1;
496 fail:
497 free (usemap);
498 return 0;
502 #ifdef O_CLOEXEC
503 # define EXTRA_O_FLAGS O_CLOEXEC
504 #else
505 # define EXTRA_O_FLAGS 0
506 #endif
509 /* Initialize database information structures. */
510 void
511 nscd_init (void)
513 /* Look up unprivileged uid/gid/groups before we start listening on the
514 socket */
515 if (server_user != NULL)
516 begin_drop_privileges ();
518 if (nthreads == -1)
519 /* No configuration for this value, assume a default. */
520 nthreads = 4;
522 for (size_t cnt = 0; cnt < lastdb; ++cnt)
523 if (dbs[cnt].enabled)
525 pthread_rwlock_init (&dbs[cnt].lock, NULL);
526 pthread_mutex_init (&dbs[cnt].memlock, NULL);
528 if (dbs[cnt].persistent)
530 /* Try to open the appropriate file on disk. */
531 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
532 if (fd != -1)
534 char *msg = NULL;
535 struct stat64 st;
536 void *mem;
537 size_t total;
538 struct database_pers_head head;
539 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
540 sizeof (head)));
541 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
543 fail_db_errno:
544 /* The code is single-threaded at this point so
545 using strerror is just fine. */
546 msg = strerror (errno);
547 fail_db:
548 dbg_log (_("invalid persistent database file \"%s\": %s"),
549 dbs[cnt].db_filename, msg);
550 unlink (dbs[cnt].db_filename);
552 else if (head.module == 0 && head.data_size == 0)
554 /* The file has been created, but the head has not
555 been initialized yet. */
556 msg = _("uninitialized header");
557 goto fail_db;
559 else if (head.header_size != (int) sizeof (head))
561 msg = _("header size does not match");
562 goto fail_db;
564 else if ((total = (sizeof (head)
565 + roundup (head.module * sizeof (ref_t),
566 ALIGN)
567 + head.data_size))
568 > st.st_size
569 || total < sizeof (head))
571 msg = _("file size does not match");
572 goto fail_db;
574 /* Note we map with the maximum size allowed for the
575 database. This is likely much larger than the
576 actual file size. This is OK on most OSes since
577 extensions of the underlying file will
578 automatically translate more pages available for
579 memory access. */
580 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
581 PROT_READ | PROT_WRITE,
582 MAP_SHARED, fd, 0))
583 == MAP_FAILED)
584 goto fail_db_errno;
585 else if (!verify_persistent_db (mem, &head, cnt))
587 munmap (mem, total);
588 msg = _("verification failed");
589 goto fail_db;
591 else
593 /* Success. We have the database. */
594 dbs[cnt].head = mem;
595 dbs[cnt].memsize = total;
596 dbs[cnt].data = (char *)
597 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
598 ALIGN / sizeof (ref_t))];
599 dbs[cnt].mmap_used = true;
601 if (dbs[cnt].suggested_module > head.module)
602 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
603 dbnames[cnt]);
605 dbs[cnt].wr_fd = fd;
606 fd = -1;
607 /* We also need a read-only descriptor. */
608 if (dbs[cnt].shared)
610 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
611 O_RDONLY | EXTRA_O_FLAGS);
612 if (dbs[cnt].ro_fd == -1)
613 dbg_log (_("\
614 cannot create read-only descriptor for \"%s\"; no mmap"),
615 dbs[cnt].db_filename);
618 // XXX Shall we test whether the descriptors actually
619 // XXX point to the same file?
622 /* Close the file descriptors in case something went
623 wrong in which case the variable have not been
624 assigned -1. */
625 if (fd != -1)
626 close (fd);
628 else if (errno == EACCES)
629 do_exit (EXIT_FAILURE, 0, _("cannot access '%s'"),
630 dbs[cnt].db_filename);
633 if (dbs[cnt].head == NULL)
635 /* No database loaded. Allocate the data structure,
636 possibly on disk. */
637 struct database_pers_head head;
638 size_t total = (sizeof (head)
639 + roundup (dbs[cnt].suggested_module
640 * sizeof (ref_t), ALIGN)
641 + (dbs[cnt].suggested_module
642 * DEFAULT_DATASIZE_PER_BUCKET));
644 /* Try to create the database. If we do not need a
645 persistent database create a temporary file. */
646 int fd;
647 int ro_fd = -1;
648 if (dbs[cnt].persistent)
650 fd = open (dbs[cnt].db_filename,
651 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
652 S_IRUSR | S_IWUSR);
653 if (fd != -1 && dbs[cnt].shared)
654 ro_fd = open (dbs[cnt].db_filename,
655 O_RDONLY | EXTRA_O_FLAGS);
657 else
659 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
660 fd = mkostemp (fname, EXTRA_O_FLAGS);
662 /* We do not need the file name anymore after we
663 opened another file descriptor in read-only mode. */
664 if (fd != -1)
666 if (dbs[cnt].shared)
667 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
669 unlink (fname);
673 if (fd == -1)
675 if (errno == EEXIST)
677 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
678 dbnames[cnt], dbs[cnt].db_filename);
679 do_exit (1, 0, NULL);
682 if (dbs[cnt].persistent)
683 dbg_log (_("cannot create %s; no persistent database used"),
684 dbs[cnt].db_filename);
685 else
686 dbg_log (_("cannot create %s; no sharing possible"),
687 dbs[cnt].db_filename);
689 dbs[cnt].persistent = 0;
690 // XXX remember: no mmap
692 else
694 /* Tell the user if we could not create the read-only
695 descriptor. */
696 if (ro_fd == -1 && dbs[cnt].shared)
697 dbg_log (_("\
698 cannot create read-only descriptor for \"%s\"; no mmap"),
699 dbs[cnt].db_filename);
701 /* Before we create the header, initialize the hash
702 table. That way if we get interrupted while writing
703 the header we can recognize a partially initialized
704 database. */
705 size_t ps = sysconf (_SC_PAGESIZE);
706 char tmpbuf[ps];
707 assert (~ENDREF == 0);
708 memset (tmpbuf, '\xff', ps);
710 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
711 off_t offset = sizeof (head);
713 size_t towrite;
714 if (offset % ps != 0)
716 towrite = MIN (remaining, ps - (offset % ps));
717 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
718 goto write_fail;
719 offset += towrite;
720 remaining -= towrite;
723 while (remaining > ps)
725 if (pwrite (fd, tmpbuf, ps, offset) == -1)
726 goto write_fail;
727 offset += ps;
728 remaining -= ps;
731 if (remaining > 0
732 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
733 goto write_fail;
735 /* Create the header of the file. */
736 struct database_pers_head head =
738 .version = DB_VERSION,
739 .header_size = sizeof (head),
740 .module = dbs[cnt].suggested_module,
741 .data_size = (dbs[cnt].suggested_module
742 * DEFAULT_DATASIZE_PER_BUCKET),
743 .first_free = 0
745 void *mem;
747 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
748 != sizeof (head))
749 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
750 != 0)
751 || (mem = mmap (NULL, dbs[cnt].max_db_size,
752 PROT_READ | PROT_WRITE,
753 MAP_SHARED, fd, 0)) == MAP_FAILED)
755 write_fail:
756 unlink (dbs[cnt].db_filename);
757 dbg_log (_("cannot write to database file %s: %s"),
758 dbs[cnt].db_filename, strerror (errno));
759 dbs[cnt].persistent = 0;
761 else
763 /* Success. */
764 dbs[cnt].head = mem;
765 dbs[cnt].data = (char *)
766 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
767 ALIGN / sizeof (ref_t))];
768 dbs[cnt].memsize = total;
769 dbs[cnt].mmap_used = true;
771 /* Remember the descriptors. */
772 dbs[cnt].wr_fd = fd;
773 dbs[cnt].ro_fd = ro_fd;
774 fd = -1;
775 ro_fd = -1;
778 if (fd != -1)
779 close (fd);
780 if (ro_fd != -1)
781 close (ro_fd);
785 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
786 /* We do not check here whether the O_CLOEXEC provided to the
787 open call was successful or not. The two fcntl calls are
788 only performed once each per process start-up and therefore
789 is not noticeable at all. */
790 if (paranoia
791 && ((dbs[cnt].wr_fd != -1
792 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
793 || (dbs[cnt].ro_fd != -1
794 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
796 dbg_log (_("\
797 cannot set socket to close on exec: %s; disabling paranoia mode"),
798 strerror (errno));
799 paranoia = 0;
801 #endif
803 if (dbs[cnt].head == NULL)
805 /* We do not use the persistent database. Just
806 create an in-memory data structure. */
807 assert (! dbs[cnt].persistent);
809 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
810 + (dbs[cnt].suggested_module
811 * sizeof (ref_t)));
812 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
813 assert (~ENDREF == 0);
814 memset (dbs[cnt].head->array, '\xff',
815 dbs[cnt].suggested_module * sizeof (ref_t));
816 dbs[cnt].head->module = dbs[cnt].suggested_module;
817 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
818 * dbs[cnt].head->module);
819 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
820 dbs[cnt].head->first_free = 0;
822 dbs[cnt].shared = 0;
823 assert (dbs[cnt].ro_fd == -1);
827 /* Create the socket. */
828 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
829 if (sock < 0)
831 dbg_log (_("cannot open socket: %s"), strerror (errno));
832 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
834 /* Bind a name to the socket. */
835 struct sockaddr_un sock_addr;
836 sock_addr.sun_family = AF_UNIX;
837 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
838 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
840 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
841 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
844 /* Set permissions for the socket. */
845 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
847 /* Set the socket up to accept connections. */
848 if (listen (sock, SOMAXCONN) < 0)
850 dbg_log (_("cannot enable socket to accept connections: %s"),
851 strerror (errno));
852 do_exit (1, 0, NULL);
855 #ifdef HAVE_NETLINK
856 if (dbs[hstdb].enabled)
858 /* Try to open netlink socket to monitor network setting changes. */
859 nl_status_fd = socket (AF_NETLINK,
860 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
861 NETLINK_ROUTE);
862 if (nl_status_fd != -1)
864 struct sockaddr_nl snl;
865 memset (&snl, '\0', sizeof (snl));
866 snl.nl_family = AF_NETLINK;
867 /* XXX Is this the best set to use? */
868 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
869 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
870 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
871 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
872 | RTMGRP_IPV6_PREFIX);
874 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
876 close (nl_status_fd);
877 nl_status_fd = -1;
879 else
881 /* Start the timestamp process. */
882 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
883 = __bump_nl_timestamp ();
887 #endif
889 /* Change to unprivileged uid/gid/groups if specified in config file */
890 if (server_user != NULL)
891 finish_drop_privileges ();
894 #ifdef HAVE_INOTIFY
895 #define TRACED_FILE_MASK (IN_DELETE_SELF | IN_CLOSE_WRITE | IN_MOVE_SELF)
896 #define TRACED_DIR_MASK (IN_DELETE_SELF | IN_CREATE | IN_MOVED_TO | IN_MOVE_SELF)
897 void
898 install_watches (struct traced_file *finfo)
900 /* Use inotify support if we have it. */
901 if (finfo->inotify_descr[TRACED_FILE] < 0)
902 finfo->inotify_descr[TRACED_FILE] = inotify_add_watch (inotify_fd,
903 finfo->fname,
904 TRACED_FILE_MASK);
905 if (finfo->inotify_descr[TRACED_FILE] < 0)
907 dbg_log (_("disabled inotify-based monitoring for file `%s': %s"),
908 finfo->fname, strerror (errno));
909 return;
911 dbg_log (_("monitoring file `%s` (%d)"),
912 finfo->fname, finfo->inotify_descr[TRACED_FILE]);
913 /* Additionally listen for events in the file's parent directory.
914 We do this because the file to be watched might be
915 deleted and then added back again. When it is added back again
916 we must re-add the watch. We must also cover IN_MOVED_TO to
917 detect a file being moved into the directory. */
918 if (finfo->inotify_descr[TRACED_DIR] < 0)
919 finfo->inotify_descr[TRACED_DIR] = inotify_add_watch (inotify_fd,
920 finfo->dname,
921 TRACED_DIR_MASK);
922 if (finfo->inotify_descr[TRACED_DIR] < 0)
924 dbg_log (_("disabled inotify-based monitoring for directory `%s': %s"),
925 finfo->fname, strerror (errno));
926 return;
928 dbg_log (_("monitoring directory `%s` (%d)"),
929 finfo->dname, finfo->inotify_descr[TRACED_DIR]);
931 #endif
933 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
935 We support registering multiple files per database. Each call to
936 register_traced_file adds to the list of registered files.
938 When we prune the database, either through timeout or a request to
939 invalidate, we will check to see if any of the registered files has changed.
940 When we accept new connections to handle a cache request we will also
941 check to see if any of the registered files has changed.
943 If we have inotify support then we install an inotify fd to notify us of
944 file deletion or modification, both of which will require we invalidate
945 the cache for the database. Without inotify support we stat the file and
946 store st_mtime to determine if the file has been modified. */
947 void
948 register_traced_file (size_t dbidx, struct traced_file *finfo)
950 /* If the database is disabled or file checking is disabled
951 then ignore the registration. */
952 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
953 return;
955 if (__glibc_unlikely (debug_level > 0))
956 dbg_log (_("monitoring file %s for database %s"),
957 finfo->fname, dbnames[dbidx]);
959 #ifdef HAVE_INOTIFY
960 install_watches (finfo);
961 #endif
962 struct stat64 st;
963 if (stat64 (finfo->fname, &st) < 0)
965 /* We cannot stat() the file. Set mtime to zero and try again later. */
966 dbg_log (_("stat failed for file `%s'; will try again later: %s"),
967 finfo->fname, strerror (errno));
968 finfo->mtime = 0;
970 else
971 finfo->mtime = st.st_mtime;
973 /* Queue up the file name. */
974 finfo->next = dbs[dbidx].traced_files;
975 dbs[dbidx].traced_files = finfo;
979 /* Close the connections. */
980 void
981 close_sockets (void)
983 close (sock);
987 static void
988 invalidate_cache (char *key, int fd)
990 dbtype number;
991 int32_t resp;
993 for (number = pwddb; number < lastdb; ++number)
994 if (strcmp (key, dbnames[number]) == 0)
996 struct traced_file *runp = dbs[number].traced_files;
997 while (runp != NULL)
999 /* Make sure we reload from file when checking mtime. */
1000 runp->mtime = 0;
1001 #ifdef HAVE_INOTIFY
1002 /* During an invalidation we try to reload the traced
1003 file watches. This allows the user to re-sync if
1004 inotify events were lost. Similar to what we do during
1005 pruning. */
1006 install_watches (runp);
1007 #endif
1008 if (runp->call_res_init)
1010 res_init ();
1011 break;
1013 runp = runp->next;
1015 break;
1018 if (number == lastdb)
1020 resp = EINVAL;
1021 writeall (fd, &resp, sizeof (resp));
1022 return;
1025 if (dbs[number].enabled)
1027 pthread_mutex_lock (&dbs[number].prune_run_lock);
1028 prune_cache (&dbs[number], LONG_MAX, fd);
1029 pthread_mutex_unlock (&dbs[number].prune_run_lock);
1031 else
1033 resp = 0;
1034 writeall (fd, &resp, sizeof (resp));
1039 #ifdef SCM_RIGHTS
1040 static void
1041 send_ro_fd (struct database_dyn *db, char *key, int fd)
1043 /* If we do not have an read-only file descriptor do nothing. */
1044 if (db->ro_fd == -1)
1045 return;
1047 /* We need to send some data along with the descriptor. */
1048 uint64_t mapsize = (db->head->data_size
1049 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1050 + sizeof (struct database_pers_head));
1051 struct iovec iov[2];
1052 iov[0].iov_base = key;
1053 iov[0].iov_len = strlen (key) + 1;
1054 iov[1].iov_base = &mapsize;
1055 iov[1].iov_len = sizeof (mapsize);
1057 /* Prepare the control message to transfer the descriptor. */
1058 union
1060 struct cmsghdr hdr;
1061 char bytes[CMSG_SPACE (sizeof (int))];
1062 } buf;
1063 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1064 .msg_control = buf.bytes,
1065 .msg_controllen = sizeof (buf) };
1066 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1068 cmsg->cmsg_level = SOL_SOCKET;
1069 cmsg->cmsg_type = SCM_RIGHTS;
1070 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1072 int *ip = (int *) CMSG_DATA (cmsg);
1073 *ip = db->ro_fd;
1075 msg.msg_controllen = cmsg->cmsg_len;
1077 /* Send the control message. We repeat when we are interrupted but
1078 everything else is ignored. */
1079 #ifndef MSG_NOSIGNAL
1080 # define MSG_NOSIGNAL 0
1081 #endif
1082 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1084 if (__glibc_unlikely (debug_level > 0))
1085 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1087 #endif /* SCM_RIGHTS */
1090 /* Handle new request. */
1091 static void
1092 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1094 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1096 if (debug_level > 0)
1097 dbg_log (_("\
1098 cannot handle old request version %d; current version is %d"),
1099 req->version, NSCD_VERSION);
1100 return;
1103 /* Perform the SELinux check before we go on to the standard checks. */
1104 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1106 if (debug_level > 0)
1108 #ifdef SO_PEERCRED
1109 # ifdef PATH_MAX
1110 char buf[PATH_MAX];
1111 # else
1112 char buf[4096];
1113 # endif
1115 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
1116 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
1118 if (n <= 0)
1119 dbg_log (_("\
1120 request from %ld not handled due to missing permission"), (long int) pid);
1121 else
1123 buf[n] = '\0';
1124 dbg_log (_("\
1125 request from '%s' [%ld] not handled due to missing permission"),
1126 buf, (long int) pid);
1128 #else
1129 dbg_log (_("request not handled due to missing permission"));
1130 #endif
1132 return;
1135 struct database_dyn *db = reqinfo[req->type].db;
1137 /* See whether we can service the request from the cache. */
1138 if (__builtin_expect (reqinfo[req->type].data_request, true))
1140 if (__builtin_expect (debug_level, 0) > 0)
1142 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1144 char buf[INET6_ADDRSTRLEN];
1146 dbg_log ("\t%s (%s)", serv2str[req->type],
1147 inet_ntop (req->type == GETHOSTBYADDR
1148 ? AF_INET : AF_INET6,
1149 key, buf, sizeof (buf)));
1151 else
1152 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1155 /* Is this service enabled? */
1156 if (__glibc_unlikely (!db->enabled))
1158 /* No, sent the prepared record. */
1159 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1160 db->disabled_iov->iov_len,
1161 MSG_NOSIGNAL))
1162 != (ssize_t) db->disabled_iov->iov_len
1163 && __builtin_expect (debug_level, 0) > 0)
1165 /* We have problems sending the result. */
1166 char buf[256];
1167 dbg_log (_("cannot write result: %s"),
1168 strerror_r (errno, buf, sizeof (buf)));
1171 return;
1174 /* Be sure we can read the data. */
1175 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db->lock) != 0))
1177 ++db->head->rdlockdelayed;
1178 pthread_rwlock_rdlock (&db->lock);
1181 /* See whether we can handle it from the cache. */
1182 struct datahead *cached;
1183 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1184 db, uid);
1185 if (cached != NULL)
1187 /* Hurray it's in the cache. */
1188 ssize_t nwritten;
1190 #ifdef HAVE_SENDFILE
1191 if (__glibc_likely (db->mmap_used))
1193 assert (db->wr_fd != -1);
1194 assert ((char *) cached->data > (char *) db->data);
1195 assert ((char *) cached->data - (char *) db->head
1196 + cached->recsize
1197 <= (sizeof (struct database_pers_head)
1198 + db->head->module * sizeof (ref_t)
1199 + db->head->data_size));
1200 nwritten = sendfileall (fd, db->wr_fd,
1201 (char *) cached->data
1202 - (char *) db->head, cached->recsize);
1203 # ifndef __ASSUME_SENDFILE
1204 if (nwritten == -1 && errno == ENOSYS)
1205 goto use_write;
1206 # endif
1208 else
1209 # ifndef __ASSUME_SENDFILE
1210 use_write:
1211 # endif
1212 #endif
1213 nwritten = writeall (fd, cached->data, cached->recsize);
1215 if (nwritten != cached->recsize
1216 && __builtin_expect (debug_level, 0) > 0)
1218 /* We have problems sending the result. */
1219 char buf[256];
1220 dbg_log (_("cannot write result: %s"),
1221 strerror_r (errno, buf, sizeof (buf)));
1224 pthread_rwlock_unlock (&db->lock);
1226 return;
1229 pthread_rwlock_unlock (&db->lock);
1231 else if (__builtin_expect (debug_level, 0) > 0)
1233 if (req->type == INVALIDATE)
1234 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1235 else
1236 dbg_log ("\t%s", serv2str[req->type]);
1239 /* Handle the request. */
1240 switch (req->type)
1242 case GETPWBYNAME:
1243 addpwbyname (db, fd, req, key, uid);
1244 break;
1246 case GETPWBYUID:
1247 addpwbyuid (db, fd, req, key, uid);
1248 break;
1250 case GETGRBYNAME:
1251 addgrbyname (db, fd, req, key, uid);
1252 break;
1254 case GETGRBYGID:
1255 addgrbygid (db, fd, req, key, uid);
1256 break;
1258 case GETHOSTBYNAME:
1259 addhstbyname (db, fd, req, key, uid);
1260 break;
1262 case GETHOSTBYNAMEv6:
1263 addhstbynamev6 (db, fd, req, key, uid);
1264 break;
1266 case GETHOSTBYADDR:
1267 addhstbyaddr (db, fd, req, key, uid);
1268 break;
1270 case GETHOSTBYADDRv6:
1271 addhstbyaddrv6 (db, fd, req, key, uid);
1272 break;
1274 case GETAI:
1275 addhstai (db, fd, req, key, uid);
1276 break;
1278 case INITGROUPS:
1279 addinitgroups (db, fd, req, key, uid);
1280 break;
1282 case GETSERVBYNAME:
1283 addservbyname (db, fd, req, key, uid);
1284 break;
1286 case GETSERVBYPORT:
1287 addservbyport (db, fd, req, key, uid);
1288 break;
1290 case GETNETGRENT:
1291 addgetnetgrent (db, fd, req, key, uid);
1292 break;
1294 case INNETGR:
1295 addinnetgr (db, fd, req, key, uid);
1296 break;
1298 case GETSTAT:
1299 case SHUTDOWN:
1300 case INVALIDATE:
1302 /* Get the callers credentials. */
1303 #ifdef SO_PEERCRED
1304 struct ucred caller;
1305 socklen_t optlen = sizeof (caller);
1307 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1309 char buf[256];
1311 dbg_log (_("error getting caller's id: %s"),
1312 strerror_r (errno, buf, sizeof (buf)));
1313 break;
1316 uid = caller.uid;
1317 #else
1318 /* Some systems have no SO_PEERCRED implementation. They don't
1319 care about security so we don't as well. */
1320 uid = 0;
1321 #endif
1324 /* Accept shutdown, getstat and invalidate only from root. For
1325 the stat call also allow the user specified in the config file. */
1326 if (req->type == GETSTAT)
1328 if (uid == 0 || uid == stat_uid)
1329 send_stats (fd, dbs);
1331 else if (uid == 0)
1333 if (req->type == INVALIDATE)
1334 invalidate_cache (key, fd);
1335 else
1336 termination_handler (0);
1338 break;
1340 case GETFDPW:
1341 case GETFDGR:
1342 case GETFDHST:
1343 case GETFDSERV:
1344 case GETFDNETGR:
1345 #ifdef SCM_RIGHTS
1346 send_ro_fd (reqinfo[req->type].db, key, fd);
1347 #endif
1348 break;
1350 default:
1351 /* Ignore the command, it's nothing we know. */
1352 break;
1357 /* Restart the process. */
1358 static void
1359 restart (void)
1361 /* First determine the parameters. We do not use the parameters
1362 passed to main() since in case nscd is started by running the
1363 dynamic linker this will not work. Yes, this is not the usual
1364 case but nscd is part of glibc and we occasionally do this. */
1365 size_t buflen = 1024;
1366 char *buf = alloca (buflen);
1367 size_t readlen = 0;
1368 int fd = open ("/proc/self/cmdline", O_RDONLY);
1369 if (fd == -1)
1371 dbg_log (_("\
1372 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1373 strerror (errno));
1375 paranoia = 0;
1376 return;
1379 while (1)
1381 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1382 buflen - readlen));
1383 if (n == -1)
1385 dbg_log (_("\
1386 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1387 strerror (errno));
1389 close (fd);
1390 paranoia = 0;
1391 return;
1394 readlen += n;
1396 if (readlen < buflen)
1397 break;
1399 /* We might have to extend the buffer. */
1400 size_t old_buflen = buflen;
1401 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1402 buf = memmove (newp, buf, old_buflen);
1405 close (fd);
1407 /* Parse the command line. Worst case scenario: every two
1408 characters form one parameter (one character plus NUL). */
1409 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1410 int argc = 0;
1412 char *cp = buf;
1413 while (cp < buf + readlen)
1415 argv[argc++] = cp;
1416 cp = (char *) rawmemchr (cp, '\0') + 1;
1418 argv[argc] = NULL;
1420 /* Second, change back to the old user if we changed it. */
1421 if (server_user != NULL)
1423 if (setresuid (old_uid, old_uid, old_uid) != 0)
1425 dbg_log (_("\
1426 cannot change to old UID: %s; disabling paranoia mode"),
1427 strerror (errno));
1429 paranoia = 0;
1430 return;
1433 if (setresgid (old_gid, old_gid, old_gid) != 0)
1435 dbg_log (_("\
1436 cannot change to old GID: %s; disabling paranoia mode"),
1437 strerror (errno));
1439 ignore_value (setuid (server_uid));
1440 paranoia = 0;
1441 return;
1445 /* Next change back to the old working directory. */
1446 if (chdir (oldcwd) == -1)
1448 dbg_log (_("\
1449 cannot change to old working directory: %s; disabling paranoia mode"),
1450 strerror (errno));
1452 if (server_user != NULL)
1454 ignore_value (setuid (server_uid));
1455 ignore_value (setgid (server_gid));
1457 paranoia = 0;
1458 return;
1461 /* Synchronize memory. */
1462 int32_t certainly[lastdb];
1463 for (int cnt = 0; cnt < lastdb; ++cnt)
1464 if (dbs[cnt].enabled)
1466 /* Make sure nobody keeps using the database. */
1467 dbs[cnt].head->timestamp = 0;
1468 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1469 dbs[cnt].head->nscd_certainly_running = 0;
1471 if (dbs[cnt].persistent)
1472 // XXX async OK?
1473 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1476 /* The preparations are done. */
1477 #ifdef PATH_MAX
1478 char pathbuf[PATH_MAX];
1479 #else
1480 char pathbuf[256];
1481 #endif
1482 /* Try to exec the real nscd program so the process name (as reported
1483 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1484 if readlink or the exec with the result of the readlink call fails. */
1485 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1486 if (n != -1)
1488 pathbuf[n] = '\0';
1489 execv (pathbuf, argv);
1491 execv ("/proc/self/exe", argv);
1493 /* If we come here, we will never be able to re-exec. */
1494 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1495 strerror (errno));
1497 if (server_user != NULL)
1499 ignore_value (setuid (server_uid));
1500 ignore_value (setgid (server_gid));
1502 if (chdir ("/") != 0)
1503 dbg_log (_("cannot change current working directory to \"/\": %s"),
1504 strerror (errno));
1505 paranoia = 0;
1507 /* Reenable the databases. */
1508 time_t now = time (NULL);
1509 for (int cnt = 0; cnt < lastdb; ++cnt)
1510 if (dbs[cnt].enabled)
1512 dbs[cnt].head->timestamp = now;
1513 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1518 /* List of file descriptors. */
1519 struct fdlist
1521 int fd;
1522 struct fdlist *next;
1524 /* Memory allocated for the list. */
1525 static struct fdlist *fdlist;
1526 /* List of currently ready-to-read file descriptors. */
1527 static struct fdlist *readylist;
1529 /* Conditional variable and mutex to signal availability of entries in
1530 READYLIST. The condvar is initialized dynamically since we might
1531 use a different clock depending on availability. */
1532 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1533 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1535 /* The clock to use with the condvar. */
1536 static clockid_t timeout_clock = CLOCK_REALTIME;
1538 /* Number of threads ready to handle the READYLIST. */
1539 static unsigned long int nready;
1542 /* Function for the clean-up threads. */
1543 static void *
1544 __attribute__ ((__noreturn__))
1545 nscd_run_prune (void *p)
1547 const long int my_number = (long int) p;
1548 assert (dbs[my_number].enabled);
1550 int dont_need_update = setup_thread (&dbs[my_number]);
1552 time_t now = time (NULL);
1554 /* We are running. */
1555 dbs[my_number].head->timestamp = now;
1557 struct timespec prune_ts;
1558 if (__glibc_unlikely (clock_gettime (timeout_clock, &prune_ts) == -1))
1559 /* Should never happen. */
1560 abort ();
1562 /* Compute the initial timeout time. Prevent all the timers to go
1563 off at the same time by adding a db-based value. */
1564 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1565 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1567 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1568 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1569 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1571 pthread_mutex_lock (prune_lock);
1572 while (1)
1574 /* Wait, but not forever. */
1575 int e = 0;
1576 if (! dbs[my_number].clear_cache)
1577 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1578 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1580 time_t next_wait;
1581 now = time (NULL);
1582 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1583 || dbs[my_number].clear_cache)
1585 /* We will determine the new timout values based on the
1586 cache content. Should there be concurrent additions to
1587 the cache which are not accounted for in the cache
1588 pruning we want to know about it. Therefore set the
1589 timeout to the maximum. It will be descreased when adding
1590 new entries to the cache, if necessary. */
1591 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1593 /* Unconditionally reset the flag. */
1594 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1595 dbs[my_number].clear_cache = 0;
1597 pthread_mutex_unlock (prune_lock);
1599 /* We use a separate lock for running the prune function (instead
1600 of keeping prune_lock locked) because this enables concurrent
1601 invocations of cache_add which might modify the timeout value. */
1602 pthread_mutex_lock (prune_run_lock);
1603 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1604 pthread_mutex_unlock (prune_run_lock);
1606 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1607 /* If clients cannot determine for sure whether nscd is running
1608 we need to wake up occasionally to update the timestamp.
1609 Wait 90% of the update period. */
1610 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1611 if (__glibc_unlikely (! dont_need_update))
1613 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1614 dbs[my_number].head->timestamp = now;
1617 pthread_mutex_lock (prune_lock);
1619 /* Make it known when we will wake up again. */
1620 if (now + next_wait < dbs[my_number].wakeup_time)
1621 dbs[my_number].wakeup_time = now + next_wait;
1622 else
1623 next_wait = dbs[my_number].wakeup_time - now;
1625 else
1626 /* The cache was just pruned. Do not do it again now. Just
1627 use the new timeout value. */
1628 next_wait = dbs[my_number].wakeup_time - now;
1630 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1631 /* Should never happen. */
1632 abort ();
1634 /* Compute next timeout time. */
1635 prune_ts.tv_sec += next_wait;
1640 /* This is the main loop. It is replicated in different threads but
1641 the use of the ready list makes sure only one thread handles an
1642 incoming connection. */
1643 static void *
1644 __attribute__ ((__noreturn__))
1645 nscd_run_worker (void *p)
1647 char buf[256];
1649 /* Initial locking. */
1650 pthread_mutex_lock (&readylist_lock);
1652 /* One more thread available. */
1653 ++nready;
1655 while (1)
1657 while (readylist == NULL)
1658 pthread_cond_wait (&readylist_cond, &readylist_lock);
1660 struct fdlist *it = readylist->next;
1661 if (readylist->next == readylist)
1662 /* Just one entry on the list. */
1663 readylist = NULL;
1664 else
1665 readylist->next = it->next;
1667 /* Extract the information and mark the record ready to be used
1668 again. */
1669 int fd = it->fd;
1670 it->next = NULL;
1672 /* One more thread available. */
1673 --nready;
1675 /* We are done with the list. */
1676 pthread_mutex_unlock (&readylist_lock);
1678 #ifndef __ASSUME_ACCEPT4
1679 if (have_accept4 < 0)
1681 /* We do not want to block on a short read or so. */
1682 int fl = fcntl (fd, F_GETFL);
1683 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1684 goto close_and_out;
1686 #endif
1688 /* Now read the request. */
1689 request_header req;
1690 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1691 != sizeof (req), 0))
1693 /* We failed to read data. Note that this also might mean we
1694 failed because we would have blocked. */
1695 if (debug_level > 0)
1696 dbg_log (_("short read while reading request: %s"),
1697 strerror_r (errno, buf, sizeof (buf)));
1698 goto close_and_out;
1701 /* Check whether this is a valid request type. */
1702 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1703 goto close_and_out;
1705 /* Some systems have no SO_PEERCRED implementation. They don't
1706 care about security so we don't as well. */
1707 uid_t uid = -1;
1708 #ifdef SO_PEERCRED
1709 pid_t pid = 0;
1711 if (__glibc_unlikely (debug_level > 0))
1713 struct ucred caller;
1714 socklen_t optlen = sizeof (caller);
1716 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1717 pid = caller.pid;
1719 #else
1720 const pid_t pid = 0;
1721 #endif
1723 /* It should not be possible to crash the nscd with a silly
1724 request (i.e., a terribly large key). We limit the size to 1kb. */
1725 if (__builtin_expect (req.key_len, 1) < 0
1726 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1728 if (debug_level > 0)
1729 dbg_log (_("key length in request too long: %d"), req.key_len);
1731 else
1733 /* Get the key. */
1734 char keybuf[MAXKEYLEN + 1];
1736 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1737 req.key_len))
1738 != req.key_len, 0))
1740 /* Again, this can also mean we would have blocked. */
1741 if (debug_level > 0)
1742 dbg_log (_("short read while reading request key: %s"),
1743 strerror_r (errno, buf, sizeof (buf)));
1744 goto close_and_out;
1746 keybuf[req.key_len] = '\0';
1748 if (__builtin_expect (debug_level, 0) > 0)
1750 #ifdef SO_PEERCRED
1751 if (pid != 0)
1752 dbg_log (_("\
1753 handle_request: request received (Version = %d) from PID %ld"),
1754 req.version, (long int) pid);
1755 else
1756 #endif
1757 dbg_log (_("\
1758 handle_request: request received (Version = %d)"), req.version);
1761 /* Phew, we got all the data, now process it. */
1762 handle_request (fd, &req, keybuf, uid, pid);
1765 close_and_out:
1766 /* We are done. */
1767 close (fd);
1769 /* Re-locking. */
1770 pthread_mutex_lock (&readylist_lock);
1772 /* One more thread available. */
1773 ++nready;
1775 /* NOTREACHED */
1779 static unsigned int nconns;
1781 static void
1782 fd_ready (int fd)
1784 pthread_mutex_lock (&readylist_lock);
1786 /* Find an empty entry in FDLIST. */
1787 size_t inner;
1788 for (inner = 0; inner < nconns; ++inner)
1789 if (fdlist[inner].next == NULL)
1790 break;
1791 assert (inner < nconns);
1793 fdlist[inner].fd = fd;
1795 if (readylist == NULL)
1796 readylist = fdlist[inner].next = &fdlist[inner];
1797 else
1799 fdlist[inner].next = readylist->next;
1800 readylist = readylist->next = &fdlist[inner];
1803 bool do_signal = true;
1804 if (__glibc_unlikely (nready == 0))
1806 ++client_queued;
1807 do_signal = false;
1809 /* Try to start another thread to help out. */
1810 pthread_t th;
1811 if (nthreads < max_nthreads
1812 && pthread_create (&th, &attr, nscd_run_worker,
1813 (void *) (long int) nthreads) == 0)
1815 /* We got another thread. */
1816 ++nthreads;
1817 /* The new thread might need a kick. */
1818 do_signal = true;
1823 pthread_mutex_unlock (&readylist_lock);
1825 /* Tell one of the worker threads there is work to do. */
1826 if (do_signal)
1827 pthread_cond_signal (&readylist_cond);
1831 /* Check whether restarting should happen. */
1832 static bool
1833 restart_p (time_t now)
1835 return (paranoia && readylist == NULL && nready == nthreads
1836 && now >= restart_time);
1840 /* Array for times a connection was accepted. */
1841 static time_t *starttime;
1843 #ifdef HAVE_INOTIFY
1844 /* Inotify event for changed file. */
1845 union __inev
1847 struct inotify_event i;
1848 # ifndef PATH_MAX
1849 # define PATH_MAX 1024
1850 # endif
1851 char buf[sizeof (struct inotify_event) + PATH_MAX];
1854 /* Returns 0 if the file is there otherwise -1. */
1856 check_file (struct traced_file *finfo)
1858 struct stat64 st;
1859 /* We could check mtime and if different re-add
1860 the watches, and invalidate the database, but we
1861 don't because we are called from inotify_check_files
1862 which should be doing that work. If sufficient inotify
1863 events were lost then the next pruning or invalidation
1864 will do the stat and mtime check. We don't do it here to
1865 keep the logic simple. */
1866 if (stat64 (finfo->fname, &st) < 0)
1867 return -1;
1868 return 0;
1871 /* Process the inotify event in INEV. If the event matches any of the files
1872 registered with a database then mark that database as requiring its cache
1873 to be cleared. We indicate the cache needs clearing by setting
1874 TO_CLEAR[DBCNT] to true for the matching database. */
1875 static void
1876 inotify_check_files (bool *to_clear, union __inev *inev)
1878 /* Check which of the files changed. */
1879 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1881 struct traced_file *finfo = dbs[dbcnt].traced_files;
1883 while (finfo != NULL)
1885 /* The configuration file was moved or deleted.
1886 We stop watching it at that point, and reinitialize. */
1887 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1888 && ((inev->i.mask & IN_MOVE_SELF)
1889 || (inev->i.mask & IN_DELETE_SELF)
1890 || (inev->i.mask & IN_IGNORED)))
1892 int ret;
1893 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1895 if (check_file (finfo) == 0)
1897 dbg_log (_("ignored inotify event for `%s` (file exists)"),
1898 finfo->fname);
1899 return;
1902 dbg_log (_("monitored file `%s` was %s, removing watch"),
1903 finfo->fname, moved ? "moved" : "deleted");
1904 /* File was moved out, remove the watch. Watches are
1905 automatically removed when the file is deleted. */
1906 if (moved)
1908 ret = inotify_rm_watch (inotify_fd, inev->i.wd);
1909 if (ret < 0)
1910 dbg_log (_("failed to remove file watch `%s`: %s"),
1911 finfo->fname, strerror (errno));
1913 finfo->inotify_descr[TRACED_FILE] = -1;
1914 to_clear[dbcnt] = true;
1915 if (finfo->call_res_init)
1916 res_init ();
1917 return;
1919 /* The configuration file was open for writing and has just closed.
1920 We reset the cache and reinitialize. */
1921 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1922 && inev->i.mask & IN_CLOSE_WRITE)
1924 /* Mark cache as needing to be cleared and reinitialize. */
1925 dbg_log (_("monitored file `%s` was written to"), finfo->fname);
1926 to_clear[dbcnt] = true;
1927 if (finfo->call_res_init)
1928 res_init ();
1929 return;
1931 /* The parent directory was moved or deleted. We trigger one last
1932 invalidation. At the next pruning or invalidation we may add
1933 this watch back if the file is present again. */
1934 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1935 && ((inev->i.mask & IN_DELETE_SELF)
1936 || (inev->i.mask & IN_MOVE_SELF)
1937 || (inev->i.mask & IN_IGNORED)))
1939 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1940 /* The directory watch may have already been removed
1941 but we don't know so we just remove it again and
1942 ignore the error. Then we remove the file watch.
1943 Note: watches are automatically removed for deleted
1944 files. */
1945 if (moved)
1946 inotify_rm_watch (inotify_fd, inev->i.wd);
1947 if (finfo->inotify_descr[TRACED_FILE] != -1)
1949 dbg_log (_("monitored parent directory `%s` was %s, removing watch on `%s`"),
1950 finfo->dname, moved ? "moved" : "deleted", finfo->fname);
1951 if (inotify_rm_watch (inotify_fd, finfo->inotify_descr[TRACED_FILE]) < 0)
1952 dbg_log (_("failed to remove file watch `%s`: %s"),
1953 finfo->dname, strerror (errno));
1955 finfo->inotify_descr[TRACED_FILE] = -1;
1956 finfo->inotify_descr[TRACED_DIR] = -1;
1957 to_clear[dbcnt] = true;
1958 if (finfo->call_res_init)
1959 res_init ();
1960 /* Continue to the next entry since this might be the
1961 parent directory for multiple registered files and
1962 we want to remove watches for all registered files. */
1963 continue;
1965 /* The parent directory had a create or moved to event. */
1966 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1967 && ((inev->i.mask & IN_MOVED_TO)
1968 || (inev->i.mask & IN_CREATE))
1969 && strcmp (inev->i.name, finfo->sfname) == 0)
1971 /* We detected a directory change. We look for the creation
1972 of the file we are tracking or the move of the same file
1973 into the directory. */
1974 int ret;
1975 dbg_log (_("monitored file `%s` was %s, adding watch"),
1976 finfo->fname,
1977 inev->i.mask & IN_CREATE ? "created" : "moved into place");
1978 /* File was moved in or created. Regenerate the watch. */
1979 if (finfo->inotify_descr[TRACED_FILE] != -1)
1980 inotify_rm_watch (inotify_fd,
1981 finfo->inotify_descr[TRACED_FILE]);
1983 ret = inotify_add_watch (inotify_fd,
1984 finfo->fname,
1985 TRACED_FILE_MASK);
1986 if (ret < 0)
1987 dbg_log (_("failed to add file watch `%s`: %s"),
1988 finfo->fname, strerror (errno));
1990 finfo->inotify_descr[TRACED_FILE] = ret;
1992 /* The file is new or moved so mark cache as needing to
1993 be cleared and reinitialize. */
1994 to_clear[dbcnt] = true;
1995 if (finfo->call_res_init)
1996 res_init ();
1998 /* Done re-adding the watch. Don't return, we may still
1999 have other files in this same directory, same watch
2000 descriptor, and need to process them. */
2002 /* Other events are ignored, and we move on to the next file. */
2003 finfo = finfo->next;
2008 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
2009 for the associated database, otherwise do nothing. The TO_CLEAR array must
2010 have LASTDB entries. */
2011 static inline void
2012 clear_db_cache (bool *to_clear)
2014 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2015 if (to_clear[dbcnt])
2017 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
2018 dbs[dbcnt].clear_cache = 1;
2019 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
2020 pthread_cond_signal (&dbs[dbcnt].prune_cond);
2025 handle_inotify_events (void)
2027 bool to_clear[lastdb] = { false, };
2028 union __inev inev;
2030 /* Read all inotify events for files registered via
2031 register_traced_file(). */
2032 while (1)
2034 /* Potentially read multiple events into buf. */
2035 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd,
2036 &inev.buf,
2037 sizeof (inev)));
2038 if (nb < (ssize_t) sizeof (struct inotify_event))
2040 /* Not even 1 event. */
2041 if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
2042 return -1;
2043 /* Done reading events that are ready. */
2044 break;
2046 /* Process all events. The normal inotify interface delivers
2047 complete events on a read and never a partial event. */
2048 char *eptr = &inev.buf[0];
2049 ssize_t count;
2050 while (1)
2052 /* Check which of the files changed. */
2053 inotify_check_files (to_clear, &inev);
2054 count = sizeof (struct inotify_event) + inev.i.len;
2055 eptr += count;
2056 nb -= count;
2057 if (nb >= (ssize_t) sizeof (struct inotify_event))
2058 memcpy (&inev, eptr, nb);
2059 else
2060 break;
2062 continue;
2064 /* Actually perform the cache clearing. */
2065 clear_db_cache (to_clear);
2066 return 0;
2069 #endif
2071 static void
2072 __attribute__ ((__noreturn__))
2073 main_loop_poll (void)
2075 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
2076 * sizeof (conns[0]));
2078 conns[0].fd = sock;
2079 conns[0].events = POLLRDNORM;
2080 size_t nused = 1;
2081 size_t firstfree = 1;
2083 #ifdef HAVE_INOTIFY
2084 if (inotify_fd != -1)
2086 conns[1].fd = inotify_fd;
2087 conns[1].events = POLLRDNORM;
2088 nused = 2;
2089 firstfree = 2;
2091 #endif
2093 #ifdef HAVE_NETLINK
2094 size_t idx_nl_status_fd = 0;
2095 if (nl_status_fd != -1)
2097 idx_nl_status_fd = nused;
2098 conns[nused].fd = nl_status_fd;
2099 conns[nused].events = POLLRDNORM;
2100 ++nused;
2101 firstfree = nused;
2103 #endif
2105 while (1)
2107 /* Wait for any event. We wait at most a couple of seconds so
2108 that we can check whether we should close any of the accepted
2109 connections since we have not received a request. */
2110 #define MAX_ACCEPT_TIMEOUT 30
2111 #define MIN_ACCEPT_TIMEOUT 5
2112 #define MAIN_THREAD_TIMEOUT \
2113 (MAX_ACCEPT_TIMEOUT * 1000 \
2114 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
2116 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
2118 time_t now = time (NULL);
2120 /* If there is a descriptor ready for reading or there is a new
2121 connection, process this now. */
2122 if (n > 0)
2124 if (conns[0].revents != 0)
2126 /* We have a new incoming connection. Accept the connection. */
2127 int fd;
2129 #ifndef __ASSUME_ACCEPT4
2130 fd = -1;
2131 if (have_accept4 >= 0)
2132 #endif
2134 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2135 SOCK_NONBLOCK));
2136 #ifndef __ASSUME_ACCEPT4
2137 if (have_accept4 == 0)
2138 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2139 #endif
2141 #ifndef __ASSUME_ACCEPT4
2142 if (have_accept4 < 0)
2143 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2144 #endif
2146 /* Use the descriptor if we have not reached the limit. */
2147 if (fd >= 0)
2149 if (firstfree < nconns)
2151 conns[firstfree].fd = fd;
2152 conns[firstfree].events = POLLRDNORM;
2153 starttime[firstfree] = now;
2154 if (firstfree >= nused)
2155 nused = firstfree + 1;
2158 ++firstfree;
2159 while (firstfree < nused && conns[firstfree].fd != -1);
2161 else
2162 /* We cannot use the connection so close it. */
2163 close (fd);
2166 --n;
2169 size_t first = 1;
2170 #ifdef HAVE_INOTIFY
2171 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
2173 if (conns[1].revents != 0)
2175 int ret;
2176 ret = handle_inotify_events ();
2177 if (ret == -1)
2179 /* Something went wrong when reading the inotify
2180 data. Better disable inotify. */
2181 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2182 conns[1].fd = -1;
2183 firstfree = 1;
2184 if (nused == 2)
2185 nused = 1;
2186 close (inotify_fd);
2187 inotify_fd = -1;
2189 --n;
2192 first = 2;
2194 #endif
2196 #ifdef HAVE_NETLINK
2197 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2199 char buf[4096];
2200 /* Read all the data. We do not interpret it here. */
2201 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2202 sizeof (buf))) != -1)
2205 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2206 = __bump_nl_timestamp ();
2208 #endif
2210 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2211 if (conns[cnt].revents != 0)
2213 fd_ready (conns[cnt].fd);
2215 /* Clean up the CONNS array. */
2216 conns[cnt].fd = -1;
2217 if (cnt < firstfree)
2218 firstfree = cnt;
2219 if (cnt == nused - 1)
2221 --nused;
2222 while (conns[nused - 1].fd == -1);
2224 --n;
2228 /* Now find entries which have timed out. */
2229 assert (nused > 0);
2231 /* We make the timeout length depend on the number of file
2232 descriptors currently used. */
2233 #define ACCEPT_TIMEOUT \
2234 (MAX_ACCEPT_TIMEOUT \
2235 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2236 time_t laststart = now - ACCEPT_TIMEOUT;
2238 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2240 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2242 /* Remove the entry, it timed out. */
2243 (void) close (conns[cnt].fd);
2244 conns[cnt].fd = -1;
2246 if (cnt < firstfree)
2247 firstfree = cnt;
2248 if (cnt == nused - 1)
2250 --nused;
2251 while (conns[nused - 1].fd == -1);
2255 if (restart_p (now))
2256 restart ();
2261 #ifdef HAVE_EPOLL
2262 static void
2263 main_loop_epoll (int efd)
2265 struct epoll_event ev = { 0, };
2266 int nused = 1;
2267 size_t highest = 0;
2269 /* Add the socket. */
2270 ev.events = EPOLLRDNORM;
2271 ev.data.fd = sock;
2272 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2273 /* We cannot use epoll. */
2274 return;
2276 # ifdef HAVE_INOTIFY
2277 if (inotify_fd != -1)
2279 ev.events = EPOLLRDNORM;
2280 ev.data.fd = inotify_fd;
2281 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2282 /* We cannot use epoll. */
2283 return;
2284 nused = 2;
2286 # endif
2288 # ifdef HAVE_NETLINK
2289 if (nl_status_fd != -1)
2291 ev.events = EPOLLRDNORM;
2292 ev.data.fd = nl_status_fd;
2293 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2294 /* We cannot use epoll. */
2295 return;
2297 # endif
2299 while (1)
2301 struct epoll_event revs[100];
2302 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2304 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2306 time_t now = time (NULL);
2308 for (int cnt = 0; cnt < n; ++cnt)
2309 if (revs[cnt].data.fd == sock)
2311 /* A new connection. */
2312 int fd;
2314 # ifndef __ASSUME_ACCEPT4
2315 fd = -1;
2316 if (have_accept4 >= 0)
2317 # endif
2319 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2320 SOCK_NONBLOCK));
2321 # ifndef __ASSUME_ACCEPT4
2322 if (have_accept4 == 0)
2323 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2324 # endif
2326 # ifndef __ASSUME_ACCEPT4
2327 if (have_accept4 < 0)
2328 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2329 # endif
2331 /* Use the descriptor if we have not reached the limit. */
2332 if (fd >= 0)
2334 /* Try to add the new descriptor. */
2335 ev.data.fd = fd;
2336 if (fd >= nconns
2337 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2338 /* The descriptor is too large or something went
2339 wrong. Close the descriptor. */
2340 close (fd);
2341 else
2343 /* Remember when we accepted the connection. */
2344 starttime[fd] = now;
2346 if (fd > highest)
2347 highest = fd;
2349 ++nused;
2353 # ifdef HAVE_INOTIFY
2354 else if (revs[cnt].data.fd == inotify_fd)
2356 int ret;
2357 ret = handle_inotify_events ();
2358 if (ret == -1)
2360 /* Something went wrong when reading the inotify
2361 data. Better disable inotify. */
2362 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2363 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd, NULL);
2364 close (inotify_fd);
2365 inotify_fd = -1;
2366 break;
2369 # endif
2370 # ifdef HAVE_NETLINK
2371 else if (revs[cnt].data.fd == nl_status_fd)
2373 char buf[4096];
2374 /* Read all the data. We do not interpret it here. */
2375 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2376 sizeof (buf))) != -1)
2379 __bump_nl_timestamp ();
2381 # endif
2382 else
2384 /* Remove the descriptor from the epoll descriptor. */
2385 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2387 /* Get a worker to handle the request. */
2388 fd_ready (revs[cnt].data.fd);
2390 /* Reset the time. */
2391 starttime[revs[cnt].data.fd] = 0;
2392 if (revs[cnt].data.fd == highest)
2394 --highest;
2395 while (highest > 0 && starttime[highest] == 0);
2397 --nused;
2400 /* Now look for descriptors for accepted connections which have
2401 no reply in too long of a time. */
2402 time_t laststart = now - ACCEPT_TIMEOUT;
2403 assert (starttime[sock] == 0);
2404 # ifdef HAVE_INOTIFY
2405 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2406 # endif
2407 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2408 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2409 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2411 /* We are waiting for this one for too long. Close it. */
2412 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2414 (void) close (cnt);
2416 starttime[cnt] = 0;
2417 if (cnt == highest)
2418 --highest;
2420 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2421 --highest;
2423 if (restart_p (now))
2424 restart ();
2427 #endif
2430 /* Start all the threads we want. The initial process is thread no. 1. */
2431 void
2432 start_threads (void)
2434 /* Initialize the conditional variable we will use. The only
2435 non-standard attribute we might use is the clock selection. */
2436 pthread_condattr_t condattr;
2437 pthread_condattr_init (&condattr);
2439 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2440 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2441 /* Determine whether the monotonous clock is available. */
2442 struct timespec dummy;
2443 # if _POSIX_MONOTONIC_CLOCK == 0
2444 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2445 # endif
2446 # if _POSIX_CLOCK_SELECTION == 0
2447 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2448 # endif
2449 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2450 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2451 timeout_clock = CLOCK_MONOTONIC;
2452 #endif
2454 /* Create the attribute for the threads. They are all created
2455 detached. */
2456 pthread_attr_init (&attr);
2457 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2458 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2459 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2461 /* We allow less than LASTDB threads only for debugging. */
2462 if (debug_level == 0)
2463 nthreads = MAX (nthreads, lastdb);
2465 /* Create the threads which prune the databases. */
2466 // XXX Ideally this work would be done by some of the worker threads.
2467 // XXX But this is problematic since we would need to be able to wake
2468 // XXX them up explicitly as well as part of the group handling the
2469 // XXX ready-list. This requires an operation where we can wait on
2470 // XXX two conditional variables at the same time. This operation
2471 // XXX does not exist (yet).
2472 for (long int i = 0; i < lastdb; ++i)
2474 /* Initialize the conditional variable. */
2475 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2477 dbg_log (_("could not initialize conditional variable"));
2478 do_exit (1, 0, NULL);
2481 pthread_t th;
2482 if (dbs[i].enabled
2483 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2485 dbg_log (_("could not start clean-up thread; terminating"));
2486 do_exit (1, 0, NULL);
2490 pthread_condattr_destroy (&condattr);
2492 for (long int i = 0; i < nthreads; ++i)
2494 pthread_t th;
2495 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2497 if (i == 0)
2499 dbg_log (_("could not start any worker thread; terminating"));
2500 do_exit (1, 0, NULL);
2503 break;
2507 /* Now it is safe to let the parent know that we're doing fine and it can
2508 exit. */
2509 notify_parent (0);
2511 /* Determine how much room for descriptors we should initially
2512 allocate. This might need to change later if we cap the number
2513 with MAXCONN. */
2514 const long int nfds = sysconf (_SC_OPEN_MAX);
2515 #define MINCONN 32
2516 #define MAXCONN 16384
2517 if (nfds == -1 || nfds > MAXCONN)
2518 nconns = MAXCONN;
2519 else if (nfds < MINCONN)
2520 nconns = MINCONN;
2521 else
2522 nconns = nfds;
2524 /* We need memory to pass descriptors on to the worker threads. */
2525 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2526 /* Array to keep track when connection was accepted. */
2527 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2529 /* In the main thread we execute the loop which handles incoming
2530 connections. */
2531 #ifdef HAVE_EPOLL
2532 int efd = epoll_create (100);
2533 if (efd != -1)
2535 main_loop_epoll (efd);
2536 close (efd);
2538 #endif
2540 main_loop_poll ();
2544 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2545 this function is called, we are not listening on the nscd socket yet so
2546 we can just use the ordinary lookup functions without causing a lockup */
2547 static void
2548 begin_drop_privileges (void)
2550 struct passwd *pwd = getpwnam (server_user);
2552 if (pwd == NULL)
2554 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2555 do_exit (EXIT_FAILURE, 0,
2556 _("Failed to run nscd as user '%s'"), server_user);
2559 server_uid = pwd->pw_uid;
2560 server_gid = pwd->pw_gid;
2562 /* Save the old UID/GID if we have to change back. */
2563 if (paranoia)
2565 old_uid = getuid ();
2566 old_gid = getgid ();
2569 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2571 /* This really must never happen. */
2572 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2573 do_exit (EXIT_FAILURE, errno,
2574 _("initial getgrouplist failed"));
2577 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2579 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2580 == -1)
2582 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2583 do_exit (EXIT_FAILURE, errno, _("getgrouplist failed"));
2588 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2589 run nscd as the user specified in the configuration file. */
2590 static void
2591 finish_drop_privileges (void)
2593 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2594 /* We need to preserve the capabilities to connect to the audit daemon. */
2595 cap_t new_caps = preserve_capabilities ();
2596 #endif
2598 if (setgroups (server_ngroups, server_groups) == -1)
2600 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2601 do_exit (EXIT_FAILURE, errno, _("setgroups failed"));
2604 int res;
2605 if (paranoia)
2606 res = setresgid (server_gid, server_gid, old_gid);
2607 else
2608 res = setgid (server_gid);
2609 if (res == -1)
2611 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2612 do_exit (4, errno, "setgid");
2615 if (paranoia)
2616 res = setresuid (server_uid, server_uid, old_uid);
2617 else
2618 res = setuid (server_uid);
2619 if (res == -1)
2621 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2622 do_exit (4, errno, "setuid");
2625 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2626 /* Remove the temporary capabilities. */
2627 install_real_capabilities (new_caps);
2628 #endif