iconv mapping of 0xA8 0xEC in CP1258 is non-canonical
[glibc.git] / nscd / connections.c
blobc74199616a0bea843a932ab8d849a3cc542e0e98
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2007, 2008, 2009, 2011 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #include <alloca.h>
21 #include <assert.h>
22 #include <atomic.h>
23 #include <error.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <grp.h>
27 #include <ifaddrs.h>
28 #include <libintl.h>
29 #include <pthread.h>
30 #include <pwd.h>
31 #include <resolv.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <unistd.h>
35 #include <arpa/inet.h>
36 #ifdef HAVE_NETLINK
37 # include <linux/netlink.h>
38 # include <linux/rtnetlink.h>
39 #endif
40 #ifdef HAVE_EPOLL
41 # include <sys/epoll.h>
42 #endif
43 #ifdef HAVE_INOTIFY
44 # include <sys/inotify.h>
45 #endif
46 #include <sys/mman.h>
47 #include <sys/param.h>
48 #include <sys/poll.h>
49 #ifdef HAVE_SENDFILE
50 # include <sys/sendfile.h>
51 #endif
52 #include <sys/socket.h>
53 #include <sys/stat.h>
54 #include <sys/un.h>
56 #include "nscd.h"
57 #include "dbg_log.h"
58 #include "selinux.h"
59 #include <resolv/resolv.h>
60 #ifdef HAVE_SENDFILE
61 # include <kernel-features.h>
62 #endif
65 /* Support to run nscd as an unprivileged user */
66 const char *server_user;
67 static uid_t server_uid;
68 static gid_t server_gid;
69 const char *stat_user;
70 uid_t stat_uid;
71 static gid_t *server_groups;
72 #ifndef NGROUPS
73 # define NGROUPS 32
74 #endif
75 static int server_ngroups;
77 static pthread_attr_t attr;
79 static void begin_drop_privileges (void);
80 static void finish_drop_privileges (void);
82 /* Map request type to a string. */
83 const char *const serv2str[LASTREQ] =
85 [GETPWBYNAME] = "GETPWBYNAME",
86 [GETPWBYUID] = "GETPWBYUID",
87 [GETGRBYNAME] = "GETGRBYNAME",
88 [GETGRBYGID] = "GETGRBYGID",
89 [GETHOSTBYNAME] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
93 [SHUTDOWN] = "SHUTDOWN",
94 [GETSTAT] = "GETSTAT",
95 [INVALIDATE] = "INVALIDATE",
96 [GETFDPW] = "GETFDPW",
97 [GETFDGR] = "GETFDGR",
98 [GETFDHST] = "GETFDHST",
99 [GETAI] = "GETAI",
100 [INITGROUPS] = "INITGROUPS",
101 [GETSERVBYNAME] = "GETSERVBYNAME",
102 [GETSERVBYPORT] = "GETSERVBYPORT",
103 [GETFDSERV] = "GETFDSERV",
104 [GETNETGRENT] = "GETNETGRENT",
105 [INNETGR] = "INNETGR",
106 [GETFDNETGR] = "GETFDNETGR"
109 /* The control data structures for the services. */
110 struct database_dyn dbs[lastdb] =
112 [pwddb] = {
113 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
114 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
115 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
116 .enabled = 0,
117 .check_file = 1,
118 .persistent = 0,
119 .propagate = 1,
120 .shared = 0,
121 .max_db_size = DEFAULT_MAX_DB_SIZE,
122 .suggested_module = DEFAULT_SUGGESTED_MODULE,
123 .db_filename = _PATH_NSCD_PASSWD_DB,
124 .disabled_iov = &pwd_iov_disabled,
125 .postimeout = 3600,
126 .negtimeout = 20,
127 .wr_fd = -1,
128 .ro_fd = -1,
129 .mmap_used = false
131 [grpdb] = {
132 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
133 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
134 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
135 .enabled = 0,
136 .check_file = 1,
137 .persistent = 0,
138 .propagate = 1,
139 .shared = 0,
140 .max_db_size = DEFAULT_MAX_DB_SIZE,
141 .suggested_module = DEFAULT_SUGGESTED_MODULE,
142 .db_filename = _PATH_NSCD_GROUP_DB,
143 .disabled_iov = &grp_iov_disabled,
144 .postimeout = 3600,
145 .negtimeout = 60,
146 .wr_fd = -1,
147 .ro_fd = -1,
148 .mmap_used = false
150 [hstdb] = {
151 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
152 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
153 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
154 .enabled = 0,
155 .check_file = 1,
156 .persistent = 0,
157 .propagate = 0, /* Not used. */
158 .shared = 0,
159 .max_db_size = DEFAULT_MAX_DB_SIZE,
160 .suggested_module = DEFAULT_SUGGESTED_MODULE,
161 .db_filename = _PATH_NSCD_HOSTS_DB,
162 .disabled_iov = &hst_iov_disabled,
163 .postimeout = 3600,
164 .negtimeout = 20,
165 .wr_fd = -1,
166 .ro_fd = -1,
167 .mmap_used = false
169 [servdb] = {
170 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
171 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
172 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
173 .enabled = 0,
174 .check_file = 1,
175 .persistent = 0,
176 .propagate = 0, /* Not used. */
177 .shared = 0,
178 .max_db_size = DEFAULT_MAX_DB_SIZE,
179 .suggested_module = DEFAULT_SUGGESTED_MODULE,
180 .db_filename = _PATH_NSCD_SERVICES_DB,
181 .disabled_iov = &serv_iov_disabled,
182 .postimeout = 28800,
183 .negtimeout = 20,
184 .wr_fd = -1,
185 .ro_fd = -1,
186 .mmap_used = false
188 [netgrdb] = {
189 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
190 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
191 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
192 .enabled = 0,
193 .check_file = 1,
194 .persistent = 0,
195 .propagate = 0, /* Not used. */
196 .shared = 0,
197 .max_db_size = DEFAULT_MAX_DB_SIZE,
198 .suggested_module = DEFAULT_SUGGESTED_MODULE,
199 .db_filename = _PATH_NSCD_NETGROUP_DB,
200 .disabled_iov = &netgroup_iov_disabled,
201 .postimeout = 28800,
202 .negtimeout = 20,
203 .wr_fd = -1,
204 .ro_fd = -1,
205 .mmap_used = false
210 /* Mapping of request type to database. */
211 static struct
213 bool data_request;
214 struct database_dyn *db;
215 } const reqinfo[LASTREQ] =
217 [GETPWBYNAME] = { true, &dbs[pwddb] },
218 [GETPWBYUID] = { true, &dbs[pwddb] },
219 [GETGRBYNAME] = { true, &dbs[grpdb] },
220 [GETGRBYGID] = { true, &dbs[grpdb] },
221 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
222 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
223 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
224 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
225 [SHUTDOWN] = { false, NULL },
226 [GETSTAT] = { false, NULL },
227 [SHUTDOWN] = { false, NULL },
228 [GETFDPW] = { false, &dbs[pwddb] },
229 [GETFDGR] = { false, &dbs[grpdb] },
230 [GETFDHST] = { false, &dbs[hstdb] },
231 [GETAI] = { true, &dbs[hstdb] },
232 [INITGROUPS] = { true, &dbs[grpdb] },
233 [GETSERVBYNAME] = { true, &dbs[servdb] },
234 [GETSERVBYPORT] = { true, &dbs[servdb] },
235 [GETFDSERV] = { false, &dbs[servdb] },
236 [GETNETGRENT] = { true, &dbs[netgrdb] },
237 [INNETGR] = { true, &dbs[netgrdb] },
238 [GETFDNETGR] = { false, &dbs[netgrdb] }
242 /* Initial number of threads to use. */
243 int nthreads = -1;
244 /* Maximum number of threads to use. */
245 int max_nthreads = 32;
247 /* Socket for incoming connections. */
248 static int sock;
250 #ifdef HAVE_INOTIFY
251 /* Inotify descriptor. */
252 int inotify_fd = -1;
253 #endif
255 #ifdef HAVE_NETLINK
256 /* Descriptor for netlink status updates. */
257 static int nl_status_fd = -1;
258 #endif
260 #ifndef __ASSUME_SOCK_CLOEXEC
261 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
262 before be know the result. */
263 static int have_sock_cloexec;
264 #endif
265 #ifndef __ASSUME_ACCEPT4
266 static int have_accept4;
267 #endif
269 /* Number of times clients had to wait. */
270 unsigned long int client_queued;
273 ssize_t
274 writeall (int fd, const void *buf, size_t len)
276 size_t n = len;
277 ssize_t ret;
280 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
281 if (ret <= 0)
282 break;
283 buf = (const char *) buf + ret;
284 n -= ret;
286 while (n > 0);
287 return ret < 0 ? ret : len - n;
291 #ifdef HAVE_SENDFILE
292 ssize_t
293 sendfileall (int tofd, int fromfd, off_t off, size_t len)
295 ssize_t n = len;
296 ssize_t ret;
300 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
301 if (ret <= 0)
302 break;
303 n -= ret;
305 while (n > 0);
306 return ret < 0 ? ret : len - n;
308 #endif
311 enum usekey
313 use_not = 0,
314 /* The following three are not really used, they are symbolic constants. */
315 use_first = 16,
316 use_begin = 32,
317 use_end = 64,
319 use_he = 1,
320 use_he_begin = use_he | use_begin,
321 use_he_end = use_he | use_end,
322 #if SEPARATE_KEY
323 use_key = 2,
324 use_key_begin = use_key | use_begin,
325 use_key_end = use_key | use_end,
326 use_key_first = use_key_begin | use_first,
327 #endif
328 use_data = 3,
329 use_data_begin = use_data | use_begin,
330 use_data_end = use_data | use_end,
331 use_data_first = use_data_begin | use_first
335 static int
336 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
337 enum usekey use, ref_t start, size_t len)
339 assert (len >= 2);
341 if (start > first_free || start + len > first_free
342 || (start & BLOCK_ALIGN_M1))
343 return 0;
345 if (usemap[start] == use_not)
347 /* Add the start marker. */
348 usemap[start] = use | use_begin;
349 use &= ~use_first;
351 while (--len > 0)
352 if (usemap[++start] != use_not)
353 return 0;
354 else
355 usemap[start] = use;
357 /* Add the end marker. */
358 usemap[start] = use | use_end;
360 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
362 /* Hash entries can't be shared. */
363 if (use == use_he)
364 return 0;
366 usemap[start] |= (use & use_first);
367 use &= ~use_first;
369 while (--len > 1)
370 if (usemap[++start] != use)
371 return 0;
373 if (usemap[++start] != (use | use_end))
374 return 0;
376 else
377 /* Points to a wrong object or somewhere in the middle. */
378 return 0;
380 return 1;
384 /* Verify data in persistent database. */
385 static int
386 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
388 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
389 || dbnr == netgrdb);
391 time_t now = time (NULL);
393 struct database_pers_head *head = mem;
394 struct database_pers_head head_copy = *head;
396 /* Check that the header that was read matches the head in the database. */
397 if (memcmp (head, readhead, sizeof (*head)) != 0)
398 return 0;
400 /* First some easy tests: make sure the database header is sane. */
401 if (head->version != DB_VERSION
402 || head->header_size != sizeof (*head)
403 /* We allow a timestamp to be one hour ahead of the current time.
404 This should cover daylight saving time changes. */
405 || head->timestamp > now + 60 * 60 + 60
406 || (head->gc_cycle & 1)
407 || head->module == 0
408 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
409 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
410 || head->first_free < 0
411 || head->first_free > head->data_size
412 || (head->first_free & BLOCK_ALIGN_M1) != 0
413 || head->maxnentries < 0
414 || head->maxnsearched < 0)
415 return 0;
417 uint8_t *usemap = calloc (head->first_free, 1);
418 if (usemap == NULL)
419 return 0;
421 const char *data = (char *) &head->array[roundup (head->module,
422 ALIGN / sizeof (ref_t))];
424 nscd_ssize_t he_cnt = 0;
425 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
427 ref_t trail = head->array[cnt];
428 ref_t work = trail;
429 int tick = 0;
431 while (work != ENDREF)
433 if (! check_use (data, head->first_free, usemap, use_he, work,
434 sizeof (struct hashentry)))
435 goto fail;
437 /* Now we know we can dereference the record. */
438 struct hashentry *here = (struct hashentry *) (data + work);
440 ++he_cnt;
442 /* Make sure the record is for this type of service. */
443 if (here->type >= LASTREQ
444 || reqinfo[here->type].db != &dbs[dbnr])
445 goto fail;
447 /* Validate boolean field value. */
448 if (here->first != false && here->first != true)
449 goto fail;
451 if (here->len < 0)
452 goto fail;
454 /* Now the data. */
455 if (here->packet < 0
456 || here->packet > head->first_free
457 || here->packet + sizeof (struct datahead) > head->first_free)
458 goto fail;
460 struct datahead *dh = (struct datahead *) (data + here->packet);
462 if (! check_use (data, head->first_free, usemap,
463 use_data | (here->first ? use_first : 0),
464 here->packet, dh->allocsize))
465 goto fail;
467 if (dh->allocsize < sizeof (struct datahead)
468 || dh->recsize > dh->allocsize
469 || (dh->notfound != false && dh->notfound != true)
470 || (dh->usable != false && dh->usable != true))
471 goto fail;
473 if (here->key < here->packet + sizeof (struct datahead)
474 || here->key > here->packet + dh->allocsize
475 || here->key + here->len > here->packet + dh->allocsize)
477 #if SEPARATE_KEY
478 /* If keys can appear outside of data, this should be done
479 instead. But gc doesn't mark the data in that case. */
480 if (! check_use (data, head->first_free, usemap,
481 use_key | (here->first ? use_first : 0),
482 here->key, here->len))
483 #endif
484 goto fail;
487 work = here->next;
489 if (work == trail)
490 /* A circular list, this must not happen. */
491 goto fail;
492 if (tick)
493 trail = ((struct hashentry *) (data + trail))->next;
494 tick = 1 - tick;
498 if (he_cnt != head->nentries)
499 goto fail;
501 /* See if all data and keys had at least one reference from
502 he->first == true hashentry. */
503 for (ref_t idx = 0; idx < head->first_free; ++idx)
505 #if SEPARATE_KEY
506 if (usemap[idx] == use_key_begin)
507 goto fail;
508 #endif
509 if (usemap[idx] == use_data_begin)
510 goto fail;
513 /* Finally, make sure the database hasn't changed since the first test. */
514 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
515 goto fail;
517 free (usemap);
518 return 1;
520 fail:
521 free (usemap);
522 return 0;
526 #ifdef O_CLOEXEC
527 # define EXTRA_O_FLAGS O_CLOEXEC
528 #else
529 # define EXTRA_O_FLAGS 0
530 #endif
533 /* Initialize database information structures. */
534 void
535 nscd_init (void)
537 /* Look up unprivileged uid/gid/groups before we start listening on the
538 socket */
539 if (server_user != NULL)
540 begin_drop_privileges ();
542 if (nthreads == -1)
543 /* No configuration for this value, assume a default. */
544 nthreads = 4;
546 for (size_t cnt = 0; cnt < lastdb; ++cnt)
547 if (dbs[cnt].enabled)
549 pthread_rwlock_init (&dbs[cnt].lock, NULL);
550 pthread_mutex_init (&dbs[cnt].memlock, NULL);
552 if (dbs[cnt].persistent)
554 /* Try to open the appropriate file on disk. */
555 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
556 if (fd != -1)
558 char *msg = NULL;
559 struct stat64 st;
560 void *mem;
561 size_t total;
562 struct database_pers_head head;
563 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
564 sizeof (head)));
565 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
567 fail_db_errno:
568 /* The code is single-threaded at this point so
569 using strerror is just fine. */
570 msg = strerror (errno);
571 fail_db:
572 dbg_log (_("invalid persistent database file \"%s\": %s"),
573 dbs[cnt].db_filename, msg);
574 unlink (dbs[cnt].db_filename);
576 else if (head.module == 0 && head.data_size == 0)
578 /* The file has been created, but the head has not
579 been initialized yet. */
580 msg = _("uninitialized header");
581 goto fail_db;
583 else if (head.header_size != (int) sizeof (head))
585 msg = _("header size does not match");
586 goto fail_db;
588 else if ((total = (sizeof (head)
589 + roundup (head.module * sizeof (ref_t),
590 ALIGN)
591 + head.data_size))
592 > st.st_size
593 || total < sizeof (head))
595 msg = _("file size does not match");
596 goto fail_db;
598 /* Note we map with the maximum size allowed for the
599 database. This is likely much larger than the
600 actual file size. This is OK on most OSes since
601 extensions of the underlying file will
602 automatically translate more pages available for
603 memory access. */
604 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
605 PROT_READ | PROT_WRITE,
606 MAP_SHARED, fd, 0))
607 == MAP_FAILED)
608 goto fail_db_errno;
609 else if (!verify_persistent_db (mem, &head, cnt))
611 munmap (mem, total);
612 msg = _("verification failed");
613 goto fail_db;
615 else
617 /* Success. We have the database. */
618 dbs[cnt].head = mem;
619 dbs[cnt].memsize = total;
620 dbs[cnt].data = (char *)
621 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
622 ALIGN / sizeof (ref_t))];
623 dbs[cnt].mmap_used = true;
625 if (dbs[cnt].suggested_module > head.module)
626 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
627 dbnames[cnt]);
629 dbs[cnt].wr_fd = fd;
630 fd = -1;
631 /* We also need a read-only descriptor. */
632 if (dbs[cnt].shared)
634 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
635 O_RDONLY | EXTRA_O_FLAGS);
636 if (dbs[cnt].ro_fd == -1)
637 dbg_log (_("\
638 cannot create read-only descriptor for \"%s\"; no mmap"),
639 dbs[cnt].db_filename);
642 // XXX Shall we test whether the descriptors actually
643 // XXX point to the same file?
646 /* Close the file descriptors in case something went
647 wrong in which case the variable have not been
648 assigned -1. */
649 if (fd != -1)
650 close (fd);
652 else if (errno == EACCES)
653 error (EXIT_FAILURE, 0, _("cannot access '%s'"),
654 dbs[cnt].db_filename);
657 if (dbs[cnt].head == NULL)
659 /* No database loaded. Allocate the data structure,
660 possibly on disk. */
661 struct database_pers_head head;
662 size_t total = (sizeof (head)
663 + roundup (dbs[cnt].suggested_module
664 * sizeof (ref_t), ALIGN)
665 + (dbs[cnt].suggested_module
666 * DEFAULT_DATASIZE_PER_BUCKET));
668 /* Try to create the database. If we do not need a
669 persistent database create a temporary file. */
670 int fd;
671 int ro_fd = -1;
672 if (dbs[cnt].persistent)
674 fd = open (dbs[cnt].db_filename,
675 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
676 S_IRUSR | S_IWUSR);
677 if (fd != -1 && dbs[cnt].shared)
678 ro_fd = open (dbs[cnt].db_filename,
679 O_RDONLY | EXTRA_O_FLAGS);
681 else
683 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
684 fd = mkostemp (fname, EXTRA_O_FLAGS);
686 /* We do not need the file name anymore after we
687 opened another file descriptor in read-only mode. */
688 if (fd != -1)
690 if (dbs[cnt].shared)
691 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
693 unlink (fname);
697 if (fd == -1)
699 if (errno == EEXIST)
701 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
702 dbnames[cnt], dbs[cnt].db_filename);
703 // XXX Correct way to terminate?
704 exit (1);
707 if (dbs[cnt].persistent)
708 dbg_log (_("cannot create %s; no persistent database used"),
709 dbs[cnt].db_filename);
710 else
711 dbg_log (_("cannot create %s; no sharing possible"),
712 dbs[cnt].db_filename);
714 dbs[cnt].persistent = 0;
715 // XXX remember: no mmap
717 else
719 /* Tell the user if we could not create the read-only
720 descriptor. */
721 if (ro_fd == -1 && dbs[cnt].shared)
722 dbg_log (_("\
723 cannot create read-only descriptor for \"%s\"; no mmap"),
724 dbs[cnt].db_filename);
726 /* Before we create the header, initialiye the hash
727 table. So that if we get interrupted if writing
728 the header we can recognize a partially initialized
729 database. */
730 size_t ps = sysconf (_SC_PAGESIZE);
731 char tmpbuf[ps];
732 assert (~ENDREF == 0);
733 memset (tmpbuf, '\xff', ps);
735 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
736 off_t offset = sizeof (head);
738 size_t towrite;
739 if (offset % ps != 0)
741 towrite = MIN (remaining, ps - (offset % ps));
742 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
743 goto write_fail;
744 offset += towrite;
745 remaining -= towrite;
748 while (remaining > ps)
750 if (pwrite (fd, tmpbuf, ps, offset) == -1)
751 goto write_fail;
752 offset += ps;
753 remaining -= ps;
756 if (remaining > 0
757 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
758 goto write_fail;
760 /* Create the header of the file. */
761 struct database_pers_head head =
763 .version = DB_VERSION,
764 .header_size = sizeof (head),
765 .module = dbs[cnt].suggested_module,
766 .data_size = (dbs[cnt].suggested_module
767 * DEFAULT_DATASIZE_PER_BUCKET),
768 .first_free = 0
770 void *mem;
772 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
773 != sizeof (head))
774 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
775 != 0)
776 || (mem = mmap (NULL, dbs[cnt].max_db_size,
777 PROT_READ | PROT_WRITE,
778 MAP_SHARED, fd, 0)) == MAP_FAILED)
780 write_fail:
781 unlink (dbs[cnt].db_filename);
782 dbg_log (_("cannot write to database file %s: %s"),
783 dbs[cnt].db_filename, strerror (errno));
784 dbs[cnt].persistent = 0;
786 else
788 /* Success. */
789 dbs[cnt].head = mem;
790 dbs[cnt].data = (char *)
791 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
792 ALIGN / sizeof (ref_t))];
793 dbs[cnt].memsize = total;
794 dbs[cnt].mmap_used = true;
796 /* Remember the descriptors. */
797 dbs[cnt].wr_fd = fd;
798 dbs[cnt].ro_fd = ro_fd;
799 fd = -1;
800 ro_fd = -1;
803 if (fd != -1)
804 close (fd);
805 if (ro_fd != -1)
806 close (ro_fd);
810 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
811 /* We do not check here whether the O_CLOEXEC provided to the
812 open call was successful or not. The two fcntl calls are
813 only performed once each per process start-up and therefore
814 is not noticeable at all. */
815 if (paranoia
816 && ((dbs[cnt].wr_fd != -1
817 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
818 || (dbs[cnt].ro_fd != -1
819 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
821 dbg_log (_("\
822 cannot set socket to close on exec: %s; disabling paranoia mode"),
823 strerror (errno));
824 paranoia = 0;
826 #endif
828 if (dbs[cnt].head == NULL)
830 /* We do not use the persistent database. Just
831 create an in-memory data structure. */
832 assert (! dbs[cnt].persistent);
834 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
835 + (dbs[cnt].suggested_module
836 * sizeof (ref_t)));
837 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
838 assert (~ENDREF == 0);
839 memset (dbs[cnt].head->array, '\xff',
840 dbs[cnt].suggested_module * sizeof (ref_t));
841 dbs[cnt].head->module = dbs[cnt].suggested_module;
842 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
843 * dbs[cnt].head->module);
844 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
845 dbs[cnt].head->first_free = 0;
847 dbs[cnt].shared = 0;
848 assert (dbs[cnt].ro_fd == -1);
852 /* Create the socket. */
853 #ifndef __ASSUME_SOCK_CLOEXEC
854 sock = -1;
855 if (have_sock_cloexec >= 0)
856 #endif
858 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
859 #ifndef __ASSUME_SOCK_CLOEXEC
860 if (have_sock_cloexec == 0)
861 have_sock_cloexec = sock != -1 || errno != EINVAL ? 1 : -1;
862 #endif
864 #ifndef __ASSUME_SOCK_CLOEXEC
865 if (have_sock_cloexec < 0)
866 sock = socket (AF_UNIX, SOCK_STREAM, 0);
867 #endif
868 if (sock < 0)
870 dbg_log (_("cannot open socket: %s"), strerror (errno));
871 exit (errno == EACCES ? 4 : 1);
873 /* Bind a name to the socket. */
874 struct sockaddr_un sock_addr;
875 sock_addr.sun_family = AF_UNIX;
876 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
877 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
879 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
880 exit (errno == EACCES ? 4 : 1);
883 #ifndef __ASSUME_SOCK_CLOEXEC
884 if (have_sock_cloexec < 0)
886 /* We don't want to get stuck on accept. */
887 int fl = fcntl (sock, F_GETFL);
888 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
890 dbg_log (_("cannot change socket to nonblocking mode: %s"),
891 strerror (errno));
892 exit (1);
895 /* The descriptor needs to be closed on exec. */
896 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
898 dbg_log (_("cannot set socket to close on exec: %s"),
899 strerror (errno));
900 exit (1);
903 #endif
905 /* Set permissions for the socket. */
906 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
908 /* Set the socket up to accept connections. */
909 if (listen (sock, SOMAXCONN) < 0)
911 dbg_log (_("cannot enable socket to accept connections: %s"),
912 strerror (errno));
913 exit (1);
916 #ifdef HAVE_NETLINK
917 if (dbs[hstdb].enabled)
919 /* Try to open netlink socket to monitor network setting changes. */
920 nl_status_fd = socket (AF_NETLINK,
921 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
922 NETLINK_ROUTE);
923 if (nl_status_fd != -1)
925 struct sockaddr_nl snl;
926 memset (&snl, '\0', sizeof (snl));
927 snl.nl_family = AF_NETLINK;
928 /* XXX Is this the best set to use? */
929 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
930 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
931 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
932 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
933 | RTMGRP_IPV6_PREFIX);
935 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
937 close (nl_status_fd);
938 nl_status_fd = -1;
940 else
942 /* Start the timestamp process. */
943 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
944 = __bump_nl_timestamp ();
946 # ifndef __ASSUME_SOCK_CLOEXEC
947 if (have_sock_cloexec < 0)
949 /* We don't want to get stuck on accept. */
950 int fl = fcntl (nl_status_fd, F_GETFL);
951 if (fl == -1
952 || fcntl (nl_status_fd, F_SETFL, fl | O_NONBLOCK) == -1)
954 dbg_log (_("\
955 cannot change socket to nonblocking mode: %s"),
956 strerror (errno));
957 exit (1);
960 /* The descriptor needs to be closed on exec. */
961 if (paranoia
962 && fcntl (nl_status_fd, F_SETFD, FD_CLOEXEC) == -1)
964 dbg_log (_("cannot set socket to close on exec: %s"),
965 strerror (errno));
966 exit (1);
969 # endif
973 #endif
975 /* Change to unprivileged uid/gid/groups if specified in config file */
976 if (server_user != NULL)
977 finish_drop_privileges ();
981 void
982 register_traced_file (size_t dbidx, struct traced_file *finfo)
984 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
985 return;
987 if (__builtin_expect (debug_level > 0, 0))
988 dbg_log (_("register trace file %s for database %s"),
989 finfo->fname, dbnames[dbidx]);
991 #ifdef HAVE_INOTIFY
992 if (inotify_fd < 0
993 || (finfo->inotify_descr = inotify_add_watch (inotify_fd, finfo->fname,
994 IN_DELETE_SELF
995 | IN_MODIFY)) < 0)
996 #endif
998 /* We need the modification date of the file. */
999 struct stat64 st;
1001 if (stat64 (finfo->fname, &st) < 0)
1003 /* We cannot stat() the file, disable file checking. */
1004 dbg_log (_("cannot stat() file `%s': %s"),
1005 finfo->fname, strerror (errno));
1006 return;
1009 finfo->inotify_descr = -1;
1010 finfo->mtime = st.st_mtime;
1013 /* Queue up the file name. */
1014 finfo->next = dbs[dbidx].traced_files;
1015 dbs[dbidx].traced_files = finfo;
1019 /* Close the connections. */
1020 void
1021 close_sockets (void)
1023 close (sock);
1027 static void
1028 invalidate_cache (char *key, int fd)
1030 dbtype number;
1031 int32_t resp;
1033 for (number = pwddb; number < lastdb; ++number)
1034 if (strcmp (key, dbnames[number]) == 0)
1036 if (number == hstdb)
1038 struct traced_file *runp = dbs[hstdb].traced_files;
1039 while (runp != NULL)
1040 if (runp->call_res_init)
1042 res_init ();
1043 break;
1045 else
1046 runp = runp->next;
1048 break;
1051 if (number == lastdb)
1053 resp = EINVAL;
1054 writeall (fd, &resp, sizeof (resp));
1055 return;
1058 if (dbs[number].enabled)
1060 pthread_mutex_lock (&dbs[number].prune_run_lock);
1061 prune_cache (&dbs[number], LONG_MAX, fd);
1062 pthread_mutex_unlock (&dbs[number].prune_run_lock);
1064 else
1066 resp = 0;
1067 writeall (fd, &resp, sizeof (resp));
1072 #ifdef SCM_RIGHTS
1073 static void
1074 send_ro_fd (struct database_dyn *db, char *key, int fd)
1076 /* If we do not have an read-only file descriptor do nothing. */
1077 if (db->ro_fd == -1)
1078 return;
1080 /* We need to send some data along with the descriptor. */
1081 uint64_t mapsize = (db->head->data_size
1082 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1083 + sizeof (struct database_pers_head));
1084 struct iovec iov[2];
1085 iov[0].iov_base = key;
1086 iov[0].iov_len = strlen (key) + 1;
1087 iov[1].iov_base = &mapsize;
1088 iov[1].iov_len = sizeof (mapsize);
1090 /* Prepare the control message to transfer the descriptor. */
1091 union
1093 struct cmsghdr hdr;
1094 char bytes[CMSG_SPACE (sizeof (int))];
1095 } buf;
1096 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1097 .msg_control = buf.bytes,
1098 .msg_controllen = sizeof (buf) };
1099 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1101 cmsg->cmsg_level = SOL_SOCKET;
1102 cmsg->cmsg_type = SCM_RIGHTS;
1103 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1105 int *ip = (int *) CMSG_DATA (cmsg);
1106 *ip = db->ro_fd;
1108 msg.msg_controllen = cmsg->cmsg_len;
1110 /* Send the control message. We repeat when we are interrupted but
1111 everything else is ignored. */
1112 #ifndef MSG_NOSIGNAL
1113 # define MSG_NOSIGNAL 0
1114 #endif
1115 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1117 if (__builtin_expect (debug_level > 0, 0))
1118 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1120 #endif /* SCM_RIGHTS */
1123 /* Handle new request. */
1124 static void
1125 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1127 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1129 if (debug_level > 0)
1130 dbg_log (_("\
1131 cannot handle old request version %d; current version is %d"),
1132 req->version, NSCD_VERSION);
1133 return;
1136 /* Perform the SELinux check before we go on to the standard checks. */
1137 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1139 if (debug_level > 0)
1141 #ifdef SO_PEERCRED
1142 # ifdef PATH_MAX
1143 char buf[PATH_MAX];
1144 # else
1145 char buf[4096];
1146 # endif
1148 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
1149 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
1151 if (n <= 0)
1152 dbg_log (_("\
1153 request from %ld not handled due to missing permission"), (long int) pid);
1154 else
1156 buf[n] = '\0';
1157 dbg_log (_("\
1158 request from '%s' [%ld] not handled due to missing permission"),
1159 buf, (long int) pid);
1161 #else
1162 dbg_log (_("request not handled due to missing permission"));
1163 #endif
1165 return;
1168 struct database_dyn *db = reqinfo[req->type].db;
1170 /* See whether we can service the request from the cache. */
1171 if (__builtin_expect (reqinfo[req->type].data_request, true))
1173 if (__builtin_expect (debug_level, 0) > 0)
1175 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1177 char buf[INET6_ADDRSTRLEN];
1179 dbg_log ("\t%s (%s)", serv2str[req->type],
1180 inet_ntop (req->type == GETHOSTBYADDR
1181 ? AF_INET : AF_INET6,
1182 key, buf, sizeof (buf)));
1184 else
1185 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1188 /* Is this service enabled? */
1189 if (__builtin_expect (!db->enabled, 0))
1191 /* No, sent the prepared record. */
1192 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1193 db->disabled_iov->iov_len,
1194 MSG_NOSIGNAL))
1195 != (ssize_t) db->disabled_iov->iov_len
1196 && __builtin_expect (debug_level, 0) > 0)
1198 /* We have problems sending the result. */
1199 char buf[256];
1200 dbg_log (_("cannot write result: %s"),
1201 strerror_r (errno, buf, sizeof (buf)));
1204 return;
1207 /* Be sure we can read the data. */
1208 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
1210 ++db->head->rdlockdelayed;
1211 pthread_rwlock_rdlock (&db->lock);
1214 /* See whether we can handle it from the cache. */
1215 struct datahead *cached;
1216 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1217 db, uid);
1218 if (cached != NULL)
1220 /* Hurray it's in the cache. */
1221 ssize_t nwritten;
1223 #ifdef HAVE_SENDFILE
1224 if (__builtin_expect (db->mmap_used, 1))
1226 assert (db->wr_fd != -1);
1227 assert ((char *) cached->data > (char *) db->data);
1228 assert ((char *) cached->data - (char *) db->head
1229 + cached->recsize
1230 <= (sizeof (struct database_pers_head)
1231 + db->head->module * sizeof (ref_t)
1232 + db->head->data_size));
1233 nwritten = sendfileall (fd, db->wr_fd,
1234 (char *) cached->data
1235 - (char *) db->head, cached->recsize);
1236 # ifndef __ASSUME_SENDFILE
1237 if (nwritten == -1 && errno == ENOSYS)
1238 goto use_write;
1239 # endif
1241 else
1242 # ifndef __ASSUME_SENDFILE
1243 use_write:
1244 # endif
1245 #endif
1246 nwritten = writeall (fd, cached->data, cached->recsize);
1248 if (nwritten != cached->recsize
1249 && __builtin_expect (debug_level, 0) > 0)
1251 /* We have problems sending the result. */
1252 char buf[256];
1253 dbg_log (_("cannot write result: %s"),
1254 strerror_r (errno, buf, sizeof (buf)));
1257 pthread_rwlock_unlock (&db->lock);
1259 return;
1262 pthread_rwlock_unlock (&db->lock);
1264 else if (__builtin_expect (debug_level, 0) > 0)
1266 if (req->type == INVALIDATE)
1267 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1268 else
1269 dbg_log ("\t%s", serv2str[req->type]);
1272 /* Handle the request. */
1273 switch (req->type)
1275 case GETPWBYNAME:
1276 addpwbyname (db, fd, req, key, uid);
1277 break;
1279 case GETPWBYUID:
1280 addpwbyuid (db, fd, req, key, uid);
1281 break;
1283 case GETGRBYNAME:
1284 addgrbyname (db, fd, req, key, uid);
1285 break;
1287 case GETGRBYGID:
1288 addgrbygid (db, fd, req, key, uid);
1289 break;
1291 case GETHOSTBYNAME:
1292 addhstbyname (db, fd, req, key, uid);
1293 break;
1295 case GETHOSTBYNAMEv6:
1296 addhstbynamev6 (db, fd, req, key, uid);
1297 break;
1299 case GETHOSTBYADDR:
1300 addhstbyaddr (db, fd, req, key, uid);
1301 break;
1303 case GETHOSTBYADDRv6:
1304 addhstbyaddrv6 (db, fd, req, key, uid);
1305 break;
1307 case GETAI:
1308 addhstai (db, fd, req, key, uid);
1309 break;
1311 case INITGROUPS:
1312 addinitgroups (db, fd, req, key, uid);
1313 break;
1315 case GETSERVBYNAME:
1316 addservbyname (db, fd, req, key, uid);
1317 break;
1319 case GETSERVBYPORT:
1320 addservbyport (db, fd, req, key, uid);
1321 break;
1323 case GETNETGRENT:
1324 addgetnetgrent (db, fd, req, key, uid);
1325 break;
1327 case INNETGR:
1328 addinnetgr (db, fd, req, key, uid);
1329 break;
1331 case GETSTAT:
1332 case SHUTDOWN:
1333 case INVALIDATE:
1335 /* Get the callers credentials. */
1336 #ifdef SO_PEERCRED
1337 struct ucred caller;
1338 socklen_t optlen = sizeof (caller);
1340 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1342 char buf[256];
1344 dbg_log (_("error getting caller's id: %s"),
1345 strerror_r (errno, buf, sizeof (buf)));
1346 break;
1349 uid = caller.uid;
1350 #else
1351 /* Some systems have no SO_PEERCRED implementation. They don't
1352 care about security so we don't as well. */
1353 uid = 0;
1354 #endif
1357 /* Accept shutdown, getstat and invalidate only from root. For
1358 the stat call also allow the user specified in the config file. */
1359 if (req->type == GETSTAT)
1361 if (uid == 0 || uid == stat_uid)
1362 send_stats (fd, dbs);
1364 else if (uid == 0)
1366 if (req->type == INVALIDATE)
1367 invalidate_cache (key, fd);
1368 else
1369 termination_handler (0);
1371 break;
1373 case GETFDPW:
1374 case GETFDGR:
1375 case GETFDHST:
1376 case GETFDSERV:
1377 case GETFDNETGR:
1378 #ifdef SCM_RIGHTS
1379 send_ro_fd (reqinfo[req->type].db, key, fd);
1380 #endif
1381 break;
1383 default:
1384 /* Ignore the command, it's nothing we know. */
1385 break;
1390 /* Restart the process. */
1391 static void
1392 restart (void)
1394 /* First determine the parameters. We do not use the parameters
1395 passed to main() since in case nscd is started by running the
1396 dynamic linker this will not work. Yes, this is not the usual
1397 case but nscd is part of glibc and we occasionally do this. */
1398 size_t buflen = 1024;
1399 char *buf = alloca (buflen);
1400 size_t readlen = 0;
1401 int fd = open ("/proc/self/cmdline", O_RDONLY);
1402 if (fd == -1)
1404 dbg_log (_("\
1405 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1406 strerror (errno));
1408 paranoia = 0;
1409 return;
1412 while (1)
1414 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1415 buflen - readlen));
1416 if (n == -1)
1418 dbg_log (_("\
1419 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1420 strerror (errno));
1422 close (fd);
1423 paranoia = 0;
1424 return;
1427 readlen += n;
1429 if (readlen < buflen)
1430 break;
1432 /* We might have to extend the buffer. */
1433 size_t old_buflen = buflen;
1434 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1435 buf = memmove (newp, buf, old_buflen);
1438 close (fd);
1440 /* Parse the command line. Worst case scenario: every two
1441 characters form one parameter (one character plus NUL). */
1442 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1443 int argc = 0;
1445 char *cp = buf;
1446 while (cp < buf + readlen)
1448 argv[argc++] = cp;
1449 cp = (char *) rawmemchr (cp, '\0') + 1;
1451 argv[argc] = NULL;
1453 /* Second, change back to the old user if we changed it. */
1454 if (server_user != NULL)
1456 if (setresuid (old_uid, old_uid, old_uid) != 0)
1458 dbg_log (_("\
1459 cannot change to old UID: %s; disabling paranoia mode"),
1460 strerror (errno));
1462 paranoia = 0;
1463 return;
1466 if (setresgid (old_gid, old_gid, old_gid) != 0)
1468 dbg_log (_("\
1469 cannot change to old GID: %s; disabling paranoia mode"),
1470 strerror (errno));
1472 setuid (server_uid);
1473 paranoia = 0;
1474 return;
1478 /* Next change back to the old working directory. */
1479 if (chdir (oldcwd) == -1)
1481 dbg_log (_("\
1482 cannot change to old working directory: %s; disabling paranoia mode"),
1483 strerror (errno));
1485 if (server_user != NULL)
1487 setuid (server_uid);
1488 setgid (server_gid);
1490 paranoia = 0;
1491 return;
1494 /* Synchronize memory. */
1495 int32_t certainly[lastdb];
1496 for (int cnt = 0; cnt < lastdb; ++cnt)
1497 if (dbs[cnt].enabled)
1499 /* Make sure nobody keeps using the database. */
1500 dbs[cnt].head->timestamp = 0;
1501 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1502 dbs[cnt].head->nscd_certainly_running = 0;
1504 if (dbs[cnt].persistent)
1505 // XXX async OK?
1506 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1509 /* The preparations are done. */
1510 #ifdef PATH_MAX
1511 char pathbuf[PATH_MAX];
1512 #else
1513 char pathbuf[256];
1514 #endif
1515 /* Try to exec the real nscd program so the process name (as reported
1516 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1517 if readlink or the exec with the result of the readlink call fails. */
1518 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1519 if (n != -1)
1521 pathbuf[n] = '\0';
1522 execv (pathbuf, argv);
1524 execv ("/proc/self/exe", argv);
1526 /* If we come here, we will never be able to re-exec. */
1527 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1528 strerror (errno));
1530 if (server_user != NULL)
1532 setuid (server_uid);
1533 setgid (server_gid);
1535 if (chdir ("/") != 0)
1536 dbg_log (_("cannot change current working directory to \"/\": %s"),
1537 strerror (errno));
1538 paranoia = 0;
1540 /* Reenable the databases. */
1541 time_t now = time (NULL);
1542 for (int cnt = 0; cnt < lastdb; ++cnt)
1543 if (dbs[cnt].enabled)
1545 dbs[cnt].head->timestamp = now;
1546 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1551 /* List of file descriptors. */
1552 struct fdlist
1554 int fd;
1555 struct fdlist *next;
1557 /* Memory allocated for the list. */
1558 static struct fdlist *fdlist;
1559 /* List of currently ready-to-read file descriptors. */
1560 static struct fdlist *readylist;
1562 /* Conditional variable and mutex to signal availability of entries in
1563 READYLIST. The condvar is initialized dynamically since we might
1564 use a different clock depending on availability. */
1565 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1566 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1568 /* The clock to use with the condvar. */
1569 static clockid_t timeout_clock = CLOCK_REALTIME;
1571 /* Number of threads ready to handle the READYLIST. */
1572 static unsigned long int nready;
1575 /* Function for the clean-up threads. */
1576 static void *
1577 __attribute__ ((__noreturn__))
1578 nscd_run_prune (void *p)
1580 const long int my_number = (long int) p;
1581 assert (dbs[my_number].enabled);
1583 int dont_need_update = setup_thread (&dbs[my_number]);
1585 time_t now = time (NULL);
1587 /* We are running. */
1588 dbs[my_number].head->timestamp = now;
1590 struct timespec prune_ts;
1591 if (__builtin_expect (clock_gettime (timeout_clock, &prune_ts) == -1, 0))
1592 /* Should never happen. */
1593 abort ();
1595 /* Compute the initial timeout time. Prevent all the timers to go
1596 off at the same time by adding a db-based value. */
1597 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1598 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1600 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1601 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1602 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1604 pthread_mutex_lock (prune_lock);
1605 while (1)
1607 /* Wait, but not forever. */
1608 int e = 0;
1609 if (! dbs[my_number].clear_cache)
1610 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1611 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1613 time_t next_wait;
1614 now = time (NULL);
1615 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1616 || dbs[my_number].clear_cache)
1618 /* We will determine the new timout values based on the
1619 cache content. Should there be concurrent additions to
1620 the cache which are not accounted for in the cache
1621 pruning we want to know about it. Therefore set the
1622 timeout to the maximum. It will be descreased when adding
1623 new entries to the cache, if necessary. */
1624 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1626 /* Unconditionally reset the flag. */
1627 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1628 dbs[my_number].clear_cache = 0;
1630 pthread_mutex_unlock (prune_lock);
1632 /* We use a separate lock for running the prune function (instead
1633 of keeping prune_lock locked) because this enables concurrent
1634 invocations of cache_add which might modify the timeout value. */
1635 pthread_mutex_lock (prune_run_lock);
1636 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1637 pthread_mutex_unlock (prune_run_lock);
1639 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1640 /* If clients cannot determine for sure whether nscd is running
1641 we need to wake up occasionally to update the timestamp.
1642 Wait 90% of the update period. */
1643 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1644 if (__builtin_expect (! dont_need_update, 0))
1646 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1647 dbs[my_number].head->timestamp = now;
1650 pthread_mutex_lock (prune_lock);
1652 /* Make it known when we will wake up again. */
1653 if (now + next_wait < dbs[my_number].wakeup_time)
1654 dbs[my_number].wakeup_time = now + next_wait;
1655 else
1656 next_wait = dbs[my_number].wakeup_time - now;
1658 else
1659 /* The cache was just pruned. Do not do it again now. Just
1660 use the new timeout value. */
1661 next_wait = dbs[my_number].wakeup_time - now;
1663 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1664 /* Should never happen. */
1665 abort ();
1667 /* Compute next timeout time. */
1668 prune_ts.tv_sec += next_wait;
1673 /* This is the main loop. It is replicated in different threads but
1674 the use of the ready list makes sure only one thread handles an
1675 incoming connection. */
1676 static void *
1677 __attribute__ ((__noreturn__))
1678 nscd_run_worker (void *p)
1680 char buf[256];
1682 /* Initial locking. */
1683 pthread_mutex_lock (&readylist_lock);
1685 /* One more thread available. */
1686 ++nready;
1688 while (1)
1690 while (readylist == NULL)
1691 pthread_cond_wait (&readylist_cond, &readylist_lock);
1693 struct fdlist *it = readylist->next;
1694 if (readylist->next == readylist)
1695 /* Just one entry on the list. */
1696 readylist = NULL;
1697 else
1698 readylist->next = it->next;
1700 /* Extract the information and mark the record ready to be used
1701 again. */
1702 int fd = it->fd;
1703 it->next = NULL;
1705 /* One more thread available. */
1706 --nready;
1708 /* We are done with the list. */
1709 pthread_mutex_unlock (&readylist_lock);
1711 #ifndef __ASSUME_ACCEPT4
1712 if (have_accept4 < 0)
1714 /* We do not want to block on a short read or so. */
1715 int fl = fcntl (fd, F_GETFL);
1716 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1717 goto close_and_out;
1719 #endif
1721 /* Now read the request. */
1722 request_header req;
1723 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1724 != sizeof (req), 0))
1726 /* We failed to read data. Note that this also might mean we
1727 failed because we would have blocked. */
1728 if (debug_level > 0)
1729 dbg_log (_("short read while reading request: %s"),
1730 strerror_r (errno, buf, sizeof (buf)));
1731 goto close_and_out;
1734 /* Check whether this is a valid request type. */
1735 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1736 goto close_and_out;
1738 /* Some systems have no SO_PEERCRED implementation. They don't
1739 care about security so we don't as well. */
1740 uid_t uid = -1;
1741 #ifdef SO_PEERCRED
1742 pid_t pid = 0;
1744 if (__builtin_expect (debug_level > 0, 0))
1746 struct ucred caller;
1747 socklen_t optlen = sizeof (caller);
1749 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1750 pid = caller.pid;
1752 #else
1753 const pid_t pid = 0;
1754 #endif
1756 /* It should not be possible to crash the nscd with a silly
1757 request (i.e., a terribly large key). We limit the size to 1kb. */
1758 if (__builtin_expect (req.key_len, 1) < 0
1759 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1761 if (debug_level > 0)
1762 dbg_log (_("key length in request too long: %d"), req.key_len);
1764 else
1766 /* Get the key. */
1767 char keybuf[MAXKEYLEN];
1769 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1770 req.key_len))
1771 != req.key_len, 0))
1773 /* Again, this can also mean we would have blocked. */
1774 if (debug_level > 0)
1775 dbg_log (_("short read while reading request key: %s"),
1776 strerror_r (errno, buf, sizeof (buf)));
1777 goto close_and_out;
1780 if (__builtin_expect (debug_level, 0) > 0)
1782 #ifdef SO_PEERCRED
1783 if (pid != 0)
1784 dbg_log (_("\
1785 handle_request: request received (Version = %d) from PID %ld"),
1786 req.version, (long int) pid);
1787 else
1788 #endif
1789 dbg_log (_("\
1790 handle_request: request received (Version = %d)"), req.version);
1793 /* Phew, we got all the data, now process it. */
1794 handle_request (fd, &req, keybuf, uid, pid);
1797 close_and_out:
1798 /* We are done. */
1799 close (fd);
1801 /* Re-locking. */
1802 pthread_mutex_lock (&readylist_lock);
1804 /* One more thread available. */
1805 ++nready;
1807 /* NOTREACHED */
1811 static unsigned int nconns;
1813 static void
1814 fd_ready (int fd)
1816 pthread_mutex_lock (&readylist_lock);
1818 /* Find an empty entry in FDLIST. */
1819 size_t inner;
1820 for (inner = 0; inner < nconns; ++inner)
1821 if (fdlist[inner].next == NULL)
1822 break;
1823 assert (inner < nconns);
1825 fdlist[inner].fd = fd;
1827 if (readylist == NULL)
1828 readylist = fdlist[inner].next = &fdlist[inner];
1829 else
1831 fdlist[inner].next = readylist->next;
1832 readylist = readylist->next = &fdlist[inner];
1835 bool do_signal = true;
1836 if (__builtin_expect (nready == 0, 0))
1838 ++client_queued;
1839 do_signal = false;
1841 /* Try to start another thread to help out. */
1842 pthread_t th;
1843 if (nthreads < max_nthreads
1844 && pthread_create (&th, &attr, nscd_run_worker,
1845 (void *) (long int) nthreads) == 0)
1847 /* We got another thread. */
1848 ++nthreads;
1849 /* The new thread might need a kick. */
1850 do_signal = true;
1855 pthread_mutex_unlock (&readylist_lock);
1857 /* Tell one of the worker threads there is work to do. */
1858 if (do_signal)
1859 pthread_cond_signal (&readylist_cond);
1863 /* Check whether restarting should happen. */
1864 static inline int
1865 restart_p (time_t now)
1867 return (paranoia && readylist == NULL && nready == nthreads
1868 && now >= restart_time);
1872 /* Array for times a connection was accepted. */
1873 static time_t *starttime;
1876 static void
1877 __attribute__ ((__noreturn__))
1878 main_loop_poll (void)
1880 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1881 * sizeof (conns[0]));
1883 conns[0].fd = sock;
1884 conns[0].events = POLLRDNORM;
1885 size_t nused = 1;
1886 size_t firstfree = 1;
1888 #ifdef HAVE_INOTIFY
1889 if (inotify_fd != -1)
1891 conns[1].fd = inotify_fd;
1892 conns[1].events = POLLRDNORM;
1893 nused = 2;
1894 firstfree = 2;
1896 #endif
1898 #ifdef HAVE_NETLINK
1899 size_t idx_nl_status_fd = 0;
1900 if (nl_status_fd != -1)
1902 idx_nl_status_fd = nused;
1903 conns[nused].fd = nl_status_fd;
1904 conns[nused].events = POLLRDNORM;
1905 ++nused;
1906 firstfree = nused;
1908 #endif
1910 while (1)
1912 /* Wait for any event. We wait at most a couple of seconds so
1913 that we can check whether we should close any of the accepted
1914 connections since we have not received a request. */
1915 #define MAX_ACCEPT_TIMEOUT 30
1916 #define MIN_ACCEPT_TIMEOUT 5
1917 #define MAIN_THREAD_TIMEOUT \
1918 (MAX_ACCEPT_TIMEOUT * 1000 \
1919 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1921 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1923 time_t now = time (NULL);
1925 /* If there is a descriptor ready for reading or there is a new
1926 connection, process this now. */
1927 if (n > 0)
1929 if (conns[0].revents != 0)
1931 /* We have a new incoming connection. Accept the connection. */
1932 int fd;
1934 #ifndef __ASSUME_ACCEPT4
1935 fd = -1;
1936 if (have_accept4 >= 0)
1937 #endif
1939 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
1940 SOCK_NONBLOCK));
1941 #ifndef __ASSUME_ACCEPT4
1942 if (have_accept4 == 0)
1943 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
1944 #endif
1946 #ifndef __ASSUME_ACCEPT4
1947 if (have_accept4 < 0)
1948 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1949 #endif
1951 /* Use the descriptor if we have not reached the limit. */
1952 if (fd >= 0)
1954 if (firstfree < nconns)
1956 conns[firstfree].fd = fd;
1957 conns[firstfree].events = POLLRDNORM;
1958 starttime[firstfree] = now;
1959 if (firstfree >= nused)
1960 nused = firstfree + 1;
1963 ++firstfree;
1964 while (firstfree < nused && conns[firstfree].fd != -1);
1966 else
1967 /* We cannot use the connection so close it. */
1968 close (fd);
1971 --n;
1974 size_t first = 1;
1975 #ifdef HAVE_INOTIFY
1976 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
1978 if (conns[1].revents != 0)
1980 bool to_clear[lastdb] = { false, };
1981 union
1983 # ifndef PATH_MAX
1984 # define PATH_MAX 1024
1985 # endif
1986 struct inotify_event i;
1987 char buf[sizeof (struct inotify_event) + PATH_MAX];
1988 } inev;
1990 while (1)
1992 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
1993 sizeof (inev)));
1994 if (nb < (ssize_t) sizeof (struct inotify_event))
1996 if (__builtin_expect (nb == -1 && errno != EAGAIN,
1999 /* Something went wrong when reading the inotify
2000 data. Better disable inotify. */
2001 dbg_log (_("\
2002 disabled inotify after read error %d"),
2003 errno);
2004 conns[1].fd = -1;
2005 firstfree = 1;
2006 if (nused == 2)
2007 nused = 1;
2008 close (inotify_fd);
2009 inotify_fd = -1;
2011 break;
2014 /* Check which of the files changed. */
2015 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2017 struct traced_file *finfo = dbs[dbcnt].traced_files;
2019 while (finfo != NULL)
2021 if (finfo->inotify_descr == inev.i.wd)
2023 to_clear[dbcnt] = true;
2024 if (finfo->call_res_init)
2025 res_init ();
2026 goto next;
2029 finfo = finfo->next;
2032 next:;
2035 /* Actually perform the cache clearing. */
2036 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2037 if (to_clear[dbcnt])
2039 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
2040 dbs[dbcnt].clear_cache = 1;
2041 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
2042 pthread_cond_signal (&dbs[dbcnt].prune_cond);
2045 --n;
2048 first = 2;
2050 #endif
2052 #ifdef HAVE_NETLINK
2053 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2055 char buf[4096];
2056 /* Read all the data. We do not interpret it here. */
2057 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2058 sizeof (buf))) != -1)
2061 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2062 = __bump_nl_timestamp ();
2064 #endif
2066 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2067 if (conns[cnt].revents != 0)
2069 fd_ready (conns[cnt].fd);
2071 /* Clean up the CONNS array. */
2072 conns[cnt].fd = -1;
2073 if (cnt < firstfree)
2074 firstfree = cnt;
2075 if (cnt == nused - 1)
2077 --nused;
2078 while (conns[nused - 1].fd == -1);
2080 --n;
2084 /* Now find entries which have timed out. */
2085 assert (nused > 0);
2087 /* We make the timeout length depend on the number of file
2088 descriptors currently used. */
2089 #define ACCEPT_TIMEOUT \
2090 (MAX_ACCEPT_TIMEOUT \
2091 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2092 time_t laststart = now - ACCEPT_TIMEOUT;
2094 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2096 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2098 /* Remove the entry, it timed out. */
2099 (void) close (conns[cnt].fd);
2100 conns[cnt].fd = -1;
2102 if (cnt < firstfree)
2103 firstfree = cnt;
2104 if (cnt == nused - 1)
2106 --nused;
2107 while (conns[nused - 1].fd == -1);
2111 if (restart_p (now))
2112 restart ();
2117 #ifdef HAVE_EPOLL
2118 static void
2119 main_loop_epoll (int efd)
2121 struct epoll_event ev = { 0, };
2122 int nused = 1;
2123 size_t highest = 0;
2125 /* Add the socket. */
2126 ev.events = EPOLLRDNORM;
2127 ev.data.fd = sock;
2128 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2129 /* We cannot use epoll. */
2130 return;
2132 # ifdef HAVE_INOTIFY
2133 if (inotify_fd != -1)
2135 ev.events = EPOLLRDNORM;
2136 ev.data.fd = inotify_fd;
2137 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2138 /* We cannot use epoll. */
2139 return;
2140 nused = 2;
2142 # endif
2144 # ifdef HAVE_NETLINK
2145 if (nl_status_fd != -1)
2147 ev.events = EPOLLRDNORM;
2148 ev.data.fd = nl_status_fd;
2149 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2150 /* We cannot use epoll. */
2151 return;
2153 # endif
2155 while (1)
2157 struct epoll_event revs[100];
2158 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2160 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2162 time_t now = time (NULL);
2164 for (int cnt = 0; cnt < n; ++cnt)
2165 if (revs[cnt].data.fd == sock)
2167 /* A new connection. */
2168 int fd;
2170 # ifndef __ASSUME_ACCEPT4
2171 fd = -1;
2172 if (have_accept4 >= 0)
2173 # endif
2175 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2176 SOCK_NONBLOCK));
2177 # ifndef __ASSUME_ACCEPT4
2178 if (have_accept4 == 0)
2179 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2180 # endif
2182 # ifndef __ASSUME_ACCEPT4
2183 if (have_accept4 < 0)
2184 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2185 # endif
2187 /* Use the descriptor if we have not reached the limit. */
2188 if (fd >= 0)
2190 /* Try to add the new descriptor. */
2191 ev.data.fd = fd;
2192 if (fd >= nconns
2193 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2194 /* The descriptor is too large or something went
2195 wrong. Close the descriptor. */
2196 close (fd);
2197 else
2199 /* Remember when we accepted the connection. */
2200 starttime[fd] = now;
2202 if (fd > highest)
2203 highest = fd;
2205 ++nused;
2209 # ifdef HAVE_INOTIFY
2210 else if (revs[cnt].data.fd == inotify_fd)
2212 bool to_clear[lastdb] = { false, };
2213 union
2215 struct inotify_event i;
2216 char buf[sizeof (struct inotify_event) + PATH_MAX];
2217 } inev;
2219 while (1)
2221 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
2222 sizeof (inev)));
2223 if (nb < (ssize_t) sizeof (struct inotify_event))
2225 if (__builtin_expect (nb == -1 && errno != EAGAIN, 0))
2227 /* Something went wrong when reading the inotify
2228 data. Better disable inotify. */
2229 dbg_log (_("disabled inotify after read error %d"),
2230 errno);
2231 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd,
2232 NULL);
2233 close (inotify_fd);
2234 inotify_fd = -1;
2236 break;
2239 /* Check which of the files changed. */
2240 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2242 struct traced_file *finfo = dbs[dbcnt].traced_files;
2244 while (finfo != NULL)
2246 if (finfo->inotify_descr == inev.i.wd)
2248 to_clear[dbcnt] = true;
2249 if (finfo->call_res_init)
2250 res_init ();
2251 goto next;
2254 finfo = finfo->next;
2257 next:;
2260 /* Actually perform the cache clearing. */
2261 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2262 if (to_clear[dbcnt])
2264 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
2265 dbs[dbcnt].clear_cache = 1;
2266 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
2267 pthread_cond_signal (&dbs[dbcnt].prune_cond);
2270 # endif
2271 # ifdef HAVE_NETLINK
2272 else if (revs[cnt].data.fd == nl_status_fd)
2274 char buf[4096];
2275 /* Read all the data. We do not interpret it here. */
2276 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2277 sizeof (buf))) != -1)
2280 __bump_nl_timestamp ();
2282 # endif
2283 else
2285 /* Remove the descriptor from the epoll descriptor. */
2286 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2288 /* Get a worker to handle the request. */
2289 fd_ready (revs[cnt].data.fd);
2291 /* Reset the time. */
2292 starttime[revs[cnt].data.fd] = 0;
2293 if (revs[cnt].data.fd == highest)
2295 --highest;
2296 while (highest > 0 && starttime[highest] == 0);
2298 --nused;
2301 /* Now look for descriptors for accepted connections which have
2302 no reply in too long of a time. */
2303 time_t laststart = now - ACCEPT_TIMEOUT;
2304 assert (starttime[sock] == 0);
2305 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2306 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2307 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2308 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2310 /* We are waiting for this one for too long. Close it. */
2311 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2313 (void) close (cnt);
2315 starttime[cnt] = 0;
2316 if (cnt == highest)
2317 --highest;
2319 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2320 --highest;
2322 if (restart_p (now))
2323 restart ();
2326 #endif
2329 /* Start all the threads we want. The initial process is thread no. 1. */
2330 void
2331 start_threads (void)
2333 /* Initialize the conditional variable we will use. The only
2334 non-standard attribute we might use is the clock selection. */
2335 pthread_condattr_t condattr;
2336 pthread_condattr_init (&condattr);
2338 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2339 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2340 /* Determine whether the monotonous clock is available. */
2341 struct timespec dummy;
2342 # if _POSIX_MONOTONIC_CLOCK == 0
2343 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2344 # endif
2345 # if _POSIX_CLOCK_SELECTION == 0
2346 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2347 # endif
2348 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2349 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2350 timeout_clock = CLOCK_MONOTONIC;
2351 #endif
2353 /* Create the attribute for the threads. They are all created
2354 detached. */
2355 pthread_attr_init (&attr);
2356 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2357 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2358 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2360 /* We allow less than LASTDB threads only for debugging. */
2361 if (debug_level == 0)
2362 nthreads = MAX (nthreads, lastdb);
2364 /* Create the threads which prune the databases. */
2365 // XXX Ideally this work would be done by some of the worker threads.
2366 // XXX But this is problematic since we would need to be able to wake
2367 // XXX them up explicitly as well as part of the group handling the
2368 // XXX ready-list. This requires an operation where we can wait on
2369 // XXX two conditional variables at the same time. This operation
2370 // XXX does not exist (yet).
2371 for (long int i = 0; i < lastdb; ++i)
2373 /* Initialize the conditional variable. */
2374 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2376 dbg_log (_("could not initialize conditional variable"));
2377 exit (1);
2380 pthread_t th;
2381 if (dbs[i].enabled
2382 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2384 dbg_log (_("could not start clean-up thread; terminating"));
2385 exit (1);
2389 pthread_condattr_destroy (&condattr);
2391 for (long int i = 0; i < nthreads; ++i)
2393 pthread_t th;
2394 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2396 if (i == 0)
2398 dbg_log (_("could not start any worker thread; terminating"));
2399 exit (1);
2402 break;
2406 /* Determine how much room for descriptors we should initially
2407 allocate. This might need to change later if we cap the number
2408 with MAXCONN. */
2409 const long int nfds = sysconf (_SC_OPEN_MAX);
2410 #define MINCONN 32
2411 #define MAXCONN 16384
2412 if (nfds == -1 || nfds > MAXCONN)
2413 nconns = MAXCONN;
2414 else if (nfds < MINCONN)
2415 nconns = MINCONN;
2416 else
2417 nconns = nfds;
2419 /* We need memory to pass descriptors on to the worker threads. */
2420 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2421 /* Array to keep track when connection was accepted. */
2422 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2424 /* In the main thread we execute the loop which handles incoming
2425 connections. */
2426 #ifdef HAVE_EPOLL
2427 int efd = epoll_create (100);
2428 if (efd != -1)
2430 main_loop_epoll (efd);
2431 close (efd);
2433 #endif
2435 main_loop_poll ();
2439 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2440 this function is called, we are not listening on the nscd socket yet so
2441 we can just use the ordinary lookup functions without causing a lockup */
2442 static void
2443 begin_drop_privileges (void)
2445 struct passwd *pwd = getpwnam (server_user);
2447 if (pwd == NULL)
2449 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2450 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
2451 server_user);
2454 server_uid = pwd->pw_uid;
2455 server_gid = pwd->pw_gid;
2457 /* Save the old UID/GID if we have to change back. */
2458 if (paranoia)
2460 old_uid = getuid ();
2461 old_gid = getgid ();
2464 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2466 /* This really must never happen. */
2467 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2468 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
2471 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2473 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2474 == -1)
2476 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2477 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
2482 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2483 run nscd as the user specified in the configuration file. */
2484 static void
2485 finish_drop_privileges (void)
2487 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2488 /* We need to preserve the capabilities to connect to the audit daemon. */
2489 cap_t new_caps = preserve_capabilities ();
2490 #endif
2492 if (setgroups (server_ngroups, server_groups) == -1)
2494 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2495 error (EXIT_FAILURE, errno, _("setgroups failed"));
2498 int res;
2499 if (paranoia)
2500 res = setresgid (server_gid, server_gid, old_gid);
2501 else
2502 res = setgid (server_gid);
2503 if (res == -1)
2505 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2506 perror ("setgid");
2507 exit (4);
2510 if (paranoia)
2511 res = setresuid (server_uid, server_uid, old_uid);
2512 else
2513 res = setuid (server_uid);
2514 if (res == -1)
2516 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2517 perror ("setuid");
2518 exit (4);
2521 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2522 /* Remove the temporary capabilities. */
2523 install_real_capabilities (new_caps);
2524 #endif