* nscd/connections.c (restart): If we want to switch back to the
[glibc.git] / nscd / connections.c
blob0de79452350d85cf659e01fbf365e462e9b4e76d
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2003, 2004, 2005, 2006 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License version 2 as
8 published by the Free Software Foundation.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <libintl.h>
27 #include <pthread.h>
28 #include <pwd.h>
29 #include <resolv.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <arpa/inet.h>
34 #ifdef HAVE_EPOLL
35 # include <sys/epoll.h>
36 #endif
37 #include <sys/mman.h>
38 #include <sys/param.h>
39 #include <sys/poll.h>
40 #ifdef HAVE_SENDFILE
41 # include <sys/sendfile.h>
42 #endif
43 #include <sys/socket.h>
44 #include <sys/stat.h>
45 #include <sys/un.h>
47 #include "nscd.h"
48 #include "dbg_log.h"
49 #include "selinux.h"
50 #ifdef HAVE_SENDFILE
51 # include <kernel-features.h>
52 #endif
55 /* Wrapper functions with error checking for standard functions. */
56 extern void *xmalloc (size_t n);
57 extern void *xcalloc (size_t n, size_t s);
58 extern void *xrealloc (void *o, size_t n);
60 /* Support to run nscd as an unprivileged user */
61 const char *server_user;
62 static uid_t server_uid;
63 static gid_t server_gid;
64 const char *stat_user;
65 uid_t stat_uid;
66 static gid_t *server_groups;
67 #ifndef NGROUPS
68 # define NGROUPS 32
69 #endif
70 static int server_ngroups;
72 static pthread_attr_t attr;
74 static void begin_drop_privileges (void);
75 static void finish_drop_privileges (void);
77 /* Map request type to a string. */
78 const char *serv2str[LASTREQ] =
80 [GETPWBYNAME] = "GETPWBYNAME",
81 [GETPWBYUID] = "GETPWBYUID",
82 [GETGRBYNAME] = "GETGRBYNAME",
83 [GETGRBYGID] = "GETGRBYGID",
84 [GETHOSTBYNAME] = "GETHOSTBYNAME",
85 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
86 [GETHOSTBYADDR] = "GETHOSTBYADDR",
87 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
88 [SHUTDOWN] = "SHUTDOWN",
89 [GETSTAT] = "GETSTAT",
90 [INVALIDATE] = "INVALIDATE",
91 [GETFDPW] = "GETFDPW",
92 [GETFDGR] = "GETFDGR",
93 [GETFDHST] = "GETFDHST",
94 [GETAI] = "GETAI",
95 [INITGROUPS] = "INITGROUPS"
98 /* The control data structures for the services. */
99 struct database_dyn dbs[lastdb] =
101 [pwddb] = {
102 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
103 .enabled = 0,
104 .check_file = 1,
105 .persistent = 0,
106 .propagate = 1,
107 .shared = 0,
108 .max_db_size = DEFAULT_MAX_DB_SIZE,
109 .filename = "/etc/passwd",
110 .db_filename = _PATH_NSCD_PASSWD_DB,
111 .disabled_iov = &pwd_iov_disabled,
112 .postimeout = 3600,
113 .negtimeout = 20,
114 .wr_fd = -1,
115 .ro_fd = -1,
116 .mmap_used = false
118 [grpdb] = {
119 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
120 .enabled = 0,
121 .check_file = 1,
122 .persistent = 0,
123 .propagate = 1,
124 .shared = 0,
125 .max_db_size = DEFAULT_MAX_DB_SIZE,
126 .filename = "/etc/group",
127 .db_filename = _PATH_NSCD_GROUP_DB,
128 .disabled_iov = &grp_iov_disabled,
129 .postimeout = 3600,
130 .negtimeout = 60,
131 .wr_fd = -1,
132 .ro_fd = -1,
133 .mmap_used = false
135 [hstdb] = {
136 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
137 .enabled = 0,
138 .check_file = 1,
139 .persistent = 0,
140 .propagate = 0, /* Not used. */
141 .shared = 0,
142 .max_db_size = DEFAULT_MAX_DB_SIZE,
143 .filename = "/etc/hosts",
144 .db_filename = _PATH_NSCD_HOSTS_DB,
145 .disabled_iov = &hst_iov_disabled,
146 .postimeout = 3600,
147 .negtimeout = 20,
148 .wr_fd = -1,
149 .ro_fd = -1,
150 .mmap_used = false
155 /* Mapping of request type to database. */
156 static struct database_dyn *const serv2db[LASTREQ] =
158 [GETPWBYNAME] = &dbs[pwddb],
159 [GETPWBYUID] = &dbs[pwddb],
160 [GETGRBYNAME] = &dbs[grpdb],
161 [GETGRBYGID] = &dbs[grpdb],
162 [GETHOSTBYNAME] = &dbs[hstdb],
163 [GETHOSTBYNAMEv6] = &dbs[hstdb],
164 [GETHOSTBYADDR] = &dbs[hstdb],
165 [GETHOSTBYADDRv6] = &dbs[hstdb],
166 [GETFDPW] = &dbs[pwddb],
167 [GETFDGR] = &dbs[grpdb],
168 [GETFDHST] = &dbs[hstdb],
169 [GETAI] = &dbs[hstdb],
170 [INITGROUPS] = &dbs[grpdb]
174 /* Number of seconds between two cache pruning runs. */
175 #define CACHE_PRUNE_INTERVAL 15
178 /* Initial number of threads to use. */
179 int nthreads = -1;
180 /* Maximum number of threads to use. */
181 int max_nthreads = 32;
183 /* Socket for incoming connections. */
184 static int sock;
186 /* Number of times clients had to wait. */
187 unsigned long int client_queued;
190 ssize_t
191 writeall (int fd, const void *buf, size_t len)
193 size_t n = len;
194 ssize_t ret;
197 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
198 if (ret <= 0)
199 break;
200 buf = (const char *) buf + ret;
201 n -= ret;
203 while (n > 0);
204 return ret < 0 ? ret : len - n;
208 #ifdef HAVE_SENDFILE
209 ssize_t
210 sendfileall (int tofd, int fromfd, off_t off, size_t len)
212 ssize_t n = len;
213 ssize_t ret;
217 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
218 if (ret <= 0)
219 break;
220 n -= ret;
222 while (n > 0);
223 return ret < 0 ? ret : len - n;
225 #endif
228 enum usekey
230 use_not = 0,
231 /* The following three are not really used, they are symbolic constants. */
232 use_first = 16,
233 use_begin = 32,
234 use_end = 64,
236 use_he = 1,
237 use_he_begin = use_he | use_begin,
238 use_he_end = use_he | use_end,
239 #if SEPARATE_KEY
240 use_key = 2,
241 use_key_begin = use_key | use_begin,
242 use_key_end = use_key | use_end,
243 use_key_first = use_key_begin | use_first,
244 #endif
245 use_data = 3,
246 use_data_begin = use_data | use_begin,
247 use_data_end = use_data | use_end,
248 use_data_first = use_data_begin | use_first
252 static int
253 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
254 enum usekey use, ref_t start, size_t len)
256 assert (len >= 2);
258 if (start > first_free || start + len > first_free
259 || (start & BLOCK_ALIGN_M1))
260 return 0;
262 if (usemap[start] == use_not)
264 /* Add the start marker. */
265 usemap[start] = use | use_begin;
266 use &= ~use_first;
268 while (--len > 0)
269 if (usemap[++start] != use_not)
270 return 0;
271 else
272 usemap[start] = use;
274 /* Add the end marker. */
275 usemap[start] = use | use_end;
277 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
279 /* Hash entries can't be shared. */
280 if (use == use_he)
281 return 0;
283 usemap[start] |= (use & use_first);
284 use &= ~use_first;
286 while (--len > 1)
287 if (usemap[++start] != use)
288 return 0;
290 if (usemap[++start] != (use | use_end))
291 return 0;
293 else
294 /* Points to a wrong object or somewhere in the middle. */
295 return 0;
297 return 1;
301 /* Verify data in persistent database. */
302 static int
303 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
305 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb);
307 time_t now = time (NULL);
309 struct database_pers_head *head = mem;
310 struct database_pers_head head_copy = *head;
312 /* Check that the header that was read matches the head in the database. */
313 if (readhead != NULL && memcmp (head, readhead, sizeof (*head)) != 0)
314 return 0;
316 /* First some easy tests: make sure the database header is sane. */
317 if (head->version != DB_VERSION
318 || head->header_size != sizeof (*head)
319 /* We allow a timestamp to be one hour ahead of the current time.
320 This should cover daylight saving time changes. */
321 || head->timestamp > now + 60 * 60 + 60
322 || (head->gc_cycle & 1)
323 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
324 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
325 || head->first_free < 0
326 || head->first_free > head->data_size
327 || (head->first_free & BLOCK_ALIGN_M1) != 0
328 || head->maxnentries < 0
329 || head->maxnsearched < 0)
330 return 0;
332 uint8_t *usemap = calloc (head->first_free, 1);
333 if (usemap == NULL)
334 return 0;
336 const char *data = (char *) &head->array[roundup (head->module,
337 ALIGN / sizeof (ref_t))];
339 nscd_ssize_t he_cnt = 0;
340 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
342 ref_t work = head->array[cnt];
344 while (work != ENDREF)
346 if (! check_use (data, head->first_free, usemap, use_he, work,
347 sizeof (struct hashentry)))
348 goto fail;
350 /* Now we know we can dereference the record. */
351 struct hashentry *here = (struct hashentry *) (data + work);
353 ++he_cnt;
355 /* Make sure the record is for this type of service. */
356 if (here->type >= LASTREQ
357 || serv2db[here->type] != &dbs[dbnr])
358 goto fail;
360 /* Validate boolean field value. */
361 if (here->first != false && here->first != true)
362 goto fail;
364 if (here->len < 0)
365 goto fail;
367 /* Now the data. */
368 if (here->packet < 0
369 || here->packet > head->first_free
370 || here->packet + sizeof (struct datahead) > head->first_free)
371 goto fail;
373 struct datahead *dh = (struct datahead *) (data + here->packet);
375 if (! check_use (data, head->first_free, usemap,
376 use_data | (here->first ? use_first : 0),
377 here->packet, dh->allocsize))
378 goto fail;
380 if (dh->allocsize < sizeof (struct datahead)
381 || dh->recsize > dh->allocsize
382 || (dh->notfound != false && dh->notfound != true)
383 || (dh->usable != false && dh->usable != true))
384 goto fail;
386 if (here->key < here->packet + sizeof (struct datahead)
387 || here->key > here->packet + dh->allocsize
388 || here->key + here->len > here->packet + dh->allocsize)
390 #if SEPARATE_KEY
391 /* If keys can appear outside of data, this should be done
392 instead. But gc doesn't mark the data in that case. */
393 if (! check_use (data, head->first_free, usemap,
394 use_key | (here->first ? use_first : 0),
395 here->key, here->len))
396 #endif
397 goto fail;
400 work = here->next;
404 if (he_cnt != head->nentries)
405 goto fail;
407 /* See if all data and keys had at least one reference from
408 he->first == true hashentry. */
409 for (ref_t idx = 0; idx < head->first_free; ++idx)
411 #if SEPARATE_KEY
412 if (usemap[idx] == use_key_begin)
413 goto fail;
414 #endif
415 if (usemap[idx] == use_data_begin)
416 goto fail;
419 /* Finally, make sure the database hasn't changed since the first test. */
420 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
421 goto fail;
423 free (usemap);
424 return 1;
426 fail:
427 free (usemap);
428 return 0;
432 /* Initialize database information structures. */
433 void
434 nscd_init (void)
436 /* Look up unprivileged uid/gid/groups before we start listening on the
437 socket */
438 if (server_user != NULL)
439 begin_drop_privileges ();
441 if (nthreads == -1)
442 /* No configuration for this value, assume a default. */
443 nthreads = 2 * lastdb;
445 for (size_t cnt = 0; cnt < lastdb; ++cnt)
446 if (dbs[cnt].enabled)
448 pthread_rwlock_init (&dbs[cnt].lock, NULL);
449 pthread_mutex_init (&dbs[cnt].memlock, NULL);
451 if (dbs[cnt].persistent)
453 /* Try to open the appropriate file on disk. */
454 int fd = open (dbs[cnt].db_filename, O_RDWR);
455 if (fd != -1)
457 struct stat64 st;
458 void *mem;
459 size_t total;
460 struct database_pers_head head;
461 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
462 sizeof (head)));
463 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
465 fail_db:
466 dbg_log (_("invalid persistent database file \"%s\": %s"),
467 dbs[cnt].db_filename, strerror (errno));
468 unlink (dbs[cnt].db_filename);
470 else if (head.module == 0 && head.data_size == 0)
472 /* The file has been created, but the head has not been
473 initialized yet. Remove the old file. */
474 unlink (dbs[cnt].db_filename);
476 else if (head.header_size != (int) sizeof (head))
478 dbg_log (_("invalid persistent database file \"%s\": %s"),
479 dbs[cnt].db_filename,
480 _("header size does not match"));
481 unlink (dbs[cnt].db_filename);
483 else if ((total = (sizeof (head)
484 + roundup (head.module * sizeof (ref_t),
485 ALIGN)
486 + head.data_size))
487 > st.st_size
488 || total < sizeof (head))
490 dbg_log (_("invalid persistent database file \"%s\": %s"),
491 dbs[cnt].db_filename,
492 _("file size does not match"));
493 unlink (dbs[cnt].db_filename);
495 /* Note we map with the maximum size allowed for the
496 database. This is likely much larger than the
497 actual file size. This is OK on most OSes since
498 extensions of the underlying file will
499 automatically translate more pages available for
500 memory access. */
501 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
502 PROT_READ | PROT_WRITE,
503 MAP_SHARED, fd, 0))
504 == MAP_FAILED)
505 goto fail_db;
506 else if (!verify_persistent_db (mem, &head, cnt))
508 munmap (mem, total);
509 dbg_log (_("invalid persistent database file \"%s\": %s"),
510 dbs[cnt].db_filename,
511 _("verification failed"));
512 unlink (dbs[cnt].db_filename);
514 else
516 /* Success. We have the database. */
517 dbs[cnt].head = mem;
518 dbs[cnt].memsize = total;
519 dbs[cnt].data = (char *)
520 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
521 ALIGN / sizeof (ref_t))];
522 dbs[cnt].mmap_used = true;
524 if (dbs[cnt].suggested_module > head.module)
525 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
526 dbnames[cnt]);
528 dbs[cnt].wr_fd = fd;
529 fd = -1;
530 /* We also need a read-only descriptor. */
531 if (dbs[cnt].shared)
533 dbs[cnt].ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
534 if (dbs[cnt].ro_fd == -1)
535 dbg_log (_("\
536 cannot create read-only descriptor for \"%s\"; no mmap"),
537 dbs[cnt].db_filename);
540 // XXX Shall we test whether the descriptors actually
541 // XXX point to the same file?
544 /* Close the file descriptors in case something went
545 wrong in which case the variable have not been
546 assigned -1. */
547 if (fd != -1)
548 close (fd);
552 if (dbs[cnt].head == NULL)
554 /* No database loaded. Allocate the data structure,
555 possibly on disk. */
556 struct database_pers_head head;
557 size_t total = (sizeof (head)
558 + roundup (dbs[cnt].suggested_module
559 * sizeof (ref_t), ALIGN)
560 + (dbs[cnt].suggested_module
561 * DEFAULT_DATASIZE_PER_BUCKET));
563 /* Try to create the database. If we do not need a
564 persistent database create a temporary file. */
565 int fd;
566 int ro_fd = -1;
567 if (dbs[cnt].persistent)
569 fd = open (dbs[cnt].db_filename,
570 O_RDWR | O_CREAT | O_EXCL | O_TRUNC,
571 S_IRUSR | S_IWUSR);
572 if (fd != -1 && dbs[cnt].shared)
573 ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
575 else
577 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
578 fd = mkstemp (fname);
580 /* We do not need the file name anymore after we
581 opened another file descriptor in read-only mode. */
582 if (fd != -1)
584 if (dbs[cnt].shared)
585 ro_fd = open (fname, O_RDONLY);
587 unlink (fname);
591 if (fd == -1)
593 if (errno == EEXIST)
595 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
596 dbnames[cnt], dbs[cnt].db_filename);
597 // XXX Correct way to terminate?
598 exit (1);
601 if (dbs[cnt].persistent)
602 dbg_log (_("cannot create %s; no persistent database used"),
603 dbs[cnt].db_filename);
604 else
605 dbg_log (_("cannot create %s; no sharing possible"),
606 dbs[cnt].db_filename);
608 dbs[cnt].persistent = 0;
609 // XXX remember: no mmap
611 else
613 /* Tell the user if we could not create the read-only
614 descriptor. */
615 if (ro_fd == -1 && dbs[cnt].shared)
616 dbg_log (_("\
617 cannot create read-only descriptor for \"%s\"; no mmap"),
618 dbs[cnt].db_filename);
620 /* Before we create the header, initialiye the hash
621 table. So that if we get interrupted if writing
622 the header we can recognize a partially initialized
623 database. */
624 size_t ps = sysconf (_SC_PAGESIZE);
625 char tmpbuf[ps];
626 assert (~ENDREF == 0);
627 memset (tmpbuf, '\xff', ps);
629 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
630 off_t offset = sizeof (head);
632 size_t towrite;
633 if (offset % ps != 0)
635 towrite = MIN (remaining, ps - (offset % ps));
636 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
637 goto write_fail;
638 offset += towrite;
639 remaining -= towrite;
642 while (remaining > ps)
644 if (pwrite (fd, tmpbuf, ps, offset) == -1)
645 goto write_fail;
646 offset += ps;
647 remaining -= ps;
650 if (remaining > 0
651 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
652 goto write_fail;
654 /* Create the header of the file. */
655 struct database_pers_head head =
657 .version = DB_VERSION,
658 .header_size = sizeof (head),
659 .module = dbs[cnt].suggested_module,
660 .data_size = (dbs[cnt].suggested_module
661 * DEFAULT_DATASIZE_PER_BUCKET),
662 .first_free = 0
664 void *mem;
666 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
667 != sizeof (head))
668 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
669 != 0)
670 || (mem = mmap (NULL, dbs[cnt].max_db_size,
671 PROT_READ | PROT_WRITE,
672 MAP_SHARED, fd, 0)) == MAP_FAILED)
674 write_fail:
675 unlink (dbs[cnt].db_filename);
676 dbg_log (_("cannot write to database file %s: %s"),
677 dbs[cnt].db_filename, strerror (errno));
678 dbs[cnt].persistent = 0;
680 else
682 /* Success. */
683 dbs[cnt].head = mem;
684 dbs[cnt].data = (char *)
685 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
686 ALIGN / sizeof (ref_t))];
687 dbs[cnt].memsize = total;
688 dbs[cnt].mmap_used = true;
690 /* Remember the descriptors. */
691 dbs[cnt].wr_fd = fd;
692 dbs[cnt].ro_fd = ro_fd;
693 fd = -1;
694 ro_fd = -1;
697 if (fd != -1)
698 close (fd);
699 if (ro_fd != -1)
700 close (ro_fd);
704 if (paranoia
705 && ((dbs[cnt].wr_fd != -1
706 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
707 || (dbs[cnt].ro_fd != -1
708 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
710 dbg_log (_("\
711 cannot set socket to close on exec: %s; disabling paranoia mode"),
712 strerror (errno));
713 paranoia = 0;
716 if (dbs[cnt].head == NULL)
718 /* We do not use the persistent database. Just
719 create an in-memory data structure. */
720 assert (! dbs[cnt].persistent);
722 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
723 + (dbs[cnt].suggested_module
724 * sizeof (ref_t)));
725 memset (dbs[cnt].head, '\0', sizeof (dbs[cnt].head));
726 assert (~ENDREF == 0);
727 memset (dbs[cnt].head->array, '\xff',
728 dbs[cnt].suggested_module * sizeof (ref_t));
729 dbs[cnt].head->module = dbs[cnt].suggested_module;
730 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
731 * dbs[cnt].head->module);
732 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
733 dbs[cnt].head->first_free = 0;
735 dbs[cnt].shared = 0;
736 assert (dbs[cnt].ro_fd == -1);
739 if (dbs[cnt].check_file)
741 /* We need the modification date of the file. */
742 struct stat64 st;
744 if (stat64 (dbs[cnt].filename, &st) < 0)
746 /* We cannot stat() the file, disable file checking. */
747 dbg_log (_("cannot stat() file `%s': %s"),
748 dbs[cnt].filename, strerror (errno));
749 dbs[cnt].check_file = 0;
751 else
752 dbs[cnt].file_mtime = st.st_mtime;
756 /* Create the socket. */
757 sock = socket (AF_UNIX, SOCK_STREAM, 0);
758 if (sock < 0)
760 dbg_log (_("cannot open socket: %s"), strerror (errno));
761 exit (errno == EACCES ? 4 : 1);
763 /* Bind a name to the socket. */
764 struct sockaddr_un sock_addr;
765 sock_addr.sun_family = AF_UNIX;
766 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
767 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
769 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
770 exit (errno == EACCES ? 4 : 1);
773 /* We don't want to get stuck on accept. */
774 int fl = fcntl (sock, F_GETFL);
775 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
777 dbg_log (_("cannot change socket to nonblocking mode: %s"),
778 strerror (errno));
779 exit (1);
782 /* The descriptor needs to be closed on exec. */
783 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
785 dbg_log (_("cannot set socket to close on exec: %s"),
786 strerror (errno));
787 exit (1);
790 /* Set permissions for the socket. */
791 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
793 /* Set the socket up to accept connections. */
794 if (listen (sock, SOMAXCONN) < 0)
796 dbg_log (_("cannot enable socket to accept connections: %s"),
797 strerror (errno));
798 exit (1);
801 /* Change to unprivileged uid/gid/groups if specifed in config file */
802 if (server_user != NULL)
803 finish_drop_privileges ();
807 /* Close the connections. */
808 void
809 close_sockets (void)
811 close (sock);
815 static void
816 invalidate_cache (char *key)
818 dbtype number;
820 if (strcmp (key, "passwd") == 0)
821 number = pwddb;
822 else if (strcmp (key, "group") == 0)
823 number = grpdb;
824 else if (__builtin_expect (strcmp (key, "hosts"), 0) == 0)
826 number = hstdb;
828 /* Re-initialize the resolver. resolv.conf might have changed. */
829 res_init ();
831 else
832 return;
834 if (dbs[number].enabled)
835 prune_cache (&dbs[number], LONG_MAX);
839 #ifdef SCM_RIGHTS
840 static void
841 send_ro_fd (struct database_dyn *db, char *key, int fd)
843 /* If we do not have an read-only file descriptor do nothing. */
844 if (db->ro_fd == -1)
845 return;
847 /* We need to send some data along with the descriptor. */
848 struct iovec iov[1];
849 iov[0].iov_base = key;
850 iov[0].iov_len = strlen (key) + 1;
852 /* Prepare the control message to transfer the descriptor. */
853 union
855 struct cmsghdr hdr;
856 char bytes[CMSG_SPACE (sizeof (int))];
857 } buf;
858 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1,
859 .msg_control = buf.bytes,
860 .msg_controllen = sizeof (buf) };
861 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
863 cmsg->cmsg_level = SOL_SOCKET;
864 cmsg->cmsg_type = SCM_RIGHTS;
865 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
867 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
869 msg.msg_controllen = cmsg->cmsg_len;
871 /* Send the control message. We repeat when we are interrupted but
872 everything else is ignored. */
873 #ifndef MSG_NOSIGNAL
874 # define MSG_NOSIGNAL 0
875 #endif
876 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
878 if (__builtin_expect (debug_level > 0, 0))
879 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
881 #endif /* SCM_RIGHTS */
884 /* Handle new request. */
885 static void
886 handle_request (int fd, request_header *req, void *key, uid_t uid)
888 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
890 if (debug_level > 0)
891 dbg_log (_("\
892 cannot handle old request version %d; current version is %d"),
893 req->version, NSCD_VERSION);
894 return;
897 /* Make the SELinux check before we go on to the standard checks. We
898 need to verify that the request type is valid, since it has not
899 yet been checked at this point. */
900 if (selinux_enabled
901 && __builtin_expect (req->type, GETPWBYNAME) >= GETPWBYNAME
902 && __builtin_expect (req->type, LASTREQ) < LASTREQ
903 && nscd_request_avc_has_perm (fd, req->type) != 0)
904 return;
906 struct database_dyn *db = serv2db[req->type];
908 // XXX Clean up so that each new command need not introduce a
909 // XXX new conditional.
910 if ((__builtin_expect (req->type, GETPWBYNAME) >= GETPWBYNAME
911 && __builtin_expect (req->type, LASTDBREQ) <= LASTDBREQ)
912 || req->type == GETAI || req->type == INITGROUPS)
914 if (__builtin_expect (debug_level, 0) > 0)
916 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
918 char buf[INET6_ADDRSTRLEN];
920 dbg_log ("\t%s (%s)", serv2str[req->type],
921 inet_ntop (req->type == GETHOSTBYADDR
922 ? AF_INET : AF_INET6,
923 key, buf, sizeof (buf)));
925 else
926 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
929 /* Is this service enabled? */
930 if (!db->enabled)
932 /* No, sent the prepared record. */
933 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
934 db->disabled_iov->iov_len,
935 MSG_NOSIGNAL))
936 != (ssize_t) db->disabled_iov->iov_len
937 && __builtin_expect (debug_level, 0) > 0)
939 /* We have problems sending the result. */
940 char buf[256];
941 dbg_log (_("cannot write result: %s"),
942 strerror_r (errno, buf, sizeof (buf)));
945 return;
948 /* Be sure we can read the data. */
949 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
951 ++db->head->rdlockdelayed;
952 pthread_rwlock_rdlock (&db->lock);
955 /* See whether we can handle it from the cache. */
956 struct datahead *cached;
957 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
958 db, uid);
959 if (cached != NULL)
961 /* Hurray it's in the cache. */
962 ssize_t nwritten;
964 #ifdef HAVE_SENDFILE
965 if (db->mmap_used || !cached->notfound)
967 assert (db->wr_fd != -1);
968 assert ((char *) cached->data > (char *) db->data);
969 assert ((char *) cached->data - (char *) db->head
970 + cached->recsize
971 <= (sizeof (struct database_pers_head)
972 + db->head->module * sizeof (ref_t)
973 + db->head->data_size));
974 nwritten = sendfileall (fd, db->wr_fd,
975 (char *) cached->data
976 - (char *) db->head, cached->recsize);
977 # ifndef __ASSUME_SENDFILE
978 if (nwritten == -1 && errno == ENOSYS)
979 goto use_write;
980 # endif
982 else
983 # ifndef __ASSUME_SENDFILE
984 use_write:
985 # endif
986 #endif
987 nwritten = writeall (fd, cached->data, cached->recsize);
989 if (nwritten != cached->recsize
990 && __builtin_expect (debug_level, 0) > 0)
992 /* We have problems sending the result. */
993 char buf[256];
994 dbg_log (_("cannot write result: %s"),
995 strerror_r (errno, buf, sizeof (buf)));
998 pthread_rwlock_unlock (&db->lock);
1000 return;
1003 pthread_rwlock_unlock (&db->lock);
1005 else if (__builtin_expect (debug_level, 0) > 0)
1007 if (req->type == INVALIDATE)
1008 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1009 else
1010 dbg_log ("\t%s", serv2str[req->type]);
1013 /* Handle the request. */
1014 switch (req->type)
1016 case GETPWBYNAME:
1017 addpwbyname (db, fd, req, key, uid);
1018 break;
1020 case GETPWBYUID:
1021 addpwbyuid (db, fd, req, key, uid);
1022 break;
1024 case GETGRBYNAME:
1025 addgrbyname (db, fd, req, key, uid);
1026 break;
1028 case GETGRBYGID:
1029 addgrbygid (db, fd, req, key, uid);
1030 break;
1032 case GETHOSTBYNAME:
1033 addhstbyname (db, fd, req, key, uid);
1034 break;
1036 case GETHOSTBYNAMEv6:
1037 addhstbynamev6 (db, fd, req, key, uid);
1038 break;
1040 case GETHOSTBYADDR:
1041 addhstbyaddr (db, fd, req, key, uid);
1042 break;
1044 case GETHOSTBYADDRv6:
1045 addhstbyaddrv6 (db, fd, req, key, uid);
1046 break;
1048 case GETAI:
1049 addhstai (db, fd, req, key, uid);
1050 break;
1052 case INITGROUPS:
1053 addinitgroups (db, fd, req, key, uid);
1054 break;
1056 case GETSTAT:
1057 case SHUTDOWN:
1058 case INVALIDATE:
1060 /* Get the callers credentials. */
1061 #ifdef SO_PEERCRED
1062 struct ucred caller;
1063 socklen_t optlen = sizeof (caller);
1065 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1067 char buf[256];
1069 dbg_log (_("error getting callers id: %s"),
1070 strerror_r (errno, buf, sizeof (buf)));
1071 break;
1074 uid = caller.uid;
1075 #else
1076 /* Some systems have no SO_PEERCRED implementation. They don't
1077 care about security so we don't as well. */
1078 uid = 0;
1079 #endif
1082 /* Accept shutdown, getstat and invalidate only from root. For
1083 the stat call also allow the user specified in the config file. */
1084 if (req->type == GETSTAT)
1086 if (uid == 0 || uid == stat_uid)
1087 send_stats (fd, dbs);
1089 else if (uid == 0)
1091 if (req->type == INVALIDATE)
1092 invalidate_cache (key);
1093 else
1094 termination_handler (0);
1096 break;
1098 case GETFDPW:
1099 case GETFDGR:
1100 case GETFDHST:
1101 #ifdef SCM_RIGHTS
1102 send_ro_fd (serv2db[req->type], key, fd);
1103 #endif
1104 break;
1106 default:
1107 /* Ignore the command, it's nothing we know. */
1108 break;
1113 /* Restart the process. */
1114 static void
1115 restart (void)
1117 /* First determine the parameters. We do not use the parameters
1118 passed to main() since in case nscd is started by running the
1119 dynamic linker this will not work. Yes, this is not the usual
1120 case but nscd is part of glibc and we occasionally do this. */
1121 size_t buflen = 1024;
1122 char *buf = alloca (buflen);
1123 size_t readlen = 0;
1124 int fd = open ("/proc/self/cmdline", O_RDONLY);
1125 if (fd == -1)
1127 dbg_log (_("\
1128 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1129 strerror (errno));
1131 paranoia = 0;
1132 return;
1135 while (1)
1137 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1138 buflen - readlen));
1139 if (n == -1)
1141 dbg_log (_("\
1142 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1143 strerror (errno));
1145 close (fd);
1146 paranoia = 0;
1147 return;
1150 readlen += n;
1152 if (readlen < buflen)
1153 break;
1155 /* We might have to extend the buffer. */
1156 size_t old_buflen = buflen;
1157 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1158 buf = memmove (newp, buf, old_buflen);
1161 close (fd);
1163 /* Parse the command line. Worst case scenario: every two
1164 characters form one parameter (one character plus NUL). */
1165 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1166 int argc = 0;
1168 char *cp = buf;
1169 while (cp < buf + readlen)
1171 argv[argc++] = cp;
1172 cp = (char *) rawmemchr (cp, '\0') + 1;
1174 argv[argc] = NULL;
1176 /* Second, change back to the old user if we changed it. */
1177 if (server_user != NULL)
1179 if (setresuid (old_uid, old_uid, old_uid) != 0)
1181 dbg_log (_("\
1182 cannot change to old UID: %s; disabling paranoia mode"),
1183 strerror (errno));
1185 paranoia = 0;
1186 return;
1189 if (setresgid (old_gid, old_gid, old_gid) != 0)
1191 dbg_log (_("\
1192 cannot change to old GID: %s; disabling paranoia mode"),
1193 strerror (errno));
1195 setuid (server_uid);
1196 paranoia = 0;
1197 return;
1201 /* Next change back to the old working directory. */
1202 if (chdir (oldcwd) == -1)
1204 dbg_log (_("\
1205 cannot change to old working directory: %s; disabling paranoia mode"),
1206 strerror (errno));
1208 if (server_user != NULL)
1210 setuid (server_uid);
1211 setgid (server_gid);
1213 paranoia = 0;
1214 return;
1217 /* Synchronize memory. */
1218 for (int cnt = 0; cnt < lastdb; ++cnt)
1220 /* Make sure nobody keeps using the database. */
1221 dbs[cnt].head->timestamp = 0;
1223 if (dbs[cnt].persistent)
1224 // XXX async OK?
1225 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1228 /* The preparations are done. */
1229 execv ("/proc/self/exe", argv);
1231 /* If we come here, we will never be able to re-exec. */
1232 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1233 strerror (errno));
1235 if (server_user != NULL)
1237 setuid (server_uid);
1238 setgid (server_gid);
1240 if (chdir ("/") != 0)
1241 dbg_log (_("cannot change current working directory to \"/\": %s"),
1242 strerror (errno));
1243 paranoia = 0;
1247 /* List of file descriptors. */
1248 struct fdlist
1250 int fd;
1251 struct fdlist *next;
1253 /* Memory allocated for the list. */
1254 static struct fdlist *fdlist;
1255 /* List of currently ready-to-read file descriptors. */
1256 static struct fdlist *readylist;
1258 /* Conditional variable and mutex to signal availability of entries in
1259 READYLIST. The condvar is initialized dynamically since we might
1260 use a different clock depending on availability. */
1261 static pthread_cond_t readylist_cond;
1262 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1264 /* The clock to use with the condvar. */
1265 static clockid_t timeout_clock = CLOCK_REALTIME;
1267 /* Number of threads ready to handle the READYLIST. */
1268 static unsigned long int nready;
1271 /* This is the main loop. It is replicated in different threads but the
1272 `poll' call makes sure only one thread handles an incoming connection. */
1273 static void *
1274 __attribute__ ((__noreturn__))
1275 nscd_run (void *p)
1277 const long int my_number = (long int) p;
1278 const int run_prune = my_number < lastdb && dbs[my_number].enabled;
1279 struct timespec prune_ts;
1280 int to = 0;
1281 char buf[256];
1283 if (run_prune)
1285 setup_thread (&dbs[my_number]);
1287 /* We are running. */
1288 dbs[my_number].head->timestamp = time (NULL);
1290 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1291 /* Should never happen. */
1292 abort ();
1294 /* Compute timeout time. */
1295 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1298 /* Initial locking. */
1299 pthread_mutex_lock (&readylist_lock);
1301 /* One more thread available. */
1302 ++nready;
1304 while (1)
1306 while (readylist == NULL)
1308 if (run_prune)
1310 /* Wait, but not forever. */
1311 to = pthread_cond_timedwait (&readylist_cond, &readylist_lock,
1312 &prune_ts);
1314 /* If we were woken and there is no work to be done,
1315 just start pruning. */
1316 if (readylist == NULL && to == ETIMEDOUT)
1318 --nready;
1319 pthread_mutex_unlock (&readylist_lock);
1320 goto only_prune;
1323 else
1324 /* No need to timeout. */
1325 pthread_cond_wait (&readylist_cond, &readylist_lock);
1328 struct fdlist *it = readylist->next;
1329 if (readylist->next == readylist)
1330 /* Just one entry on the list. */
1331 readylist = NULL;
1332 else
1333 readylist->next = it->next;
1335 /* Extract the information and mark the record ready to be used
1336 again. */
1337 int fd = it->fd;
1338 it->next = NULL;
1340 /* One more thread available. */
1341 --nready;
1343 /* We are done with the list. */
1344 pthread_mutex_unlock (&readylist_lock);
1346 /* We do not want to block on a short read or so. */
1347 int fl = fcntl (fd, F_GETFL);
1348 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1349 goto close_and_out;
1351 /* Now read the request. */
1352 request_header req;
1353 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1354 != sizeof (req), 0))
1356 /* We failed to read data. Note that this also might mean we
1357 failed because we would have blocked. */
1358 if (debug_level > 0)
1359 dbg_log (_("short read while reading request: %s"),
1360 strerror_r (errno, buf, sizeof (buf)));
1361 goto close_and_out;
1364 /* Check whether this is a valid request type. */
1365 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1366 goto close_and_out;
1368 /* Some systems have no SO_PEERCRED implementation. They don't
1369 care about security so we don't as well. */
1370 uid_t uid = -1;
1371 #ifdef SO_PEERCRED
1372 pid_t pid = 0;
1374 if (__builtin_expect (debug_level > 0, 0))
1376 struct ucred caller;
1377 socklen_t optlen = sizeof (caller);
1379 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1380 pid = caller.pid;
1382 #endif
1384 /* It should not be possible to crash the nscd with a silly
1385 request (i.e., a terribly large key). We limit the size to 1kb. */
1386 #define MAXKEYLEN 1024
1387 if (__builtin_expect (req.key_len, 1) < 0
1388 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1390 if (debug_level > 0)
1391 dbg_log (_("key length in request too long: %d"), req.key_len);
1393 else
1395 /* Get the key. */
1396 char keybuf[MAXKEYLEN];
1398 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1399 req.key_len))
1400 != req.key_len, 0))
1402 /* Again, this can also mean we would have blocked. */
1403 if (debug_level > 0)
1404 dbg_log (_("short read while reading request key: %s"),
1405 strerror_r (errno, buf, sizeof (buf)));
1406 goto close_and_out;
1409 if (__builtin_expect (debug_level, 0) > 0)
1411 #ifdef SO_PEERCRED
1412 if (pid != 0)
1413 dbg_log (_("\
1414 handle_request: request received (Version = %d) from PID %ld"),
1415 req.version, (long int) pid);
1416 else
1417 #endif
1418 dbg_log (_("\
1419 handle_request: request received (Version = %d)"), req.version);
1422 /* Phew, we got all the data, now process it. */
1423 handle_request (fd, &req, keybuf, uid);
1426 close_and_out:
1427 /* We are done. */
1428 close (fd);
1430 /* Check whether we should be pruning the cache. */
1431 assert (run_prune || to == 0);
1432 if (to == ETIMEDOUT)
1434 only_prune:
1435 /* The pthread_cond_timedwait() call timed out. It is time
1436 to clean up the cache. */
1437 assert (my_number < lastdb);
1438 prune_cache (&dbs[my_number], time (NULL));
1440 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1441 /* Should never happen. */
1442 abort ();
1444 /* Compute next timeout time. */
1445 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1447 /* In case the list is emtpy we do not want to run the prune
1448 code right away again. */
1449 to = 0;
1452 /* Re-locking. */
1453 pthread_mutex_lock (&readylist_lock);
1455 /* One more thread available. */
1456 ++nready;
1461 static unsigned int nconns;
1463 static void
1464 fd_ready (int fd)
1466 pthread_mutex_lock (&readylist_lock);
1468 /* Find an empty entry in FDLIST. */
1469 size_t inner;
1470 for (inner = 0; inner < nconns; ++inner)
1471 if (fdlist[inner].next == NULL)
1472 break;
1473 assert (inner < nconns);
1475 fdlist[inner].fd = fd;
1477 if (readylist == NULL)
1478 readylist = fdlist[inner].next = &fdlist[inner];
1479 else
1481 fdlist[inner].next = readylist->next;
1482 readylist = readylist->next = &fdlist[inner];
1485 bool do_signal = true;
1486 if (__builtin_expect (nready == 0, 0))
1488 ++client_queued;
1489 do_signal = false;
1491 /* Try to start another thread to help out. */
1492 pthread_t th;
1493 if (nthreads < max_nthreads
1494 && pthread_create (&th, &attr, nscd_run,
1495 (void *) (long int) nthreads) == 0)
1497 /* We got another thread. */
1498 ++nthreads;
1499 /* The new thread might need a kick. */
1500 do_signal = true;
1505 pthread_mutex_unlock (&readylist_lock);
1507 /* Tell one of the worker threads there is work to do. */
1508 if (do_signal)
1509 pthread_cond_signal (&readylist_cond);
1513 /* Check whether restarting should happen. */
1514 static inline int
1515 restart_p (time_t now)
1517 return (paranoia && readylist == NULL && nready == nthreads
1518 && now >= restart_time);
1522 /* Array for times a connection was accepted. */
1523 static time_t *starttime;
1526 static void
1527 __attribute__ ((__noreturn__))
1528 main_loop_poll (void)
1530 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1531 * sizeof (conns[0]));
1533 conns[0].fd = sock;
1534 conns[0].events = POLLRDNORM;
1535 size_t nused = 1;
1536 size_t firstfree = 1;
1538 while (1)
1540 /* Wait for any event. We wait at most a couple of seconds so
1541 that we can check whether we should close any of the accepted
1542 connections since we have not received a request. */
1543 #define MAX_ACCEPT_TIMEOUT 30
1544 #define MIN_ACCEPT_TIMEOUT 5
1545 #define MAIN_THREAD_TIMEOUT \
1546 (MAX_ACCEPT_TIMEOUT * 1000 \
1547 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1549 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1551 time_t now = time (NULL);
1553 /* If there is a descriptor ready for reading or there is a new
1554 connection, process this now. */
1555 if (n > 0)
1557 if (conns[0].revents != 0)
1559 /* We have a new incoming connection. Accept the connection. */
1560 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1562 /* Use the descriptor if we have not reached the limit. */
1563 if (fd >= 0)
1565 if (firstfree < nconns)
1567 conns[firstfree].fd = fd;
1568 conns[firstfree].events = POLLRDNORM;
1569 starttime[firstfree] = now;
1570 if (firstfree >= nused)
1571 nused = firstfree + 1;
1574 ++firstfree;
1575 while (firstfree < nused && conns[firstfree].fd != -1);
1577 else
1578 /* We cannot use the connection so close it. */
1579 close (fd);
1582 --n;
1585 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1586 if (conns[cnt].revents != 0)
1588 fd_ready (conns[cnt].fd);
1590 /* Clean up the CONNS array. */
1591 conns[cnt].fd = -1;
1592 if (cnt < firstfree)
1593 firstfree = cnt;
1594 if (cnt == nused - 1)
1596 --nused;
1597 while (conns[nused - 1].fd == -1);
1599 --n;
1603 /* Now find entries which have timed out. */
1604 assert (nused > 0);
1606 /* We make the timeout length depend on the number of file
1607 descriptors currently used. */
1608 #define ACCEPT_TIMEOUT \
1609 (MAX_ACCEPT_TIMEOUT \
1610 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1611 time_t laststart = now - ACCEPT_TIMEOUT;
1613 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1615 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1617 /* Remove the entry, it timed out. */
1618 (void) close (conns[cnt].fd);
1619 conns[cnt].fd = -1;
1621 if (cnt < firstfree)
1622 firstfree = cnt;
1623 if (cnt == nused - 1)
1625 --nused;
1626 while (conns[nused - 1].fd == -1);
1630 if (restart_p (now))
1631 restart ();
1636 #ifdef HAVE_EPOLL
1637 static void
1638 main_loop_epoll (int efd)
1640 struct epoll_event ev = { 0, };
1641 int nused = 1;
1642 size_t highest = 0;
1644 /* Add the socket. */
1645 ev.events = EPOLLRDNORM;
1646 ev.data.fd = sock;
1647 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1648 /* We cannot use epoll. */
1649 return;
1651 while (1)
1653 struct epoll_event revs[100];
1654 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1656 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1658 time_t now = time (NULL);
1660 for (int cnt = 0; cnt < n; ++cnt)
1661 if (revs[cnt].data.fd == sock)
1663 /* A new connection. */
1664 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1666 if (fd >= 0)
1668 /* Try to add the new descriptor. */
1669 ev.data.fd = fd;
1670 if (fd >= nconns
1671 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1672 /* The descriptor is too large or something went
1673 wrong. Close the descriptor. */
1674 close (fd);
1675 else
1677 /* Remember when we accepted the connection. */
1678 starttime[fd] = now;
1680 if (fd > highest)
1681 highest = fd;
1683 ++nused;
1687 else
1689 /* Remove the descriptor from the epoll descriptor. */
1690 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1692 /* Get a worker to handle the request. */
1693 fd_ready (revs[cnt].data.fd);
1695 /* Reset the time. */
1696 starttime[revs[cnt].data.fd] = 0;
1697 if (revs[cnt].data.fd == highest)
1699 --highest;
1700 while (highest > 0 && starttime[highest] == 0);
1702 --nused;
1705 /* Now look for descriptors for accepted connections which have
1706 no reply in too long of a time. */
1707 time_t laststart = now - ACCEPT_TIMEOUT;
1708 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1709 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1711 /* We are waiting for this one for too long. Close it. */
1712 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
1714 (void) close (cnt);
1716 starttime[cnt] = 0;
1717 if (cnt == highest)
1718 --highest;
1720 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1721 --highest;
1723 if (restart_p (now))
1724 restart ();
1727 #endif
1730 /* Start all the threads we want. The initial process is thread no. 1. */
1731 void
1732 start_threads (void)
1734 /* Initialize the conditional variable we will use. The only
1735 non-standard attribute we might use is the clock selection. */
1736 pthread_condattr_t condattr;
1737 pthread_condattr_init (&condattr);
1739 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1740 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1741 /* Determine whether the monotonous clock is available. */
1742 struct timespec dummy;
1743 # if _POSIX_MONOTONIC_CLOCK == 0
1744 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1745 # endif
1746 # if _POSIX_CLOCK_SELECTION == 0
1747 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1748 # endif
1749 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1750 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1751 timeout_clock = CLOCK_MONOTONIC;
1752 #endif
1754 pthread_cond_init (&readylist_cond, &condattr);
1755 pthread_condattr_destroy (&condattr);
1758 /* Create the attribute for the threads. They are all created
1759 detached. */
1760 pthread_attr_init (&attr);
1761 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1762 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1763 pthread_attr_setstacksize (&attr, 1024 * 1024 * (sizeof (void *) / 4));
1765 /* We allow less than LASTDB threads only for debugging. */
1766 if (debug_level == 0)
1767 nthreads = MAX (nthreads, lastdb);
1769 int nfailed = 0;
1770 for (long int i = 0; i < nthreads; ++i)
1772 pthread_t th;
1773 if (pthread_create (&th, &attr, nscd_run, (void *) (i - nfailed)) != 0)
1774 ++nfailed;
1776 if (nthreads - nfailed < lastdb)
1778 /* We could not start enough threads. */
1779 dbg_log (_("could only start %d threads; terminating"),
1780 nthreads - nfailed);
1781 exit (1);
1784 /* Determine how much room for descriptors we should initially
1785 allocate. This might need to change later if we cap the number
1786 with MAXCONN. */
1787 const long int nfds = sysconf (_SC_OPEN_MAX);
1788 #define MINCONN 32
1789 #define MAXCONN 16384
1790 if (nfds == -1 || nfds > MAXCONN)
1791 nconns = MAXCONN;
1792 else if (nfds < MINCONN)
1793 nconns = MINCONN;
1794 else
1795 nconns = nfds;
1797 /* We need memory to pass descriptors on to the worker threads. */
1798 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1799 /* Array to keep track when connection was accepted. */
1800 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1802 /* In the main thread we execute the loop which handles incoming
1803 connections. */
1804 #ifdef HAVE_EPOLL
1805 int efd = epoll_create (100);
1806 if (efd != -1)
1808 main_loop_epoll (efd);
1809 close (efd);
1811 #endif
1813 main_loop_poll ();
1817 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1818 this function is called, we are not listening on the nscd socket yet so
1819 we can just use the ordinary lookup functions without causing a lockup */
1820 static void
1821 begin_drop_privileges (void)
1823 struct passwd *pwd = getpwnam (server_user);
1825 if (pwd == NULL)
1827 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1828 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1829 server_user);
1832 server_uid = pwd->pw_uid;
1833 server_gid = pwd->pw_gid;
1835 /* Save the old UID/GID if we have to change back. */
1836 if (paranoia)
1838 old_uid = getuid ();
1839 old_gid = getgid ();
1842 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1844 /* This really must never happen. */
1845 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1846 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
1849 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
1851 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
1852 == -1)
1854 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1855 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
1860 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
1861 run nscd as the user specified in the configuration file. */
1862 static void
1863 finish_drop_privileges (void)
1865 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1866 /* We need to preserve the capabilities to connect to the audit daemon. */
1867 cap_t new_caps = preserve_capabilities ();
1868 #endif
1870 if (setgroups (server_ngroups, server_groups) == -1)
1872 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1873 error (EXIT_FAILURE, errno, _("setgroups failed"));
1876 if (setresgid (server_gid, server_gid, old_gid) == -1)
1878 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1879 perror ("setgid");
1880 exit (4);
1883 if (setresuid (server_uid, server_uid, old_uid) == -1)
1885 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1886 perror ("setuid");
1887 exit (4);
1890 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1891 /* Remove the temporary capabilities. */
1892 install_real_capabilities (new_caps);
1893 #endif