file kaio_write64.c was initially added on branch fedora-branch.
[glibc/history.git] / nscd / connections.c
blobc4269ce548d9fd8db6c27f59eda14944c4eecc1c
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2003, 2004, 2005, 2006 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License version 2 as
8 published by the Free Software Foundation.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <libintl.h>
27 #include <pthread.h>
28 #include <pwd.h>
29 #include <resolv.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <arpa/inet.h>
34 #ifdef HAVE_EPOLL
35 # include <sys/epoll.h>
36 #endif
37 #include <sys/mman.h>
38 #include <sys/param.h>
39 #include <sys/poll.h>
40 #ifdef HAVE_SENDFILE
41 # include <sys/sendfile.h>
42 #endif
43 #include <sys/socket.h>
44 #include <sys/stat.h>
45 #include <sys/un.h>
47 #include "nscd.h"
48 #include "dbg_log.h"
49 #include "selinux.h"
50 #ifdef HAVE_SENDFILE
51 # include <kernel-features.h>
52 #endif
55 /* Wrapper functions with error checking for standard functions. */
56 extern void *xmalloc (size_t n);
57 extern void *xcalloc (size_t n, size_t s);
58 extern void *xrealloc (void *o, size_t n);
60 /* Support to run nscd as an unprivileged user */
61 const char *server_user;
62 static uid_t server_uid;
63 static gid_t server_gid;
64 const char *stat_user;
65 uid_t stat_uid;
66 static gid_t *server_groups;
67 #ifndef NGROUPS
68 # define NGROUPS 32
69 #endif
70 static int server_ngroups;
72 static pthread_attr_t attr;
74 static void begin_drop_privileges (void);
75 static void finish_drop_privileges (void);
77 /* Map request type to a string. */
78 const char *serv2str[LASTREQ] =
80 [GETPWBYNAME] = "GETPWBYNAME",
81 [GETPWBYUID] = "GETPWBYUID",
82 [GETGRBYNAME] = "GETGRBYNAME",
83 [GETGRBYGID] = "GETGRBYGID",
84 [GETHOSTBYNAME] = "GETHOSTBYNAME",
85 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
86 [GETHOSTBYADDR] = "GETHOSTBYADDR",
87 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
88 [SHUTDOWN] = "SHUTDOWN",
89 [GETSTAT] = "GETSTAT",
90 [INVALIDATE] = "INVALIDATE",
91 [GETFDPW] = "GETFDPW",
92 [GETFDGR] = "GETFDGR",
93 [GETFDHST] = "GETFDHST",
94 [GETAI] = "GETAI",
95 [INITGROUPS] = "INITGROUPS"
98 /* The control data structures for the services. */
99 struct database_dyn dbs[lastdb] =
101 [pwddb] = {
102 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
103 .prunelock = PTHREAD_MUTEX_INITIALIZER,
104 .enabled = 0,
105 .check_file = 1,
106 .persistent = 0,
107 .propagate = 1,
108 .shared = 0,
109 .max_db_size = DEFAULT_MAX_DB_SIZE,
110 .filename = "/etc/passwd",
111 .db_filename = _PATH_NSCD_PASSWD_DB,
112 .disabled_iov = &pwd_iov_disabled,
113 .postimeout = 3600,
114 .negtimeout = 20,
115 .wr_fd = -1,
116 .ro_fd = -1,
117 .mmap_used = false
119 [grpdb] = {
120 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
121 .prunelock = PTHREAD_MUTEX_INITIALIZER,
122 .enabled = 0,
123 .check_file = 1,
124 .persistent = 0,
125 .propagate = 1,
126 .shared = 0,
127 .max_db_size = DEFAULT_MAX_DB_SIZE,
128 .filename = "/etc/group",
129 .db_filename = _PATH_NSCD_GROUP_DB,
130 .disabled_iov = &grp_iov_disabled,
131 .postimeout = 3600,
132 .negtimeout = 60,
133 .wr_fd = -1,
134 .ro_fd = -1,
135 .mmap_used = false
137 [hstdb] = {
138 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
139 .prunelock = PTHREAD_MUTEX_INITIALIZER,
140 .enabled = 0,
141 .check_file = 1,
142 .persistent = 0,
143 .propagate = 0, /* Not used. */
144 .shared = 0,
145 .max_db_size = DEFAULT_MAX_DB_SIZE,
146 .filename = "/etc/hosts",
147 .db_filename = _PATH_NSCD_HOSTS_DB,
148 .disabled_iov = &hst_iov_disabled,
149 .postimeout = 3600,
150 .negtimeout = 20,
151 .wr_fd = -1,
152 .ro_fd = -1,
153 .mmap_used = false
158 /* Mapping of request type to database. */
159 static struct database_dyn *const serv2db[LASTREQ] =
161 [GETPWBYNAME] = &dbs[pwddb],
162 [GETPWBYUID] = &dbs[pwddb],
163 [GETGRBYNAME] = &dbs[grpdb],
164 [GETGRBYGID] = &dbs[grpdb],
165 [GETHOSTBYNAME] = &dbs[hstdb],
166 [GETHOSTBYNAMEv6] = &dbs[hstdb],
167 [GETHOSTBYADDR] = &dbs[hstdb],
168 [GETHOSTBYADDRv6] = &dbs[hstdb],
169 [GETFDPW] = &dbs[pwddb],
170 [GETFDGR] = &dbs[grpdb],
171 [GETFDHST] = &dbs[hstdb],
172 [GETAI] = &dbs[hstdb],
173 [INITGROUPS] = &dbs[grpdb]
177 /* Number of seconds between two cache pruning runs. */
178 #define CACHE_PRUNE_INTERVAL 15
181 /* Initial number of threads to use. */
182 int nthreads = -1;
183 /* Maximum number of threads to use. */
184 int max_nthreads = 32;
186 /* Socket for incoming connections. */
187 static int sock;
189 /* Number of times clients had to wait. */
190 unsigned long int client_queued;
193 ssize_t
194 writeall (int fd, const void *buf, size_t len)
196 size_t n = len;
197 ssize_t ret;
200 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
201 if (ret <= 0)
202 break;
203 buf = (const char *) buf + ret;
204 n -= ret;
206 while (n > 0);
207 return ret < 0 ? ret : len - n;
211 #ifdef HAVE_SENDFILE
212 ssize_t
213 sendfileall (int tofd, int fromfd, off_t off, size_t len)
215 ssize_t n = len;
216 ssize_t ret;
220 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
221 if (ret <= 0)
222 break;
223 n -= ret;
225 while (n > 0);
226 return ret < 0 ? ret : len - n;
228 #endif
231 enum usekey
233 use_not = 0,
234 /* The following three are not really used, they are symbolic constants. */
235 use_first = 16,
236 use_begin = 32,
237 use_end = 64,
239 use_he = 1,
240 use_he_begin = use_he | use_begin,
241 use_he_end = use_he | use_end,
242 #if SEPARATE_KEY
243 use_key = 2,
244 use_key_begin = use_key | use_begin,
245 use_key_end = use_key | use_end,
246 use_key_first = use_key_begin | use_first,
247 #endif
248 use_data = 3,
249 use_data_begin = use_data | use_begin,
250 use_data_end = use_data | use_end,
251 use_data_first = use_data_begin | use_first
255 static int
256 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
257 enum usekey use, ref_t start, size_t len)
259 assert (len >= 2);
261 if (start > first_free || start + len > first_free
262 || (start & BLOCK_ALIGN_M1))
263 return 0;
265 if (usemap[start] == use_not)
267 /* Add the start marker. */
268 usemap[start] = use | use_begin;
269 use &= ~use_first;
271 while (--len > 0)
272 if (usemap[++start] != use_not)
273 return 0;
274 else
275 usemap[start] = use;
277 /* Add the end marker. */
278 usemap[start] = use | use_end;
280 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
282 /* Hash entries can't be shared. */
283 if (use == use_he)
284 return 0;
286 usemap[start] |= (use & use_first);
287 use &= ~use_first;
289 while (--len > 1)
290 if (usemap[++start] != use)
291 return 0;
293 if (usemap[++start] != (use | use_end))
294 return 0;
296 else
297 /* Points to a wrong object or somewhere in the middle. */
298 return 0;
300 return 1;
304 /* Verify data in persistent database. */
305 static int
306 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
308 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb);
310 time_t now = time (NULL);
312 struct database_pers_head *head = mem;
313 struct database_pers_head head_copy = *head;
315 /* Check that the header that was read matches the head in the database. */
316 if (readhead != NULL && memcmp (head, readhead, sizeof (*head)) != 0)
317 return 0;
319 /* First some easy tests: make sure the database header is sane. */
320 if (head->version != DB_VERSION
321 || head->header_size != sizeof (*head)
322 /* We allow a timestamp to be one hour ahead of the current time.
323 This should cover daylight saving time changes. */
324 || head->timestamp > now + 60 * 60 + 60
325 || (head->gc_cycle & 1)
326 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
327 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
328 || head->first_free < 0
329 || head->first_free > head->data_size
330 || (head->first_free & BLOCK_ALIGN_M1) != 0
331 || head->maxnentries < 0
332 || head->maxnsearched < 0)
333 return 0;
335 uint8_t *usemap = calloc (head->first_free, 1);
336 if (usemap == NULL)
337 return 0;
339 const char *data = (char *) &head->array[roundup (head->module,
340 ALIGN / sizeof (ref_t))];
342 nscd_ssize_t he_cnt = 0;
343 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
345 ref_t work = head->array[cnt];
347 while (work != ENDREF)
349 if (! check_use (data, head->first_free, usemap, use_he, work,
350 sizeof (struct hashentry)))
351 goto fail;
353 /* Now we know we can dereference the record. */
354 struct hashentry *here = (struct hashentry *) (data + work);
356 ++he_cnt;
358 /* Make sure the record is for this type of service. */
359 if (here->type >= LASTREQ
360 || serv2db[here->type] != &dbs[dbnr])
361 goto fail;
363 /* Validate boolean field value. */
364 if (here->first != false && here->first != true)
365 goto fail;
367 if (here->len < 0)
368 goto fail;
370 /* Now the data. */
371 if (here->packet < 0
372 || here->packet > head->first_free
373 || here->packet + sizeof (struct datahead) > head->first_free)
374 goto fail;
376 struct datahead *dh = (struct datahead *) (data + here->packet);
378 if (! check_use (data, head->first_free, usemap,
379 use_data | (here->first ? use_first : 0),
380 here->packet, dh->allocsize))
381 goto fail;
383 if (dh->allocsize < sizeof (struct datahead)
384 || dh->recsize > dh->allocsize
385 || (dh->notfound != false && dh->notfound != true)
386 || (dh->usable != false && dh->usable != true))
387 goto fail;
389 if (here->key < here->packet + sizeof (struct datahead)
390 || here->key > here->packet + dh->allocsize
391 || here->key + here->len > here->packet + dh->allocsize)
393 #if SEPARATE_KEY
394 /* If keys can appear outside of data, this should be done
395 instead. But gc doesn't mark the data in that case. */
396 if (! check_use (data, head->first_free, usemap,
397 use_key | (here->first ? use_first : 0),
398 here->key, here->len))
399 #endif
400 goto fail;
403 work = here->next;
407 if (he_cnt != head->nentries)
408 goto fail;
410 /* See if all data and keys had at least one reference from
411 he->first == true hashentry. */
412 for (ref_t idx = 0; idx < head->first_free; ++idx)
414 #if SEPARATE_KEY
415 if (usemap[idx] == use_key_begin)
416 goto fail;
417 #endif
418 if (usemap[idx] == use_data_begin)
419 goto fail;
422 /* Finally, make sure the database hasn't changed since the first test. */
423 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
424 goto fail;
426 free (usemap);
427 return 1;
429 fail:
430 free (usemap);
431 return 0;
435 /* Initialize database information structures. */
436 void
437 nscd_init (void)
439 /* Look up unprivileged uid/gid/groups before we start listening on the
440 socket */
441 if (server_user != NULL)
442 begin_drop_privileges ();
444 if (nthreads == -1)
445 /* No configuration for this value, assume a default. */
446 nthreads = 2 * lastdb;
448 for (size_t cnt = 0; cnt < lastdb; ++cnt)
449 if (dbs[cnt].enabled)
451 pthread_rwlock_init (&dbs[cnt].lock, NULL);
452 pthread_mutex_init (&dbs[cnt].memlock, NULL);
454 if (dbs[cnt].persistent)
456 /* Try to open the appropriate file on disk. */
457 int fd = open (dbs[cnt].db_filename, O_RDWR);
458 if (fd != -1)
460 struct stat64 st;
461 void *mem;
462 size_t total;
463 struct database_pers_head head;
464 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
465 sizeof (head)));
466 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
468 fail_db:
469 dbg_log (_("invalid persistent database file \"%s\": %s"),
470 dbs[cnt].db_filename, strerror (errno));
471 unlink (dbs[cnt].db_filename);
473 else if (head.module == 0 && head.data_size == 0)
475 /* The file has been created, but the head has not been
476 initialized yet. Remove the old file. */
477 unlink (dbs[cnt].db_filename);
479 else if (head.header_size != (int) sizeof (head))
481 dbg_log (_("invalid persistent database file \"%s\": %s"),
482 dbs[cnt].db_filename,
483 _("header size does not match"));
484 unlink (dbs[cnt].db_filename);
486 else if ((total = (sizeof (head)
487 + roundup (head.module * sizeof (ref_t),
488 ALIGN)
489 + head.data_size))
490 > st.st_size
491 || total < sizeof (head))
493 dbg_log (_("invalid persistent database file \"%s\": %s"),
494 dbs[cnt].db_filename,
495 _("file size does not match"));
496 unlink (dbs[cnt].db_filename);
498 /* Note we map with the maximum size allowed for the
499 database. This is likely much larger than the
500 actual file size. This is OK on most OSes since
501 extensions of the underlying file will
502 automatically translate more pages available for
503 memory access. */
504 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
505 PROT_READ | PROT_WRITE,
506 MAP_SHARED, fd, 0))
507 == MAP_FAILED)
508 goto fail_db;
509 else if (!verify_persistent_db (mem, &head, cnt))
511 munmap (mem, total);
512 dbg_log (_("invalid persistent database file \"%s\": %s"),
513 dbs[cnt].db_filename,
514 _("verification failed"));
515 unlink (dbs[cnt].db_filename);
517 else
519 /* Success. We have the database. */
520 dbs[cnt].head = mem;
521 dbs[cnt].memsize = total;
522 dbs[cnt].data = (char *)
523 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
524 ALIGN / sizeof (ref_t))];
525 dbs[cnt].mmap_used = true;
527 if (dbs[cnt].suggested_module > head.module)
528 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
529 dbnames[cnt]);
531 dbs[cnt].wr_fd = fd;
532 fd = -1;
533 /* We also need a read-only descriptor. */
534 if (dbs[cnt].shared)
536 dbs[cnt].ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
537 if (dbs[cnt].ro_fd == -1)
538 dbg_log (_("\
539 cannot create read-only descriptor for \"%s\"; no mmap"),
540 dbs[cnt].db_filename);
543 // XXX Shall we test whether the descriptors actually
544 // XXX point to the same file?
547 /* Close the file descriptors in case something went
548 wrong in which case the variable have not been
549 assigned -1. */
550 if (fd != -1)
551 close (fd);
555 if (dbs[cnt].head == NULL)
557 /* No database loaded. Allocate the data structure,
558 possibly on disk. */
559 struct database_pers_head head;
560 size_t total = (sizeof (head)
561 + roundup (dbs[cnt].suggested_module
562 * sizeof (ref_t), ALIGN)
563 + (dbs[cnt].suggested_module
564 * DEFAULT_DATASIZE_PER_BUCKET));
566 /* Try to create the database. If we do not need a
567 persistent database create a temporary file. */
568 int fd;
569 int ro_fd = -1;
570 if (dbs[cnt].persistent)
572 fd = open (dbs[cnt].db_filename,
573 O_RDWR | O_CREAT | O_EXCL | O_TRUNC,
574 S_IRUSR | S_IWUSR);
575 if (fd != -1 && dbs[cnt].shared)
576 ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
578 else
580 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
581 fd = mkstemp (fname);
583 /* We do not need the file name anymore after we
584 opened another file descriptor in read-only mode. */
585 if (fd != -1)
587 if (dbs[cnt].shared)
588 ro_fd = open (fname, O_RDONLY);
590 unlink (fname);
594 if (fd == -1)
596 if (errno == EEXIST)
598 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
599 dbnames[cnt], dbs[cnt].db_filename);
600 // XXX Correct way to terminate?
601 exit (1);
604 if (dbs[cnt].persistent)
605 dbg_log (_("cannot create %s; no persistent database used"),
606 dbs[cnt].db_filename);
607 else
608 dbg_log (_("cannot create %s; no sharing possible"),
609 dbs[cnt].db_filename);
611 dbs[cnt].persistent = 0;
612 // XXX remember: no mmap
614 else
616 /* Tell the user if we could not create the read-only
617 descriptor. */
618 if (ro_fd == -1 && dbs[cnt].shared)
619 dbg_log (_("\
620 cannot create read-only descriptor for \"%s\"; no mmap"),
621 dbs[cnt].db_filename);
623 /* Before we create the header, initialiye the hash
624 table. So that if we get interrupted if writing
625 the header we can recognize a partially initialized
626 database. */
627 size_t ps = sysconf (_SC_PAGESIZE);
628 char tmpbuf[ps];
629 assert (~ENDREF == 0);
630 memset (tmpbuf, '\xff', ps);
632 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
633 off_t offset = sizeof (head);
635 size_t towrite;
636 if (offset % ps != 0)
638 towrite = MIN (remaining, ps - (offset % ps));
639 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
640 goto write_fail;
641 offset += towrite;
642 remaining -= towrite;
645 while (remaining > ps)
647 if (pwrite (fd, tmpbuf, ps, offset) == -1)
648 goto write_fail;
649 offset += ps;
650 remaining -= ps;
653 if (remaining > 0
654 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
655 goto write_fail;
657 /* Create the header of the file. */
658 struct database_pers_head head =
660 .version = DB_VERSION,
661 .header_size = sizeof (head),
662 .module = dbs[cnt].suggested_module,
663 .data_size = (dbs[cnt].suggested_module
664 * DEFAULT_DATASIZE_PER_BUCKET),
665 .first_free = 0
667 void *mem;
669 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
670 != sizeof (head))
671 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
672 != 0)
673 || (mem = mmap (NULL, dbs[cnt].max_db_size,
674 PROT_READ | PROT_WRITE,
675 MAP_SHARED, fd, 0)) == MAP_FAILED)
677 write_fail:
678 unlink (dbs[cnt].db_filename);
679 dbg_log (_("cannot write to database file %s: %s"),
680 dbs[cnt].db_filename, strerror (errno));
681 dbs[cnt].persistent = 0;
683 else
685 /* Success. */
686 dbs[cnt].head = mem;
687 dbs[cnt].data = (char *)
688 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
689 ALIGN / sizeof (ref_t))];
690 dbs[cnt].memsize = total;
691 dbs[cnt].mmap_used = true;
693 /* Remember the descriptors. */
694 dbs[cnt].wr_fd = fd;
695 dbs[cnt].ro_fd = ro_fd;
696 fd = -1;
697 ro_fd = -1;
700 if (fd != -1)
701 close (fd);
702 if (ro_fd != -1)
703 close (ro_fd);
707 if (paranoia
708 && ((dbs[cnt].wr_fd != -1
709 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
710 || (dbs[cnt].ro_fd != -1
711 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
713 dbg_log (_("\
714 cannot set socket to close on exec: %s; disabling paranoia mode"),
715 strerror (errno));
716 paranoia = 0;
719 if (dbs[cnt].head == NULL)
721 /* We do not use the persistent database. Just
722 create an in-memory data structure. */
723 assert (! dbs[cnt].persistent);
725 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
726 + (dbs[cnt].suggested_module
727 * sizeof (ref_t)));
728 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
729 assert (~ENDREF == 0);
730 memset (dbs[cnt].head->array, '\xff',
731 dbs[cnt].suggested_module * sizeof (ref_t));
732 dbs[cnt].head->module = dbs[cnt].suggested_module;
733 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
734 * dbs[cnt].head->module);
735 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
736 dbs[cnt].head->first_free = 0;
738 dbs[cnt].shared = 0;
739 assert (dbs[cnt].ro_fd == -1);
742 if (dbs[cnt].check_file)
744 /* We need the modification date of the file. */
745 struct stat64 st;
747 if (stat64 (dbs[cnt].filename, &st) < 0)
749 /* We cannot stat() the file, disable file checking. */
750 dbg_log (_("cannot stat() file `%s': %s"),
751 dbs[cnt].filename, strerror (errno));
752 dbs[cnt].check_file = 0;
754 else
755 dbs[cnt].file_mtime = st.st_mtime;
759 /* Create the socket. */
760 sock = socket (AF_UNIX, SOCK_STREAM, 0);
761 if (sock < 0)
763 dbg_log (_("cannot open socket: %s"), strerror (errno));
764 exit (errno == EACCES ? 4 : 1);
766 /* Bind a name to the socket. */
767 struct sockaddr_un sock_addr;
768 sock_addr.sun_family = AF_UNIX;
769 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
770 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
772 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
773 exit (errno == EACCES ? 4 : 1);
776 /* We don't want to get stuck on accept. */
777 int fl = fcntl (sock, F_GETFL);
778 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
780 dbg_log (_("cannot change socket to nonblocking mode: %s"),
781 strerror (errno));
782 exit (1);
785 /* The descriptor needs to be closed on exec. */
786 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
788 dbg_log (_("cannot set socket to close on exec: %s"),
789 strerror (errno));
790 exit (1);
793 /* Set permissions for the socket. */
794 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
796 /* Set the socket up to accept connections. */
797 if (listen (sock, SOMAXCONN) < 0)
799 dbg_log (_("cannot enable socket to accept connections: %s"),
800 strerror (errno));
801 exit (1);
804 /* Change to unprivileged uid/gid/groups if specifed in config file */
805 if (server_user != NULL)
806 finish_drop_privileges ();
810 /* Close the connections. */
811 void
812 close_sockets (void)
814 close (sock);
818 static void
819 invalidate_cache (char *key, int fd)
821 dbtype number;
822 int32_t resp;
824 if (strcmp (key, "passwd") == 0)
825 number = pwddb;
826 else if (strcmp (key, "group") == 0)
827 number = grpdb;
828 else if (__builtin_expect (strcmp (key, "hosts"), 0) == 0)
830 number = hstdb;
832 /* Re-initialize the resolver. resolv.conf might have changed. */
833 res_init ();
835 else
837 resp = EINVAL;
838 writeall (fd, &resp, sizeof (resp));
839 return;
842 if (dbs[number].enabled)
843 prune_cache (&dbs[number], LONG_MAX, fd);
844 else
846 resp = 0;
847 writeall (fd, &resp, sizeof (resp));
852 #ifdef SCM_RIGHTS
853 static void
854 send_ro_fd (struct database_dyn *db, char *key, int fd)
856 /* If we do not have an read-only file descriptor do nothing. */
857 if (db->ro_fd == -1)
858 return;
860 /* We need to send some data along with the descriptor. */
861 struct iovec iov[1];
862 iov[0].iov_base = key;
863 iov[0].iov_len = strlen (key) + 1;
865 /* Prepare the control message to transfer the descriptor. */
866 union
868 struct cmsghdr hdr;
869 char bytes[CMSG_SPACE (sizeof (int))];
870 } buf;
871 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1,
872 .msg_control = buf.bytes,
873 .msg_controllen = sizeof (buf) };
874 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
876 cmsg->cmsg_level = SOL_SOCKET;
877 cmsg->cmsg_type = SCM_RIGHTS;
878 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
880 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
882 msg.msg_controllen = cmsg->cmsg_len;
884 /* Send the control message. We repeat when we are interrupted but
885 everything else is ignored. */
886 #ifndef MSG_NOSIGNAL
887 # define MSG_NOSIGNAL 0
888 #endif
889 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
891 if (__builtin_expect (debug_level > 0, 0))
892 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
894 #endif /* SCM_RIGHTS */
897 /* Handle new request. */
898 static void
899 handle_request (int fd, request_header *req, void *key, uid_t uid)
901 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
903 if (debug_level > 0)
904 dbg_log (_("\
905 cannot handle old request version %d; current version is %d"),
906 req->version, NSCD_VERSION);
907 return;
910 /* Make the SELinux check before we go on to the standard checks. We
911 need to verify that the request type is valid, since it has not
912 yet been checked at this point. */
913 if (selinux_enabled
914 && __builtin_expect (req->type, GETPWBYNAME) >= GETPWBYNAME
915 && __builtin_expect (req->type, LASTREQ) < LASTREQ
916 && nscd_request_avc_has_perm (fd, req->type) != 0)
917 return;
919 struct database_dyn *db = serv2db[req->type];
921 // XXX Clean up so that each new command need not introduce a
922 // XXX new conditional.
923 if ((__builtin_expect (req->type, GETPWBYNAME) >= GETPWBYNAME
924 && __builtin_expect (req->type, LASTDBREQ) <= LASTDBREQ)
925 || req->type == GETAI || req->type == INITGROUPS)
927 if (__builtin_expect (debug_level, 0) > 0)
929 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
931 char buf[INET6_ADDRSTRLEN];
933 dbg_log ("\t%s (%s)", serv2str[req->type],
934 inet_ntop (req->type == GETHOSTBYADDR
935 ? AF_INET : AF_INET6,
936 key, buf, sizeof (buf)));
938 else
939 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
942 /* Is this service enabled? */
943 if (!db->enabled)
945 /* No, sent the prepared record. */
946 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
947 db->disabled_iov->iov_len,
948 MSG_NOSIGNAL))
949 != (ssize_t) db->disabled_iov->iov_len
950 && __builtin_expect (debug_level, 0) > 0)
952 /* We have problems sending the result. */
953 char buf[256];
954 dbg_log (_("cannot write result: %s"),
955 strerror_r (errno, buf, sizeof (buf)));
958 return;
961 /* Be sure we can read the data. */
962 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
964 ++db->head->rdlockdelayed;
965 pthread_rwlock_rdlock (&db->lock);
968 /* See whether we can handle it from the cache. */
969 struct datahead *cached;
970 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
971 db, uid);
972 if (cached != NULL)
974 /* Hurray it's in the cache. */
975 ssize_t nwritten;
977 #ifdef HAVE_SENDFILE
978 if (db->mmap_used || !cached->notfound)
980 assert (db->wr_fd != -1);
981 assert ((char *) cached->data > (char *) db->data);
982 assert ((char *) cached->data - (char *) db->head
983 + cached->recsize
984 <= (sizeof (struct database_pers_head)
985 + db->head->module * sizeof (ref_t)
986 + db->head->data_size));
987 nwritten = sendfileall (fd, db->wr_fd,
988 (char *) cached->data
989 - (char *) db->head, cached->recsize);
990 # ifndef __ASSUME_SENDFILE
991 if (nwritten == -1 && errno == ENOSYS)
992 goto use_write;
993 # endif
995 else
996 # ifndef __ASSUME_SENDFILE
997 use_write:
998 # endif
999 #endif
1000 nwritten = writeall (fd, cached->data, cached->recsize);
1002 if (nwritten != cached->recsize
1003 && __builtin_expect (debug_level, 0) > 0)
1005 /* We have problems sending the result. */
1006 char buf[256];
1007 dbg_log (_("cannot write result: %s"),
1008 strerror_r (errno, buf, sizeof (buf)));
1011 pthread_rwlock_unlock (&db->lock);
1013 return;
1016 pthread_rwlock_unlock (&db->lock);
1018 else if (__builtin_expect (debug_level, 0) > 0)
1020 if (req->type == INVALIDATE)
1021 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1022 else
1023 dbg_log ("\t%s", serv2str[req->type]);
1026 /* Handle the request. */
1027 switch (req->type)
1029 case GETPWBYNAME:
1030 addpwbyname (db, fd, req, key, uid);
1031 break;
1033 case GETPWBYUID:
1034 addpwbyuid (db, fd, req, key, uid);
1035 break;
1037 case GETGRBYNAME:
1038 addgrbyname (db, fd, req, key, uid);
1039 break;
1041 case GETGRBYGID:
1042 addgrbygid (db, fd, req, key, uid);
1043 break;
1045 case GETHOSTBYNAME:
1046 addhstbyname (db, fd, req, key, uid);
1047 break;
1049 case GETHOSTBYNAMEv6:
1050 addhstbynamev6 (db, fd, req, key, uid);
1051 break;
1053 case GETHOSTBYADDR:
1054 addhstbyaddr (db, fd, req, key, uid);
1055 break;
1057 case GETHOSTBYADDRv6:
1058 addhstbyaddrv6 (db, fd, req, key, uid);
1059 break;
1061 case GETAI:
1062 addhstai (db, fd, req, key, uid);
1063 break;
1065 case INITGROUPS:
1066 addinitgroups (db, fd, req, key, uid);
1067 break;
1069 case GETSTAT:
1070 case SHUTDOWN:
1071 case INVALIDATE:
1073 /* Get the callers credentials. */
1074 #ifdef SO_PEERCRED
1075 struct ucred caller;
1076 socklen_t optlen = sizeof (caller);
1078 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1080 char buf[256];
1082 dbg_log (_("error getting callers id: %s"),
1083 strerror_r (errno, buf, sizeof (buf)));
1084 break;
1087 uid = caller.uid;
1088 #else
1089 /* Some systems have no SO_PEERCRED implementation. They don't
1090 care about security so we don't as well. */
1091 uid = 0;
1092 #endif
1095 /* Accept shutdown, getstat and invalidate only from root. For
1096 the stat call also allow the user specified in the config file. */
1097 if (req->type == GETSTAT)
1099 if (uid == 0 || uid == stat_uid)
1100 send_stats (fd, dbs);
1102 else if (uid == 0)
1104 if (req->type == INVALIDATE)
1105 invalidate_cache (key, fd);
1106 else
1107 termination_handler (0);
1109 break;
1111 case GETFDPW:
1112 case GETFDGR:
1113 case GETFDHST:
1114 #ifdef SCM_RIGHTS
1115 send_ro_fd (serv2db[req->type], key, fd);
1116 #endif
1117 break;
1119 default:
1120 /* Ignore the command, it's nothing we know. */
1121 break;
1126 /* Restart the process. */
1127 static void
1128 restart (void)
1130 /* First determine the parameters. We do not use the parameters
1131 passed to main() since in case nscd is started by running the
1132 dynamic linker this will not work. Yes, this is not the usual
1133 case but nscd is part of glibc and we occasionally do this. */
1134 size_t buflen = 1024;
1135 char *buf = alloca (buflen);
1136 size_t readlen = 0;
1137 int fd = open ("/proc/self/cmdline", O_RDONLY);
1138 if (fd == -1)
1140 dbg_log (_("\
1141 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1142 strerror (errno));
1144 paranoia = 0;
1145 return;
1148 while (1)
1150 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1151 buflen - readlen));
1152 if (n == -1)
1154 dbg_log (_("\
1155 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1156 strerror (errno));
1158 close (fd);
1159 paranoia = 0;
1160 return;
1163 readlen += n;
1165 if (readlen < buflen)
1166 break;
1168 /* We might have to extend the buffer. */
1169 size_t old_buflen = buflen;
1170 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1171 buf = memmove (newp, buf, old_buflen);
1174 close (fd);
1176 /* Parse the command line. Worst case scenario: every two
1177 characters form one parameter (one character plus NUL). */
1178 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1179 int argc = 0;
1181 char *cp = buf;
1182 while (cp < buf + readlen)
1184 argv[argc++] = cp;
1185 cp = (char *) rawmemchr (cp, '\0') + 1;
1187 argv[argc] = NULL;
1189 /* Second, change back to the old user if we changed it. */
1190 if (server_user != NULL)
1192 if (setresuid (old_uid, old_uid, old_uid) != 0)
1194 dbg_log (_("\
1195 cannot change to old UID: %s; disabling paranoia mode"),
1196 strerror (errno));
1198 paranoia = 0;
1199 return;
1202 if (setresgid (old_gid, old_gid, old_gid) != 0)
1204 dbg_log (_("\
1205 cannot change to old GID: %s; disabling paranoia mode"),
1206 strerror (errno));
1208 setuid (server_uid);
1209 paranoia = 0;
1210 return;
1214 /* Next change back to the old working directory. */
1215 if (chdir (oldcwd) == -1)
1217 dbg_log (_("\
1218 cannot change to old working directory: %s; disabling paranoia mode"),
1219 strerror (errno));
1221 if (server_user != NULL)
1223 setuid (server_uid);
1224 setgid (server_gid);
1226 paranoia = 0;
1227 return;
1230 /* Synchronize memory. */
1231 for (int cnt = 0; cnt < lastdb; ++cnt)
1233 /* Make sure nobody keeps using the database. */
1234 dbs[cnt].head->timestamp = 0;
1236 if (dbs[cnt].persistent)
1237 // XXX async OK?
1238 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1241 /* The preparations are done. */
1242 execv ("/proc/self/exe", argv);
1244 /* If we come here, we will never be able to re-exec. */
1245 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1246 strerror (errno));
1248 if (server_user != NULL)
1250 setuid (server_uid);
1251 setgid (server_gid);
1253 if (chdir ("/") != 0)
1254 dbg_log (_("cannot change current working directory to \"/\": %s"),
1255 strerror (errno));
1256 paranoia = 0;
1260 /* List of file descriptors. */
1261 struct fdlist
1263 int fd;
1264 struct fdlist *next;
1266 /* Memory allocated for the list. */
1267 static struct fdlist *fdlist;
1268 /* List of currently ready-to-read file descriptors. */
1269 static struct fdlist *readylist;
1271 /* Conditional variable and mutex to signal availability of entries in
1272 READYLIST. The condvar is initialized dynamically since we might
1273 use a different clock depending on availability. */
1274 static pthread_cond_t readylist_cond;
1275 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1277 /* The clock to use with the condvar. */
1278 static clockid_t timeout_clock = CLOCK_REALTIME;
1280 /* Number of threads ready to handle the READYLIST. */
1281 static unsigned long int nready;
1284 /* This is the main loop. It is replicated in different threads but the
1285 `poll' call makes sure only one thread handles an incoming connection. */
1286 static void *
1287 __attribute__ ((__noreturn__))
1288 nscd_run (void *p)
1290 const long int my_number = (long int) p;
1291 const int run_prune = my_number < lastdb && dbs[my_number].enabled;
1292 struct timespec prune_ts;
1293 int to = 0;
1294 char buf[256];
1296 if (run_prune)
1298 setup_thread (&dbs[my_number]);
1300 /* We are running. */
1301 dbs[my_number].head->timestamp = time (NULL);
1303 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1304 /* Should never happen. */
1305 abort ();
1307 /* Compute timeout time. */
1308 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1311 /* Initial locking. */
1312 pthread_mutex_lock (&readylist_lock);
1314 /* One more thread available. */
1315 ++nready;
1317 while (1)
1319 while (readylist == NULL)
1321 if (run_prune)
1323 /* Wait, but not forever. */
1324 to = pthread_cond_timedwait (&readylist_cond, &readylist_lock,
1325 &prune_ts);
1327 /* If we were woken and there is no work to be done,
1328 just start pruning. */
1329 if (readylist == NULL && to == ETIMEDOUT)
1331 --nready;
1332 pthread_mutex_unlock (&readylist_lock);
1333 goto only_prune;
1336 else
1337 /* No need to timeout. */
1338 pthread_cond_wait (&readylist_cond, &readylist_lock);
1341 struct fdlist *it = readylist->next;
1342 if (readylist->next == readylist)
1343 /* Just one entry on the list. */
1344 readylist = NULL;
1345 else
1346 readylist->next = it->next;
1348 /* Extract the information and mark the record ready to be used
1349 again. */
1350 int fd = it->fd;
1351 it->next = NULL;
1353 /* One more thread available. */
1354 --nready;
1356 /* We are done with the list. */
1357 pthread_mutex_unlock (&readylist_lock);
1359 /* We do not want to block on a short read or so. */
1360 int fl = fcntl (fd, F_GETFL);
1361 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1362 goto close_and_out;
1364 /* Now read the request. */
1365 request_header req;
1366 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1367 != sizeof (req), 0))
1369 /* We failed to read data. Note that this also might mean we
1370 failed because we would have blocked. */
1371 if (debug_level > 0)
1372 dbg_log (_("short read while reading request: %s"),
1373 strerror_r (errno, buf, sizeof (buf)));
1374 goto close_and_out;
1377 /* Check whether this is a valid request type. */
1378 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1379 goto close_and_out;
1381 /* Some systems have no SO_PEERCRED implementation. They don't
1382 care about security so we don't as well. */
1383 uid_t uid = -1;
1384 #ifdef SO_PEERCRED
1385 pid_t pid = 0;
1387 if (__builtin_expect (debug_level > 0, 0))
1389 struct ucred caller;
1390 socklen_t optlen = sizeof (caller);
1392 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1393 pid = caller.pid;
1395 #endif
1397 /* It should not be possible to crash the nscd with a silly
1398 request (i.e., a terribly large key). We limit the size to 1kb. */
1399 #define MAXKEYLEN 1024
1400 if (__builtin_expect (req.key_len, 1) < 0
1401 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1403 if (debug_level > 0)
1404 dbg_log (_("key length in request too long: %d"), req.key_len);
1406 else
1408 /* Get the key. */
1409 char keybuf[MAXKEYLEN];
1411 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1412 req.key_len))
1413 != req.key_len, 0))
1415 /* Again, this can also mean we would have blocked. */
1416 if (debug_level > 0)
1417 dbg_log (_("short read while reading request key: %s"),
1418 strerror_r (errno, buf, sizeof (buf)));
1419 goto close_and_out;
1422 if (__builtin_expect (debug_level, 0) > 0)
1424 #ifdef SO_PEERCRED
1425 if (pid != 0)
1426 dbg_log (_("\
1427 handle_request: request received (Version = %d) from PID %ld"),
1428 req.version, (long int) pid);
1429 else
1430 #endif
1431 dbg_log (_("\
1432 handle_request: request received (Version = %d)"), req.version);
1435 /* Phew, we got all the data, now process it. */
1436 handle_request (fd, &req, keybuf, uid);
1439 close_and_out:
1440 /* We are done. */
1441 close (fd);
1443 /* Check whether we should be pruning the cache. */
1444 assert (run_prune || to == 0);
1445 if (to == ETIMEDOUT)
1447 only_prune:
1448 /* The pthread_cond_timedwait() call timed out. It is time
1449 to clean up the cache. */
1450 assert (my_number < lastdb);
1451 prune_cache (&dbs[my_number], time (NULL), -1);
1453 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1454 /* Should never happen. */
1455 abort ();
1457 /* Compute next timeout time. */
1458 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1460 /* In case the list is emtpy we do not want to run the prune
1461 code right away again. */
1462 to = 0;
1465 /* Re-locking. */
1466 pthread_mutex_lock (&readylist_lock);
1468 /* One more thread available. */
1469 ++nready;
1474 static unsigned int nconns;
1476 static void
1477 fd_ready (int fd)
1479 pthread_mutex_lock (&readylist_lock);
1481 /* Find an empty entry in FDLIST. */
1482 size_t inner;
1483 for (inner = 0; inner < nconns; ++inner)
1484 if (fdlist[inner].next == NULL)
1485 break;
1486 assert (inner < nconns);
1488 fdlist[inner].fd = fd;
1490 if (readylist == NULL)
1491 readylist = fdlist[inner].next = &fdlist[inner];
1492 else
1494 fdlist[inner].next = readylist->next;
1495 readylist = readylist->next = &fdlist[inner];
1498 bool do_signal = true;
1499 if (__builtin_expect (nready == 0, 0))
1501 ++client_queued;
1502 do_signal = false;
1504 /* Try to start another thread to help out. */
1505 pthread_t th;
1506 if (nthreads < max_nthreads
1507 && pthread_create (&th, &attr, nscd_run,
1508 (void *) (long int) nthreads) == 0)
1510 /* We got another thread. */
1511 ++nthreads;
1512 /* The new thread might need a kick. */
1513 do_signal = true;
1518 pthread_mutex_unlock (&readylist_lock);
1520 /* Tell one of the worker threads there is work to do. */
1521 if (do_signal)
1522 pthread_cond_signal (&readylist_cond);
1526 /* Check whether restarting should happen. */
1527 static inline int
1528 restart_p (time_t now)
1530 return (paranoia && readylist == NULL && nready == nthreads
1531 && now >= restart_time);
1535 /* Array for times a connection was accepted. */
1536 static time_t *starttime;
1539 static void
1540 __attribute__ ((__noreturn__))
1541 main_loop_poll (void)
1543 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1544 * sizeof (conns[0]));
1546 conns[0].fd = sock;
1547 conns[0].events = POLLRDNORM;
1548 size_t nused = 1;
1549 size_t firstfree = 1;
1551 while (1)
1553 /* Wait for any event. We wait at most a couple of seconds so
1554 that we can check whether we should close any of the accepted
1555 connections since we have not received a request. */
1556 #define MAX_ACCEPT_TIMEOUT 30
1557 #define MIN_ACCEPT_TIMEOUT 5
1558 #define MAIN_THREAD_TIMEOUT \
1559 (MAX_ACCEPT_TIMEOUT * 1000 \
1560 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1562 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1564 time_t now = time (NULL);
1566 /* If there is a descriptor ready for reading or there is a new
1567 connection, process this now. */
1568 if (n > 0)
1570 if (conns[0].revents != 0)
1572 /* We have a new incoming connection. Accept the connection. */
1573 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1575 /* Use the descriptor if we have not reached the limit. */
1576 if (fd >= 0)
1578 if (firstfree < nconns)
1580 conns[firstfree].fd = fd;
1581 conns[firstfree].events = POLLRDNORM;
1582 starttime[firstfree] = now;
1583 if (firstfree >= nused)
1584 nused = firstfree + 1;
1587 ++firstfree;
1588 while (firstfree < nused && conns[firstfree].fd != -1);
1590 else
1591 /* We cannot use the connection so close it. */
1592 close (fd);
1595 --n;
1598 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1599 if (conns[cnt].revents != 0)
1601 fd_ready (conns[cnt].fd);
1603 /* Clean up the CONNS array. */
1604 conns[cnt].fd = -1;
1605 if (cnt < firstfree)
1606 firstfree = cnt;
1607 if (cnt == nused - 1)
1609 --nused;
1610 while (conns[nused - 1].fd == -1);
1612 --n;
1616 /* Now find entries which have timed out. */
1617 assert (nused > 0);
1619 /* We make the timeout length depend on the number of file
1620 descriptors currently used. */
1621 #define ACCEPT_TIMEOUT \
1622 (MAX_ACCEPT_TIMEOUT \
1623 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1624 time_t laststart = now - ACCEPT_TIMEOUT;
1626 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1628 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1630 /* Remove the entry, it timed out. */
1631 (void) close (conns[cnt].fd);
1632 conns[cnt].fd = -1;
1634 if (cnt < firstfree)
1635 firstfree = cnt;
1636 if (cnt == nused - 1)
1638 --nused;
1639 while (conns[nused - 1].fd == -1);
1643 if (restart_p (now))
1644 restart ();
1649 #ifdef HAVE_EPOLL
1650 static void
1651 main_loop_epoll (int efd)
1653 struct epoll_event ev = { 0, };
1654 int nused = 1;
1655 size_t highest = 0;
1657 /* Add the socket. */
1658 ev.events = EPOLLRDNORM;
1659 ev.data.fd = sock;
1660 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1661 /* We cannot use epoll. */
1662 return;
1664 while (1)
1666 struct epoll_event revs[100];
1667 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1669 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1671 time_t now = time (NULL);
1673 for (int cnt = 0; cnt < n; ++cnt)
1674 if (revs[cnt].data.fd == sock)
1676 /* A new connection. */
1677 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1679 if (fd >= 0)
1681 /* Try to add the new descriptor. */
1682 ev.data.fd = fd;
1683 if (fd >= nconns
1684 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1685 /* The descriptor is too large or something went
1686 wrong. Close the descriptor. */
1687 close (fd);
1688 else
1690 /* Remember when we accepted the connection. */
1691 starttime[fd] = now;
1693 if (fd > highest)
1694 highest = fd;
1696 ++nused;
1700 else
1702 /* Remove the descriptor from the epoll descriptor. */
1703 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1705 /* Get a worker to handle the request. */
1706 fd_ready (revs[cnt].data.fd);
1708 /* Reset the time. */
1709 starttime[revs[cnt].data.fd] = 0;
1710 if (revs[cnt].data.fd == highest)
1712 --highest;
1713 while (highest > 0 && starttime[highest] == 0);
1715 --nused;
1718 /* Now look for descriptors for accepted connections which have
1719 no reply in too long of a time. */
1720 time_t laststart = now - ACCEPT_TIMEOUT;
1721 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1722 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1724 /* We are waiting for this one for too long. Close it. */
1725 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
1727 (void) close (cnt);
1729 starttime[cnt] = 0;
1730 if (cnt == highest)
1731 --highest;
1733 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1734 --highest;
1736 if (restart_p (now))
1737 restart ();
1740 #endif
1743 /* Start all the threads we want. The initial process is thread no. 1. */
1744 void
1745 start_threads (void)
1747 /* Initialize the conditional variable we will use. The only
1748 non-standard attribute we might use is the clock selection. */
1749 pthread_condattr_t condattr;
1750 pthread_condattr_init (&condattr);
1752 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1753 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1754 /* Determine whether the monotonous clock is available. */
1755 struct timespec dummy;
1756 # if _POSIX_MONOTONIC_CLOCK == 0
1757 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1758 # endif
1759 # if _POSIX_CLOCK_SELECTION == 0
1760 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1761 # endif
1762 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1763 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1764 timeout_clock = CLOCK_MONOTONIC;
1765 #endif
1767 pthread_cond_init (&readylist_cond, &condattr);
1768 pthread_condattr_destroy (&condattr);
1771 /* Create the attribute for the threads. They are all created
1772 detached. */
1773 pthread_attr_init (&attr);
1774 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1775 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1776 pthread_attr_setstacksize (&attr, 1024 * 1024 * (sizeof (void *) / 4));
1778 /* We allow less than LASTDB threads only for debugging. */
1779 if (debug_level == 0)
1780 nthreads = MAX (nthreads, lastdb);
1782 int nfailed = 0;
1783 for (long int i = 0; i < nthreads; ++i)
1785 pthread_t th;
1786 if (pthread_create (&th, &attr, nscd_run, (void *) (i - nfailed)) != 0)
1787 ++nfailed;
1789 if (nthreads - nfailed < lastdb)
1791 /* We could not start enough threads. */
1792 dbg_log (_("could only start %d threads; terminating"),
1793 nthreads - nfailed);
1794 exit (1);
1797 /* Determine how much room for descriptors we should initially
1798 allocate. This might need to change later if we cap the number
1799 with MAXCONN. */
1800 const long int nfds = sysconf (_SC_OPEN_MAX);
1801 #define MINCONN 32
1802 #define MAXCONN 16384
1803 if (nfds == -1 || nfds > MAXCONN)
1804 nconns = MAXCONN;
1805 else if (nfds < MINCONN)
1806 nconns = MINCONN;
1807 else
1808 nconns = nfds;
1810 /* We need memory to pass descriptors on to the worker threads. */
1811 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1812 /* Array to keep track when connection was accepted. */
1813 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1815 /* In the main thread we execute the loop which handles incoming
1816 connections. */
1817 #ifdef HAVE_EPOLL
1818 int efd = epoll_create (100);
1819 if (efd != -1)
1821 main_loop_epoll (efd);
1822 close (efd);
1824 #endif
1826 main_loop_poll ();
1830 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1831 this function is called, we are not listening on the nscd socket yet so
1832 we can just use the ordinary lookup functions without causing a lockup */
1833 static void
1834 begin_drop_privileges (void)
1836 struct passwd *pwd = getpwnam (server_user);
1838 if (pwd == NULL)
1840 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1841 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1842 server_user);
1845 server_uid = pwd->pw_uid;
1846 server_gid = pwd->pw_gid;
1848 /* Save the old UID/GID if we have to change back. */
1849 if (paranoia)
1851 old_uid = getuid ();
1852 old_gid = getgid ();
1855 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1857 /* This really must never happen. */
1858 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1859 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
1862 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
1864 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
1865 == -1)
1867 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1868 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
1873 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
1874 run nscd as the user specified in the configuration file. */
1875 static void
1876 finish_drop_privileges (void)
1878 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1879 /* We need to preserve the capabilities to connect to the audit daemon. */
1880 cap_t new_caps = preserve_capabilities ();
1881 #endif
1883 if (setgroups (server_ngroups, server_groups) == -1)
1885 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1886 error (EXIT_FAILURE, errno, _("setgroups failed"));
1889 int res;
1890 if (paranoia)
1891 res = setresgid (server_gid, server_gid, old_gid);
1892 else
1893 res = setgid (server_gid);
1894 if (res == -1)
1896 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1897 perror ("setgid");
1898 exit (4);
1901 if (paranoia)
1902 res = setresuid (server_uid, server_uid, old_uid);
1903 else
1904 res = setuid (server_uid);
1905 if (res == -1)
1907 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1908 perror ("setuid");
1909 exit (4);
1912 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1913 /* Remove the temporary capabilities. */
1914 install_real_capabilities (new_caps);
1915 #endif