* nscd/connections.c (dbs): Initialize .prunelock.
[glibc.git] / nscd / connections.c
blobda837b5e8af5813276da74531b2eff59cd4769e1
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2003, 2004, 2005, 2006 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License version 2 as
8 published by the Free Software Foundation.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <libintl.h>
27 #include <pthread.h>
28 #include <pwd.h>
29 #include <resolv.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <arpa/inet.h>
34 #ifdef HAVE_EPOLL
35 # include <sys/epoll.h>
36 #endif
37 #include <sys/mman.h>
38 #include <sys/param.h>
39 #include <sys/poll.h>
40 #ifdef HAVE_SENDFILE
41 # include <sys/sendfile.h>
42 #endif
43 #include <sys/socket.h>
44 #include <sys/stat.h>
45 #include <sys/un.h>
47 #include "nscd.h"
48 #include "dbg_log.h"
49 #include "selinux.h"
50 #ifdef HAVE_SENDFILE
51 # include <kernel-features.h>
52 #endif
55 /* Wrapper functions with error checking for standard functions. */
56 extern void *xmalloc (size_t n);
57 extern void *xcalloc (size_t n, size_t s);
58 extern void *xrealloc (void *o, size_t n);
60 /* Support to run nscd as an unprivileged user */
61 const char *server_user;
62 static uid_t server_uid;
63 static gid_t server_gid;
64 const char *stat_user;
65 uid_t stat_uid;
66 static gid_t *server_groups;
67 #ifndef NGROUPS
68 # define NGROUPS 32
69 #endif
70 static int server_ngroups;
72 static pthread_attr_t attr;
74 static void begin_drop_privileges (void);
75 static void finish_drop_privileges (void);
77 /* Map request type to a string. */
78 const char *serv2str[LASTREQ] =
80 [GETPWBYNAME] = "GETPWBYNAME",
81 [GETPWBYUID] = "GETPWBYUID",
82 [GETGRBYNAME] = "GETGRBYNAME",
83 [GETGRBYGID] = "GETGRBYGID",
84 [GETHOSTBYNAME] = "GETHOSTBYNAME",
85 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
86 [GETHOSTBYADDR] = "GETHOSTBYADDR",
87 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
88 [SHUTDOWN] = "SHUTDOWN",
89 [GETSTAT] = "GETSTAT",
90 [INVALIDATE] = "INVALIDATE",
91 [GETFDPW] = "GETFDPW",
92 [GETFDGR] = "GETFDGR",
93 [GETFDHST] = "GETFDHST",
94 [GETAI] = "GETAI",
95 [INITGROUPS] = "INITGROUPS"
98 /* The control data structures for the services. */
99 struct database_dyn dbs[lastdb] =
101 [pwddb] = {
102 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
103 .prunelock = PTHREAD_MUTEX_INITIALIZER,
104 .enabled = 0,
105 .check_file = 1,
106 .persistent = 0,
107 .propagate = 1,
108 .shared = 0,
109 .max_db_size = DEFAULT_MAX_DB_SIZE,
110 .filename = "/etc/passwd",
111 .db_filename = _PATH_NSCD_PASSWD_DB,
112 .disabled_iov = &pwd_iov_disabled,
113 .postimeout = 3600,
114 .negtimeout = 20,
115 .wr_fd = -1,
116 .ro_fd = -1,
117 .mmap_used = false
119 [grpdb] = {
120 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
121 .prunelock = PTHREAD_MUTEX_INITIALIZER,
122 .enabled = 0,
123 .check_file = 1,
124 .persistent = 0,
125 .propagate = 1,
126 .shared = 0,
127 .max_db_size = DEFAULT_MAX_DB_SIZE,
128 .filename = "/etc/group",
129 .db_filename = _PATH_NSCD_GROUP_DB,
130 .disabled_iov = &grp_iov_disabled,
131 .postimeout = 3600,
132 .negtimeout = 60,
133 .wr_fd = -1,
134 .ro_fd = -1,
135 .mmap_used = false
137 [hstdb] = {
138 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
139 .prunelock = PTHREAD_MUTEX_INITIALIZER,
140 .enabled = 0,
141 .check_file = 1,
142 .persistent = 0,
143 .propagate = 0, /* Not used. */
144 .shared = 0,
145 .max_db_size = DEFAULT_MAX_DB_SIZE,
146 .filename = "/etc/hosts",
147 .db_filename = _PATH_NSCD_HOSTS_DB,
148 .disabled_iov = &hst_iov_disabled,
149 .postimeout = 3600,
150 .negtimeout = 20,
151 .wr_fd = -1,
152 .ro_fd = -1,
153 .mmap_used = false
158 /* Mapping of request type to database. */
159 static struct database_dyn *const serv2db[LASTREQ] =
161 [GETPWBYNAME] = &dbs[pwddb],
162 [GETPWBYUID] = &dbs[pwddb],
163 [GETGRBYNAME] = &dbs[grpdb],
164 [GETGRBYGID] = &dbs[grpdb],
165 [GETHOSTBYNAME] = &dbs[hstdb],
166 [GETHOSTBYNAMEv6] = &dbs[hstdb],
167 [GETHOSTBYADDR] = &dbs[hstdb],
168 [GETHOSTBYADDRv6] = &dbs[hstdb],
169 [GETFDPW] = &dbs[pwddb],
170 [GETFDGR] = &dbs[grpdb],
171 [GETFDHST] = &dbs[hstdb],
172 [GETAI] = &dbs[hstdb],
173 [INITGROUPS] = &dbs[grpdb]
177 /* Number of seconds between two cache pruning runs. */
178 #define CACHE_PRUNE_INTERVAL 15
181 /* Initial number of threads to use. */
182 int nthreads = -1;
183 /* Maximum number of threads to use. */
184 int max_nthreads = 32;
186 /* Socket for incoming connections. */
187 static int sock;
189 /* Number of times clients had to wait. */
190 unsigned long int client_queued;
193 ssize_t
194 writeall (int fd, const void *buf, size_t len)
196 size_t n = len;
197 ssize_t ret;
200 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
201 if (ret <= 0)
202 break;
203 buf = (const char *) buf + ret;
204 n -= ret;
206 while (n > 0);
207 return ret < 0 ? ret : len - n;
211 #ifdef HAVE_SENDFILE
212 ssize_t
213 sendfileall (int tofd, int fromfd, off_t off, size_t len)
215 ssize_t n = len;
216 ssize_t ret;
220 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
221 if (ret <= 0)
222 break;
223 n -= ret;
225 while (n > 0);
226 return ret < 0 ? ret : len - n;
228 #endif
231 enum usekey
233 use_not = 0,
234 /* The following three are not really used, they are symbolic constants. */
235 use_first = 16,
236 use_begin = 32,
237 use_end = 64,
239 use_he = 1,
240 use_he_begin = use_he | use_begin,
241 use_he_end = use_he | use_end,
242 #if SEPARATE_KEY
243 use_key = 2,
244 use_key_begin = use_key | use_begin,
245 use_key_end = use_key | use_end,
246 use_key_first = use_key_begin | use_first,
247 #endif
248 use_data = 3,
249 use_data_begin = use_data | use_begin,
250 use_data_end = use_data | use_end,
251 use_data_first = use_data_begin | use_first
255 static int
256 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
257 enum usekey use, ref_t start, size_t len)
259 assert (len >= 2);
261 if (start > first_free || start + len > first_free
262 || (start & BLOCK_ALIGN_M1))
263 return 0;
265 if (usemap[start] == use_not)
267 /* Add the start marker. */
268 usemap[start] = use | use_begin;
269 use &= ~use_first;
271 while (--len > 0)
272 if (usemap[++start] != use_not)
273 return 0;
274 else
275 usemap[start] = use;
277 /* Add the end marker. */
278 usemap[start] = use | use_end;
280 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
282 /* Hash entries can't be shared. */
283 if (use == use_he)
284 return 0;
286 usemap[start] |= (use & use_first);
287 use &= ~use_first;
289 while (--len > 1)
290 if (usemap[++start] != use)
291 return 0;
293 if (usemap[++start] != (use | use_end))
294 return 0;
296 else
297 /* Points to a wrong object or somewhere in the middle. */
298 return 0;
300 return 1;
304 /* Verify data in persistent database. */
305 static int
306 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
308 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb);
310 time_t now = time (NULL);
312 struct database_pers_head *head = mem;
313 struct database_pers_head head_copy = *head;
315 /* Check that the header that was read matches the head in the database. */
316 if (readhead != NULL && memcmp (head, readhead, sizeof (*head)) != 0)
317 return 0;
319 /* First some easy tests: make sure the database header is sane. */
320 if (head->version != DB_VERSION
321 || head->header_size != sizeof (*head)
322 /* We allow a timestamp to be one hour ahead of the current time.
323 This should cover daylight saving time changes. */
324 || head->timestamp > now + 60 * 60 + 60
325 || (head->gc_cycle & 1)
326 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
327 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
328 || head->first_free < 0
329 || head->first_free > head->data_size
330 || (head->first_free & BLOCK_ALIGN_M1) != 0
331 || head->maxnentries < 0
332 || head->maxnsearched < 0)
333 return 0;
335 uint8_t *usemap = calloc (head->first_free, 1);
336 if (usemap == NULL)
337 return 0;
339 const char *data = (char *) &head->array[roundup (head->module,
340 ALIGN / sizeof (ref_t))];
342 nscd_ssize_t he_cnt = 0;
343 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
345 ref_t work = head->array[cnt];
347 while (work != ENDREF)
349 if (! check_use (data, head->first_free, usemap, use_he, work,
350 sizeof (struct hashentry)))
351 goto fail;
353 /* Now we know we can dereference the record. */
354 struct hashentry *here = (struct hashentry *) (data + work);
356 ++he_cnt;
358 /* Make sure the record is for this type of service. */
359 if (here->type >= LASTREQ
360 || serv2db[here->type] != &dbs[dbnr])
361 goto fail;
363 /* Validate boolean field value. */
364 if (here->first != false && here->first != true)
365 goto fail;
367 if (here->len < 0)
368 goto fail;
370 /* Now the data. */
371 if (here->packet < 0
372 || here->packet > head->first_free
373 || here->packet + sizeof (struct datahead) > head->first_free)
374 goto fail;
376 struct datahead *dh = (struct datahead *) (data + here->packet);
378 if (! check_use (data, head->first_free, usemap,
379 use_data | (here->first ? use_first : 0),
380 here->packet, dh->allocsize))
381 goto fail;
383 if (dh->allocsize < sizeof (struct datahead)
384 || dh->recsize > dh->allocsize
385 || (dh->notfound != false && dh->notfound != true)
386 || (dh->usable != false && dh->usable != true))
387 goto fail;
389 if (here->key < here->packet + sizeof (struct datahead)
390 || here->key > here->packet + dh->allocsize
391 || here->key + here->len > here->packet + dh->allocsize)
393 #if SEPARATE_KEY
394 /* If keys can appear outside of data, this should be done
395 instead. But gc doesn't mark the data in that case. */
396 if (! check_use (data, head->first_free, usemap,
397 use_key | (here->first ? use_first : 0),
398 here->key, here->len))
399 #endif
400 goto fail;
403 work = here->next;
407 if (he_cnt != head->nentries)
408 goto fail;
410 /* See if all data and keys had at least one reference from
411 he->first == true hashentry. */
412 for (ref_t idx = 0; idx < head->first_free; ++idx)
414 #if SEPARATE_KEY
415 if (usemap[idx] == use_key_begin)
416 goto fail;
417 #endif
418 if (usemap[idx] == use_data_begin)
419 goto fail;
422 /* Finally, make sure the database hasn't changed since the first test. */
423 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
424 goto fail;
426 free (usemap);
427 return 1;
429 fail:
430 free (usemap);
431 return 0;
435 /* Initialize database information structures. */
436 void
437 nscd_init (void)
439 /* Look up unprivileged uid/gid/groups before we start listening on the
440 socket */
441 if (server_user != NULL)
442 begin_drop_privileges ();
444 if (nthreads == -1)
445 /* No configuration for this value, assume a default. */
446 nthreads = 2 * lastdb;
448 for (size_t cnt = 0; cnt < lastdb; ++cnt)
449 if (dbs[cnt].enabled)
451 pthread_rwlock_init (&dbs[cnt].lock, NULL);
452 pthread_mutex_init (&dbs[cnt].memlock, NULL);
454 if (dbs[cnt].persistent)
456 /* Try to open the appropriate file on disk. */
457 int fd = open (dbs[cnt].db_filename, O_RDWR);
458 if (fd != -1)
460 struct stat64 st;
461 void *mem;
462 size_t total;
463 struct database_pers_head head;
464 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
465 sizeof (head)));
466 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
468 fail_db:
469 dbg_log (_("invalid persistent database file \"%s\": %s"),
470 dbs[cnt].db_filename, strerror (errno));
471 unlink (dbs[cnt].db_filename);
473 else if (head.module == 0 && head.data_size == 0)
475 /* The file has been created, but the head has not been
476 initialized yet. Remove the old file. */
477 unlink (dbs[cnt].db_filename);
479 else if (head.header_size != (int) sizeof (head))
481 dbg_log (_("invalid persistent database file \"%s\": %s"),
482 dbs[cnt].db_filename,
483 _("header size does not match"));
484 unlink (dbs[cnt].db_filename);
486 else if ((total = (sizeof (head)
487 + roundup (head.module * sizeof (ref_t),
488 ALIGN)
489 + head.data_size))
490 > st.st_size
491 || total < sizeof (head))
493 dbg_log (_("invalid persistent database file \"%s\": %s"),
494 dbs[cnt].db_filename,
495 _("file size does not match"));
496 unlink (dbs[cnt].db_filename);
498 /* Note we map with the maximum size allowed for the
499 database. This is likely much larger than the
500 actual file size. This is OK on most OSes since
501 extensions of the underlying file will
502 automatically translate more pages available for
503 memory access. */
504 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
505 PROT_READ | PROT_WRITE,
506 MAP_SHARED, fd, 0))
507 == MAP_FAILED)
508 goto fail_db;
509 else if (!verify_persistent_db (mem, &head, cnt))
511 munmap (mem, total);
512 dbg_log (_("invalid persistent database file \"%s\": %s"),
513 dbs[cnt].db_filename,
514 _("verification failed"));
515 unlink (dbs[cnt].db_filename);
517 else
519 /* Success. We have the database. */
520 dbs[cnt].head = mem;
521 dbs[cnt].memsize = total;
522 dbs[cnt].data = (char *)
523 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
524 ALIGN / sizeof (ref_t))];
525 dbs[cnt].mmap_used = true;
527 if (dbs[cnt].suggested_module > head.module)
528 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
529 dbnames[cnt]);
531 dbs[cnt].wr_fd = fd;
532 fd = -1;
533 /* We also need a read-only descriptor. */
534 if (dbs[cnt].shared)
536 dbs[cnt].ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
537 if (dbs[cnt].ro_fd == -1)
538 dbg_log (_("\
539 cannot create read-only descriptor for \"%s\"; no mmap"),
540 dbs[cnt].db_filename);
543 // XXX Shall we test whether the descriptors actually
544 // XXX point to the same file?
547 /* Close the file descriptors in case something went
548 wrong in which case the variable have not been
549 assigned -1. */
550 if (fd != -1)
551 close (fd);
555 if (dbs[cnt].head == NULL)
557 /* No database loaded. Allocate the data structure,
558 possibly on disk. */
559 struct database_pers_head head;
560 size_t total = (sizeof (head)
561 + roundup (dbs[cnt].suggested_module
562 * sizeof (ref_t), ALIGN)
563 + (dbs[cnt].suggested_module
564 * DEFAULT_DATASIZE_PER_BUCKET));
566 /* Try to create the database. If we do not need a
567 persistent database create a temporary file. */
568 int fd;
569 int ro_fd = -1;
570 if (dbs[cnt].persistent)
572 fd = open (dbs[cnt].db_filename,
573 O_RDWR | O_CREAT | O_EXCL | O_TRUNC,
574 S_IRUSR | S_IWUSR);
575 if (fd != -1 && dbs[cnt].shared)
576 ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
578 else
580 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
581 fd = mkstemp (fname);
583 /* We do not need the file name anymore after we
584 opened another file descriptor in read-only mode. */
585 if (fd != -1)
587 if (dbs[cnt].shared)
588 ro_fd = open (fname, O_RDONLY);
590 unlink (fname);
594 if (fd == -1)
596 if (errno == EEXIST)
598 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
599 dbnames[cnt], dbs[cnt].db_filename);
600 // XXX Correct way to terminate?
601 exit (1);
604 if (dbs[cnt].persistent)
605 dbg_log (_("cannot create %s; no persistent database used"),
606 dbs[cnt].db_filename);
607 else
608 dbg_log (_("cannot create %s; no sharing possible"),
609 dbs[cnt].db_filename);
611 dbs[cnt].persistent = 0;
612 // XXX remember: no mmap
614 else
616 /* Tell the user if we could not create the read-only
617 descriptor. */
618 if (ro_fd == -1 && dbs[cnt].shared)
619 dbg_log (_("\
620 cannot create read-only descriptor for \"%s\"; no mmap"),
621 dbs[cnt].db_filename);
623 /* Before we create the header, initialiye the hash
624 table. So that if we get interrupted if writing
625 the header we can recognize a partially initialized
626 database. */
627 size_t ps = sysconf (_SC_PAGESIZE);
628 char tmpbuf[ps];
629 assert (~ENDREF == 0);
630 memset (tmpbuf, '\xff', ps);
632 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
633 off_t offset = sizeof (head);
635 size_t towrite;
636 if (offset % ps != 0)
638 towrite = MIN (remaining, ps - (offset % ps));
639 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
640 goto write_fail;
641 offset += towrite;
642 remaining -= towrite;
645 while (remaining > ps)
647 if (pwrite (fd, tmpbuf, ps, offset) == -1)
648 goto write_fail;
649 offset += ps;
650 remaining -= ps;
653 if (remaining > 0
654 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
655 goto write_fail;
657 /* Create the header of the file. */
658 struct database_pers_head head =
660 .version = DB_VERSION,
661 .header_size = sizeof (head),
662 .module = dbs[cnt].suggested_module,
663 .data_size = (dbs[cnt].suggested_module
664 * DEFAULT_DATASIZE_PER_BUCKET),
665 .first_free = 0
667 void *mem;
669 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
670 != sizeof (head))
671 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
672 != 0)
673 || (mem = mmap (NULL, dbs[cnt].max_db_size,
674 PROT_READ | PROT_WRITE,
675 MAP_SHARED, fd, 0)) == MAP_FAILED)
677 write_fail:
678 unlink (dbs[cnt].db_filename);
679 dbg_log (_("cannot write to database file %s: %s"),
680 dbs[cnt].db_filename, strerror (errno));
681 dbs[cnt].persistent = 0;
683 else
685 /* Success. */
686 dbs[cnt].head = mem;
687 dbs[cnt].data = (char *)
688 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
689 ALIGN / sizeof (ref_t))];
690 dbs[cnt].memsize = total;
691 dbs[cnt].mmap_used = true;
693 /* Remember the descriptors. */
694 dbs[cnt].wr_fd = fd;
695 dbs[cnt].ro_fd = ro_fd;
696 fd = -1;
697 ro_fd = -1;
700 if (fd != -1)
701 close (fd);
702 if (ro_fd != -1)
703 close (ro_fd);
707 if (paranoia
708 && ((dbs[cnt].wr_fd != -1
709 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
710 || (dbs[cnt].ro_fd != -1
711 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
713 dbg_log (_("\
714 cannot set socket to close on exec: %s; disabling paranoia mode"),
715 strerror (errno));
716 paranoia = 0;
719 if (dbs[cnt].head == NULL)
721 /* We do not use the persistent database. Just
722 create an in-memory data structure. */
723 assert (! dbs[cnt].persistent);
725 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
726 + (dbs[cnt].suggested_module
727 * sizeof (ref_t)));
728 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
729 assert (~ENDREF == 0);
730 memset (dbs[cnt].head->array, '\xff',
731 dbs[cnt].suggested_module * sizeof (ref_t));
732 dbs[cnt].head->module = dbs[cnt].suggested_module;
733 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
734 * dbs[cnt].head->module);
735 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
736 dbs[cnt].head->first_free = 0;
738 dbs[cnt].shared = 0;
739 assert (dbs[cnt].ro_fd == -1);
742 if (dbs[cnt].check_file)
744 /* We need the modification date of the file. */
745 struct stat64 st;
747 if (stat64 (dbs[cnt].filename, &st) < 0)
749 /* We cannot stat() the file, disable file checking. */
750 dbg_log (_("cannot stat() file `%s': %s"),
751 dbs[cnt].filename, strerror (errno));
752 dbs[cnt].check_file = 0;
754 else
755 dbs[cnt].file_mtime = st.st_mtime;
759 /* Create the socket. */
760 sock = socket (AF_UNIX, SOCK_STREAM, 0);
761 if (sock < 0)
763 dbg_log (_("cannot open socket: %s"), strerror (errno));
764 exit (errno == EACCES ? 4 : 1);
766 /* Bind a name to the socket. */
767 struct sockaddr_un sock_addr;
768 sock_addr.sun_family = AF_UNIX;
769 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
770 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
772 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
773 exit (errno == EACCES ? 4 : 1);
776 /* We don't want to get stuck on accept. */
777 int fl = fcntl (sock, F_GETFL);
778 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
780 dbg_log (_("cannot change socket to nonblocking mode: %s"),
781 strerror (errno));
782 exit (1);
785 /* The descriptor needs to be closed on exec. */
786 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
788 dbg_log (_("cannot set socket to close on exec: %s"),
789 strerror (errno));
790 exit (1);
793 /* Set permissions for the socket. */
794 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
796 /* Set the socket up to accept connections. */
797 if (listen (sock, SOMAXCONN) < 0)
799 dbg_log (_("cannot enable socket to accept connections: %s"),
800 strerror (errno));
801 exit (1);
804 /* Change to unprivileged uid/gid/groups if specifed in config file */
805 if (server_user != NULL)
806 finish_drop_privileges ();
810 /* Close the connections. */
811 void
812 close_sockets (void)
814 close (sock);
818 static void
819 invalidate_cache (char *key)
821 dbtype number;
823 if (strcmp (key, "passwd") == 0)
824 number = pwddb;
825 else if (strcmp (key, "group") == 0)
826 number = grpdb;
827 else if (__builtin_expect (strcmp (key, "hosts"), 0) == 0)
829 number = hstdb;
831 /* Re-initialize the resolver. resolv.conf might have changed. */
832 res_init ();
834 else
835 return;
837 if (dbs[number].enabled)
838 prune_cache (&dbs[number], LONG_MAX);
842 #ifdef SCM_RIGHTS
843 static void
844 send_ro_fd (struct database_dyn *db, char *key, int fd)
846 /* If we do not have an read-only file descriptor do nothing. */
847 if (db->ro_fd == -1)
848 return;
850 /* We need to send some data along with the descriptor. */
851 struct iovec iov[1];
852 iov[0].iov_base = key;
853 iov[0].iov_len = strlen (key) + 1;
855 /* Prepare the control message to transfer the descriptor. */
856 union
858 struct cmsghdr hdr;
859 char bytes[CMSG_SPACE (sizeof (int))];
860 } buf;
861 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1,
862 .msg_control = buf.bytes,
863 .msg_controllen = sizeof (buf) };
864 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
866 cmsg->cmsg_level = SOL_SOCKET;
867 cmsg->cmsg_type = SCM_RIGHTS;
868 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
870 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
872 msg.msg_controllen = cmsg->cmsg_len;
874 /* Send the control message. We repeat when we are interrupted but
875 everything else is ignored. */
876 #ifndef MSG_NOSIGNAL
877 # define MSG_NOSIGNAL 0
878 #endif
879 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
881 if (__builtin_expect (debug_level > 0, 0))
882 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
884 #endif /* SCM_RIGHTS */
887 /* Handle new request. */
888 static void
889 handle_request (int fd, request_header *req, void *key, uid_t uid)
891 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
893 if (debug_level > 0)
894 dbg_log (_("\
895 cannot handle old request version %d; current version is %d"),
896 req->version, NSCD_VERSION);
897 return;
900 /* Make the SELinux check before we go on to the standard checks. We
901 need to verify that the request type is valid, since it has not
902 yet been checked at this point. */
903 if (selinux_enabled
904 && __builtin_expect (req->type, GETPWBYNAME) >= GETPWBYNAME
905 && __builtin_expect (req->type, LASTREQ) < LASTREQ
906 && nscd_request_avc_has_perm (fd, req->type) != 0)
907 return;
909 struct database_dyn *db = serv2db[req->type];
911 // XXX Clean up so that each new command need not introduce a
912 // XXX new conditional.
913 if ((__builtin_expect (req->type, GETPWBYNAME) >= GETPWBYNAME
914 && __builtin_expect (req->type, LASTDBREQ) <= LASTDBREQ)
915 || req->type == GETAI || req->type == INITGROUPS)
917 if (__builtin_expect (debug_level, 0) > 0)
919 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
921 char buf[INET6_ADDRSTRLEN];
923 dbg_log ("\t%s (%s)", serv2str[req->type],
924 inet_ntop (req->type == GETHOSTBYADDR
925 ? AF_INET : AF_INET6,
926 key, buf, sizeof (buf)));
928 else
929 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
932 /* Is this service enabled? */
933 if (!db->enabled)
935 /* No, sent the prepared record. */
936 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
937 db->disabled_iov->iov_len,
938 MSG_NOSIGNAL))
939 != (ssize_t) db->disabled_iov->iov_len
940 && __builtin_expect (debug_level, 0) > 0)
942 /* We have problems sending the result. */
943 char buf[256];
944 dbg_log (_("cannot write result: %s"),
945 strerror_r (errno, buf, sizeof (buf)));
948 return;
951 /* Be sure we can read the data. */
952 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
954 ++db->head->rdlockdelayed;
955 pthread_rwlock_rdlock (&db->lock);
958 /* See whether we can handle it from the cache. */
959 struct datahead *cached;
960 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
961 db, uid);
962 if (cached != NULL)
964 /* Hurray it's in the cache. */
965 ssize_t nwritten;
967 #ifdef HAVE_SENDFILE
968 if (db->mmap_used || !cached->notfound)
970 assert (db->wr_fd != -1);
971 assert ((char *) cached->data > (char *) db->data);
972 assert ((char *) cached->data - (char *) db->head
973 + cached->recsize
974 <= (sizeof (struct database_pers_head)
975 + db->head->module * sizeof (ref_t)
976 + db->head->data_size));
977 nwritten = sendfileall (fd, db->wr_fd,
978 (char *) cached->data
979 - (char *) db->head, cached->recsize);
980 # ifndef __ASSUME_SENDFILE
981 if (nwritten == -1 && errno == ENOSYS)
982 goto use_write;
983 # endif
985 else
986 # ifndef __ASSUME_SENDFILE
987 use_write:
988 # endif
989 #endif
990 nwritten = writeall (fd, cached->data, cached->recsize);
992 if (nwritten != cached->recsize
993 && __builtin_expect (debug_level, 0) > 0)
995 /* We have problems sending the result. */
996 char buf[256];
997 dbg_log (_("cannot write result: %s"),
998 strerror_r (errno, buf, sizeof (buf)));
1001 pthread_rwlock_unlock (&db->lock);
1003 return;
1006 pthread_rwlock_unlock (&db->lock);
1008 else if (__builtin_expect (debug_level, 0) > 0)
1010 if (req->type == INVALIDATE)
1011 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1012 else
1013 dbg_log ("\t%s", serv2str[req->type]);
1016 /* Handle the request. */
1017 switch (req->type)
1019 case GETPWBYNAME:
1020 addpwbyname (db, fd, req, key, uid);
1021 break;
1023 case GETPWBYUID:
1024 addpwbyuid (db, fd, req, key, uid);
1025 break;
1027 case GETGRBYNAME:
1028 addgrbyname (db, fd, req, key, uid);
1029 break;
1031 case GETGRBYGID:
1032 addgrbygid (db, fd, req, key, uid);
1033 break;
1035 case GETHOSTBYNAME:
1036 addhstbyname (db, fd, req, key, uid);
1037 break;
1039 case GETHOSTBYNAMEv6:
1040 addhstbynamev6 (db, fd, req, key, uid);
1041 break;
1043 case GETHOSTBYADDR:
1044 addhstbyaddr (db, fd, req, key, uid);
1045 break;
1047 case GETHOSTBYADDRv6:
1048 addhstbyaddrv6 (db, fd, req, key, uid);
1049 break;
1051 case GETAI:
1052 addhstai (db, fd, req, key, uid);
1053 break;
1055 case INITGROUPS:
1056 addinitgroups (db, fd, req, key, uid);
1057 break;
1059 case GETSTAT:
1060 case SHUTDOWN:
1061 case INVALIDATE:
1063 /* Get the callers credentials. */
1064 #ifdef SO_PEERCRED
1065 struct ucred caller;
1066 socklen_t optlen = sizeof (caller);
1068 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1070 char buf[256];
1072 dbg_log (_("error getting callers id: %s"),
1073 strerror_r (errno, buf, sizeof (buf)));
1074 break;
1077 uid = caller.uid;
1078 #else
1079 /* Some systems have no SO_PEERCRED implementation. They don't
1080 care about security so we don't as well. */
1081 uid = 0;
1082 #endif
1085 /* Accept shutdown, getstat and invalidate only from root. For
1086 the stat call also allow the user specified in the config file. */
1087 if (req->type == GETSTAT)
1089 if (uid == 0 || uid == stat_uid)
1090 send_stats (fd, dbs);
1092 else if (uid == 0)
1094 if (req->type == INVALIDATE)
1095 invalidate_cache (key);
1096 else
1097 termination_handler (0);
1099 break;
1101 case GETFDPW:
1102 case GETFDGR:
1103 case GETFDHST:
1104 #ifdef SCM_RIGHTS
1105 send_ro_fd (serv2db[req->type], key, fd);
1106 #endif
1107 break;
1109 default:
1110 /* Ignore the command, it's nothing we know. */
1111 break;
1116 /* Restart the process. */
1117 static void
1118 restart (void)
1120 /* First determine the parameters. We do not use the parameters
1121 passed to main() since in case nscd is started by running the
1122 dynamic linker this will not work. Yes, this is not the usual
1123 case but nscd is part of glibc and we occasionally do this. */
1124 size_t buflen = 1024;
1125 char *buf = alloca (buflen);
1126 size_t readlen = 0;
1127 int fd = open ("/proc/self/cmdline", O_RDONLY);
1128 if (fd == -1)
1130 dbg_log (_("\
1131 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1132 strerror (errno));
1134 paranoia = 0;
1135 return;
1138 while (1)
1140 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1141 buflen - readlen));
1142 if (n == -1)
1144 dbg_log (_("\
1145 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1146 strerror (errno));
1148 close (fd);
1149 paranoia = 0;
1150 return;
1153 readlen += n;
1155 if (readlen < buflen)
1156 break;
1158 /* We might have to extend the buffer. */
1159 size_t old_buflen = buflen;
1160 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1161 buf = memmove (newp, buf, old_buflen);
1164 close (fd);
1166 /* Parse the command line. Worst case scenario: every two
1167 characters form one parameter (one character plus NUL). */
1168 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1169 int argc = 0;
1171 char *cp = buf;
1172 while (cp < buf + readlen)
1174 argv[argc++] = cp;
1175 cp = (char *) rawmemchr (cp, '\0') + 1;
1177 argv[argc] = NULL;
1179 /* Second, change back to the old user if we changed it. */
1180 if (server_user != NULL)
1182 if (setresuid (old_uid, old_uid, old_uid) != 0)
1184 dbg_log (_("\
1185 cannot change to old UID: %s; disabling paranoia mode"),
1186 strerror (errno));
1188 paranoia = 0;
1189 return;
1192 if (setresgid (old_gid, old_gid, old_gid) != 0)
1194 dbg_log (_("\
1195 cannot change to old GID: %s; disabling paranoia mode"),
1196 strerror (errno));
1198 setuid (server_uid);
1199 paranoia = 0;
1200 return;
1204 /* Next change back to the old working directory. */
1205 if (chdir (oldcwd) == -1)
1207 dbg_log (_("\
1208 cannot change to old working directory: %s; disabling paranoia mode"),
1209 strerror (errno));
1211 if (server_user != NULL)
1213 setuid (server_uid);
1214 setgid (server_gid);
1216 paranoia = 0;
1217 return;
1220 /* Synchronize memory. */
1221 for (int cnt = 0; cnt < lastdb; ++cnt)
1223 /* Make sure nobody keeps using the database. */
1224 dbs[cnt].head->timestamp = 0;
1226 if (dbs[cnt].persistent)
1227 // XXX async OK?
1228 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1231 /* The preparations are done. */
1232 execv ("/proc/self/exe", argv);
1234 /* If we come here, we will never be able to re-exec. */
1235 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1236 strerror (errno));
1238 if (server_user != NULL)
1240 setuid (server_uid);
1241 setgid (server_gid);
1243 if (chdir ("/") != 0)
1244 dbg_log (_("cannot change current working directory to \"/\": %s"),
1245 strerror (errno));
1246 paranoia = 0;
1250 /* List of file descriptors. */
1251 struct fdlist
1253 int fd;
1254 struct fdlist *next;
1256 /* Memory allocated for the list. */
1257 static struct fdlist *fdlist;
1258 /* List of currently ready-to-read file descriptors. */
1259 static struct fdlist *readylist;
1261 /* Conditional variable and mutex to signal availability of entries in
1262 READYLIST. The condvar is initialized dynamically since we might
1263 use a different clock depending on availability. */
1264 static pthread_cond_t readylist_cond;
1265 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1267 /* The clock to use with the condvar. */
1268 static clockid_t timeout_clock = CLOCK_REALTIME;
1270 /* Number of threads ready to handle the READYLIST. */
1271 static unsigned long int nready;
1274 /* This is the main loop. It is replicated in different threads but the
1275 `poll' call makes sure only one thread handles an incoming connection. */
1276 static void *
1277 __attribute__ ((__noreturn__))
1278 nscd_run (void *p)
1280 const long int my_number = (long int) p;
1281 const int run_prune = my_number < lastdb && dbs[my_number].enabled;
1282 struct timespec prune_ts;
1283 int to = 0;
1284 char buf[256];
1286 if (run_prune)
1288 setup_thread (&dbs[my_number]);
1290 /* We are running. */
1291 dbs[my_number].head->timestamp = time (NULL);
1293 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1294 /* Should never happen. */
1295 abort ();
1297 /* Compute timeout time. */
1298 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1301 /* Initial locking. */
1302 pthread_mutex_lock (&readylist_lock);
1304 /* One more thread available. */
1305 ++nready;
1307 while (1)
1309 while (readylist == NULL)
1311 if (run_prune)
1313 /* Wait, but not forever. */
1314 to = pthread_cond_timedwait (&readylist_cond, &readylist_lock,
1315 &prune_ts);
1317 /* If we were woken and there is no work to be done,
1318 just start pruning. */
1319 if (readylist == NULL && to == ETIMEDOUT)
1321 --nready;
1322 pthread_mutex_unlock (&readylist_lock);
1323 goto only_prune;
1326 else
1327 /* No need to timeout. */
1328 pthread_cond_wait (&readylist_cond, &readylist_lock);
1331 struct fdlist *it = readylist->next;
1332 if (readylist->next == readylist)
1333 /* Just one entry on the list. */
1334 readylist = NULL;
1335 else
1336 readylist->next = it->next;
1338 /* Extract the information and mark the record ready to be used
1339 again. */
1340 int fd = it->fd;
1341 it->next = NULL;
1343 /* One more thread available. */
1344 --nready;
1346 /* We are done with the list. */
1347 pthread_mutex_unlock (&readylist_lock);
1349 /* We do not want to block on a short read or so. */
1350 int fl = fcntl (fd, F_GETFL);
1351 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1352 goto close_and_out;
1354 /* Now read the request. */
1355 request_header req;
1356 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1357 != sizeof (req), 0))
1359 /* We failed to read data. Note that this also might mean we
1360 failed because we would have blocked. */
1361 if (debug_level > 0)
1362 dbg_log (_("short read while reading request: %s"),
1363 strerror_r (errno, buf, sizeof (buf)));
1364 goto close_and_out;
1367 /* Check whether this is a valid request type. */
1368 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1369 goto close_and_out;
1371 /* Some systems have no SO_PEERCRED implementation. They don't
1372 care about security so we don't as well. */
1373 uid_t uid = -1;
1374 #ifdef SO_PEERCRED
1375 pid_t pid = 0;
1377 if (__builtin_expect (debug_level > 0, 0))
1379 struct ucred caller;
1380 socklen_t optlen = sizeof (caller);
1382 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1383 pid = caller.pid;
1385 #endif
1387 /* It should not be possible to crash the nscd with a silly
1388 request (i.e., a terribly large key). We limit the size to 1kb. */
1389 #define MAXKEYLEN 1024
1390 if (__builtin_expect (req.key_len, 1) < 0
1391 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1393 if (debug_level > 0)
1394 dbg_log (_("key length in request too long: %d"), req.key_len);
1396 else
1398 /* Get the key. */
1399 char keybuf[MAXKEYLEN];
1401 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1402 req.key_len))
1403 != req.key_len, 0))
1405 /* Again, this can also mean we would have blocked. */
1406 if (debug_level > 0)
1407 dbg_log (_("short read while reading request key: %s"),
1408 strerror_r (errno, buf, sizeof (buf)));
1409 goto close_and_out;
1412 if (__builtin_expect (debug_level, 0) > 0)
1414 #ifdef SO_PEERCRED
1415 if (pid != 0)
1416 dbg_log (_("\
1417 handle_request: request received (Version = %d) from PID %ld"),
1418 req.version, (long int) pid);
1419 else
1420 #endif
1421 dbg_log (_("\
1422 handle_request: request received (Version = %d)"), req.version);
1425 /* Phew, we got all the data, now process it. */
1426 handle_request (fd, &req, keybuf, uid);
1429 close_and_out:
1430 /* We are done. */
1431 close (fd);
1433 /* Check whether we should be pruning the cache. */
1434 assert (run_prune || to == 0);
1435 if (to == ETIMEDOUT)
1437 only_prune:
1438 /* The pthread_cond_timedwait() call timed out. It is time
1439 to clean up the cache. */
1440 assert (my_number < lastdb);
1441 prune_cache (&dbs[my_number], time (NULL));
1443 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1444 /* Should never happen. */
1445 abort ();
1447 /* Compute next timeout time. */
1448 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1450 /* In case the list is emtpy we do not want to run the prune
1451 code right away again. */
1452 to = 0;
1455 /* Re-locking. */
1456 pthread_mutex_lock (&readylist_lock);
1458 /* One more thread available. */
1459 ++nready;
1464 static unsigned int nconns;
1466 static void
1467 fd_ready (int fd)
1469 pthread_mutex_lock (&readylist_lock);
1471 /* Find an empty entry in FDLIST. */
1472 size_t inner;
1473 for (inner = 0; inner < nconns; ++inner)
1474 if (fdlist[inner].next == NULL)
1475 break;
1476 assert (inner < nconns);
1478 fdlist[inner].fd = fd;
1480 if (readylist == NULL)
1481 readylist = fdlist[inner].next = &fdlist[inner];
1482 else
1484 fdlist[inner].next = readylist->next;
1485 readylist = readylist->next = &fdlist[inner];
1488 bool do_signal = true;
1489 if (__builtin_expect (nready == 0, 0))
1491 ++client_queued;
1492 do_signal = false;
1494 /* Try to start another thread to help out. */
1495 pthread_t th;
1496 if (nthreads < max_nthreads
1497 && pthread_create (&th, &attr, nscd_run,
1498 (void *) (long int) nthreads) == 0)
1500 /* We got another thread. */
1501 ++nthreads;
1502 /* The new thread might need a kick. */
1503 do_signal = true;
1508 pthread_mutex_unlock (&readylist_lock);
1510 /* Tell one of the worker threads there is work to do. */
1511 if (do_signal)
1512 pthread_cond_signal (&readylist_cond);
1516 /* Check whether restarting should happen. */
1517 static inline int
1518 restart_p (time_t now)
1520 return (paranoia && readylist == NULL && nready == nthreads
1521 && now >= restart_time);
1525 /* Array for times a connection was accepted. */
1526 static time_t *starttime;
1529 static void
1530 __attribute__ ((__noreturn__))
1531 main_loop_poll (void)
1533 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1534 * sizeof (conns[0]));
1536 conns[0].fd = sock;
1537 conns[0].events = POLLRDNORM;
1538 size_t nused = 1;
1539 size_t firstfree = 1;
1541 while (1)
1543 /* Wait for any event. We wait at most a couple of seconds so
1544 that we can check whether we should close any of the accepted
1545 connections since we have not received a request. */
1546 #define MAX_ACCEPT_TIMEOUT 30
1547 #define MIN_ACCEPT_TIMEOUT 5
1548 #define MAIN_THREAD_TIMEOUT \
1549 (MAX_ACCEPT_TIMEOUT * 1000 \
1550 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1552 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1554 time_t now = time (NULL);
1556 /* If there is a descriptor ready for reading or there is a new
1557 connection, process this now. */
1558 if (n > 0)
1560 if (conns[0].revents != 0)
1562 /* We have a new incoming connection. Accept the connection. */
1563 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1565 /* Use the descriptor if we have not reached the limit. */
1566 if (fd >= 0)
1568 if (firstfree < nconns)
1570 conns[firstfree].fd = fd;
1571 conns[firstfree].events = POLLRDNORM;
1572 starttime[firstfree] = now;
1573 if (firstfree >= nused)
1574 nused = firstfree + 1;
1577 ++firstfree;
1578 while (firstfree < nused && conns[firstfree].fd != -1);
1580 else
1581 /* We cannot use the connection so close it. */
1582 close (fd);
1585 --n;
1588 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1589 if (conns[cnt].revents != 0)
1591 fd_ready (conns[cnt].fd);
1593 /* Clean up the CONNS array. */
1594 conns[cnt].fd = -1;
1595 if (cnt < firstfree)
1596 firstfree = cnt;
1597 if (cnt == nused - 1)
1599 --nused;
1600 while (conns[nused - 1].fd == -1);
1602 --n;
1606 /* Now find entries which have timed out. */
1607 assert (nused > 0);
1609 /* We make the timeout length depend on the number of file
1610 descriptors currently used. */
1611 #define ACCEPT_TIMEOUT \
1612 (MAX_ACCEPT_TIMEOUT \
1613 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1614 time_t laststart = now - ACCEPT_TIMEOUT;
1616 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1618 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1620 /* Remove the entry, it timed out. */
1621 (void) close (conns[cnt].fd);
1622 conns[cnt].fd = -1;
1624 if (cnt < firstfree)
1625 firstfree = cnt;
1626 if (cnt == nused - 1)
1628 --nused;
1629 while (conns[nused - 1].fd == -1);
1633 if (restart_p (now))
1634 restart ();
1639 #ifdef HAVE_EPOLL
1640 static void
1641 main_loop_epoll (int efd)
1643 struct epoll_event ev = { 0, };
1644 int nused = 1;
1645 size_t highest = 0;
1647 /* Add the socket. */
1648 ev.events = EPOLLRDNORM;
1649 ev.data.fd = sock;
1650 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1651 /* We cannot use epoll. */
1652 return;
1654 while (1)
1656 struct epoll_event revs[100];
1657 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1659 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1661 time_t now = time (NULL);
1663 for (int cnt = 0; cnt < n; ++cnt)
1664 if (revs[cnt].data.fd == sock)
1666 /* A new connection. */
1667 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1669 if (fd >= 0)
1671 /* Try to add the new descriptor. */
1672 ev.data.fd = fd;
1673 if (fd >= nconns
1674 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1675 /* The descriptor is too large or something went
1676 wrong. Close the descriptor. */
1677 close (fd);
1678 else
1680 /* Remember when we accepted the connection. */
1681 starttime[fd] = now;
1683 if (fd > highest)
1684 highest = fd;
1686 ++nused;
1690 else
1692 /* Remove the descriptor from the epoll descriptor. */
1693 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1695 /* Get a worker to handle the request. */
1696 fd_ready (revs[cnt].data.fd);
1698 /* Reset the time. */
1699 starttime[revs[cnt].data.fd] = 0;
1700 if (revs[cnt].data.fd == highest)
1702 --highest;
1703 while (highest > 0 && starttime[highest] == 0);
1705 --nused;
1708 /* Now look for descriptors for accepted connections which have
1709 no reply in too long of a time. */
1710 time_t laststart = now - ACCEPT_TIMEOUT;
1711 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1712 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1714 /* We are waiting for this one for too long. Close it. */
1715 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
1717 (void) close (cnt);
1719 starttime[cnt] = 0;
1720 if (cnt == highest)
1721 --highest;
1723 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1724 --highest;
1726 if (restart_p (now))
1727 restart ();
1730 #endif
1733 /* Start all the threads we want. The initial process is thread no. 1. */
1734 void
1735 start_threads (void)
1737 /* Initialize the conditional variable we will use. The only
1738 non-standard attribute we might use is the clock selection. */
1739 pthread_condattr_t condattr;
1740 pthread_condattr_init (&condattr);
1742 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1743 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1744 /* Determine whether the monotonous clock is available. */
1745 struct timespec dummy;
1746 # if _POSIX_MONOTONIC_CLOCK == 0
1747 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1748 # endif
1749 # if _POSIX_CLOCK_SELECTION == 0
1750 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1751 # endif
1752 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1753 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1754 timeout_clock = CLOCK_MONOTONIC;
1755 #endif
1757 pthread_cond_init (&readylist_cond, &condattr);
1758 pthread_condattr_destroy (&condattr);
1761 /* Create the attribute for the threads. They are all created
1762 detached. */
1763 pthread_attr_init (&attr);
1764 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1765 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1766 pthread_attr_setstacksize (&attr, 1024 * 1024 * (sizeof (void *) / 4));
1768 /* We allow less than LASTDB threads only for debugging. */
1769 if (debug_level == 0)
1770 nthreads = MAX (nthreads, lastdb);
1772 int nfailed = 0;
1773 for (long int i = 0; i < nthreads; ++i)
1775 pthread_t th;
1776 if (pthread_create (&th, &attr, nscd_run, (void *) (i - nfailed)) != 0)
1777 ++nfailed;
1779 if (nthreads - nfailed < lastdb)
1781 /* We could not start enough threads. */
1782 dbg_log (_("could only start %d threads; terminating"),
1783 nthreads - nfailed);
1784 exit (1);
1787 /* Determine how much room for descriptors we should initially
1788 allocate. This might need to change later if we cap the number
1789 with MAXCONN. */
1790 const long int nfds = sysconf (_SC_OPEN_MAX);
1791 #define MINCONN 32
1792 #define MAXCONN 16384
1793 if (nfds == -1 || nfds > MAXCONN)
1794 nconns = MAXCONN;
1795 else if (nfds < MINCONN)
1796 nconns = MINCONN;
1797 else
1798 nconns = nfds;
1800 /* We need memory to pass descriptors on to the worker threads. */
1801 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1802 /* Array to keep track when connection was accepted. */
1803 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1805 /* In the main thread we execute the loop which handles incoming
1806 connections. */
1807 #ifdef HAVE_EPOLL
1808 int efd = epoll_create (100);
1809 if (efd != -1)
1811 main_loop_epoll (efd);
1812 close (efd);
1814 #endif
1816 main_loop_poll ();
1820 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1821 this function is called, we are not listening on the nscd socket yet so
1822 we can just use the ordinary lookup functions without causing a lockup */
1823 static void
1824 begin_drop_privileges (void)
1826 struct passwd *pwd = getpwnam (server_user);
1828 if (pwd == NULL)
1830 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1831 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1832 server_user);
1835 server_uid = pwd->pw_uid;
1836 server_gid = pwd->pw_gid;
1838 /* Save the old UID/GID if we have to change back. */
1839 if (paranoia)
1841 old_uid = getuid ();
1842 old_gid = getgid ();
1845 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1847 /* This really must never happen. */
1848 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1849 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
1852 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
1854 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
1855 == -1)
1857 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1858 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
1863 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
1864 run nscd as the user specified in the configuration file. */
1865 static void
1866 finish_drop_privileges (void)
1868 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1869 /* We need to preserve the capabilities to connect to the audit daemon. */
1870 cap_t new_caps = preserve_capabilities ();
1871 #endif
1873 if (setgroups (server_ngroups, server_groups) == -1)
1875 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1876 error (EXIT_FAILURE, errno, _("setgroups failed"));
1879 int res;
1880 if (paranoia)
1881 res = setresgid (server_gid, server_gid, old_gid);
1882 else
1883 res = setgid (server_gid);
1884 if (res == -1)
1886 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1887 perror ("setgid");
1888 exit (4);
1891 if (paranoia)
1892 res = setresuid (server_uid, server_uid, old_uid);
1893 else
1894 res = setuid (server_uid);
1895 if (res == -1)
1897 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1898 perror ("setuid");
1899 exit (4);
1902 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1903 /* Remove the temporary capabilities. */
1904 install_real_capabilities (new_caps);
1905 #endif