* login/login_tty.c (login_tty): The Linux kernel can return EBUSY
[glibc.git] / nscd / connections.c
blob11fbc0349833c801106dd322a743e512ad5e4c8e
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #include <alloca.h>
21 #include <assert.h>
22 #include <atomic.h>
23 #include <error.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <grp.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <arpa/inet.h>
35 #ifdef HAVE_EPOLL
36 # include <sys/epoll.h>
37 #endif
38 #include <sys/mman.h>
39 #include <sys/param.h>
40 #include <sys/poll.h>
41 #ifdef HAVE_SENDFILE
42 # include <sys/sendfile.h>
43 #endif
44 #include <sys/socket.h>
45 #include <sys/stat.h>
46 #include <sys/un.h>
48 #include "nscd.h"
49 #include "dbg_log.h"
50 #include "selinux.h"
51 #ifdef HAVE_SENDFILE
52 # include <kernel-features.h>
53 #endif
56 /* Wrapper functions with error checking for standard functions. */
57 extern void *xmalloc (size_t n);
58 extern void *xcalloc (size_t n, size_t s);
59 extern void *xrealloc (void *o, size_t n);
61 /* Support to run nscd as an unprivileged user */
62 const char *server_user;
63 static uid_t server_uid;
64 static gid_t server_gid;
65 const char *stat_user;
66 uid_t stat_uid;
67 static gid_t *server_groups;
68 #ifndef NGROUPS
69 # define NGROUPS 32
70 #endif
71 static int server_ngroups;
73 static pthread_attr_t attr;
75 static void begin_drop_privileges (void);
76 static void finish_drop_privileges (void);
78 /* Map request type to a string. */
79 const char *const serv2str[LASTREQ] =
81 [GETPWBYNAME] = "GETPWBYNAME",
82 [GETPWBYUID] = "GETPWBYUID",
83 [GETGRBYNAME] = "GETGRBYNAME",
84 [GETGRBYGID] = "GETGRBYGID",
85 [GETHOSTBYNAME] = "GETHOSTBYNAME",
86 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
87 [GETHOSTBYADDR] = "GETHOSTBYADDR",
88 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
89 [SHUTDOWN] = "SHUTDOWN",
90 [GETSTAT] = "GETSTAT",
91 [INVALIDATE] = "INVALIDATE",
92 [GETFDPW] = "GETFDPW",
93 [GETFDGR] = "GETFDGR",
94 [GETFDHST] = "GETFDHST",
95 [GETAI] = "GETAI",
96 [INITGROUPS] = "INITGROUPS",
97 [GETSERVBYNAME] = "GETSERVBYNAME",
98 [GETSERVBYPORT] = "GETSERVBYPORT",
99 [GETFDSERV] = "GETFDSERV"
102 /* The control data structures for the services. */
103 struct database_dyn dbs[lastdb] =
105 [pwddb] = {
106 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
107 .prunelock = PTHREAD_MUTEX_INITIALIZER,
108 .enabled = 0,
109 .check_file = 1,
110 .persistent = 0,
111 .propagate = 1,
112 .shared = 0,
113 .max_db_size = DEFAULT_MAX_DB_SIZE,
114 .reset_res = 0,
115 .filename = "/etc/passwd",
116 .db_filename = _PATH_NSCD_PASSWD_DB,
117 .disabled_iov = &pwd_iov_disabled,
118 .postimeout = 3600,
119 .negtimeout = 20,
120 .wr_fd = -1,
121 .ro_fd = -1,
122 .mmap_used = false
124 [grpdb] = {
125 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
126 .prunelock = PTHREAD_MUTEX_INITIALIZER,
127 .enabled = 0,
128 .check_file = 1,
129 .persistent = 0,
130 .propagate = 1,
131 .shared = 0,
132 .max_db_size = DEFAULT_MAX_DB_SIZE,
133 .reset_res = 0,
134 .filename = "/etc/group",
135 .db_filename = _PATH_NSCD_GROUP_DB,
136 .disabled_iov = &grp_iov_disabled,
137 .postimeout = 3600,
138 .negtimeout = 60,
139 .wr_fd = -1,
140 .ro_fd = -1,
141 .mmap_used = false
143 [hstdb] = {
144 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
145 .prunelock = PTHREAD_MUTEX_INITIALIZER,
146 .enabled = 0,
147 .check_file = 1,
148 .persistent = 0,
149 .propagate = 0, /* Not used. */
150 .shared = 0,
151 .max_db_size = DEFAULT_MAX_DB_SIZE,
152 .reset_res = 1,
153 .filename = "/etc/hosts",
154 .db_filename = _PATH_NSCD_HOSTS_DB,
155 .disabled_iov = &hst_iov_disabled,
156 .postimeout = 3600,
157 .negtimeout = 20,
158 .wr_fd = -1,
159 .ro_fd = -1,
160 .mmap_used = false
162 [servdb] = {
163 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
164 .prunelock = PTHREAD_MUTEX_INITIALIZER,
165 .enabled = 0,
166 .check_file = 1,
167 .persistent = 0,
168 .propagate = 0, /* Not used. */
169 .shared = 0,
170 .max_db_size = DEFAULT_MAX_DB_SIZE,
171 .reset_res = 0,
172 .filename = "/etc/services",
173 .db_filename = _PATH_NSCD_SERVICES_DB,
174 .disabled_iov = &serv_iov_disabled,
175 .postimeout = 28800,
176 .negtimeout = 20,
177 .wr_fd = -1,
178 .ro_fd = -1,
179 .mmap_used = false
184 /* Mapping of request type to database. */
185 static struct
187 bool data_request;
188 struct database_dyn *db;
189 } const reqinfo[LASTREQ] =
191 [GETPWBYNAME] = { true, &dbs[pwddb] },
192 [GETPWBYUID] = { true, &dbs[pwddb] },
193 [GETGRBYNAME] = { true, &dbs[grpdb] },
194 [GETGRBYGID] = { true, &dbs[grpdb] },
195 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
196 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
197 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
198 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
199 [SHUTDOWN] = { false, NULL },
200 [GETSTAT] = { false, NULL },
201 [SHUTDOWN] = { false, NULL },
202 [GETFDPW] = { false, &dbs[pwddb] },
203 [GETFDGR] = { false, &dbs[grpdb] },
204 [GETFDHST] = { false, &dbs[hstdb] },
205 [GETAI] = { true, &dbs[hstdb] },
206 [INITGROUPS] = { true, &dbs[grpdb] },
207 [GETSERVBYNAME] = { true, &dbs[servdb] },
208 [GETSERVBYPORT] = { true, &dbs[servdb] },
209 [GETFDSERV] = { false, &dbs[servdb] }
213 /* Number of seconds between two cache pruning runs. */
214 #define CACHE_PRUNE_INTERVAL 15
217 /* Initial number of threads to use. */
218 int nthreads = -1;
219 /* Maximum number of threads to use. */
220 int max_nthreads = 32;
222 /* Socket for incoming connections. */
223 static int sock;
225 /* Number of times clients had to wait. */
226 unsigned long int client_queued;
229 ssize_t
230 writeall (int fd, const void *buf, size_t len)
232 size_t n = len;
233 ssize_t ret;
236 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
237 if (ret <= 0)
238 break;
239 buf = (const char *) buf + ret;
240 n -= ret;
242 while (n > 0);
243 return ret < 0 ? ret : len - n;
247 #ifdef HAVE_SENDFILE
248 ssize_t
249 sendfileall (int tofd, int fromfd, off_t off, size_t len)
251 ssize_t n = len;
252 ssize_t ret;
256 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
257 if (ret <= 0)
258 break;
259 n -= ret;
261 while (n > 0);
262 return ret < 0 ? ret : len - n;
264 #endif
267 enum usekey
269 use_not = 0,
270 /* The following three are not really used, they are symbolic constants. */
271 use_first = 16,
272 use_begin = 32,
273 use_end = 64,
275 use_he = 1,
276 use_he_begin = use_he | use_begin,
277 use_he_end = use_he | use_end,
278 #if SEPARATE_KEY
279 use_key = 2,
280 use_key_begin = use_key | use_begin,
281 use_key_end = use_key | use_end,
282 use_key_first = use_key_begin | use_first,
283 #endif
284 use_data = 3,
285 use_data_begin = use_data | use_begin,
286 use_data_end = use_data | use_end,
287 use_data_first = use_data_begin | use_first
291 static int
292 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
293 enum usekey use, ref_t start, size_t len)
295 assert (len >= 2);
297 if (start > first_free || start + len > first_free
298 || (start & BLOCK_ALIGN_M1))
299 return 0;
301 if (usemap[start] == use_not)
303 /* Add the start marker. */
304 usemap[start] = use | use_begin;
305 use &= ~use_first;
307 while (--len > 0)
308 if (usemap[++start] != use_not)
309 return 0;
310 else
311 usemap[start] = use;
313 /* Add the end marker. */
314 usemap[start] = use | use_end;
316 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
318 /* Hash entries can't be shared. */
319 if (use == use_he)
320 return 0;
322 usemap[start] |= (use & use_first);
323 use &= ~use_first;
325 while (--len > 1)
326 if (usemap[++start] != use)
327 return 0;
329 if (usemap[++start] != (use | use_end))
330 return 0;
332 else
333 /* Points to a wrong object or somewhere in the middle. */
334 return 0;
336 return 1;
340 /* Verify data in persistent database. */
341 static int
342 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
344 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb);
346 time_t now = time (NULL);
348 struct database_pers_head *head = mem;
349 struct database_pers_head head_copy = *head;
351 /* Check that the header that was read matches the head in the database. */
352 if (readhead != NULL && memcmp (head, readhead, sizeof (*head)) != 0)
353 return 0;
355 /* First some easy tests: make sure the database header is sane. */
356 if (head->version != DB_VERSION
357 || head->header_size != sizeof (*head)
358 /* We allow a timestamp to be one hour ahead of the current time.
359 This should cover daylight saving time changes. */
360 || head->timestamp > now + 60 * 60 + 60
361 || (head->gc_cycle & 1)
362 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
363 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
364 || head->first_free < 0
365 || head->first_free > head->data_size
366 || (head->first_free & BLOCK_ALIGN_M1) != 0
367 || head->maxnentries < 0
368 || head->maxnsearched < 0)
369 return 0;
371 uint8_t *usemap = calloc (head->first_free, 1);
372 if (usemap == NULL)
373 return 0;
375 const char *data = (char *) &head->array[roundup (head->module,
376 ALIGN / sizeof (ref_t))];
378 nscd_ssize_t he_cnt = 0;
379 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
381 ref_t work = head->array[cnt];
383 while (work != ENDREF)
385 if (! check_use (data, head->first_free, usemap, use_he, work,
386 sizeof (struct hashentry)))
387 goto fail;
389 /* Now we know we can dereference the record. */
390 struct hashentry *here = (struct hashentry *) (data + work);
392 ++he_cnt;
394 /* Make sure the record is for this type of service. */
395 if (here->type >= LASTREQ
396 || reqinfo[here->type].db != &dbs[dbnr])
397 goto fail;
399 /* Validate boolean field value. */
400 if (here->first != false && here->first != true)
401 goto fail;
403 if (here->len < 0)
404 goto fail;
406 /* Now the data. */
407 if (here->packet < 0
408 || here->packet > head->first_free
409 || here->packet + sizeof (struct datahead) > head->first_free)
410 goto fail;
412 struct datahead *dh = (struct datahead *) (data + here->packet);
414 if (! check_use (data, head->first_free, usemap,
415 use_data | (here->first ? use_first : 0),
416 here->packet, dh->allocsize))
417 goto fail;
419 if (dh->allocsize < sizeof (struct datahead)
420 || dh->recsize > dh->allocsize
421 || (dh->notfound != false && dh->notfound != true)
422 || (dh->usable != false && dh->usable != true))
423 goto fail;
425 if (here->key < here->packet + sizeof (struct datahead)
426 || here->key > here->packet + dh->allocsize
427 || here->key + here->len > here->packet + dh->allocsize)
429 #if SEPARATE_KEY
430 /* If keys can appear outside of data, this should be done
431 instead. But gc doesn't mark the data in that case. */
432 if (! check_use (data, head->first_free, usemap,
433 use_key | (here->first ? use_first : 0),
434 here->key, here->len))
435 #endif
436 goto fail;
439 work = here->next;
443 if (he_cnt != head->nentries)
444 goto fail;
446 /* See if all data and keys had at least one reference from
447 he->first == true hashentry. */
448 for (ref_t idx = 0; idx < head->first_free; ++idx)
450 #if SEPARATE_KEY
451 if (usemap[idx] == use_key_begin)
452 goto fail;
453 #endif
454 if (usemap[idx] == use_data_begin)
455 goto fail;
458 /* Finally, make sure the database hasn't changed since the first test. */
459 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
460 goto fail;
462 free (usemap);
463 return 1;
465 fail:
466 free (usemap);
467 return 0;
471 #ifdef O_CLOEXEC
472 # define EXTRA_O_FLAGS O_CLOEXEC
473 #else
474 # define EXTRA_O_FLAGS 0
475 #endif
478 /* Initialize database information structures. */
479 void
480 nscd_init (void)
482 /* Look up unprivileged uid/gid/groups before we start listening on the
483 socket */
484 if (server_user != NULL)
485 begin_drop_privileges ();
487 if (nthreads == -1)
488 /* No configuration for this value, assume a default. */
489 nthreads = 2 * lastdb;
491 for (size_t cnt = 0; cnt < lastdb; ++cnt)
492 if (dbs[cnt].enabled)
494 pthread_rwlock_init (&dbs[cnt].lock, NULL);
495 pthread_mutex_init (&dbs[cnt].memlock, NULL);
497 if (dbs[cnt].persistent)
499 /* Try to open the appropriate file on disk. */
500 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
501 if (fd != -1)
503 struct stat64 st;
504 void *mem;
505 size_t total;
506 struct database_pers_head head;
507 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
508 sizeof (head)));
509 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
511 fail_db:
512 dbg_log (_("invalid persistent database file \"%s\": %s"),
513 dbs[cnt].db_filename, strerror (errno));
514 unlink (dbs[cnt].db_filename);
516 else if (head.module == 0 && head.data_size == 0)
518 /* The file has been created, but the head has not been
519 initialized yet. Remove the old file. */
520 unlink (dbs[cnt].db_filename);
522 else if (head.header_size != (int) sizeof (head))
524 dbg_log (_("invalid persistent database file \"%s\": %s"),
525 dbs[cnt].db_filename,
526 _("header size does not match"));
527 unlink (dbs[cnt].db_filename);
529 else if ((total = (sizeof (head)
530 + roundup (head.module * sizeof (ref_t),
531 ALIGN)
532 + head.data_size))
533 > st.st_size
534 || total < sizeof (head))
536 dbg_log (_("invalid persistent database file \"%s\": %s"),
537 dbs[cnt].db_filename,
538 _("file size does not match"));
539 unlink (dbs[cnt].db_filename);
541 /* Note we map with the maximum size allowed for the
542 database. This is likely much larger than the
543 actual file size. This is OK on most OSes since
544 extensions of the underlying file will
545 automatically translate more pages available for
546 memory access. */
547 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
548 PROT_READ | PROT_WRITE,
549 MAP_SHARED, fd, 0))
550 == MAP_FAILED)
551 goto fail_db;
552 else if (!verify_persistent_db (mem, &head, cnt))
554 munmap (mem, total);
555 dbg_log (_("invalid persistent database file \"%s\": %s"),
556 dbs[cnt].db_filename,
557 _("verification failed"));
558 unlink (dbs[cnt].db_filename);
560 else
562 /* Success. We have the database. */
563 dbs[cnt].head = mem;
564 dbs[cnt].memsize = total;
565 dbs[cnt].data = (char *)
566 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
567 ALIGN / sizeof (ref_t))];
568 dbs[cnt].mmap_used = true;
570 if (dbs[cnt].suggested_module > head.module)
571 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
572 dbnames[cnt]);
574 dbs[cnt].wr_fd = fd;
575 fd = -1;
576 /* We also need a read-only descriptor. */
577 if (dbs[cnt].shared)
579 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
580 O_RDONLY | EXTRA_O_FLAGS);
581 if (dbs[cnt].ro_fd == -1)
582 dbg_log (_("\
583 cannot create read-only descriptor for \"%s\"; no mmap"),
584 dbs[cnt].db_filename);
587 // XXX Shall we test whether the descriptors actually
588 // XXX point to the same file?
591 /* Close the file descriptors in case something went
592 wrong in which case the variable have not been
593 assigned -1. */
594 if (fd != -1)
595 close (fd);
599 if (dbs[cnt].head == NULL)
601 /* No database loaded. Allocate the data structure,
602 possibly on disk. */
603 struct database_pers_head head;
604 size_t total = (sizeof (head)
605 + roundup (dbs[cnt].suggested_module
606 * sizeof (ref_t), ALIGN)
607 + (dbs[cnt].suggested_module
608 * DEFAULT_DATASIZE_PER_BUCKET));
610 /* Try to create the database. If we do not need a
611 persistent database create a temporary file. */
612 int fd;
613 int ro_fd = -1;
614 if (dbs[cnt].persistent)
616 fd = open (dbs[cnt].db_filename,
617 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
618 S_IRUSR | S_IWUSR);
619 if (fd != -1 && dbs[cnt].shared)
620 ro_fd = open (dbs[cnt].db_filename,
621 O_RDONLY | EXTRA_O_FLAGS);
623 else
625 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
626 fd = mkostemp (fname, EXTRA_O_FLAGS);
628 /* We do not need the file name anymore after we
629 opened another file descriptor in read-only mode. */
630 if (fd != -1)
632 if (dbs[cnt].shared)
633 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
635 unlink (fname);
639 if (fd == -1)
641 if (errno == EEXIST)
643 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
644 dbnames[cnt], dbs[cnt].db_filename);
645 // XXX Correct way to terminate?
646 exit (1);
649 if (dbs[cnt].persistent)
650 dbg_log (_("cannot create %s; no persistent database used"),
651 dbs[cnt].db_filename);
652 else
653 dbg_log (_("cannot create %s; no sharing possible"),
654 dbs[cnt].db_filename);
656 dbs[cnt].persistent = 0;
657 // XXX remember: no mmap
659 else
661 /* Tell the user if we could not create the read-only
662 descriptor. */
663 if (ro_fd == -1 && dbs[cnt].shared)
664 dbg_log (_("\
665 cannot create read-only descriptor for \"%s\"; no mmap"),
666 dbs[cnt].db_filename);
668 /* Before we create the header, initialiye the hash
669 table. So that if we get interrupted if writing
670 the header we can recognize a partially initialized
671 database. */
672 size_t ps = sysconf (_SC_PAGESIZE);
673 char tmpbuf[ps];
674 assert (~ENDREF == 0);
675 memset (tmpbuf, '\xff', ps);
677 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
678 off_t offset = sizeof (head);
680 size_t towrite;
681 if (offset % ps != 0)
683 towrite = MIN (remaining, ps - (offset % ps));
684 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
685 goto write_fail;
686 offset += towrite;
687 remaining -= towrite;
690 while (remaining > ps)
692 if (pwrite (fd, tmpbuf, ps, offset) == -1)
693 goto write_fail;
694 offset += ps;
695 remaining -= ps;
698 if (remaining > 0
699 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
700 goto write_fail;
702 /* Create the header of the file. */
703 struct database_pers_head head =
705 .version = DB_VERSION,
706 .header_size = sizeof (head),
707 .module = dbs[cnt].suggested_module,
708 .data_size = (dbs[cnt].suggested_module
709 * DEFAULT_DATASIZE_PER_BUCKET),
710 .first_free = 0
712 void *mem;
714 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
715 != sizeof (head))
716 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
717 != 0)
718 || (mem = mmap (NULL, dbs[cnt].max_db_size,
719 PROT_READ | PROT_WRITE,
720 MAP_SHARED, fd, 0)) == MAP_FAILED)
722 write_fail:
723 unlink (dbs[cnt].db_filename);
724 dbg_log (_("cannot write to database file %s: %s"),
725 dbs[cnt].db_filename, strerror (errno));
726 dbs[cnt].persistent = 0;
728 else
730 /* Success. */
731 dbs[cnt].head = mem;
732 dbs[cnt].data = (char *)
733 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
734 ALIGN / sizeof (ref_t))];
735 dbs[cnt].memsize = total;
736 dbs[cnt].mmap_used = true;
738 /* Remember the descriptors. */
739 dbs[cnt].wr_fd = fd;
740 dbs[cnt].ro_fd = ro_fd;
741 fd = -1;
742 ro_fd = -1;
745 if (fd != -1)
746 close (fd);
747 if (ro_fd != -1)
748 close (ro_fd);
752 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
753 /* We do not check here whether the O_CLOEXEC provided to the
754 open call was successful or not. The two fcntl calls are
755 only performed once each per process start-up and therefore
756 is not noticeable at all. */
757 if (paranoia
758 && ((dbs[cnt].wr_fd != -1
759 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
760 || (dbs[cnt].ro_fd != -1
761 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
763 dbg_log (_("\
764 cannot set socket to close on exec: %s; disabling paranoia mode"),
765 strerror (errno));
766 paranoia = 0;
768 #endif
770 if (dbs[cnt].head == NULL)
772 /* We do not use the persistent database. Just
773 create an in-memory data structure. */
774 assert (! dbs[cnt].persistent);
776 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
777 + (dbs[cnt].suggested_module
778 * sizeof (ref_t)));
779 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
780 assert (~ENDREF == 0);
781 memset (dbs[cnt].head->array, '\xff',
782 dbs[cnt].suggested_module * sizeof (ref_t));
783 dbs[cnt].head->module = dbs[cnt].suggested_module;
784 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
785 * dbs[cnt].head->module);
786 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
787 dbs[cnt].head->first_free = 0;
789 dbs[cnt].shared = 0;
790 assert (dbs[cnt].ro_fd == -1);
793 if (dbs[cnt].check_file)
795 /* We need the modification date of the file. */
796 struct stat64 st;
798 if (stat64 (dbs[cnt].filename, &st) < 0)
800 /* We cannot stat() the file, disable file checking. */
801 dbg_log (_("cannot stat() file `%s': %s"),
802 dbs[cnt].filename, strerror (errno));
803 dbs[cnt].check_file = 0;
805 else
806 dbs[cnt].file_mtime = st.st_mtime;
810 /* Create the socket. */
811 sock = socket (AF_UNIX, SOCK_STREAM, 0);
812 if (sock < 0)
814 dbg_log (_("cannot open socket: %s"), strerror (errno));
815 exit (errno == EACCES ? 4 : 1);
817 /* Bind a name to the socket. */
818 struct sockaddr_un sock_addr;
819 sock_addr.sun_family = AF_UNIX;
820 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
821 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
823 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
824 exit (errno == EACCES ? 4 : 1);
827 /* We don't want to get stuck on accept. */
828 int fl = fcntl (sock, F_GETFL);
829 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
831 dbg_log (_("cannot change socket to nonblocking mode: %s"),
832 strerror (errno));
833 exit (1);
836 /* The descriptor needs to be closed on exec. */
837 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
839 dbg_log (_("cannot set socket to close on exec: %s"),
840 strerror (errno));
841 exit (1);
844 /* Set permissions for the socket. */
845 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
847 /* Set the socket up to accept connections. */
848 if (listen (sock, SOMAXCONN) < 0)
850 dbg_log (_("cannot enable socket to accept connections: %s"),
851 strerror (errno));
852 exit (1);
855 /* Change to unprivileged uid/gid/groups if specifed in config file */
856 if (server_user != NULL)
857 finish_drop_privileges ();
861 /* Close the connections. */
862 void
863 close_sockets (void)
865 close (sock);
869 static void
870 invalidate_cache (char *key, int fd)
872 dbtype number;
873 int32_t resp;
875 for (number = pwddb; number < lastdb; ++number)
876 if (strcmp (key, dbnames[number]) == 0)
878 if (dbs[number].reset_res)
879 res_init ();
881 break;
884 if (number == lastdb)
886 resp = EINVAL;
887 writeall (fd, &resp, sizeof (resp));
888 return;
891 if (dbs[number].enabled)
892 prune_cache (&dbs[number], LONG_MAX, fd);
893 else
895 resp = 0;
896 writeall (fd, &resp, sizeof (resp));
901 #ifdef SCM_RIGHTS
902 static void
903 send_ro_fd (struct database_dyn *db, char *key, int fd)
905 /* If we do not have an read-only file descriptor do nothing. */
906 if (db->ro_fd == -1)
907 return;
909 /* We need to send some data along with the descriptor. */
910 uint64_t mapsize = (db->head->data_size
911 + roundup (db->head->module * sizeof (ref_t), ALIGN)
912 + sizeof (struct database_pers_head));
913 struct iovec iov[2];
914 iov[0].iov_base = key;
915 iov[0].iov_len = strlen (key) + 1;
916 iov[1].iov_base = &mapsize;
917 iov[1].iov_len = sizeof (mapsize);
919 /* Prepare the control message to transfer the descriptor. */
920 union
922 struct cmsghdr hdr;
923 char bytes[CMSG_SPACE (sizeof (int))];
924 } buf;
925 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
926 .msg_control = buf.bytes,
927 .msg_controllen = sizeof (buf) };
928 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
930 cmsg->cmsg_level = SOL_SOCKET;
931 cmsg->cmsg_type = SCM_RIGHTS;
932 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
934 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
936 msg.msg_controllen = cmsg->cmsg_len;
938 /* Send the control message. We repeat when we are interrupted but
939 everything else is ignored. */
940 #ifndef MSG_NOSIGNAL
941 # define MSG_NOSIGNAL 0
942 #endif
943 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
945 if (__builtin_expect (debug_level > 0, 0))
946 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
948 #endif /* SCM_RIGHTS */
951 /* Handle new request. */
952 static void
953 handle_request (int fd, request_header *req, void *key, uid_t uid)
955 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
957 if (debug_level > 0)
958 dbg_log (_("\
959 cannot handle old request version %d; current version is %d"),
960 req->version, NSCD_VERSION);
961 return;
964 /* Make the SELinux check before we go on to the standard checks. */
965 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
966 return;
968 struct database_dyn *db = reqinfo[req->type].db;
970 /* See whether we can service the request from the cache. */
971 if (__builtin_expect (reqinfo[req->type].data_request, true))
973 if (__builtin_expect (debug_level, 0) > 0)
975 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
977 char buf[INET6_ADDRSTRLEN];
979 dbg_log ("\t%s (%s)", serv2str[req->type],
980 inet_ntop (req->type == GETHOSTBYADDR
981 ? AF_INET : AF_INET6,
982 key, buf, sizeof (buf)));
984 else
985 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
988 /* Is this service enabled? */
989 if (__builtin_expect (!db->enabled, 0))
991 /* No, sent the prepared record. */
992 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
993 db->disabled_iov->iov_len,
994 MSG_NOSIGNAL))
995 != (ssize_t) db->disabled_iov->iov_len
996 && __builtin_expect (debug_level, 0) > 0)
998 /* We have problems sending the result. */
999 char buf[256];
1000 dbg_log (_("cannot write result: %s"),
1001 strerror_r (errno, buf, sizeof (buf)));
1004 return;
1007 /* Be sure we can read the data. */
1008 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
1010 ++db->head->rdlockdelayed;
1011 pthread_rwlock_rdlock (&db->lock);
1014 /* See whether we can handle it from the cache. */
1015 struct datahead *cached;
1016 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1017 db, uid);
1018 if (cached != NULL)
1020 /* Hurray it's in the cache. */
1021 ssize_t nwritten;
1023 #ifdef HAVE_SENDFILE
1024 if (__builtin_expect (db->mmap_used, 1))
1026 assert (db->wr_fd != -1);
1027 assert ((char *) cached->data > (char *) db->data);
1028 assert ((char *) cached->data - (char *) db->head
1029 + cached->recsize
1030 <= (sizeof (struct database_pers_head)
1031 + db->head->module * sizeof (ref_t)
1032 + db->head->data_size));
1033 nwritten = sendfileall (fd, db->wr_fd,
1034 (char *) cached->data
1035 - (char *) db->head, cached->recsize);
1036 # ifndef __ASSUME_SENDFILE
1037 if (nwritten == -1 && errno == ENOSYS)
1038 goto use_write;
1039 # endif
1041 else
1042 # ifndef __ASSUME_SENDFILE
1043 use_write:
1044 # endif
1045 #endif
1046 nwritten = writeall (fd, cached->data, cached->recsize);
1048 if (nwritten != cached->recsize
1049 && __builtin_expect (debug_level, 0) > 0)
1051 /* We have problems sending the result. */
1052 char buf[256];
1053 dbg_log (_("cannot write result: %s"),
1054 strerror_r (errno, buf, sizeof (buf)));
1057 pthread_rwlock_unlock (&db->lock);
1059 return;
1062 pthread_rwlock_unlock (&db->lock);
1064 else if (__builtin_expect (debug_level, 0) > 0)
1066 if (req->type == INVALIDATE)
1067 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1068 else
1069 dbg_log ("\t%s", serv2str[req->type]);
1072 /* Handle the request. */
1073 switch (req->type)
1075 case GETPWBYNAME:
1076 addpwbyname (db, fd, req, key, uid);
1077 break;
1079 case GETPWBYUID:
1080 addpwbyuid (db, fd, req, key, uid);
1081 break;
1083 case GETGRBYNAME:
1084 addgrbyname (db, fd, req, key, uid);
1085 break;
1087 case GETGRBYGID:
1088 addgrbygid (db, fd, req, key, uid);
1089 break;
1091 case GETHOSTBYNAME:
1092 addhstbyname (db, fd, req, key, uid);
1093 break;
1095 case GETHOSTBYNAMEv6:
1096 addhstbynamev6 (db, fd, req, key, uid);
1097 break;
1099 case GETHOSTBYADDR:
1100 addhstbyaddr (db, fd, req, key, uid);
1101 break;
1103 case GETHOSTBYADDRv6:
1104 addhstbyaddrv6 (db, fd, req, key, uid);
1105 break;
1107 case GETAI:
1108 addhstai (db, fd, req, key, uid);
1109 break;
1111 case INITGROUPS:
1112 addinitgroups (db, fd, req, key, uid);
1113 break;
1115 case GETSERVBYNAME:
1116 addservbyname (db, fd, req, key, uid);
1117 break;
1119 case GETSERVBYPORT:
1120 addservbyport (db, fd, req, key, uid);
1121 break;
1123 case GETSTAT:
1124 case SHUTDOWN:
1125 case INVALIDATE:
1127 /* Get the callers credentials. */
1128 #ifdef SO_PEERCRED
1129 struct ucred caller;
1130 socklen_t optlen = sizeof (caller);
1132 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1134 char buf[256];
1136 dbg_log (_("error getting caller's id: %s"),
1137 strerror_r (errno, buf, sizeof (buf)));
1138 break;
1141 uid = caller.uid;
1142 #else
1143 /* Some systems have no SO_PEERCRED implementation. They don't
1144 care about security so we don't as well. */
1145 uid = 0;
1146 #endif
1149 /* Accept shutdown, getstat and invalidate only from root. For
1150 the stat call also allow the user specified in the config file. */
1151 if (req->type == GETSTAT)
1153 if (uid == 0 || uid == stat_uid)
1154 send_stats (fd, dbs);
1156 else if (uid == 0)
1158 if (req->type == INVALIDATE)
1159 invalidate_cache (key, fd);
1160 else
1161 termination_handler (0);
1163 break;
1165 case GETFDPW:
1166 case GETFDGR:
1167 case GETFDHST:
1168 case GETFDSERV:
1169 #ifdef SCM_RIGHTS
1170 send_ro_fd (reqinfo[req->type].db, key, fd);
1171 #endif
1172 break;
1174 default:
1175 /* Ignore the command, it's nothing we know. */
1176 break;
1181 /* Restart the process. */
1182 static void
1183 restart (void)
1185 /* First determine the parameters. We do not use the parameters
1186 passed to main() since in case nscd is started by running the
1187 dynamic linker this will not work. Yes, this is not the usual
1188 case but nscd is part of glibc and we occasionally do this. */
1189 size_t buflen = 1024;
1190 char *buf = alloca (buflen);
1191 size_t readlen = 0;
1192 int fd = open ("/proc/self/cmdline", O_RDONLY);
1193 if (fd == -1)
1195 dbg_log (_("\
1196 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1197 strerror (errno));
1199 paranoia = 0;
1200 return;
1203 while (1)
1205 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1206 buflen - readlen));
1207 if (n == -1)
1209 dbg_log (_("\
1210 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1211 strerror (errno));
1213 close (fd);
1214 paranoia = 0;
1215 return;
1218 readlen += n;
1220 if (readlen < buflen)
1221 break;
1223 /* We might have to extend the buffer. */
1224 size_t old_buflen = buflen;
1225 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1226 buf = memmove (newp, buf, old_buflen);
1229 close (fd);
1231 /* Parse the command line. Worst case scenario: every two
1232 characters form one parameter (one character plus NUL). */
1233 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1234 int argc = 0;
1236 char *cp = buf;
1237 while (cp < buf + readlen)
1239 argv[argc++] = cp;
1240 cp = (char *) rawmemchr (cp, '\0') + 1;
1242 argv[argc] = NULL;
1244 /* Second, change back to the old user if we changed it. */
1245 if (server_user != NULL)
1247 if (setresuid (old_uid, old_uid, old_uid) != 0)
1249 dbg_log (_("\
1250 cannot change to old UID: %s; disabling paranoia mode"),
1251 strerror (errno));
1253 paranoia = 0;
1254 return;
1257 if (setresgid (old_gid, old_gid, old_gid) != 0)
1259 dbg_log (_("\
1260 cannot change to old GID: %s; disabling paranoia mode"),
1261 strerror (errno));
1263 setuid (server_uid);
1264 paranoia = 0;
1265 return;
1269 /* Next change back to the old working directory. */
1270 if (chdir (oldcwd) == -1)
1272 dbg_log (_("\
1273 cannot change to old working directory: %s; disabling paranoia mode"),
1274 strerror (errno));
1276 if (server_user != NULL)
1278 setuid (server_uid);
1279 setgid (server_gid);
1281 paranoia = 0;
1282 return;
1285 /* Synchronize memory. */
1286 for (int cnt = 0; cnt < lastdb; ++cnt)
1288 /* Make sure nobody keeps using the database. */
1289 dbs[cnt].head->timestamp = 0;
1291 if (dbs[cnt].persistent)
1292 // XXX async OK?
1293 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1296 /* The preparations are done. */
1297 execv ("/proc/self/exe", argv);
1299 /* If we come here, we will never be able to re-exec. */
1300 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1301 strerror (errno));
1303 if (server_user != NULL)
1305 setuid (server_uid);
1306 setgid (server_gid);
1308 if (chdir ("/") != 0)
1309 dbg_log (_("cannot change current working directory to \"/\": %s"),
1310 strerror (errno));
1311 paranoia = 0;
1315 /* List of file descriptors. */
1316 struct fdlist
1318 int fd;
1319 struct fdlist *next;
1321 /* Memory allocated for the list. */
1322 static struct fdlist *fdlist;
1323 /* List of currently ready-to-read file descriptors. */
1324 static struct fdlist *readylist;
1326 /* Conditional variable and mutex to signal availability of entries in
1327 READYLIST. The condvar is initialized dynamically since we might
1328 use a different clock depending on availability. */
1329 static pthread_cond_t readylist_cond;
1330 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1332 /* The clock to use with the condvar. */
1333 static clockid_t timeout_clock = CLOCK_REALTIME;
1335 /* Number of threads ready to handle the READYLIST. */
1336 static unsigned long int nready;
1339 /* This is the main loop. It is replicated in different threads but the
1340 `poll' call makes sure only one thread handles an incoming connection. */
1341 static void *
1342 __attribute__ ((__noreturn__))
1343 nscd_run (void *p)
1345 const long int my_number = (long int) p;
1346 const int run_prune = my_number < lastdb && dbs[my_number].enabled;
1347 struct timespec prune_ts;
1348 int to = 0;
1349 char buf[256];
1351 if (run_prune)
1353 setup_thread (&dbs[my_number]);
1355 /* We are running. */
1356 dbs[my_number].head->timestamp = time (NULL);
1358 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1359 /* Should never happen. */
1360 abort ();
1362 /* Compute timeout time. */
1363 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1366 /* Initial locking. */
1367 pthread_mutex_lock (&readylist_lock);
1369 /* One more thread available. */
1370 ++nready;
1372 while (1)
1374 while (readylist == NULL)
1376 if (run_prune)
1378 /* Wait, but not forever. */
1379 to = pthread_cond_timedwait (&readylist_cond, &readylist_lock,
1380 &prune_ts);
1382 /* If we were woken and there is no work to be done,
1383 just start pruning. */
1384 if (readylist == NULL && to == ETIMEDOUT)
1386 --nready;
1387 pthread_mutex_unlock (&readylist_lock);
1388 goto only_prune;
1391 else
1392 /* No need to timeout. */
1393 pthread_cond_wait (&readylist_cond, &readylist_lock);
1396 struct fdlist *it = readylist->next;
1397 if (readylist->next == readylist)
1398 /* Just one entry on the list. */
1399 readylist = NULL;
1400 else
1401 readylist->next = it->next;
1403 /* Extract the information and mark the record ready to be used
1404 again. */
1405 int fd = it->fd;
1406 it->next = NULL;
1408 /* One more thread available. */
1409 --nready;
1411 /* We are done with the list. */
1412 pthread_mutex_unlock (&readylist_lock);
1414 /* We do not want to block on a short read or so. */
1415 int fl = fcntl (fd, F_GETFL);
1416 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1417 goto close_and_out;
1419 /* Now read the request. */
1420 request_header req;
1421 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1422 != sizeof (req), 0))
1424 /* We failed to read data. Note that this also might mean we
1425 failed because we would have blocked. */
1426 if (debug_level > 0)
1427 dbg_log (_("short read while reading request: %s"),
1428 strerror_r (errno, buf, sizeof (buf)));
1429 goto close_and_out;
1432 /* Check whether this is a valid request type. */
1433 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1434 goto close_and_out;
1436 /* Some systems have no SO_PEERCRED implementation. They don't
1437 care about security so we don't as well. */
1438 uid_t uid = -1;
1439 #ifdef SO_PEERCRED
1440 pid_t pid = 0;
1442 if (__builtin_expect (debug_level > 0, 0))
1444 struct ucred caller;
1445 socklen_t optlen = sizeof (caller);
1447 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1448 pid = caller.pid;
1450 #endif
1452 /* It should not be possible to crash the nscd with a silly
1453 request (i.e., a terribly large key). We limit the size to 1kb. */
1454 if (__builtin_expect (req.key_len, 1) < 0
1455 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1457 if (debug_level > 0)
1458 dbg_log (_("key length in request too long: %d"), req.key_len);
1460 else
1462 /* Get the key. */
1463 char keybuf[MAXKEYLEN];
1465 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1466 req.key_len))
1467 != req.key_len, 0))
1469 /* Again, this can also mean we would have blocked. */
1470 if (debug_level > 0)
1471 dbg_log (_("short read while reading request key: %s"),
1472 strerror_r (errno, buf, sizeof (buf)));
1473 goto close_and_out;
1476 if (__builtin_expect (debug_level, 0) > 0)
1478 #ifdef SO_PEERCRED
1479 if (pid != 0)
1480 dbg_log (_("\
1481 handle_request: request received (Version = %d) from PID %ld"),
1482 req.version, (long int) pid);
1483 else
1484 #endif
1485 dbg_log (_("\
1486 handle_request: request received (Version = %d)"), req.version);
1489 /* Phew, we got all the data, now process it. */
1490 handle_request (fd, &req, keybuf, uid);
1493 close_and_out:
1494 /* We are done. */
1495 close (fd);
1497 /* Check whether we should be pruning the cache. */
1498 assert (run_prune || to == 0);
1499 if (to == ETIMEDOUT)
1501 only_prune:
1502 /* The pthread_cond_timedwait() call timed out. It is time
1503 to clean up the cache. */
1504 assert (my_number < lastdb);
1505 prune_cache (&dbs[my_number], time (NULL), -1);
1507 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1508 /* Should never happen. */
1509 abort ();
1511 /* Compute next timeout time. */
1512 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1514 /* In case the list is emtpy we do not want to run the prune
1515 code right away again. */
1516 to = 0;
1519 /* Re-locking. */
1520 pthread_mutex_lock (&readylist_lock);
1522 /* One more thread available. */
1523 ++nready;
1528 static unsigned int nconns;
1530 static void
1531 fd_ready (int fd)
1533 pthread_mutex_lock (&readylist_lock);
1535 /* Find an empty entry in FDLIST. */
1536 size_t inner;
1537 for (inner = 0; inner < nconns; ++inner)
1538 if (fdlist[inner].next == NULL)
1539 break;
1540 assert (inner < nconns);
1542 fdlist[inner].fd = fd;
1544 if (readylist == NULL)
1545 readylist = fdlist[inner].next = &fdlist[inner];
1546 else
1548 fdlist[inner].next = readylist->next;
1549 readylist = readylist->next = &fdlist[inner];
1552 bool do_signal = true;
1553 if (__builtin_expect (nready == 0, 0))
1555 ++client_queued;
1556 do_signal = false;
1558 /* Try to start another thread to help out. */
1559 pthread_t th;
1560 if (nthreads < max_nthreads
1561 && pthread_create (&th, &attr, nscd_run,
1562 (void *) (long int) nthreads) == 0)
1564 /* We got another thread. */
1565 ++nthreads;
1566 /* The new thread might need a kick. */
1567 do_signal = true;
1572 pthread_mutex_unlock (&readylist_lock);
1574 /* Tell one of the worker threads there is work to do. */
1575 if (do_signal)
1576 pthread_cond_signal (&readylist_cond);
1580 /* Check whether restarting should happen. */
1581 static inline int
1582 restart_p (time_t now)
1584 return (paranoia && readylist == NULL && nready == nthreads
1585 && now >= restart_time);
1589 /* Array for times a connection was accepted. */
1590 static time_t *starttime;
1593 static void
1594 __attribute__ ((__noreturn__))
1595 main_loop_poll (void)
1597 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1598 * sizeof (conns[0]));
1600 conns[0].fd = sock;
1601 conns[0].events = POLLRDNORM;
1602 size_t nused = 1;
1603 size_t firstfree = 1;
1605 while (1)
1607 /* Wait for any event. We wait at most a couple of seconds so
1608 that we can check whether we should close any of the accepted
1609 connections since we have not received a request. */
1610 #define MAX_ACCEPT_TIMEOUT 30
1611 #define MIN_ACCEPT_TIMEOUT 5
1612 #define MAIN_THREAD_TIMEOUT \
1613 (MAX_ACCEPT_TIMEOUT * 1000 \
1614 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1616 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1618 time_t now = time (NULL);
1620 /* If there is a descriptor ready for reading or there is a new
1621 connection, process this now. */
1622 if (n > 0)
1624 if (conns[0].revents != 0)
1626 /* We have a new incoming connection. Accept the connection. */
1627 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1629 /* Use the descriptor if we have not reached the limit. */
1630 if (fd >= 0)
1632 if (firstfree < nconns)
1634 conns[firstfree].fd = fd;
1635 conns[firstfree].events = POLLRDNORM;
1636 starttime[firstfree] = now;
1637 if (firstfree >= nused)
1638 nused = firstfree + 1;
1641 ++firstfree;
1642 while (firstfree < nused && conns[firstfree].fd != -1);
1644 else
1645 /* We cannot use the connection so close it. */
1646 close (fd);
1649 --n;
1652 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1653 if (conns[cnt].revents != 0)
1655 fd_ready (conns[cnt].fd);
1657 /* Clean up the CONNS array. */
1658 conns[cnt].fd = -1;
1659 if (cnt < firstfree)
1660 firstfree = cnt;
1661 if (cnt == nused - 1)
1663 --nused;
1664 while (conns[nused - 1].fd == -1);
1666 --n;
1670 /* Now find entries which have timed out. */
1671 assert (nused > 0);
1673 /* We make the timeout length depend on the number of file
1674 descriptors currently used. */
1675 #define ACCEPT_TIMEOUT \
1676 (MAX_ACCEPT_TIMEOUT \
1677 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1678 time_t laststart = now - ACCEPT_TIMEOUT;
1680 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1682 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1684 /* Remove the entry, it timed out. */
1685 (void) close (conns[cnt].fd);
1686 conns[cnt].fd = -1;
1688 if (cnt < firstfree)
1689 firstfree = cnt;
1690 if (cnt == nused - 1)
1692 --nused;
1693 while (conns[nused - 1].fd == -1);
1697 if (restart_p (now))
1698 restart ();
1703 #ifdef HAVE_EPOLL
1704 static void
1705 main_loop_epoll (int efd)
1707 struct epoll_event ev = { 0, };
1708 int nused = 1;
1709 size_t highest = 0;
1711 /* Add the socket. */
1712 ev.events = EPOLLRDNORM;
1713 ev.data.fd = sock;
1714 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1715 /* We cannot use epoll. */
1716 return;
1718 while (1)
1720 struct epoll_event revs[100];
1721 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1723 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1725 time_t now = time (NULL);
1727 for (int cnt = 0; cnt < n; ++cnt)
1728 if (revs[cnt].data.fd == sock)
1730 /* A new connection. */
1731 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1733 if (fd >= 0)
1735 /* Try to add the new descriptor. */
1736 ev.data.fd = fd;
1737 if (fd >= nconns
1738 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1739 /* The descriptor is too large or something went
1740 wrong. Close the descriptor. */
1741 close (fd);
1742 else
1744 /* Remember when we accepted the connection. */
1745 starttime[fd] = now;
1747 if (fd > highest)
1748 highest = fd;
1750 ++nused;
1754 else
1756 /* Remove the descriptor from the epoll descriptor. */
1757 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1759 /* Get a worker to handle the request. */
1760 fd_ready (revs[cnt].data.fd);
1762 /* Reset the time. */
1763 starttime[revs[cnt].data.fd] = 0;
1764 if (revs[cnt].data.fd == highest)
1766 --highest;
1767 while (highest > 0 && starttime[highest] == 0);
1769 --nused;
1772 /* Now look for descriptors for accepted connections which have
1773 no reply in too long of a time. */
1774 time_t laststart = now - ACCEPT_TIMEOUT;
1775 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1776 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1778 /* We are waiting for this one for too long. Close it. */
1779 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
1781 (void) close (cnt);
1783 starttime[cnt] = 0;
1784 if (cnt == highest)
1785 --highest;
1787 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1788 --highest;
1790 if (restart_p (now))
1791 restart ();
1794 #endif
1797 /* Start all the threads we want. The initial process is thread no. 1. */
1798 void
1799 start_threads (void)
1801 /* Initialize the conditional variable we will use. The only
1802 non-standard attribute we might use is the clock selection. */
1803 pthread_condattr_t condattr;
1804 pthread_condattr_init (&condattr);
1806 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1807 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1808 /* Determine whether the monotonous clock is available. */
1809 struct timespec dummy;
1810 # if _POSIX_MONOTONIC_CLOCK == 0
1811 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1812 # endif
1813 # if _POSIX_CLOCK_SELECTION == 0
1814 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1815 # endif
1816 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1817 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1818 timeout_clock = CLOCK_MONOTONIC;
1819 #endif
1821 pthread_cond_init (&readylist_cond, &condattr);
1822 pthread_condattr_destroy (&condattr);
1825 /* Create the attribute for the threads. They are all created
1826 detached. */
1827 pthread_attr_init (&attr);
1828 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1829 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1830 pthread_attr_setstacksize (&attr, 1024 * 1024 * (sizeof (void *) / 4));
1832 /* We allow less than LASTDB threads only for debugging. */
1833 if (debug_level == 0)
1834 nthreads = MAX (nthreads, lastdb);
1836 int nfailed = 0;
1837 for (long int i = 0; i < nthreads; ++i)
1839 pthread_t th;
1840 if (pthread_create (&th, &attr, nscd_run, (void *) (i - nfailed)) != 0)
1841 ++nfailed;
1843 if (nthreads - nfailed < lastdb)
1845 /* We could not start enough threads. */
1846 dbg_log (_("could only start %d threads; terminating"),
1847 nthreads - nfailed);
1848 exit (1);
1851 /* Determine how much room for descriptors we should initially
1852 allocate. This might need to change later if we cap the number
1853 with MAXCONN. */
1854 const long int nfds = sysconf (_SC_OPEN_MAX);
1855 #define MINCONN 32
1856 #define MAXCONN 16384
1857 if (nfds == -1 || nfds > MAXCONN)
1858 nconns = MAXCONN;
1859 else if (nfds < MINCONN)
1860 nconns = MINCONN;
1861 else
1862 nconns = nfds;
1864 /* We need memory to pass descriptors on to the worker threads. */
1865 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1866 /* Array to keep track when connection was accepted. */
1867 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1869 /* In the main thread we execute the loop which handles incoming
1870 connections. */
1871 #ifdef HAVE_EPOLL
1872 int efd = epoll_create (100);
1873 if (efd != -1)
1875 main_loop_epoll (efd);
1876 close (efd);
1878 #endif
1880 main_loop_poll ();
1884 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1885 this function is called, we are not listening on the nscd socket yet so
1886 we can just use the ordinary lookup functions without causing a lockup */
1887 static void
1888 begin_drop_privileges (void)
1890 struct passwd *pwd = getpwnam (server_user);
1892 if (pwd == NULL)
1894 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1895 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1896 server_user);
1899 server_uid = pwd->pw_uid;
1900 server_gid = pwd->pw_gid;
1902 /* Save the old UID/GID if we have to change back. */
1903 if (paranoia)
1905 old_uid = getuid ();
1906 old_gid = getgid ();
1909 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1911 /* This really must never happen. */
1912 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1913 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
1916 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
1918 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
1919 == -1)
1921 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1922 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
1927 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
1928 run nscd as the user specified in the configuration file. */
1929 static void
1930 finish_drop_privileges (void)
1932 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1933 /* We need to preserve the capabilities to connect to the audit daemon. */
1934 cap_t new_caps = preserve_capabilities ();
1935 #endif
1937 if (setgroups (server_ngroups, server_groups) == -1)
1939 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1940 error (EXIT_FAILURE, errno, _("setgroups failed"));
1943 int res;
1944 if (paranoia)
1945 res = setresgid (server_gid, server_gid, old_gid);
1946 else
1947 res = setgid (server_gid);
1948 if (res == -1)
1950 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1951 perror ("setgid");
1952 exit (4);
1955 if (paranoia)
1956 res = setresuid (server_uid, server_uid, old_uid);
1957 else
1958 res = setuid (server_uid);
1959 if (res == -1)
1961 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1962 perror ("setuid");
1963 exit (4);
1966 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1967 /* Remove the temporary capabilities. */
1968 install_real_capabilities (new_caps);
1969 #endif