* sysdeps/unix/sysv/linux/s390/s390-64/syscalls.list: Add open and
[glibc.git] / nscd / connections.c
blobabd84b9b631a57e371f5efcd0bbb2f5bc306b9da
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #include <alloca.h>
21 #include <assert.h>
22 #include <atomic.h>
23 #include <error.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <grp.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <arpa/inet.h>
35 #ifdef HAVE_EPOLL
36 # include <sys/epoll.h>
37 #endif
38 #include <sys/mman.h>
39 #include <sys/param.h>
40 #include <sys/poll.h>
41 #ifdef HAVE_SENDFILE
42 # include <sys/sendfile.h>
43 #endif
44 #include <sys/socket.h>
45 #include <sys/stat.h>
46 #include <sys/un.h>
48 #include "nscd.h"
49 #include "dbg_log.h"
50 #include "selinux.h"
51 #ifdef HAVE_SENDFILE
52 # include <kernel-features.h>
53 #endif
56 /* Wrapper functions with error checking for standard functions. */
57 extern void *xmalloc (size_t n);
58 extern void *xcalloc (size_t n, size_t s);
59 extern void *xrealloc (void *o, size_t n);
61 /* Support to run nscd as an unprivileged user */
62 const char *server_user;
63 static uid_t server_uid;
64 static gid_t server_gid;
65 const char *stat_user;
66 uid_t stat_uid;
67 static gid_t *server_groups;
68 #ifndef NGROUPS
69 # define NGROUPS 32
70 #endif
71 static int server_ngroups;
73 static pthread_attr_t attr;
75 static void begin_drop_privileges (void);
76 static void finish_drop_privileges (void);
78 /* Map request type to a string. */
79 const char *const serv2str[LASTREQ] =
81 [GETPWBYNAME] = "GETPWBYNAME",
82 [GETPWBYUID] = "GETPWBYUID",
83 [GETGRBYNAME] = "GETGRBYNAME",
84 [GETGRBYGID] = "GETGRBYGID",
85 [GETHOSTBYNAME] = "GETHOSTBYNAME",
86 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
87 [GETHOSTBYADDR] = "GETHOSTBYADDR",
88 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
89 [SHUTDOWN] = "SHUTDOWN",
90 [GETSTAT] = "GETSTAT",
91 [INVALIDATE] = "INVALIDATE",
92 [GETFDPW] = "GETFDPW",
93 [GETFDGR] = "GETFDGR",
94 [GETFDHST] = "GETFDHST",
95 [GETAI] = "GETAI",
96 [INITGROUPS] = "INITGROUPS",
97 [GETSERVBYNAME] = "GETSERVBYNAME",
98 [GETSERVBYPORT] = "GETSERVBYPORT",
99 [GETFDSERV] = "GETFDSERV"
102 /* The control data structures for the services. */
103 struct database_dyn dbs[lastdb] =
105 [pwddb] = {
106 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
107 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
108 .enabled = 0,
109 .check_file = 1,
110 .persistent = 0,
111 .propagate = 1,
112 .shared = 0,
113 .max_db_size = DEFAULT_MAX_DB_SIZE,
114 .reset_res = 0,
115 .filename = "/etc/passwd",
116 .db_filename = _PATH_NSCD_PASSWD_DB,
117 .disabled_iov = &pwd_iov_disabled,
118 .postimeout = 3600,
119 .negtimeout = 20,
120 .wr_fd = -1,
121 .ro_fd = -1,
122 .mmap_used = false
124 [grpdb] = {
125 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
126 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
127 .enabled = 0,
128 .check_file = 1,
129 .persistent = 0,
130 .propagate = 1,
131 .shared = 0,
132 .max_db_size = DEFAULT_MAX_DB_SIZE,
133 .reset_res = 0,
134 .filename = "/etc/group",
135 .db_filename = _PATH_NSCD_GROUP_DB,
136 .disabled_iov = &grp_iov_disabled,
137 .postimeout = 3600,
138 .negtimeout = 60,
139 .wr_fd = -1,
140 .ro_fd = -1,
141 .mmap_used = false
143 [hstdb] = {
144 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
145 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
146 .enabled = 0,
147 .check_file = 1,
148 .persistent = 0,
149 .propagate = 0, /* Not used. */
150 .shared = 0,
151 .max_db_size = DEFAULT_MAX_DB_SIZE,
152 .reset_res = 1,
153 .filename = "/etc/hosts",
154 .db_filename = _PATH_NSCD_HOSTS_DB,
155 .disabled_iov = &hst_iov_disabled,
156 .postimeout = 3600,
157 .negtimeout = 20,
158 .wr_fd = -1,
159 .ro_fd = -1,
160 .mmap_used = false
162 [servdb] = {
163 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
164 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
165 .enabled = 0,
166 .check_file = 1,
167 .persistent = 0,
168 .propagate = 0, /* Not used. */
169 .shared = 0,
170 .max_db_size = DEFAULT_MAX_DB_SIZE,
171 .reset_res = 0,
172 .filename = "/etc/services",
173 .db_filename = _PATH_NSCD_SERVICES_DB,
174 .disabled_iov = &serv_iov_disabled,
175 .postimeout = 28800,
176 .negtimeout = 20,
177 .wr_fd = -1,
178 .ro_fd = -1,
179 .mmap_used = false
184 /* Mapping of request type to database. */
185 static struct
187 bool data_request;
188 struct database_dyn *db;
189 } const reqinfo[LASTREQ] =
191 [GETPWBYNAME] = { true, &dbs[pwddb] },
192 [GETPWBYUID] = { true, &dbs[pwddb] },
193 [GETGRBYNAME] = { true, &dbs[grpdb] },
194 [GETGRBYGID] = { true, &dbs[grpdb] },
195 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
196 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
197 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
198 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
199 [SHUTDOWN] = { false, NULL },
200 [GETSTAT] = { false, NULL },
201 [SHUTDOWN] = { false, NULL },
202 [GETFDPW] = { false, &dbs[pwddb] },
203 [GETFDGR] = { false, &dbs[grpdb] },
204 [GETFDHST] = { false, &dbs[hstdb] },
205 [GETAI] = { true, &dbs[hstdb] },
206 [INITGROUPS] = { true, &dbs[grpdb] },
207 [GETSERVBYNAME] = { true, &dbs[servdb] },
208 [GETSERVBYPORT] = { true, &dbs[servdb] },
209 [GETFDSERV] = { false, &dbs[servdb] }
213 /* Initial number of threads to use. */
214 int nthreads = -1;
215 /* Maximum number of threads to use. */
216 int max_nthreads = 32;
218 /* Socket for incoming connections. */
219 static int sock;
221 /* Number of times clients had to wait. */
222 unsigned long int client_queued;
225 ssize_t
226 writeall (int fd, const void *buf, size_t len)
228 size_t n = len;
229 ssize_t ret;
232 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
233 if (ret <= 0)
234 break;
235 buf = (const char *) buf + ret;
236 n -= ret;
238 while (n > 0);
239 return ret < 0 ? ret : len - n;
243 #ifdef HAVE_SENDFILE
244 ssize_t
245 sendfileall (int tofd, int fromfd, off_t off, size_t len)
247 ssize_t n = len;
248 ssize_t ret;
252 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
253 if (ret <= 0)
254 break;
255 n -= ret;
257 while (n > 0);
258 return ret < 0 ? ret : len - n;
260 #endif
263 enum usekey
265 use_not = 0,
266 /* The following three are not really used, they are symbolic constants. */
267 use_first = 16,
268 use_begin = 32,
269 use_end = 64,
271 use_he = 1,
272 use_he_begin = use_he | use_begin,
273 use_he_end = use_he | use_end,
274 #if SEPARATE_KEY
275 use_key = 2,
276 use_key_begin = use_key | use_begin,
277 use_key_end = use_key | use_end,
278 use_key_first = use_key_begin | use_first,
279 #endif
280 use_data = 3,
281 use_data_begin = use_data | use_begin,
282 use_data_end = use_data | use_end,
283 use_data_first = use_data_begin | use_first
287 static int
288 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
289 enum usekey use, ref_t start, size_t len)
291 assert (len >= 2);
293 if (start > first_free || start + len > first_free
294 || (start & BLOCK_ALIGN_M1))
295 return 0;
297 if (usemap[start] == use_not)
299 /* Add the start marker. */
300 usemap[start] = use | use_begin;
301 use &= ~use_first;
303 while (--len > 0)
304 if (usemap[++start] != use_not)
305 return 0;
306 else
307 usemap[start] = use;
309 /* Add the end marker. */
310 usemap[start] = use | use_end;
312 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
314 /* Hash entries can't be shared. */
315 if (use == use_he)
316 return 0;
318 usemap[start] |= (use & use_first);
319 use &= ~use_first;
321 while (--len > 1)
322 if (usemap[++start] != use)
323 return 0;
325 if (usemap[++start] != (use | use_end))
326 return 0;
328 else
329 /* Points to a wrong object or somewhere in the middle. */
330 return 0;
332 return 1;
336 /* Verify data in persistent database. */
337 static int
338 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
340 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb);
342 time_t now = time (NULL);
344 struct database_pers_head *head = mem;
345 struct database_pers_head head_copy = *head;
347 /* Check that the header that was read matches the head in the database. */
348 if (readhead != NULL && memcmp (head, readhead, sizeof (*head)) != 0)
349 return 0;
351 /* First some easy tests: make sure the database header is sane. */
352 if (head->version != DB_VERSION
353 || head->header_size != sizeof (*head)
354 /* We allow a timestamp to be one hour ahead of the current time.
355 This should cover daylight saving time changes. */
356 || head->timestamp > now + 60 * 60 + 60
357 || (head->gc_cycle & 1)
358 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
359 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
360 || head->first_free < 0
361 || head->first_free > head->data_size
362 || (head->first_free & BLOCK_ALIGN_M1) != 0
363 || head->maxnentries < 0
364 || head->maxnsearched < 0)
365 return 0;
367 uint8_t *usemap = calloc (head->first_free, 1);
368 if (usemap == NULL)
369 return 0;
371 const char *data = (char *) &head->array[roundup (head->module,
372 ALIGN / sizeof (ref_t))];
374 nscd_ssize_t he_cnt = 0;
375 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
377 ref_t trail = head->array[cnt];
378 ref_t work = trail;
379 int tick = 0;
381 while (work != ENDREF)
383 if (! check_use (data, head->first_free, usemap, use_he, work,
384 sizeof (struct hashentry)))
385 goto fail;
387 /* Now we know we can dereference the record. */
388 struct hashentry *here = (struct hashentry *) (data + work);
390 ++he_cnt;
392 /* Make sure the record is for this type of service. */
393 if (here->type >= LASTREQ
394 || reqinfo[here->type].db != &dbs[dbnr])
395 goto fail;
397 /* Validate boolean field value. */
398 if (here->first != false && here->first != true)
399 goto fail;
401 if (here->len < 0)
402 goto fail;
404 /* Now the data. */
405 if (here->packet < 0
406 || here->packet > head->first_free
407 || here->packet + sizeof (struct datahead) > head->first_free)
408 goto fail;
410 struct datahead *dh = (struct datahead *) (data + here->packet);
412 if (! check_use (data, head->first_free, usemap,
413 use_data | (here->first ? use_first : 0),
414 here->packet, dh->allocsize))
415 goto fail;
417 if (dh->allocsize < sizeof (struct datahead)
418 || dh->recsize > dh->allocsize
419 || (dh->notfound != false && dh->notfound != true)
420 || (dh->usable != false && dh->usable != true))
421 goto fail;
423 if (here->key < here->packet + sizeof (struct datahead)
424 || here->key > here->packet + dh->allocsize
425 || here->key + here->len > here->packet + dh->allocsize)
427 #if SEPARATE_KEY
428 /* If keys can appear outside of data, this should be done
429 instead. But gc doesn't mark the data in that case. */
430 if (! check_use (data, head->first_free, usemap,
431 use_key | (here->first ? use_first : 0),
432 here->key, here->len))
433 #endif
434 goto fail;
437 work = here->next;
439 if (work == trail)
440 /* A circular list, this must not happen. */
441 goto fail;
442 if (tick)
443 trail = ((struct hashentry *) (data + trail))->next;
444 tick = 1 - tick;
448 if (he_cnt != head->nentries)
449 goto fail;
451 /* See if all data and keys had at least one reference from
452 he->first == true hashentry. */
453 for (ref_t idx = 0; idx < head->first_free; ++idx)
455 #if SEPARATE_KEY
456 if (usemap[idx] == use_key_begin)
457 goto fail;
458 #endif
459 if (usemap[idx] == use_data_begin)
460 goto fail;
463 /* Finally, make sure the database hasn't changed since the first test. */
464 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
465 goto fail;
467 free (usemap);
468 return 1;
470 fail:
471 free (usemap);
472 return 0;
476 #ifdef O_CLOEXEC
477 # define EXTRA_O_FLAGS O_CLOEXEC
478 #else
479 # define EXTRA_O_FLAGS 0
480 #endif
483 /* Initialize database information structures. */
484 void
485 nscd_init (void)
487 /* Look up unprivileged uid/gid/groups before we start listening on the
488 socket */
489 if (server_user != NULL)
490 begin_drop_privileges ();
492 if (nthreads == -1)
493 /* No configuration for this value, assume a default. */
494 nthreads = 4;
496 for (size_t cnt = 0; cnt < lastdb; ++cnt)
497 if (dbs[cnt].enabled)
499 pthread_rwlock_init (&dbs[cnt].lock, NULL);
500 pthread_mutex_init (&dbs[cnt].memlock, NULL);
502 if (dbs[cnt].persistent)
504 /* Try to open the appropriate file on disk. */
505 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
506 if (fd != -1)
508 struct stat64 st;
509 void *mem;
510 size_t total;
511 struct database_pers_head head;
512 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
513 sizeof (head)));
514 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
516 fail_db:
517 dbg_log (_("invalid persistent database file \"%s\": %s"),
518 dbs[cnt].db_filename, strerror (errno));
519 unlink (dbs[cnt].db_filename);
521 else if (head.module == 0 && head.data_size == 0)
523 /* The file has been created, but the head has not been
524 initialized yet. Remove the old file. */
525 unlink (dbs[cnt].db_filename);
527 else if (head.header_size != (int) sizeof (head))
529 dbg_log (_("invalid persistent database file \"%s\": %s"),
530 dbs[cnt].db_filename,
531 _("header size does not match"));
532 unlink (dbs[cnt].db_filename);
534 else if ((total = (sizeof (head)
535 + roundup (head.module * sizeof (ref_t),
536 ALIGN)
537 + head.data_size))
538 > st.st_size
539 || total < sizeof (head))
541 dbg_log (_("invalid persistent database file \"%s\": %s"),
542 dbs[cnt].db_filename,
543 _("file size does not match"));
544 unlink (dbs[cnt].db_filename);
546 /* Note we map with the maximum size allowed for the
547 database. This is likely much larger than the
548 actual file size. This is OK on most OSes since
549 extensions of the underlying file will
550 automatically translate more pages available for
551 memory access. */
552 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
553 PROT_READ | PROT_WRITE,
554 MAP_SHARED, fd, 0))
555 == MAP_FAILED)
556 goto fail_db;
557 else if (!verify_persistent_db (mem, &head, cnt))
559 munmap (mem, total);
560 dbg_log (_("invalid persistent database file \"%s\": %s"),
561 dbs[cnt].db_filename,
562 _("verification failed"));
563 unlink (dbs[cnt].db_filename);
565 else
567 /* Success. We have the database. */
568 dbs[cnt].head = mem;
569 dbs[cnt].memsize = total;
570 dbs[cnt].data = (char *)
571 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
572 ALIGN / sizeof (ref_t))];
573 dbs[cnt].mmap_used = true;
575 if (dbs[cnt].suggested_module > head.module)
576 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
577 dbnames[cnt]);
579 dbs[cnt].wr_fd = fd;
580 fd = -1;
581 /* We also need a read-only descriptor. */
582 if (dbs[cnt].shared)
584 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
585 O_RDONLY | EXTRA_O_FLAGS);
586 if (dbs[cnt].ro_fd == -1)
587 dbg_log (_("\
588 cannot create read-only descriptor for \"%s\"; no mmap"),
589 dbs[cnt].db_filename);
592 // XXX Shall we test whether the descriptors actually
593 // XXX point to the same file?
596 /* Close the file descriptors in case something went
597 wrong in which case the variable have not been
598 assigned -1. */
599 if (fd != -1)
600 close (fd);
604 if (dbs[cnt].head == NULL)
606 /* No database loaded. Allocate the data structure,
607 possibly on disk. */
608 struct database_pers_head head;
609 size_t total = (sizeof (head)
610 + roundup (dbs[cnt].suggested_module
611 * sizeof (ref_t), ALIGN)
612 + (dbs[cnt].suggested_module
613 * DEFAULT_DATASIZE_PER_BUCKET));
615 /* Try to create the database. If we do not need a
616 persistent database create a temporary file. */
617 int fd;
618 int ro_fd = -1;
619 if (dbs[cnt].persistent)
621 fd = open (dbs[cnt].db_filename,
622 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
623 S_IRUSR | S_IWUSR);
624 if (fd != -1 && dbs[cnt].shared)
625 ro_fd = open (dbs[cnt].db_filename,
626 O_RDONLY | EXTRA_O_FLAGS);
628 else
630 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
631 fd = mkostemp (fname, EXTRA_O_FLAGS);
633 /* We do not need the file name anymore after we
634 opened another file descriptor in read-only mode. */
635 if (fd != -1)
637 if (dbs[cnt].shared)
638 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
640 unlink (fname);
644 if (fd == -1)
646 if (errno == EEXIST)
648 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
649 dbnames[cnt], dbs[cnt].db_filename);
650 // XXX Correct way to terminate?
651 exit (1);
654 if (dbs[cnt].persistent)
655 dbg_log (_("cannot create %s; no persistent database used"),
656 dbs[cnt].db_filename);
657 else
658 dbg_log (_("cannot create %s; no sharing possible"),
659 dbs[cnt].db_filename);
661 dbs[cnt].persistent = 0;
662 // XXX remember: no mmap
664 else
666 /* Tell the user if we could not create the read-only
667 descriptor. */
668 if (ro_fd == -1 && dbs[cnt].shared)
669 dbg_log (_("\
670 cannot create read-only descriptor for \"%s\"; no mmap"),
671 dbs[cnt].db_filename);
673 /* Before we create the header, initialiye the hash
674 table. So that if we get interrupted if writing
675 the header we can recognize a partially initialized
676 database. */
677 size_t ps = sysconf (_SC_PAGESIZE);
678 char tmpbuf[ps];
679 assert (~ENDREF == 0);
680 memset (tmpbuf, '\xff', ps);
682 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
683 off_t offset = sizeof (head);
685 size_t towrite;
686 if (offset % ps != 0)
688 towrite = MIN (remaining, ps - (offset % ps));
689 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
690 goto write_fail;
691 offset += towrite;
692 remaining -= towrite;
695 while (remaining > ps)
697 if (pwrite (fd, tmpbuf, ps, offset) == -1)
698 goto write_fail;
699 offset += ps;
700 remaining -= ps;
703 if (remaining > 0
704 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
705 goto write_fail;
707 /* Create the header of the file. */
708 struct database_pers_head head =
710 .version = DB_VERSION,
711 .header_size = sizeof (head),
712 .module = dbs[cnt].suggested_module,
713 .data_size = (dbs[cnt].suggested_module
714 * DEFAULT_DATASIZE_PER_BUCKET),
715 .first_free = 0
717 void *mem;
719 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
720 != sizeof (head))
721 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
722 != 0)
723 || (mem = mmap (NULL, dbs[cnt].max_db_size,
724 PROT_READ | PROT_WRITE,
725 MAP_SHARED, fd, 0)) == MAP_FAILED)
727 write_fail:
728 unlink (dbs[cnt].db_filename);
729 dbg_log (_("cannot write to database file %s: %s"),
730 dbs[cnt].db_filename, strerror (errno));
731 dbs[cnt].persistent = 0;
733 else
735 /* Success. */
736 dbs[cnt].head = mem;
737 dbs[cnt].data = (char *)
738 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
739 ALIGN / sizeof (ref_t))];
740 dbs[cnt].memsize = total;
741 dbs[cnt].mmap_used = true;
743 /* Remember the descriptors. */
744 dbs[cnt].wr_fd = fd;
745 dbs[cnt].ro_fd = ro_fd;
746 fd = -1;
747 ro_fd = -1;
750 if (fd != -1)
751 close (fd);
752 if (ro_fd != -1)
753 close (ro_fd);
757 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
758 /* We do not check here whether the O_CLOEXEC provided to the
759 open call was successful or not. The two fcntl calls are
760 only performed once each per process start-up and therefore
761 is not noticeable at all. */
762 if (paranoia
763 && ((dbs[cnt].wr_fd != -1
764 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
765 || (dbs[cnt].ro_fd != -1
766 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
768 dbg_log (_("\
769 cannot set socket to close on exec: %s; disabling paranoia mode"),
770 strerror (errno));
771 paranoia = 0;
773 #endif
775 if (dbs[cnt].head == NULL)
777 /* We do not use the persistent database. Just
778 create an in-memory data structure. */
779 assert (! dbs[cnt].persistent);
781 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
782 + (dbs[cnt].suggested_module
783 * sizeof (ref_t)));
784 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
785 assert (~ENDREF == 0);
786 memset (dbs[cnt].head->array, '\xff',
787 dbs[cnt].suggested_module * sizeof (ref_t));
788 dbs[cnt].head->module = dbs[cnt].suggested_module;
789 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
790 * dbs[cnt].head->module);
791 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
792 dbs[cnt].head->first_free = 0;
794 dbs[cnt].shared = 0;
795 assert (dbs[cnt].ro_fd == -1);
798 if (dbs[cnt].check_file)
800 /* We need the modification date of the file. */
801 struct stat64 st;
803 if (stat64 (dbs[cnt].filename, &st) < 0)
805 /* We cannot stat() the file, disable file checking. */
806 dbg_log (_("cannot stat() file `%s': %s"),
807 dbs[cnt].filename, strerror (errno));
808 dbs[cnt].check_file = 0;
810 else
811 dbs[cnt].file_mtime = st.st_mtime;
815 /* Create the socket. */
816 sock = socket (AF_UNIX, SOCK_STREAM, 0);
817 if (sock < 0)
819 dbg_log (_("cannot open socket: %s"), strerror (errno));
820 exit (errno == EACCES ? 4 : 1);
822 /* Bind a name to the socket. */
823 struct sockaddr_un sock_addr;
824 sock_addr.sun_family = AF_UNIX;
825 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
826 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
828 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
829 exit (errno == EACCES ? 4 : 1);
832 /* We don't want to get stuck on accept. */
833 int fl = fcntl (sock, F_GETFL);
834 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
836 dbg_log (_("cannot change socket to nonblocking mode: %s"),
837 strerror (errno));
838 exit (1);
841 /* The descriptor needs to be closed on exec. */
842 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
844 dbg_log (_("cannot set socket to close on exec: %s"),
845 strerror (errno));
846 exit (1);
849 /* Set permissions for the socket. */
850 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
852 /* Set the socket up to accept connections. */
853 if (listen (sock, SOMAXCONN) < 0)
855 dbg_log (_("cannot enable socket to accept connections: %s"),
856 strerror (errno));
857 exit (1);
860 /* Change to unprivileged uid/gid/groups if specifed in config file */
861 if (server_user != NULL)
862 finish_drop_privileges ();
866 /* Close the connections. */
867 void
868 close_sockets (void)
870 close (sock);
874 static void
875 invalidate_cache (char *key, int fd)
877 dbtype number;
878 int32_t resp;
880 for (number = pwddb; number < lastdb; ++number)
881 if (strcmp (key, dbnames[number]) == 0)
883 if (dbs[number].reset_res)
884 res_init ();
886 break;
889 if (number == lastdb)
891 resp = EINVAL;
892 writeall (fd, &resp, sizeof (resp));
893 return;
896 if (dbs[number].enabled)
898 pthread_mutex_lock (&dbs[number].prune_lock);
899 prune_cache (&dbs[number], LONG_MAX, fd);
900 pthread_mutex_unlock (&dbs[number].prune_lock);
902 else
904 resp = 0;
905 writeall (fd, &resp, sizeof (resp));
910 #ifdef SCM_RIGHTS
911 static void
912 send_ro_fd (struct database_dyn *db, char *key, int fd)
914 /* If we do not have an read-only file descriptor do nothing. */
915 if (db->ro_fd == -1)
916 return;
918 /* We need to send some data along with the descriptor. */
919 uint64_t mapsize = (db->head->data_size
920 + roundup (db->head->module * sizeof (ref_t), ALIGN)
921 + sizeof (struct database_pers_head));
922 struct iovec iov[2];
923 iov[0].iov_base = key;
924 iov[0].iov_len = strlen (key) + 1;
925 iov[1].iov_base = &mapsize;
926 iov[1].iov_len = sizeof (mapsize);
928 /* Prepare the control message to transfer the descriptor. */
929 union
931 struct cmsghdr hdr;
932 char bytes[CMSG_SPACE (sizeof (int))];
933 } buf;
934 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
935 .msg_control = buf.bytes,
936 .msg_controllen = sizeof (buf) };
937 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
939 cmsg->cmsg_level = SOL_SOCKET;
940 cmsg->cmsg_type = SCM_RIGHTS;
941 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
943 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
945 msg.msg_controllen = cmsg->cmsg_len;
947 /* Send the control message. We repeat when we are interrupted but
948 everything else is ignored. */
949 #ifndef MSG_NOSIGNAL
950 # define MSG_NOSIGNAL 0
951 #endif
952 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
954 if (__builtin_expect (debug_level > 0, 0))
955 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
957 #endif /* SCM_RIGHTS */
960 /* Handle new request. */
961 static void
962 handle_request (int fd, request_header *req, void *key, uid_t uid)
964 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
966 if (debug_level > 0)
967 dbg_log (_("\
968 cannot handle old request version %d; current version is %d"),
969 req->version, NSCD_VERSION);
970 return;
973 /* Perform the SELinux check before we go on to the standard checks. */
974 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
976 if (debug_level > 0)
977 dbg_log (_("request not handled due to missing permission"));
978 return;
981 struct database_dyn *db = reqinfo[req->type].db;
983 /* See whether we can service the request from the cache. */
984 if (__builtin_expect (reqinfo[req->type].data_request, true))
986 if (__builtin_expect (debug_level, 0) > 0)
988 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
990 char buf[INET6_ADDRSTRLEN];
992 dbg_log ("\t%s (%s)", serv2str[req->type],
993 inet_ntop (req->type == GETHOSTBYADDR
994 ? AF_INET : AF_INET6,
995 key, buf, sizeof (buf)));
997 else
998 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1001 /* Is this service enabled? */
1002 if (__builtin_expect (!db->enabled, 0))
1004 /* No, sent the prepared record. */
1005 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1006 db->disabled_iov->iov_len,
1007 MSG_NOSIGNAL))
1008 != (ssize_t) db->disabled_iov->iov_len
1009 && __builtin_expect (debug_level, 0) > 0)
1011 /* We have problems sending the result. */
1012 char buf[256];
1013 dbg_log (_("cannot write result: %s"),
1014 strerror_r (errno, buf, sizeof (buf)));
1017 return;
1020 /* Be sure we can read the data. */
1021 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
1023 ++db->head->rdlockdelayed;
1024 pthread_rwlock_rdlock (&db->lock);
1027 /* See whether we can handle it from the cache. */
1028 struct datahead *cached;
1029 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1030 db, uid);
1031 if (cached != NULL)
1033 /* Hurray it's in the cache. */
1034 ssize_t nwritten;
1036 #ifdef HAVE_SENDFILE
1037 if (__builtin_expect (db->mmap_used, 1))
1039 assert (db->wr_fd != -1);
1040 assert ((char *) cached->data > (char *) db->data);
1041 assert ((char *) cached->data - (char *) db->head
1042 + cached->recsize
1043 <= (sizeof (struct database_pers_head)
1044 + db->head->module * sizeof (ref_t)
1045 + db->head->data_size));
1046 nwritten = sendfileall (fd, db->wr_fd,
1047 (char *) cached->data
1048 - (char *) db->head, cached->recsize);
1049 # ifndef __ASSUME_SENDFILE
1050 if (nwritten == -1 && errno == ENOSYS)
1051 goto use_write;
1052 # endif
1054 else
1055 # ifndef __ASSUME_SENDFILE
1056 use_write:
1057 # endif
1058 #endif
1059 nwritten = writeall (fd, cached->data, cached->recsize);
1061 if (nwritten != cached->recsize
1062 && __builtin_expect (debug_level, 0) > 0)
1064 /* We have problems sending the result. */
1065 char buf[256];
1066 dbg_log (_("cannot write result: %s"),
1067 strerror_r (errno, buf, sizeof (buf)));
1070 pthread_rwlock_unlock (&db->lock);
1072 return;
1075 pthread_rwlock_unlock (&db->lock);
1077 else if (__builtin_expect (debug_level, 0) > 0)
1079 if (req->type == INVALIDATE)
1080 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1081 else
1082 dbg_log ("\t%s", serv2str[req->type]);
1085 /* Handle the request. */
1086 switch (req->type)
1088 case GETPWBYNAME:
1089 addpwbyname (db, fd, req, key, uid);
1090 break;
1092 case GETPWBYUID:
1093 addpwbyuid (db, fd, req, key, uid);
1094 break;
1096 case GETGRBYNAME:
1097 addgrbyname (db, fd, req, key, uid);
1098 break;
1100 case GETGRBYGID:
1101 addgrbygid (db, fd, req, key, uid);
1102 break;
1104 case GETHOSTBYNAME:
1105 addhstbyname (db, fd, req, key, uid);
1106 break;
1108 case GETHOSTBYNAMEv6:
1109 addhstbynamev6 (db, fd, req, key, uid);
1110 break;
1112 case GETHOSTBYADDR:
1113 addhstbyaddr (db, fd, req, key, uid);
1114 break;
1116 case GETHOSTBYADDRv6:
1117 addhstbyaddrv6 (db, fd, req, key, uid);
1118 break;
1120 case GETAI:
1121 addhstai (db, fd, req, key, uid);
1122 break;
1124 case INITGROUPS:
1125 addinitgroups (db, fd, req, key, uid);
1126 break;
1128 case GETSERVBYNAME:
1129 addservbyname (db, fd, req, key, uid);
1130 break;
1132 case GETSERVBYPORT:
1133 addservbyport (db, fd, req, key, uid);
1134 break;
1136 case GETSTAT:
1137 case SHUTDOWN:
1138 case INVALIDATE:
1140 /* Get the callers credentials. */
1141 #ifdef SO_PEERCRED
1142 struct ucred caller;
1143 socklen_t optlen = sizeof (caller);
1145 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1147 char buf[256];
1149 dbg_log (_("error getting caller's id: %s"),
1150 strerror_r (errno, buf, sizeof (buf)));
1151 break;
1154 uid = caller.uid;
1155 #else
1156 /* Some systems have no SO_PEERCRED implementation. They don't
1157 care about security so we don't as well. */
1158 uid = 0;
1159 #endif
1162 /* Accept shutdown, getstat and invalidate only from root. For
1163 the stat call also allow the user specified in the config file. */
1164 if (req->type == GETSTAT)
1166 if (uid == 0 || uid == stat_uid)
1167 send_stats (fd, dbs);
1169 else if (uid == 0)
1171 if (req->type == INVALIDATE)
1172 invalidate_cache (key, fd);
1173 else
1174 termination_handler (0);
1176 break;
1178 case GETFDPW:
1179 case GETFDGR:
1180 case GETFDHST:
1181 case GETFDSERV:
1182 #ifdef SCM_RIGHTS
1183 send_ro_fd (reqinfo[req->type].db, key, fd);
1184 #endif
1185 break;
1187 default:
1188 /* Ignore the command, it's nothing we know. */
1189 break;
1194 /* Restart the process. */
1195 static void
1196 restart (void)
1198 /* First determine the parameters. We do not use the parameters
1199 passed to main() since in case nscd is started by running the
1200 dynamic linker this will not work. Yes, this is not the usual
1201 case but nscd is part of glibc and we occasionally do this. */
1202 size_t buflen = 1024;
1203 char *buf = alloca (buflen);
1204 size_t readlen = 0;
1205 int fd = open ("/proc/self/cmdline", O_RDONLY);
1206 if (fd == -1)
1208 dbg_log (_("\
1209 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1210 strerror (errno));
1212 paranoia = 0;
1213 return;
1216 while (1)
1218 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1219 buflen - readlen));
1220 if (n == -1)
1222 dbg_log (_("\
1223 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1224 strerror (errno));
1226 close (fd);
1227 paranoia = 0;
1228 return;
1231 readlen += n;
1233 if (readlen < buflen)
1234 break;
1236 /* We might have to extend the buffer. */
1237 size_t old_buflen = buflen;
1238 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1239 buf = memmove (newp, buf, old_buflen);
1242 close (fd);
1244 /* Parse the command line. Worst case scenario: every two
1245 characters form one parameter (one character plus NUL). */
1246 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1247 int argc = 0;
1249 char *cp = buf;
1250 while (cp < buf + readlen)
1252 argv[argc++] = cp;
1253 cp = (char *) rawmemchr (cp, '\0') + 1;
1255 argv[argc] = NULL;
1257 /* Second, change back to the old user if we changed it. */
1258 if (server_user != NULL)
1260 if (setresuid (old_uid, old_uid, old_uid) != 0)
1262 dbg_log (_("\
1263 cannot change to old UID: %s; disabling paranoia mode"),
1264 strerror (errno));
1266 paranoia = 0;
1267 return;
1270 if (setresgid (old_gid, old_gid, old_gid) != 0)
1272 dbg_log (_("\
1273 cannot change to old GID: %s; disabling paranoia mode"),
1274 strerror (errno));
1276 setuid (server_uid);
1277 paranoia = 0;
1278 return;
1282 /* Next change back to the old working directory. */
1283 if (chdir (oldcwd) == -1)
1285 dbg_log (_("\
1286 cannot change to old working directory: %s; disabling paranoia mode"),
1287 strerror (errno));
1289 if (server_user != NULL)
1291 setuid (server_uid);
1292 setgid (server_gid);
1294 paranoia = 0;
1295 return;
1298 /* Synchronize memory. */
1299 for (int cnt = 0; cnt < lastdb; ++cnt)
1300 if (!dbs[cnt].enabled)
1302 /* Make sure nobody keeps using the database. */
1303 dbs[cnt].head->timestamp = 0;
1305 if (dbs[cnt].persistent)
1306 // XXX async OK?
1307 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1310 /* The preparations are done. */
1311 execv ("/proc/self/exe", argv);
1313 /* If we come here, we will never be able to re-exec. */
1314 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1315 strerror (errno));
1317 if (server_user != NULL)
1319 setuid (server_uid);
1320 setgid (server_gid);
1322 if (chdir ("/") != 0)
1323 dbg_log (_("cannot change current working directory to \"/\": %s"),
1324 strerror (errno));
1325 paranoia = 0;
1329 /* List of file descriptors. */
1330 struct fdlist
1332 int fd;
1333 struct fdlist *next;
1335 /* Memory allocated for the list. */
1336 static struct fdlist *fdlist;
1337 /* List of currently ready-to-read file descriptors. */
1338 static struct fdlist *readylist;
1340 /* Conditional variable and mutex to signal availability of entries in
1341 READYLIST. The condvar is initialized dynamically since we might
1342 use a different clock depending on availability. */
1343 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1344 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1346 /* The clock to use with the condvar. */
1347 static clockid_t timeout_clock = CLOCK_REALTIME;
1349 /* Number of threads ready to handle the READYLIST. */
1350 static unsigned long int nready;
1353 /* Function for the clean-up threads. */
1354 static void *
1355 __attribute__ ((__noreturn__))
1356 nscd_run_prune (void *p)
1358 const long int my_number = (long int) p;
1359 assert (dbs[my_number].enabled);
1361 int dont_need_update = setup_thread (&dbs[my_number]);
1363 /* We are running. */
1364 dbs[my_number].head->timestamp = time (NULL);
1366 struct timespec prune_ts;
1367 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1368 /* Should never happen. */
1369 abort ();
1371 /* Compute the initial timeout time. Prevent all the timers to go
1372 off at the same time by adding a db-based value. */
1373 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1375 pthread_mutex_lock (&dbs[my_number].prune_lock);
1376 while (1)
1378 /* Wait, but not forever. */
1379 int e = pthread_cond_timedwait (&dbs[my_number].prune_cond,
1380 &dbs[my_number].prune_lock,
1381 &prune_ts);
1382 assert (e == 0 || e == ETIMEDOUT);
1384 time_t next_wait;
1385 time_t now = time (NULL);
1386 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time)
1388 next_wait = prune_cache (&dbs[my_number], now, -1);
1389 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1390 /* If clients cannot determine for sure whether nscd is running
1391 we need to wake up occasionally to update the timestamp.
1392 Wait 90% of the update period. */
1393 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1394 if (__builtin_expect (! dont_need_update, 0))
1395 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1397 /* Make it known when we will wake up again. */
1398 dbs[my_number].wakeup_time = now + next_wait;
1400 else
1401 /* The cache was just pruned. Do not do it again now. Just
1402 use the new timeout value. */
1403 next_wait = dbs[my_number].wakeup_time - now;
1405 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1406 /* Should never happen. */
1407 abort ();
1409 /* Compute next timeout time. */
1410 prune_ts.tv_sec += next_wait;
1415 /* This is the main loop. It is replicated in different threads but
1416 the the use of the ready list makes sure only one thread handles an
1417 incoming connection. */
1418 static void *
1419 __attribute__ ((__noreturn__))
1420 nscd_run_worker (void *p)
1422 char buf[256];
1424 /* Initial locking. */
1425 pthread_mutex_lock (&readylist_lock);
1427 /* One more thread available. */
1428 ++nready;
1430 while (1)
1432 while (readylist == NULL)
1433 pthread_cond_wait (&readylist_cond, &readylist_lock);
1435 struct fdlist *it = readylist->next;
1436 if (readylist->next == readylist)
1437 /* Just one entry on the list. */
1438 readylist = NULL;
1439 else
1440 readylist->next = it->next;
1442 /* Extract the information and mark the record ready to be used
1443 again. */
1444 int fd = it->fd;
1445 it->next = NULL;
1447 /* One more thread available. */
1448 --nready;
1450 /* We are done with the list. */
1451 pthread_mutex_unlock (&readylist_lock);
1453 /* We do not want to block on a short read or so. */
1454 int fl = fcntl (fd, F_GETFL);
1455 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1456 goto close_and_out;
1458 /* Now read the request. */
1459 request_header req;
1460 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1461 != sizeof (req), 0))
1463 /* We failed to read data. Note that this also might mean we
1464 failed because we would have blocked. */
1465 if (debug_level > 0)
1466 dbg_log (_("short read while reading request: %s"),
1467 strerror_r (errno, buf, sizeof (buf)));
1468 goto close_and_out;
1471 /* Check whether this is a valid request type. */
1472 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1473 goto close_and_out;
1475 /* Some systems have no SO_PEERCRED implementation. They don't
1476 care about security so we don't as well. */
1477 uid_t uid = -1;
1478 #ifdef SO_PEERCRED
1479 pid_t pid = 0;
1481 if (__builtin_expect (debug_level > 0, 0))
1483 struct ucred caller;
1484 socklen_t optlen = sizeof (caller);
1486 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1487 pid = caller.pid;
1489 #endif
1491 /* It should not be possible to crash the nscd with a silly
1492 request (i.e., a terribly large key). We limit the size to 1kb. */
1493 if (__builtin_expect (req.key_len, 1) < 0
1494 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1496 if (debug_level > 0)
1497 dbg_log (_("key length in request too long: %d"), req.key_len);
1499 else
1501 /* Get the key. */
1502 char keybuf[MAXKEYLEN];
1504 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1505 req.key_len))
1506 != req.key_len, 0))
1508 /* Again, this can also mean we would have blocked. */
1509 if (debug_level > 0)
1510 dbg_log (_("short read while reading request key: %s"),
1511 strerror_r (errno, buf, sizeof (buf)));
1512 goto close_and_out;
1515 if (__builtin_expect (debug_level, 0) > 0)
1517 #ifdef SO_PEERCRED
1518 if (pid != 0)
1519 dbg_log (_("\
1520 handle_request: request received (Version = %d) from PID %ld"),
1521 req.version, (long int) pid);
1522 else
1523 #endif
1524 dbg_log (_("\
1525 handle_request: request received (Version = %d)"), req.version);
1528 /* Phew, we got all the data, now process it. */
1529 handle_request (fd, &req, keybuf, uid);
1532 close_and_out:
1533 /* We are done. */
1534 close (fd);
1536 /* Re-locking. */
1537 pthread_mutex_lock (&readylist_lock);
1539 /* One more thread available. */
1540 ++nready;
1545 static unsigned int nconns;
1547 static void
1548 fd_ready (int fd)
1550 pthread_mutex_lock (&readylist_lock);
1552 /* Find an empty entry in FDLIST. */
1553 size_t inner;
1554 for (inner = 0; inner < nconns; ++inner)
1555 if (fdlist[inner].next == NULL)
1556 break;
1557 assert (inner < nconns);
1559 fdlist[inner].fd = fd;
1561 if (readylist == NULL)
1562 readylist = fdlist[inner].next = &fdlist[inner];
1563 else
1565 fdlist[inner].next = readylist->next;
1566 readylist = readylist->next = &fdlist[inner];
1569 bool do_signal = true;
1570 if (__builtin_expect (nready == 0, 0))
1572 ++client_queued;
1573 do_signal = false;
1575 /* Try to start another thread to help out. */
1576 pthread_t th;
1577 if (nthreads < max_nthreads
1578 && pthread_create (&th, &attr, nscd_run_worker,
1579 (void *) (long int) nthreads) == 0)
1581 /* We got another thread. */
1582 ++nthreads;
1583 /* The new thread might need a kick. */
1584 do_signal = true;
1589 pthread_mutex_unlock (&readylist_lock);
1591 /* Tell one of the worker threads there is work to do. */
1592 if (do_signal)
1593 pthread_cond_signal (&readylist_cond);
1597 /* Check whether restarting should happen. */
1598 static inline int
1599 restart_p (time_t now)
1601 return (paranoia && readylist == NULL && nready == nthreads
1602 && now >= restart_time);
1606 /* Array for times a connection was accepted. */
1607 static time_t *starttime;
1610 static void
1611 __attribute__ ((__noreturn__))
1612 main_loop_poll (void)
1614 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1615 * sizeof (conns[0]));
1617 conns[0].fd = sock;
1618 conns[0].events = POLLRDNORM;
1619 size_t nused = 1;
1620 size_t firstfree = 1;
1622 while (1)
1624 /* Wait for any event. We wait at most a couple of seconds so
1625 that we can check whether we should close any of the accepted
1626 connections since we have not received a request. */
1627 #define MAX_ACCEPT_TIMEOUT 30
1628 #define MIN_ACCEPT_TIMEOUT 5
1629 #define MAIN_THREAD_TIMEOUT \
1630 (MAX_ACCEPT_TIMEOUT * 1000 \
1631 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1633 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1635 time_t now = time (NULL);
1637 /* If there is a descriptor ready for reading or there is a new
1638 connection, process this now. */
1639 if (n > 0)
1641 if (conns[0].revents != 0)
1643 /* We have a new incoming connection. Accept the connection. */
1644 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1646 /* Use the descriptor if we have not reached the limit. */
1647 if (fd >= 0)
1649 if (firstfree < nconns)
1651 conns[firstfree].fd = fd;
1652 conns[firstfree].events = POLLRDNORM;
1653 starttime[firstfree] = now;
1654 if (firstfree >= nused)
1655 nused = firstfree + 1;
1658 ++firstfree;
1659 while (firstfree < nused && conns[firstfree].fd != -1);
1661 else
1662 /* We cannot use the connection so close it. */
1663 close (fd);
1666 --n;
1669 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1670 if (conns[cnt].revents != 0)
1672 fd_ready (conns[cnt].fd);
1674 /* Clean up the CONNS array. */
1675 conns[cnt].fd = -1;
1676 if (cnt < firstfree)
1677 firstfree = cnt;
1678 if (cnt == nused - 1)
1680 --nused;
1681 while (conns[nused - 1].fd == -1);
1683 --n;
1687 /* Now find entries which have timed out. */
1688 assert (nused > 0);
1690 /* We make the timeout length depend on the number of file
1691 descriptors currently used. */
1692 #define ACCEPT_TIMEOUT \
1693 (MAX_ACCEPT_TIMEOUT \
1694 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1695 time_t laststart = now - ACCEPT_TIMEOUT;
1697 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1699 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1701 /* Remove the entry, it timed out. */
1702 (void) close (conns[cnt].fd);
1703 conns[cnt].fd = -1;
1705 if (cnt < firstfree)
1706 firstfree = cnt;
1707 if (cnt == nused - 1)
1709 --nused;
1710 while (conns[nused - 1].fd == -1);
1714 if (restart_p (now))
1715 restart ();
1720 #ifdef HAVE_EPOLL
1721 static void
1722 main_loop_epoll (int efd)
1724 struct epoll_event ev = { 0, };
1725 int nused = 1;
1726 size_t highest = 0;
1728 /* Add the socket. */
1729 ev.events = EPOLLRDNORM;
1730 ev.data.fd = sock;
1731 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1732 /* We cannot use epoll. */
1733 return;
1735 while (1)
1737 struct epoll_event revs[100];
1738 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1740 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1742 time_t now = time (NULL);
1744 for (int cnt = 0; cnt < n; ++cnt)
1745 if (revs[cnt].data.fd == sock)
1747 /* A new connection. */
1748 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1750 if (fd >= 0)
1752 /* Try to add the new descriptor. */
1753 ev.data.fd = fd;
1754 if (fd >= nconns
1755 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1756 /* The descriptor is too large or something went
1757 wrong. Close the descriptor. */
1758 close (fd);
1759 else
1761 /* Remember when we accepted the connection. */
1762 starttime[fd] = now;
1764 if (fd > highest)
1765 highest = fd;
1767 ++nused;
1771 else
1773 /* Remove the descriptor from the epoll descriptor. */
1774 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1776 /* Get a worker to handle the request. */
1777 fd_ready (revs[cnt].data.fd);
1779 /* Reset the time. */
1780 starttime[revs[cnt].data.fd] = 0;
1781 if (revs[cnt].data.fd == highest)
1783 --highest;
1784 while (highest > 0 && starttime[highest] == 0);
1786 --nused;
1789 /* Now look for descriptors for accepted connections which have
1790 no reply in too long of a time. */
1791 time_t laststart = now - ACCEPT_TIMEOUT;
1792 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1793 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1795 /* We are waiting for this one for too long. Close it. */
1796 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
1798 (void) close (cnt);
1800 starttime[cnt] = 0;
1801 if (cnt == highest)
1802 --highest;
1804 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1805 --highest;
1807 if (restart_p (now))
1808 restart ();
1811 #endif
1814 /* Start all the threads we want. The initial process is thread no. 1. */
1815 void
1816 start_threads (void)
1818 /* Initialize the conditional variable we will use. The only
1819 non-standard attribute we might use is the clock selection. */
1820 pthread_condattr_t condattr;
1821 pthread_condattr_init (&condattr);
1823 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1824 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1825 /* Determine whether the monotonous clock is available. */
1826 struct timespec dummy;
1827 # if _POSIX_MONOTONIC_CLOCK == 0
1828 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1829 # endif
1830 # if _POSIX_CLOCK_SELECTION == 0
1831 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1832 # endif
1833 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1834 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1835 timeout_clock = CLOCK_MONOTONIC;
1836 #endif
1838 /* Create the attribute for the threads. They are all created
1839 detached. */
1840 pthread_attr_init (&attr);
1841 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1842 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1843 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
1845 /* We allow less than LASTDB threads only for debugging. */
1846 if (debug_level == 0)
1847 nthreads = MAX (nthreads, lastdb);
1849 /* Create the threads which prune the databases. */
1850 // XXX Ideally this work would be done by some of the worker threads.
1851 // XXX But this is problematic since we would need to be able to wake
1852 // XXX them up explicitly as well as part of the group handling the
1853 // XXX ready-list. This requires an operation where we can wait on
1854 // XXX two conditional variables at the same time. This operation
1855 // XXX does not exist (yet).
1856 for (long int i = 0; i < lastdb; ++i)
1858 /* Initialize the conditional variable. */
1859 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
1861 dbg_log (_("could not initialize conditional variable"));
1862 exit (1);
1865 pthread_t th;
1866 if (dbs[i].enabled
1867 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
1869 dbg_log (_("could not start clean-up thread; terminating"));
1870 exit (1);
1874 pthread_condattr_destroy (&condattr);
1876 for (long int i = 0; i < nthreads; ++i)
1878 pthread_t th;
1879 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
1881 if (i == 0)
1883 dbg_log (_("could not start any worker thread; terminating"));
1884 exit (1);
1887 break;
1891 /* Determine how much room for descriptors we should initially
1892 allocate. This might need to change later if we cap the number
1893 with MAXCONN. */
1894 const long int nfds = sysconf (_SC_OPEN_MAX);
1895 #define MINCONN 32
1896 #define MAXCONN 16384
1897 if (nfds == -1 || nfds > MAXCONN)
1898 nconns = MAXCONN;
1899 else if (nfds < MINCONN)
1900 nconns = MINCONN;
1901 else
1902 nconns = nfds;
1904 /* We need memory to pass descriptors on to the worker threads. */
1905 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1906 /* Array to keep track when connection was accepted. */
1907 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1909 /* In the main thread we execute the loop which handles incoming
1910 connections. */
1911 #ifdef HAVE_EPOLL
1912 int efd = epoll_create (100);
1913 if (efd != -1)
1915 main_loop_epoll (efd);
1916 close (efd);
1918 #endif
1920 main_loop_poll ();
1924 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1925 this function is called, we are not listening on the nscd socket yet so
1926 we can just use the ordinary lookup functions without causing a lockup */
1927 static void
1928 begin_drop_privileges (void)
1930 struct passwd *pwd = getpwnam (server_user);
1932 if (pwd == NULL)
1934 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1935 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1936 server_user);
1939 server_uid = pwd->pw_uid;
1940 server_gid = pwd->pw_gid;
1942 /* Save the old UID/GID if we have to change back. */
1943 if (paranoia)
1945 old_uid = getuid ();
1946 old_gid = getgid ();
1949 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1951 /* This really must never happen. */
1952 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1953 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
1956 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
1958 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
1959 == -1)
1961 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1962 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
1967 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
1968 run nscd as the user specified in the configuration file. */
1969 static void
1970 finish_drop_privileges (void)
1972 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1973 /* We need to preserve the capabilities to connect to the audit daemon. */
1974 cap_t new_caps = preserve_capabilities ();
1975 #endif
1977 if (setgroups (server_ngroups, server_groups) == -1)
1979 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1980 error (EXIT_FAILURE, errno, _("setgroups failed"));
1983 int res;
1984 if (paranoia)
1985 res = setresgid (server_gid, server_gid, old_gid);
1986 else
1987 res = setgid (server_gid);
1988 if (res == -1)
1990 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1991 perror ("setgid");
1992 exit (4);
1995 if (paranoia)
1996 res = setresuid (server_uid, server_uid, old_uid);
1997 else
1998 res = setuid (server_uid);
1999 if (res == -1)
2001 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2002 perror ("setuid");
2003 exit (4);
2006 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2007 /* Remove the temporary capabilities. */
2008 install_real_capabilities (new_caps);
2009 #endif