[BZ #5112]
[glibc/pb-stable.git] / nscd / connections.c
blob8140e96821a8a81ba57b27f8fb9bf72bd9c391fa
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2003, 2004, 2005, 2006, 2007, 2008
3 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published
9 by the Free Software Foundation; version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
21 #include <alloca.h>
22 #include <assert.h>
23 #include <atomic.h>
24 #include <error.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <grp.h>
28 #include <libintl.h>
29 #include <pthread.h>
30 #include <pwd.h>
31 #include <resolv.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <unistd.h>
35 #include <arpa/inet.h>
36 #ifdef HAVE_EPOLL
37 # include <sys/epoll.h>
38 #endif
39 #include <sys/mman.h>
40 #include <sys/param.h>
41 #include <sys/poll.h>
42 #ifdef HAVE_SENDFILE
43 # include <sys/sendfile.h>
44 #endif
45 #include <sys/socket.h>
46 #include <sys/stat.h>
47 #include <sys/un.h>
49 #include "nscd.h"
50 #include "dbg_log.h"
51 #include "selinux.h"
52 #ifdef HAVE_SENDFILE
53 # include <kernel-features.h>
54 #endif
57 /* Wrapper functions with error checking for standard functions. */
58 extern void *xmalloc (size_t n);
59 extern void *xcalloc (size_t n, size_t s);
60 extern void *xrealloc (void *o, size_t n);
62 /* Support to run nscd as an unprivileged user */
63 const char *server_user;
64 static uid_t server_uid;
65 static gid_t server_gid;
66 const char *stat_user;
67 uid_t stat_uid;
68 static gid_t *server_groups;
69 #ifndef NGROUPS
70 # define NGROUPS 32
71 #endif
72 static int server_ngroups;
74 static pthread_attr_t attr;
76 static void begin_drop_privileges (void);
77 static void finish_drop_privileges (void);
79 /* Map request type to a string. */
80 const char *const serv2str[LASTREQ] =
82 [GETPWBYNAME] = "GETPWBYNAME",
83 [GETPWBYUID] = "GETPWBYUID",
84 [GETGRBYNAME] = "GETGRBYNAME",
85 [GETGRBYGID] = "GETGRBYGID",
86 [GETHOSTBYNAME] = "GETHOSTBYNAME",
87 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
88 [GETHOSTBYADDR] = "GETHOSTBYADDR",
89 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
90 [SHUTDOWN] = "SHUTDOWN",
91 [GETSTAT] = "GETSTAT",
92 [INVALIDATE] = "INVALIDATE",
93 [GETFDPW] = "GETFDPW",
94 [GETFDGR] = "GETFDGR",
95 [GETFDHST] = "GETFDHST",
96 [GETAI] = "GETAI",
97 [INITGROUPS] = "INITGROUPS",
98 [GETSERVBYNAME] = "GETSERVBYNAME",
99 [GETSERVBYPORT] = "GETSERVBYPORT",
100 [GETFDSERV] = "GETFDSERV"
103 /* The control data structures for the services. */
104 struct database_dyn dbs[lastdb] =
106 [pwddb] = {
107 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
108 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
109 .enabled = 0,
110 .check_file = 1,
111 .persistent = 0,
112 .propagate = 1,
113 .shared = 0,
114 .max_db_size = DEFAULT_MAX_DB_SIZE,
115 .reset_res = 0,
116 .filename = "/etc/passwd",
117 .db_filename = _PATH_NSCD_PASSWD_DB,
118 .disabled_iov = &pwd_iov_disabled,
119 .postimeout = 3600,
120 .negtimeout = 20,
121 .wr_fd = -1,
122 .ro_fd = -1,
123 .mmap_used = false
125 [grpdb] = {
126 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
127 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
128 .enabled = 0,
129 .check_file = 1,
130 .persistent = 0,
131 .propagate = 1,
132 .shared = 0,
133 .max_db_size = DEFAULT_MAX_DB_SIZE,
134 .reset_res = 0,
135 .filename = "/etc/group",
136 .db_filename = _PATH_NSCD_GROUP_DB,
137 .disabled_iov = &grp_iov_disabled,
138 .postimeout = 3600,
139 .negtimeout = 60,
140 .wr_fd = -1,
141 .ro_fd = -1,
142 .mmap_used = false
144 [hstdb] = {
145 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
146 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
147 .enabled = 0,
148 .check_file = 1,
149 .persistent = 0,
150 .propagate = 0, /* Not used. */
151 .shared = 0,
152 .max_db_size = DEFAULT_MAX_DB_SIZE,
153 .reset_res = 1,
154 .filename = "/etc/hosts",
155 .db_filename = _PATH_NSCD_HOSTS_DB,
156 .disabled_iov = &hst_iov_disabled,
157 .postimeout = 3600,
158 .negtimeout = 20,
159 .wr_fd = -1,
160 .ro_fd = -1,
161 .mmap_used = false
163 [servdb] = {
164 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
165 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
166 .enabled = 0,
167 .check_file = 1,
168 .persistent = 0,
169 .propagate = 0, /* Not used. */
170 .shared = 0,
171 .max_db_size = DEFAULT_MAX_DB_SIZE,
172 .reset_res = 0,
173 .filename = "/etc/services",
174 .db_filename = _PATH_NSCD_SERVICES_DB,
175 .disabled_iov = &serv_iov_disabled,
176 .postimeout = 28800,
177 .negtimeout = 20,
178 .wr_fd = -1,
179 .ro_fd = -1,
180 .mmap_used = false
185 /* Mapping of request type to database. */
186 static struct
188 bool data_request;
189 struct database_dyn *db;
190 } const reqinfo[LASTREQ] =
192 [GETPWBYNAME] = { true, &dbs[pwddb] },
193 [GETPWBYUID] = { true, &dbs[pwddb] },
194 [GETGRBYNAME] = { true, &dbs[grpdb] },
195 [GETGRBYGID] = { true, &dbs[grpdb] },
196 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
197 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
198 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
199 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
200 [SHUTDOWN] = { false, NULL },
201 [GETSTAT] = { false, NULL },
202 [SHUTDOWN] = { false, NULL },
203 [GETFDPW] = { false, &dbs[pwddb] },
204 [GETFDGR] = { false, &dbs[grpdb] },
205 [GETFDHST] = { false, &dbs[hstdb] },
206 [GETAI] = { true, &dbs[hstdb] },
207 [INITGROUPS] = { true, &dbs[grpdb] },
208 [GETSERVBYNAME] = { true, &dbs[servdb] },
209 [GETSERVBYPORT] = { true, &dbs[servdb] },
210 [GETFDSERV] = { false, &dbs[servdb] }
214 /* Initial number of threads to use. */
215 int nthreads = -1;
216 /* Maximum number of threads to use. */
217 int max_nthreads = 32;
219 /* Socket for incoming connections. */
220 static int sock;
222 /* Number of times clients had to wait. */
223 unsigned long int client_queued;
226 ssize_t
227 writeall (int fd, const void *buf, size_t len)
229 size_t n = len;
230 ssize_t ret;
233 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
234 if (ret <= 0)
235 break;
236 buf = (const char *) buf + ret;
237 n -= ret;
239 while (n > 0);
240 return ret < 0 ? ret : len - n;
244 #ifdef HAVE_SENDFILE
245 ssize_t
246 sendfileall (int tofd, int fromfd, off_t off, size_t len)
248 ssize_t n = len;
249 ssize_t ret;
253 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
254 if (ret <= 0)
255 break;
256 n -= ret;
258 while (n > 0);
259 return ret < 0 ? ret : len - n;
261 #endif
264 enum usekey
266 use_not = 0,
267 /* The following three are not really used, they are symbolic constants. */
268 use_first = 16,
269 use_begin = 32,
270 use_end = 64,
272 use_he = 1,
273 use_he_begin = use_he | use_begin,
274 use_he_end = use_he | use_end,
275 #if SEPARATE_KEY
276 use_key = 2,
277 use_key_begin = use_key | use_begin,
278 use_key_end = use_key | use_end,
279 use_key_first = use_key_begin | use_first,
280 #endif
281 use_data = 3,
282 use_data_begin = use_data | use_begin,
283 use_data_end = use_data | use_end,
284 use_data_first = use_data_begin | use_first
288 static int
289 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
290 enum usekey use, ref_t start, size_t len)
292 assert (len >= 2);
294 if (start > first_free || start + len > first_free
295 || (start & BLOCK_ALIGN_M1))
296 return 0;
298 if (usemap[start] == use_not)
300 /* Add the start marker. */
301 usemap[start] = use | use_begin;
302 use &= ~use_first;
304 while (--len > 0)
305 if (usemap[++start] != use_not)
306 return 0;
307 else
308 usemap[start] = use;
310 /* Add the end marker. */
311 usemap[start] = use | use_end;
313 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
315 /* Hash entries can't be shared. */
316 if (use == use_he)
317 return 0;
319 usemap[start] |= (use & use_first);
320 use &= ~use_first;
322 while (--len > 1)
323 if (usemap[++start] != use)
324 return 0;
326 if (usemap[++start] != (use | use_end))
327 return 0;
329 else
330 /* Points to a wrong object or somewhere in the middle. */
331 return 0;
333 return 1;
337 /* Verify data in persistent database. */
338 static int
339 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
341 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb);
343 time_t now = time (NULL);
345 struct database_pers_head *head = mem;
346 struct database_pers_head head_copy = *head;
348 /* Check that the header that was read matches the head in the database. */
349 if (readhead != NULL && memcmp (head, readhead, sizeof (*head)) != 0)
350 return 0;
352 /* First some easy tests: make sure the database header is sane. */
353 if (head->version != DB_VERSION
354 || head->header_size != sizeof (*head)
355 /* We allow a timestamp to be one hour ahead of the current time.
356 This should cover daylight saving time changes. */
357 || head->timestamp > now + 60 * 60 + 60
358 || (head->gc_cycle & 1)
359 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
360 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
361 || head->first_free < 0
362 || head->first_free > head->data_size
363 || (head->first_free & BLOCK_ALIGN_M1) != 0
364 || head->maxnentries < 0
365 || head->maxnsearched < 0)
366 return 0;
368 uint8_t *usemap = calloc (head->first_free, 1);
369 if (usemap == NULL)
370 return 0;
372 const char *data = (char *) &head->array[roundup (head->module,
373 ALIGN / sizeof (ref_t))];
375 nscd_ssize_t he_cnt = 0;
376 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
378 ref_t trail = head->array[cnt];
379 ref_t work = trail;
380 int tick = 0;
382 while (work != ENDREF)
384 if (! check_use (data, head->first_free, usemap, use_he, work,
385 sizeof (struct hashentry)))
386 goto fail;
388 /* Now we know we can dereference the record. */
389 struct hashentry *here = (struct hashentry *) (data + work);
391 ++he_cnt;
393 /* Make sure the record is for this type of service. */
394 if (here->type >= LASTREQ
395 || reqinfo[here->type].db != &dbs[dbnr])
396 goto fail;
398 /* Validate boolean field value. */
399 if (here->first != false && here->first != true)
400 goto fail;
402 if (here->len < 0)
403 goto fail;
405 /* Now the data. */
406 if (here->packet < 0
407 || here->packet > head->first_free
408 || here->packet + sizeof (struct datahead) > head->first_free)
409 goto fail;
411 struct datahead *dh = (struct datahead *) (data + here->packet);
413 if (! check_use (data, head->first_free, usemap,
414 use_data | (here->first ? use_first : 0),
415 here->packet, dh->allocsize))
416 goto fail;
418 if (dh->allocsize < sizeof (struct datahead)
419 || dh->recsize > dh->allocsize
420 || (dh->notfound != false && dh->notfound != true)
421 || (dh->usable != false && dh->usable != true))
422 goto fail;
424 if (here->key < here->packet + sizeof (struct datahead)
425 || here->key > here->packet + dh->allocsize
426 || here->key + here->len > here->packet + dh->allocsize)
428 #if SEPARATE_KEY
429 /* If keys can appear outside of data, this should be done
430 instead. But gc doesn't mark the data in that case. */
431 if (! check_use (data, head->first_free, usemap,
432 use_key | (here->first ? use_first : 0),
433 here->key, here->len))
434 #endif
435 goto fail;
438 work = here->next;
440 if (work == trail)
441 /* A circular list, this must not happen. */
442 goto fail;
443 if (tick)
444 trail = ((struct hashentry *) (data + trail))->next;
445 tick = 1 - tick;
449 if (he_cnt != head->nentries)
450 goto fail;
452 /* See if all data and keys had at least one reference from
453 he->first == true hashentry. */
454 for (ref_t idx = 0; idx < head->first_free; ++idx)
456 #if SEPARATE_KEY
457 if (usemap[idx] == use_key_begin)
458 goto fail;
459 #endif
460 if (usemap[idx] == use_data_begin)
461 goto fail;
464 /* Finally, make sure the database hasn't changed since the first test. */
465 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
466 goto fail;
468 free (usemap);
469 return 1;
471 fail:
472 free (usemap);
473 return 0;
477 #ifdef O_CLOEXEC
478 # define EXTRA_O_FLAGS O_CLOEXEC
479 #else
480 # define EXTRA_O_FLAGS 0
481 #endif
484 /* Initialize database information structures. */
485 void
486 nscd_init (void)
488 /* Look up unprivileged uid/gid/groups before we start listening on the
489 socket */
490 if (server_user != NULL)
491 begin_drop_privileges ();
493 if (nthreads == -1)
494 /* No configuration for this value, assume a default. */
495 nthreads = 4;
497 for (size_t cnt = 0; cnt < lastdb; ++cnt)
498 if (dbs[cnt].enabled)
500 pthread_rwlock_init (&dbs[cnt].lock, NULL);
501 pthread_mutex_init (&dbs[cnt].memlock, NULL);
503 if (dbs[cnt].persistent)
505 /* Try to open the appropriate file on disk. */
506 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
507 if (fd != -1)
509 struct stat64 st;
510 void *mem;
511 size_t total;
512 struct database_pers_head head;
513 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
514 sizeof (head)));
515 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
517 fail_db:
518 dbg_log (_("invalid persistent database file \"%s\": %s"),
519 dbs[cnt].db_filename, strerror (errno));
520 unlink (dbs[cnt].db_filename);
522 else if (head.module == 0 && head.data_size == 0)
524 /* The file has been created, but the head has not been
525 initialized yet. Remove the old file. */
526 unlink (dbs[cnt].db_filename);
528 else if (head.header_size != (int) sizeof (head))
530 dbg_log (_("invalid persistent database file \"%s\": %s"),
531 dbs[cnt].db_filename,
532 _("header size does not match"));
533 unlink (dbs[cnt].db_filename);
535 else if ((total = (sizeof (head)
536 + roundup (head.module * sizeof (ref_t),
537 ALIGN)
538 + head.data_size))
539 > st.st_size
540 || total < sizeof (head))
542 dbg_log (_("invalid persistent database file \"%s\": %s"),
543 dbs[cnt].db_filename,
544 _("file size does not match"));
545 unlink (dbs[cnt].db_filename);
547 /* Note we map with the maximum size allowed for the
548 database. This is likely much larger than the
549 actual file size. This is OK on most OSes since
550 extensions of the underlying file will
551 automatically translate more pages available for
552 memory access. */
553 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
554 PROT_READ | PROT_WRITE,
555 MAP_SHARED, fd, 0))
556 == MAP_FAILED)
557 goto fail_db;
558 else if (!verify_persistent_db (mem, &head, cnt))
560 munmap (mem, total);
561 dbg_log (_("invalid persistent database file \"%s\": %s"),
562 dbs[cnt].db_filename,
563 _("verification failed"));
564 unlink (dbs[cnt].db_filename);
566 else
568 /* Success. We have the database. */
569 dbs[cnt].head = mem;
570 dbs[cnt].memsize = total;
571 dbs[cnt].data = (char *)
572 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
573 ALIGN / sizeof (ref_t))];
574 dbs[cnt].mmap_used = true;
576 if (dbs[cnt].suggested_module > head.module)
577 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
578 dbnames[cnt]);
580 dbs[cnt].wr_fd = fd;
581 fd = -1;
582 /* We also need a read-only descriptor. */
583 if (dbs[cnt].shared)
585 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
586 O_RDONLY | EXTRA_O_FLAGS);
587 if (dbs[cnt].ro_fd == -1)
588 dbg_log (_("\
589 cannot create read-only descriptor for \"%s\"; no mmap"),
590 dbs[cnt].db_filename);
593 // XXX Shall we test whether the descriptors actually
594 // XXX point to the same file?
597 /* Close the file descriptors in case something went
598 wrong in which case the variable have not been
599 assigned -1. */
600 if (fd != -1)
601 close (fd);
605 if (dbs[cnt].head == NULL)
607 /* No database loaded. Allocate the data structure,
608 possibly on disk. */
609 struct database_pers_head head;
610 size_t total = (sizeof (head)
611 + roundup (dbs[cnt].suggested_module
612 * sizeof (ref_t), ALIGN)
613 + (dbs[cnt].suggested_module
614 * DEFAULT_DATASIZE_PER_BUCKET));
616 /* Try to create the database. If we do not need a
617 persistent database create a temporary file. */
618 int fd;
619 int ro_fd = -1;
620 if (dbs[cnt].persistent)
622 fd = open (dbs[cnt].db_filename,
623 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
624 S_IRUSR | S_IWUSR);
625 if (fd != -1 && dbs[cnt].shared)
626 ro_fd = open (dbs[cnt].db_filename,
627 O_RDONLY | EXTRA_O_FLAGS);
629 else
631 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
632 fd = mkostemp (fname, EXTRA_O_FLAGS);
634 /* We do not need the file name anymore after we
635 opened another file descriptor in read-only mode. */
636 if (fd != -1)
638 if (dbs[cnt].shared)
639 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
641 unlink (fname);
645 if (fd == -1)
647 if (errno == EEXIST)
649 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
650 dbnames[cnt], dbs[cnt].db_filename);
651 // XXX Correct way to terminate?
652 exit (1);
655 if (dbs[cnt].persistent)
656 dbg_log (_("cannot create %s; no persistent database used"),
657 dbs[cnt].db_filename);
658 else
659 dbg_log (_("cannot create %s; no sharing possible"),
660 dbs[cnt].db_filename);
662 dbs[cnt].persistent = 0;
663 // XXX remember: no mmap
665 else
667 /* Tell the user if we could not create the read-only
668 descriptor. */
669 if (ro_fd == -1 && dbs[cnt].shared)
670 dbg_log (_("\
671 cannot create read-only descriptor for \"%s\"; no mmap"),
672 dbs[cnt].db_filename);
674 /* Before we create the header, initialiye the hash
675 table. So that if we get interrupted if writing
676 the header we can recognize a partially initialized
677 database. */
678 size_t ps = sysconf (_SC_PAGESIZE);
679 char tmpbuf[ps];
680 assert (~ENDREF == 0);
681 memset (tmpbuf, '\xff', ps);
683 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
684 off_t offset = sizeof (head);
686 size_t towrite;
687 if (offset % ps != 0)
689 towrite = MIN (remaining, ps - (offset % ps));
690 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
691 goto write_fail;
692 offset += towrite;
693 remaining -= towrite;
696 while (remaining > ps)
698 if (pwrite (fd, tmpbuf, ps, offset) == -1)
699 goto write_fail;
700 offset += ps;
701 remaining -= ps;
704 if (remaining > 0
705 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
706 goto write_fail;
708 /* Create the header of the file. */
709 struct database_pers_head head =
711 .version = DB_VERSION,
712 .header_size = sizeof (head),
713 .module = dbs[cnt].suggested_module,
714 .data_size = (dbs[cnt].suggested_module
715 * DEFAULT_DATASIZE_PER_BUCKET),
716 .first_free = 0
718 void *mem;
720 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
721 != sizeof (head))
722 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
723 != 0)
724 || (mem = mmap (NULL, dbs[cnt].max_db_size,
725 PROT_READ | PROT_WRITE,
726 MAP_SHARED, fd, 0)) == MAP_FAILED)
728 write_fail:
729 unlink (dbs[cnt].db_filename);
730 dbg_log (_("cannot write to database file %s: %s"),
731 dbs[cnt].db_filename, strerror (errno));
732 dbs[cnt].persistent = 0;
734 else
736 /* Success. */
737 dbs[cnt].head = mem;
738 dbs[cnt].data = (char *)
739 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
740 ALIGN / sizeof (ref_t))];
741 dbs[cnt].memsize = total;
742 dbs[cnt].mmap_used = true;
744 /* Remember the descriptors. */
745 dbs[cnt].wr_fd = fd;
746 dbs[cnt].ro_fd = ro_fd;
747 fd = -1;
748 ro_fd = -1;
751 if (fd != -1)
752 close (fd);
753 if (ro_fd != -1)
754 close (ro_fd);
758 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
759 /* We do not check here whether the O_CLOEXEC provided to the
760 open call was successful or not. The two fcntl calls are
761 only performed once each per process start-up and therefore
762 is not noticeable at all. */
763 if (paranoia
764 && ((dbs[cnt].wr_fd != -1
765 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
766 || (dbs[cnt].ro_fd != -1
767 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
769 dbg_log (_("\
770 cannot set socket to close on exec: %s; disabling paranoia mode"),
771 strerror (errno));
772 paranoia = 0;
774 #endif
776 if (dbs[cnt].head == NULL)
778 /* We do not use the persistent database. Just
779 create an in-memory data structure. */
780 assert (! dbs[cnt].persistent);
782 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
783 + (dbs[cnt].suggested_module
784 * sizeof (ref_t)));
785 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
786 assert (~ENDREF == 0);
787 memset (dbs[cnt].head->array, '\xff',
788 dbs[cnt].suggested_module * sizeof (ref_t));
789 dbs[cnt].head->module = dbs[cnt].suggested_module;
790 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
791 * dbs[cnt].head->module);
792 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
793 dbs[cnt].head->first_free = 0;
795 dbs[cnt].shared = 0;
796 assert (dbs[cnt].ro_fd == -1);
799 if (dbs[cnt].check_file)
801 /* We need the modification date of the file. */
802 struct stat64 st;
804 if (stat64 (dbs[cnt].filename, &st) < 0)
806 /* We cannot stat() the file, disable file checking. */
807 dbg_log (_("cannot stat() file `%s': %s"),
808 dbs[cnt].filename, strerror (errno));
809 dbs[cnt].check_file = 0;
811 else
812 dbs[cnt].file_mtime = st.st_mtime;
816 /* Create the socket. */
817 sock = socket (AF_UNIX, SOCK_STREAM, 0);
818 if (sock < 0)
820 dbg_log (_("cannot open socket: %s"), strerror (errno));
821 exit (errno == EACCES ? 4 : 1);
823 /* Bind a name to the socket. */
824 struct sockaddr_un sock_addr;
825 sock_addr.sun_family = AF_UNIX;
826 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
827 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
829 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
830 exit (errno == EACCES ? 4 : 1);
833 /* We don't want to get stuck on accept. */
834 int fl = fcntl (sock, F_GETFL);
835 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
837 dbg_log (_("cannot change socket to nonblocking mode: %s"),
838 strerror (errno));
839 exit (1);
842 /* The descriptor needs to be closed on exec. */
843 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
845 dbg_log (_("cannot set socket to close on exec: %s"),
846 strerror (errno));
847 exit (1);
850 /* Set permissions for the socket. */
851 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
853 /* Set the socket up to accept connections. */
854 if (listen (sock, SOMAXCONN) < 0)
856 dbg_log (_("cannot enable socket to accept connections: %s"),
857 strerror (errno));
858 exit (1);
861 /* Change to unprivileged uid/gid/groups if specifed in config file */
862 if (server_user != NULL)
863 finish_drop_privileges ();
867 /* Close the connections. */
868 void
869 close_sockets (void)
871 close (sock);
875 static void
876 invalidate_cache (char *key, int fd)
878 dbtype number;
879 int32_t resp;
881 for (number = pwddb; number < lastdb; ++number)
882 if (strcmp (key, dbnames[number]) == 0)
884 if (dbs[number].reset_res)
885 res_init ();
887 break;
890 if (number == lastdb)
892 resp = EINVAL;
893 writeall (fd, &resp, sizeof (resp));
894 return;
897 if (dbs[number].enabled)
899 pthread_mutex_lock (&dbs[number].prune_lock);
900 prune_cache (&dbs[number], LONG_MAX, fd);
901 pthread_mutex_unlock (&dbs[number].prune_lock);
903 else
905 resp = 0;
906 writeall (fd, &resp, sizeof (resp));
911 #ifdef SCM_RIGHTS
912 static void
913 send_ro_fd (struct database_dyn *db, char *key, int fd)
915 /* If we do not have an read-only file descriptor do nothing. */
916 if (db->ro_fd == -1)
917 return;
919 /* We need to send some data along with the descriptor. */
920 uint64_t mapsize = (db->head->data_size
921 + roundup (db->head->module * sizeof (ref_t), ALIGN)
922 + sizeof (struct database_pers_head));
923 struct iovec iov[2];
924 iov[0].iov_base = key;
925 iov[0].iov_len = strlen (key) + 1;
926 iov[1].iov_base = &mapsize;
927 iov[1].iov_len = sizeof (mapsize);
929 /* Prepare the control message to transfer the descriptor. */
930 union
932 struct cmsghdr hdr;
933 char bytes[CMSG_SPACE (sizeof (int))];
934 } buf;
935 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
936 .msg_control = buf.bytes,
937 .msg_controllen = sizeof (buf) };
938 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
940 cmsg->cmsg_level = SOL_SOCKET;
941 cmsg->cmsg_type = SCM_RIGHTS;
942 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
944 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
946 msg.msg_controllen = cmsg->cmsg_len;
948 /* Send the control message. We repeat when we are interrupted but
949 everything else is ignored. */
950 #ifndef MSG_NOSIGNAL
951 # define MSG_NOSIGNAL 0
952 #endif
953 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
955 if (__builtin_expect (debug_level > 0, 0))
956 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
958 #endif /* SCM_RIGHTS */
961 /* Handle new request. */
962 static void
963 handle_request (int fd, request_header *req, void *key, uid_t uid)
965 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
967 if (debug_level > 0)
968 dbg_log (_("\
969 cannot handle old request version %d; current version is %d"),
970 req->version, NSCD_VERSION);
971 return;
974 /* Perform the SELinux check before we go on to the standard checks. */
975 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
977 if (debug_level > 0)
978 dbg_log (_("request not handled due to missing permission"));
979 return;
982 struct database_dyn *db = reqinfo[req->type].db;
984 /* See whether we can service the request from the cache. */
985 if (__builtin_expect (reqinfo[req->type].data_request, true))
987 if (__builtin_expect (debug_level, 0) > 0)
989 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
991 char buf[INET6_ADDRSTRLEN];
993 dbg_log ("\t%s (%s)", serv2str[req->type],
994 inet_ntop (req->type == GETHOSTBYADDR
995 ? AF_INET : AF_INET6,
996 key, buf, sizeof (buf)));
998 else
999 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1002 /* Is this service enabled? */
1003 if (__builtin_expect (!db->enabled, 0))
1005 /* No, sent the prepared record. */
1006 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1007 db->disabled_iov->iov_len,
1008 MSG_NOSIGNAL))
1009 != (ssize_t) db->disabled_iov->iov_len
1010 && __builtin_expect (debug_level, 0) > 0)
1012 /* We have problems sending the result. */
1013 char buf[256];
1014 dbg_log (_("cannot write result: %s"),
1015 strerror_r (errno, buf, sizeof (buf)));
1018 return;
1021 /* Be sure we can read the data. */
1022 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
1024 ++db->head->rdlockdelayed;
1025 pthread_rwlock_rdlock (&db->lock);
1028 /* See whether we can handle it from the cache. */
1029 struct datahead *cached;
1030 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1031 db, uid);
1032 if (cached != NULL)
1034 /* Hurray it's in the cache. */
1035 ssize_t nwritten;
1037 #ifdef HAVE_SENDFILE
1038 if (__builtin_expect (db->mmap_used, 1))
1040 assert (db->wr_fd != -1);
1041 assert ((char *) cached->data > (char *) db->data);
1042 assert ((char *) cached->data - (char *) db->head
1043 + cached->recsize
1044 <= (sizeof (struct database_pers_head)
1045 + db->head->module * sizeof (ref_t)
1046 + db->head->data_size));
1047 nwritten = sendfileall (fd, db->wr_fd,
1048 (char *) cached->data
1049 - (char *) db->head, cached->recsize);
1050 # ifndef __ASSUME_SENDFILE
1051 if (nwritten == -1 && errno == ENOSYS)
1052 goto use_write;
1053 # endif
1055 else
1056 # ifndef __ASSUME_SENDFILE
1057 use_write:
1058 # endif
1059 #endif
1060 nwritten = writeall (fd, cached->data, cached->recsize);
1062 if (nwritten != cached->recsize
1063 && __builtin_expect (debug_level, 0) > 0)
1065 /* We have problems sending the result. */
1066 char buf[256];
1067 dbg_log (_("cannot write result: %s"),
1068 strerror_r (errno, buf, sizeof (buf)));
1071 pthread_rwlock_unlock (&db->lock);
1073 return;
1076 pthread_rwlock_unlock (&db->lock);
1078 else if (__builtin_expect (debug_level, 0) > 0)
1080 if (req->type == INVALIDATE)
1081 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1082 else
1083 dbg_log ("\t%s", serv2str[req->type]);
1086 /* Handle the request. */
1087 switch (req->type)
1089 case GETPWBYNAME:
1090 addpwbyname (db, fd, req, key, uid);
1091 break;
1093 case GETPWBYUID:
1094 addpwbyuid (db, fd, req, key, uid);
1095 break;
1097 case GETGRBYNAME:
1098 addgrbyname (db, fd, req, key, uid);
1099 break;
1101 case GETGRBYGID:
1102 addgrbygid (db, fd, req, key, uid);
1103 break;
1105 case GETHOSTBYNAME:
1106 addhstbyname (db, fd, req, key, uid);
1107 break;
1109 case GETHOSTBYNAMEv6:
1110 addhstbynamev6 (db, fd, req, key, uid);
1111 break;
1113 case GETHOSTBYADDR:
1114 addhstbyaddr (db, fd, req, key, uid);
1115 break;
1117 case GETHOSTBYADDRv6:
1118 addhstbyaddrv6 (db, fd, req, key, uid);
1119 break;
1121 case GETAI:
1122 addhstai (db, fd, req, key, uid);
1123 break;
1125 case INITGROUPS:
1126 addinitgroups (db, fd, req, key, uid);
1127 break;
1129 case GETSERVBYNAME:
1130 addservbyname (db, fd, req, key, uid);
1131 break;
1133 case GETSERVBYPORT:
1134 addservbyport (db, fd, req, key, uid);
1135 break;
1137 case GETSTAT:
1138 case SHUTDOWN:
1139 case INVALIDATE:
1141 /* Get the callers credentials. */
1142 #ifdef SO_PEERCRED
1143 struct ucred caller;
1144 socklen_t optlen = sizeof (caller);
1146 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1148 char buf[256];
1150 dbg_log (_("error getting caller's id: %s"),
1151 strerror_r (errno, buf, sizeof (buf)));
1152 break;
1155 uid = caller.uid;
1156 #else
1157 /* Some systems have no SO_PEERCRED implementation. They don't
1158 care about security so we don't as well. */
1159 uid = 0;
1160 #endif
1163 /* Accept shutdown, getstat and invalidate only from root. For
1164 the stat call also allow the user specified in the config file. */
1165 if (req->type == GETSTAT)
1167 if (uid == 0 || uid == stat_uid)
1168 send_stats (fd, dbs);
1170 else if (uid == 0)
1172 if (req->type == INVALIDATE)
1173 invalidate_cache (key, fd);
1174 else
1175 termination_handler (0);
1177 break;
1179 case GETFDPW:
1180 case GETFDGR:
1181 case GETFDHST:
1182 case GETFDSERV:
1183 #ifdef SCM_RIGHTS
1184 send_ro_fd (reqinfo[req->type].db, key, fd);
1185 #endif
1186 break;
1188 default:
1189 /* Ignore the command, it's nothing we know. */
1190 break;
1195 /* Restart the process. */
1196 static void
1197 restart (void)
1199 /* First determine the parameters. We do not use the parameters
1200 passed to main() since in case nscd is started by running the
1201 dynamic linker this will not work. Yes, this is not the usual
1202 case but nscd is part of glibc and we occasionally do this. */
1203 size_t buflen = 1024;
1204 char *buf = alloca (buflen);
1205 size_t readlen = 0;
1206 int fd = open ("/proc/self/cmdline", O_RDONLY);
1207 if (fd == -1)
1209 dbg_log (_("\
1210 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1211 strerror (errno));
1213 paranoia = 0;
1214 return;
1217 while (1)
1219 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1220 buflen - readlen));
1221 if (n == -1)
1223 dbg_log (_("\
1224 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1225 strerror (errno));
1227 close (fd);
1228 paranoia = 0;
1229 return;
1232 readlen += n;
1234 if (readlen < buflen)
1235 break;
1237 /* We might have to extend the buffer. */
1238 size_t old_buflen = buflen;
1239 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1240 buf = memmove (newp, buf, old_buflen);
1243 close (fd);
1245 /* Parse the command line. Worst case scenario: every two
1246 characters form one parameter (one character plus NUL). */
1247 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1248 int argc = 0;
1250 char *cp = buf;
1251 while (cp < buf + readlen)
1253 argv[argc++] = cp;
1254 cp = (char *) rawmemchr (cp, '\0') + 1;
1256 argv[argc] = NULL;
1258 /* Second, change back to the old user if we changed it. */
1259 if (server_user != NULL)
1261 if (setresuid (old_uid, old_uid, old_uid) != 0)
1263 dbg_log (_("\
1264 cannot change to old UID: %s; disabling paranoia mode"),
1265 strerror (errno));
1267 paranoia = 0;
1268 return;
1271 if (setresgid (old_gid, old_gid, old_gid) != 0)
1273 dbg_log (_("\
1274 cannot change to old GID: %s; disabling paranoia mode"),
1275 strerror (errno));
1277 setuid (server_uid);
1278 paranoia = 0;
1279 return;
1283 /* Next change back to the old working directory. */
1284 if (chdir (oldcwd) == -1)
1286 dbg_log (_("\
1287 cannot change to old working directory: %s; disabling paranoia mode"),
1288 strerror (errno));
1290 if (server_user != NULL)
1292 setuid (server_uid);
1293 setgid (server_gid);
1295 paranoia = 0;
1296 return;
1299 /* Synchronize memory. */
1300 for (int cnt = 0; cnt < lastdb; ++cnt)
1301 if (dbs[cnt].enabled)
1303 /* Make sure nobody keeps using the database. */
1304 dbs[cnt].head->timestamp = 0;
1306 if (dbs[cnt].persistent)
1307 // XXX async OK?
1308 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1311 /* The preparations are done. */
1312 execv ("/proc/self/exe", argv);
1314 /* If we come here, we will never be able to re-exec. */
1315 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1316 strerror (errno));
1318 if (server_user != NULL)
1320 setuid (server_uid);
1321 setgid (server_gid);
1323 if (chdir ("/") != 0)
1324 dbg_log (_("cannot change current working directory to \"/\": %s"),
1325 strerror (errno));
1326 paranoia = 0;
1330 /* List of file descriptors. */
1331 struct fdlist
1333 int fd;
1334 struct fdlist *next;
1336 /* Memory allocated for the list. */
1337 static struct fdlist *fdlist;
1338 /* List of currently ready-to-read file descriptors. */
1339 static struct fdlist *readylist;
1341 /* Conditional variable and mutex to signal availability of entries in
1342 READYLIST. The condvar is initialized dynamically since we might
1343 use a different clock depending on availability. */
1344 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1345 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1347 /* The clock to use with the condvar. */
1348 static clockid_t timeout_clock = CLOCK_REALTIME;
1350 /* Number of threads ready to handle the READYLIST. */
1351 static unsigned long int nready;
1354 /* Function for the clean-up threads. */
1355 static void *
1356 __attribute__ ((__noreturn__))
1357 nscd_run_prune (void *p)
1359 const long int my_number = (long int) p;
1360 assert (dbs[my_number].enabled);
1362 int dont_need_update = setup_thread (&dbs[my_number]);
1364 /* We are running. */
1365 dbs[my_number].head->timestamp = time (NULL);
1367 struct timespec prune_ts;
1368 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1369 /* Should never happen. */
1370 abort ();
1372 /* Compute the initial timeout time. Prevent all the timers to go
1373 off at the same time by adding a db-based value. */
1374 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1376 pthread_mutex_lock (&dbs[my_number].prune_lock);
1377 while (1)
1379 /* Wait, but not forever. */
1380 int e = pthread_cond_timedwait (&dbs[my_number].prune_cond,
1381 &dbs[my_number].prune_lock,
1382 &prune_ts);
1383 assert (e == 0 || e == ETIMEDOUT);
1385 time_t next_wait;
1386 time_t now = time (NULL);
1387 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time)
1389 next_wait = prune_cache (&dbs[my_number], now, -1);
1390 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1391 /* If clients cannot determine for sure whether nscd is running
1392 we need to wake up occasionally to update the timestamp.
1393 Wait 90% of the update period. */
1394 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1395 if (__builtin_expect (! dont_need_update, 0))
1396 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1398 /* Make it known when we will wake up again. */
1399 dbs[my_number].wakeup_time = now + next_wait;
1401 else
1402 /* The cache was just pruned. Do not do it again now. Just
1403 use the new timeout value. */
1404 next_wait = dbs[my_number].wakeup_time - now;
1406 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1407 /* Should never happen. */
1408 abort ();
1410 /* Compute next timeout time. */
1411 prune_ts.tv_sec += next_wait;
1416 /* This is the main loop. It is replicated in different threads but
1417 the the use of the ready list makes sure only one thread handles an
1418 incoming connection. */
1419 static void *
1420 __attribute__ ((__noreturn__))
1421 nscd_run_worker (void *p)
1423 char buf[256];
1425 /* Initial locking. */
1426 pthread_mutex_lock (&readylist_lock);
1428 /* One more thread available. */
1429 ++nready;
1431 while (1)
1433 while (readylist == NULL)
1434 pthread_cond_wait (&readylist_cond, &readylist_lock);
1436 struct fdlist *it = readylist->next;
1437 if (readylist->next == readylist)
1438 /* Just one entry on the list. */
1439 readylist = NULL;
1440 else
1441 readylist->next = it->next;
1443 /* Extract the information and mark the record ready to be used
1444 again. */
1445 int fd = it->fd;
1446 it->next = NULL;
1448 /* One more thread available. */
1449 --nready;
1451 /* We are done with the list. */
1452 pthread_mutex_unlock (&readylist_lock);
1454 /* We do not want to block on a short read or so. */
1455 int fl = fcntl (fd, F_GETFL);
1456 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1457 goto close_and_out;
1459 /* Now read the request. */
1460 request_header req;
1461 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1462 != sizeof (req), 0))
1464 /* We failed to read data. Note that this also might mean we
1465 failed because we would have blocked. */
1466 if (debug_level > 0)
1467 dbg_log (_("short read while reading request: %s"),
1468 strerror_r (errno, buf, sizeof (buf)));
1469 goto close_and_out;
1472 /* Check whether this is a valid request type. */
1473 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1474 goto close_and_out;
1476 /* Some systems have no SO_PEERCRED implementation. They don't
1477 care about security so we don't as well. */
1478 uid_t uid = -1;
1479 #ifdef SO_PEERCRED
1480 pid_t pid = 0;
1482 if (__builtin_expect (debug_level > 0, 0))
1484 struct ucred caller;
1485 socklen_t optlen = sizeof (caller);
1487 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1488 pid = caller.pid;
1490 #endif
1492 /* It should not be possible to crash the nscd with a silly
1493 request (i.e., a terribly large key). We limit the size to 1kb. */
1494 if (__builtin_expect (req.key_len, 1) < 0
1495 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1497 if (debug_level > 0)
1498 dbg_log (_("key length in request too long: %d"), req.key_len);
1500 else
1502 /* Get the key. */
1503 char keybuf[MAXKEYLEN];
1505 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1506 req.key_len))
1507 != req.key_len, 0))
1509 /* Again, this can also mean we would have blocked. */
1510 if (debug_level > 0)
1511 dbg_log (_("short read while reading request key: %s"),
1512 strerror_r (errno, buf, sizeof (buf)));
1513 goto close_and_out;
1516 if (__builtin_expect (debug_level, 0) > 0)
1518 #ifdef SO_PEERCRED
1519 if (pid != 0)
1520 dbg_log (_("\
1521 handle_request: request received (Version = %d) from PID %ld"),
1522 req.version, (long int) pid);
1523 else
1524 #endif
1525 dbg_log (_("\
1526 handle_request: request received (Version = %d)"), req.version);
1529 /* Phew, we got all the data, now process it. */
1530 handle_request (fd, &req, keybuf, uid);
1533 close_and_out:
1534 /* We are done. */
1535 close (fd);
1537 /* Re-locking. */
1538 pthread_mutex_lock (&readylist_lock);
1540 /* One more thread available. */
1541 ++nready;
1546 static unsigned int nconns;
1548 static void
1549 fd_ready (int fd)
1551 pthread_mutex_lock (&readylist_lock);
1553 /* Find an empty entry in FDLIST. */
1554 size_t inner;
1555 for (inner = 0; inner < nconns; ++inner)
1556 if (fdlist[inner].next == NULL)
1557 break;
1558 assert (inner < nconns);
1560 fdlist[inner].fd = fd;
1562 if (readylist == NULL)
1563 readylist = fdlist[inner].next = &fdlist[inner];
1564 else
1566 fdlist[inner].next = readylist->next;
1567 readylist = readylist->next = &fdlist[inner];
1570 bool do_signal = true;
1571 if (__builtin_expect (nready == 0, 0))
1573 ++client_queued;
1574 do_signal = false;
1576 /* Try to start another thread to help out. */
1577 pthread_t th;
1578 if (nthreads < max_nthreads
1579 && pthread_create (&th, &attr, nscd_run_worker,
1580 (void *) (long int) nthreads) == 0)
1582 /* We got another thread. */
1583 ++nthreads;
1584 /* The new thread might need a kick. */
1585 do_signal = true;
1590 pthread_mutex_unlock (&readylist_lock);
1592 /* Tell one of the worker threads there is work to do. */
1593 if (do_signal)
1594 pthread_cond_signal (&readylist_cond);
1598 /* Check whether restarting should happen. */
1599 static inline int
1600 restart_p (time_t now)
1602 return (paranoia && readylist == NULL && nready == nthreads
1603 && now >= restart_time);
1607 /* Array for times a connection was accepted. */
1608 static time_t *starttime;
1611 static void
1612 __attribute__ ((__noreturn__))
1613 main_loop_poll (void)
1615 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1616 * sizeof (conns[0]));
1618 conns[0].fd = sock;
1619 conns[0].events = POLLRDNORM;
1620 size_t nused = 1;
1621 size_t firstfree = 1;
1623 while (1)
1625 /* Wait for any event. We wait at most a couple of seconds so
1626 that we can check whether we should close any of the accepted
1627 connections since we have not received a request. */
1628 #define MAX_ACCEPT_TIMEOUT 30
1629 #define MIN_ACCEPT_TIMEOUT 5
1630 #define MAIN_THREAD_TIMEOUT \
1631 (MAX_ACCEPT_TIMEOUT * 1000 \
1632 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1634 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1636 time_t now = time (NULL);
1638 /* If there is a descriptor ready for reading or there is a new
1639 connection, process this now. */
1640 if (n > 0)
1642 if (conns[0].revents != 0)
1644 /* We have a new incoming connection. Accept the connection. */
1645 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1647 /* Use the descriptor if we have not reached the limit. */
1648 if (fd >= 0)
1650 if (firstfree < nconns)
1652 conns[firstfree].fd = fd;
1653 conns[firstfree].events = POLLRDNORM;
1654 starttime[firstfree] = now;
1655 if (firstfree >= nused)
1656 nused = firstfree + 1;
1659 ++firstfree;
1660 while (firstfree < nused && conns[firstfree].fd != -1);
1662 else
1663 /* We cannot use the connection so close it. */
1664 close (fd);
1667 --n;
1670 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1671 if (conns[cnt].revents != 0)
1673 fd_ready (conns[cnt].fd);
1675 /* Clean up the CONNS array. */
1676 conns[cnt].fd = -1;
1677 if (cnt < firstfree)
1678 firstfree = cnt;
1679 if (cnt == nused - 1)
1681 --nused;
1682 while (conns[nused - 1].fd == -1);
1684 --n;
1688 /* Now find entries which have timed out. */
1689 assert (nused > 0);
1691 /* We make the timeout length depend on the number of file
1692 descriptors currently used. */
1693 #define ACCEPT_TIMEOUT \
1694 (MAX_ACCEPT_TIMEOUT \
1695 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1696 time_t laststart = now - ACCEPT_TIMEOUT;
1698 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1700 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1702 /* Remove the entry, it timed out. */
1703 (void) close (conns[cnt].fd);
1704 conns[cnt].fd = -1;
1706 if (cnt < firstfree)
1707 firstfree = cnt;
1708 if (cnt == nused - 1)
1710 --nused;
1711 while (conns[nused - 1].fd == -1);
1715 if (restart_p (now))
1716 restart ();
1721 #ifdef HAVE_EPOLL
1722 static void
1723 main_loop_epoll (int efd)
1725 struct epoll_event ev = { 0, };
1726 int nused = 1;
1727 size_t highest = 0;
1729 /* Add the socket. */
1730 ev.events = EPOLLRDNORM;
1731 ev.data.fd = sock;
1732 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1733 /* We cannot use epoll. */
1734 return;
1736 while (1)
1738 struct epoll_event revs[100];
1739 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1741 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1743 time_t now = time (NULL);
1745 for (int cnt = 0; cnt < n; ++cnt)
1746 if (revs[cnt].data.fd == sock)
1748 /* A new connection. */
1749 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1751 if (fd >= 0)
1753 /* Try to add the new descriptor. */
1754 ev.data.fd = fd;
1755 if (fd >= nconns
1756 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1757 /* The descriptor is too large or something went
1758 wrong. Close the descriptor. */
1759 close (fd);
1760 else
1762 /* Remember when we accepted the connection. */
1763 starttime[fd] = now;
1765 if (fd > highest)
1766 highest = fd;
1768 ++nused;
1772 else
1774 /* Remove the descriptor from the epoll descriptor. */
1775 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1777 /* Get a worker to handle the request. */
1778 fd_ready (revs[cnt].data.fd);
1780 /* Reset the time. */
1781 starttime[revs[cnt].data.fd] = 0;
1782 if (revs[cnt].data.fd == highest)
1784 --highest;
1785 while (highest > 0 && starttime[highest] == 0);
1787 --nused;
1790 /* Now look for descriptors for accepted connections which have
1791 no reply in too long of a time. */
1792 time_t laststart = now - ACCEPT_TIMEOUT;
1793 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1794 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1796 /* We are waiting for this one for too long. Close it. */
1797 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
1799 (void) close (cnt);
1801 starttime[cnt] = 0;
1802 if (cnt == highest)
1803 --highest;
1805 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1806 --highest;
1808 if (restart_p (now))
1809 restart ();
1812 #endif
1815 /* Start all the threads we want. The initial process is thread no. 1. */
1816 void
1817 start_threads (void)
1819 /* Initialize the conditional variable we will use. The only
1820 non-standard attribute we might use is the clock selection. */
1821 pthread_condattr_t condattr;
1822 pthread_condattr_init (&condattr);
1824 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1825 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1826 /* Determine whether the monotonous clock is available. */
1827 struct timespec dummy;
1828 # if _POSIX_MONOTONIC_CLOCK == 0
1829 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1830 # endif
1831 # if _POSIX_CLOCK_SELECTION == 0
1832 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1833 # endif
1834 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1835 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1836 timeout_clock = CLOCK_MONOTONIC;
1837 #endif
1839 /* Create the attribute for the threads. They are all created
1840 detached. */
1841 pthread_attr_init (&attr);
1842 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1843 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1844 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
1846 /* We allow less than LASTDB threads only for debugging. */
1847 if (debug_level == 0)
1848 nthreads = MAX (nthreads, lastdb);
1850 /* Create the threads which prune the databases. */
1851 // XXX Ideally this work would be done by some of the worker threads.
1852 // XXX But this is problematic since we would need to be able to wake
1853 // XXX them up explicitly as well as part of the group handling the
1854 // XXX ready-list. This requires an operation where we can wait on
1855 // XXX two conditional variables at the same time. This operation
1856 // XXX does not exist (yet).
1857 for (long int i = 0; i < lastdb; ++i)
1859 /* Initialize the conditional variable. */
1860 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
1862 dbg_log (_("could not initialize conditional variable"));
1863 exit (1);
1866 pthread_t th;
1867 if (dbs[i].enabled
1868 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
1870 dbg_log (_("could not start clean-up thread; terminating"));
1871 exit (1);
1875 pthread_condattr_destroy (&condattr);
1877 for (long int i = 0; i < nthreads; ++i)
1879 pthread_t th;
1880 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
1882 if (i == 0)
1884 dbg_log (_("could not start any worker thread; terminating"));
1885 exit (1);
1888 break;
1892 /* Determine how much room for descriptors we should initially
1893 allocate. This might need to change later if we cap the number
1894 with MAXCONN. */
1895 const long int nfds = sysconf (_SC_OPEN_MAX);
1896 #define MINCONN 32
1897 #define MAXCONN 16384
1898 if (nfds == -1 || nfds > MAXCONN)
1899 nconns = MAXCONN;
1900 else if (nfds < MINCONN)
1901 nconns = MINCONN;
1902 else
1903 nconns = nfds;
1905 /* We need memory to pass descriptors on to the worker threads. */
1906 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1907 /* Array to keep track when connection was accepted. */
1908 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1910 /* In the main thread we execute the loop which handles incoming
1911 connections. */
1912 #ifdef HAVE_EPOLL
1913 int efd = epoll_create (100);
1914 if (efd != -1)
1916 main_loop_epoll (efd);
1917 close (efd);
1919 #endif
1921 main_loop_poll ();
1925 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1926 this function is called, we are not listening on the nscd socket yet so
1927 we can just use the ordinary lookup functions without causing a lockup */
1928 static void
1929 begin_drop_privileges (void)
1931 struct passwd *pwd = getpwnam (server_user);
1933 if (pwd == NULL)
1935 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1936 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1937 server_user);
1940 server_uid = pwd->pw_uid;
1941 server_gid = pwd->pw_gid;
1943 /* Save the old UID/GID if we have to change back. */
1944 if (paranoia)
1946 old_uid = getuid ();
1947 old_gid = getgid ();
1950 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1952 /* This really must never happen. */
1953 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1954 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
1957 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
1959 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
1960 == -1)
1962 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1963 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
1968 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
1969 run nscd as the user specified in the configuration file. */
1970 static void
1971 finish_drop_privileges (void)
1973 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1974 /* We need to preserve the capabilities to connect to the audit daemon. */
1975 cap_t new_caps = preserve_capabilities ();
1976 #endif
1978 if (setgroups (server_ngroups, server_groups) == -1)
1980 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1981 error (EXIT_FAILURE, errno, _("setgroups failed"));
1984 int res;
1985 if (paranoia)
1986 res = setresgid (server_gid, server_gid, old_gid);
1987 else
1988 res = setgid (server_gid);
1989 if (res == -1)
1991 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1992 perror ("setgid");
1993 exit (4);
1996 if (paranoia)
1997 res = setresuid (server_uid, server_uid, old_uid);
1998 else
1999 res = setuid (server_uid);
2000 if (res == -1)
2002 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2003 perror ("setuid");
2004 exit (4);
2007 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2008 /* Remove the temporary capabilities. */
2009 install_real_capabilities (new_caps);
2010 #endif