Updated to fedora-glibc-20071010T2047
[glibc.git] / nscd / connections.c
blob89a1ea496732afc86c8a2a55497414cd027cab59
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #include <alloca.h>
21 #include <assert.h>
22 #include <atomic.h>
23 #include <error.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <grp.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <arpa/inet.h>
35 #ifdef HAVE_EPOLL
36 # include <sys/epoll.h>
37 #endif
38 #include <sys/mman.h>
39 #include <sys/param.h>
40 #include <sys/poll.h>
41 #ifdef HAVE_SENDFILE
42 # include <sys/sendfile.h>
43 #endif
44 #include <sys/socket.h>
45 #include <sys/stat.h>
46 #include <sys/un.h>
48 #include "nscd.h"
49 #include "dbg_log.h"
50 #include "selinux.h"
51 #ifdef HAVE_SENDFILE
52 # include <kernel-features.h>
53 #endif
56 /* Wrapper functions with error checking for standard functions. */
57 extern void *xmalloc (size_t n);
58 extern void *xcalloc (size_t n, size_t s);
59 extern void *xrealloc (void *o, size_t n);
61 /* Support to run nscd as an unprivileged user */
62 const char *server_user;
63 static uid_t server_uid;
64 static gid_t server_gid;
65 const char *stat_user;
66 uid_t stat_uid;
67 static gid_t *server_groups;
68 #ifndef NGROUPS
69 # define NGROUPS 32
70 #endif
71 static int server_ngroups;
72 static volatile int sighup_pending;
74 static pthread_attr_t attr;
76 static void begin_drop_privileges (void);
77 static void finish_drop_privileges (void);
79 /* Map request type to a string. */
80 const char *const serv2str[LASTREQ] =
82 [GETPWBYNAME] = "GETPWBYNAME",
83 [GETPWBYUID] = "GETPWBYUID",
84 [GETGRBYNAME] = "GETGRBYNAME",
85 [GETGRBYGID] = "GETGRBYGID",
86 [GETHOSTBYNAME] = "GETHOSTBYNAME",
87 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
88 [GETHOSTBYADDR] = "GETHOSTBYADDR",
89 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
90 [SHUTDOWN] = "SHUTDOWN",
91 [GETSTAT] = "GETSTAT",
92 [INVALIDATE] = "INVALIDATE",
93 [GETFDPW] = "GETFDPW",
94 [GETFDGR] = "GETFDGR",
95 [GETFDHST] = "GETFDHST",
96 [GETAI] = "GETAI",
97 [INITGROUPS] = "INITGROUPS",
98 [GETSERVBYNAME] = "GETSERVBYNAME",
99 [GETSERVBYPORT] = "GETSERVBYPORT",
100 [GETFDSERV] = "GETFDSERV"
103 /* The control data structures for the services. */
104 struct database_dyn dbs[lastdb] =
106 [pwddb] = {
107 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
108 .prunelock = PTHREAD_MUTEX_INITIALIZER,
109 .enabled = 0,
110 .check_file = 1,
111 .persistent = 0,
112 .propagate = 1,
113 .shared = 0,
114 .max_db_size = DEFAULT_MAX_DB_SIZE,
115 .reset_res = 0,
116 .filename = "/etc/passwd",
117 .db_filename = _PATH_NSCD_PASSWD_DB,
118 .disabled_iov = &pwd_iov_disabled,
119 .postimeout = 3600,
120 .negtimeout = 20,
121 .wr_fd = -1,
122 .ro_fd = -1,
123 .mmap_used = false
125 [grpdb] = {
126 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
127 .prunelock = PTHREAD_MUTEX_INITIALIZER,
128 .enabled = 0,
129 .check_file = 1,
130 .persistent = 0,
131 .propagate = 1,
132 .shared = 0,
133 .max_db_size = DEFAULT_MAX_DB_SIZE,
134 .reset_res = 0,
135 .filename = "/etc/group",
136 .db_filename = _PATH_NSCD_GROUP_DB,
137 .disabled_iov = &grp_iov_disabled,
138 .postimeout = 3600,
139 .negtimeout = 60,
140 .wr_fd = -1,
141 .ro_fd = -1,
142 .mmap_used = false
144 [hstdb] = {
145 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
146 .prunelock = PTHREAD_MUTEX_INITIALIZER,
147 .enabled = 0,
148 .check_file = 1,
149 .persistent = 0,
150 .propagate = 0, /* Not used. */
151 .shared = 0,
152 .max_db_size = DEFAULT_MAX_DB_SIZE,
153 .reset_res = 1,
154 .filename = "/etc/hosts",
155 .db_filename = _PATH_NSCD_HOSTS_DB,
156 .disabled_iov = &hst_iov_disabled,
157 .postimeout = 3600,
158 .negtimeout = 20,
159 .wr_fd = -1,
160 .ro_fd = -1,
161 .mmap_used = false
163 [servdb] = {
164 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
165 .prunelock = PTHREAD_MUTEX_INITIALIZER,
166 .enabled = 0,
167 .check_file = 1,
168 .persistent = 0,
169 .propagate = 0, /* Not used. */
170 .shared = 0,
171 .max_db_size = DEFAULT_MAX_DB_SIZE,
172 .reset_res = 0,
173 .filename = "/etc/services",
174 .db_filename = _PATH_NSCD_SERVICES_DB,
175 .disabled_iov = &serv_iov_disabled,
176 .postimeout = 28800,
177 .negtimeout = 20,
178 .wr_fd = -1,
179 .ro_fd = -1,
180 .mmap_used = false
185 /* Mapping of request type to database. */
186 static struct
188 bool data_request;
189 struct database_dyn *db;
190 } const reqinfo[LASTREQ] =
192 [GETPWBYNAME] = { true, &dbs[pwddb] },
193 [GETPWBYUID] = { true, &dbs[pwddb] },
194 [GETGRBYNAME] = { true, &dbs[grpdb] },
195 [GETGRBYGID] = { true, &dbs[grpdb] },
196 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
197 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
198 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
199 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
200 [SHUTDOWN] = { false, NULL },
201 [GETSTAT] = { false, NULL },
202 [SHUTDOWN] = { false, NULL },
203 [GETFDPW] = { false, &dbs[pwddb] },
204 [GETFDGR] = { false, &dbs[grpdb] },
205 [GETFDHST] = { false, &dbs[hstdb] },
206 [GETAI] = { true, &dbs[hstdb] },
207 [INITGROUPS] = { true, &dbs[grpdb] },
208 [GETSERVBYNAME] = { true, &dbs[servdb] },
209 [GETSERVBYPORT] = { true, &dbs[servdb] },
210 [GETFDSERV] = { false, &dbs[servdb] }
214 /* Number of seconds between two cache pruning runs. */
215 #define CACHE_PRUNE_INTERVAL 15
218 /* Initial number of threads to use. */
219 int nthreads = -1;
220 /* Maximum number of threads to use. */
221 int max_nthreads = 32;
223 /* Socket for incoming connections. */
224 static int sock;
226 /* Number of times clients had to wait. */
227 unsigned long int client_queued;
230 ssize_t
231 writeall (int fd, const void *buf, size_t len)
233 size_t n = len;
234 ssize_t ret;
237 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
238 if (ret <= 0)
239 break;
240 buf = (const char *) buf + ret;
241 n -= ret;
243 while (n > 0);
244 return ret < 0 ? ret : len - n;
248 #ifdef HAVE_SENDFILE
249 ssize_t
250 sendfileall (int tofd, int fromfd, off_t off, size_t len)
252 ssize_t n = len;
253 ssize_t ret;
257 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
258 if (ret <= 0)
259 break;
260 n -= ret;
262 while (n > 0);
263 return ret < 0 ? ret : len - n;
265 #endif
268 enum usekey
270 use_not = 0,
271 /* The following three are not really used, they are symbolic constants. */
272 use_first = 16,
273 use_begin = 32,
274 use_end = 64,
276 use_he = 1,
277 use_he_begin = use_he | use_begin,
278 use_he_end = use_he | use_end,
279 #if SEPARATE_KEY
280 use_key = 2,
281 use_key_begin = use_key | use_begin,
282 use_key_end = use_key | use_end,
283 use_key_first = use_key_begin | use_first,
284 #endif
285 use_data = 3,
286 use_data_begin = use_data | use_begin,
287 use_data_end = use_data | use_end,
288 use_data_first = use_data_begin | use_first
292 static int
293 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
294 enum usekey use, ref_t start, size_t len)
296 assert (len >= 2);
298 if (start > first_free || start + len > first_free
299 || (start & BLOCK_ALIGN_M1))
300 return 0;
302 if (usemap[start] == use_not)
304 /* Add the start marker. */
305 usemap[start] = use | use_begin;
306 use &= ~use_first;
308 while (--len > 0)
309 if (usemap[++start] != use_not)
310 return 0;
311 else
312 usemap[start] = use;
314 /* Add the end marker. */
315 usemap[start] = use | use_end;
317 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
319 /* Hash entries can't be shared. */
320 if (use == use_he)
321 return 0;
323 usemap[start] |= (use & use_first);
324 use &= ~use_first;
326 while (--len > 1)
327 if (usemap[++start] != use)
328 return 0;
330 if (usemap[++start] != (use | use_end))
331 return 0;
333 else
334 /* Points to a wrong object or somewhere in the middle. */
335 return 0;
337 return 1;
341 /* Verify data in persistent database. */
342 static int
343 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
345 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb);
347 time_t now = time (NULL);
349 struct database_pers_head *head = mem;
350 struct database_pers_head head_copy = *head;
352 /* Check that the header that was read matches the head in the database. */
353 if (readhead != NULL && memcmp (head, readhead, sizeof (*head)) != 0)
354 return 0;
356 /* First some easy tests: make sure the database header is sane. */
357 if (head->version != DB_VERSION
358 || head->header_size != sizeof (*head)
359 /* We allow a timestamp to be one hour ahead of the current time.
360 This should cover daylight saving time changes. */
361 || head->timestamp > now + 60 * 60 + 60
362 || (head->gc_cycle & 1)
363 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
364 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
365 || head->first_free < 0
366 || head->first_free > head->data_size
367 || (head->first_free & BLOCK_ALIGN_M1) != 0
368 || head->maxnentries < 0
369 || head->maxnsearched < 0)
370 return 0;
372 uint8_t *usemap = calloc (head->first_free, 1);
373 if (usemap == NULL)
374 return 0;
376 const char *data = (char *) &head->array[roundup (head->module,
377 ALIGN / sizeof (ref_t))];
379 nscd_ssize_t he_cnt = 0;
380 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
382 ref_t trail = head->array[cnt];
383 ref_t work = trail;
384 int tick = 0;
386 while (work != ENDREF)
388 if (! check_use (data, head->first_free, usemap, use_he, work,
389 sizeof (struct hashentry)))
390 goto fail;
392 /* Now we know we can dereference the record. */
393 struct hashentry *here = (struct hashentry *) (data + work);
395 ++he_cnt;
397 /* Make sure the record is for this type of service. */
398 if (here->type >= LASTREQ
399 || reqinfo[here->type].db != &dbs[dbnr])
400 goto fail;
402 /* Validate boolean field value. */
403 if (here->first != false && here->first != true)
404 goto fail;
406 if (here->len < 0)
407 goto fail;
409 /* Now the data. */
410 if (here->packet < 0
411 || here->packet > head->first_free
412 || here->packet + sizeof (struct datahead) > head->first_free)
413 goto fail;
415 struct datahead *dh = (struct datahead *) (data + here->packet);
417 if (! check_use (data, head->first_free, usemap,
418 use_data | (here->first ? use_first : 0),
419 here->packet, dh->allocsize))
420 goto fail;
422 if (dh->allocsize < sizeof (struct datahead)
423 || dh->recsize > dh->allocsize
424 || (dh->notfound != false && dh->notfound != true)
425 || (dh->usable != false && dh->usable != true))
426 goto fail;
428 if (here->key < here->packet + sizeof (struct datahead)
429 || here->key > here->packet + dh->allocsize
430 || here->key + here->len > here->packet + dh->allocsize)
432 #if SEPARATE_KEY
433 /* If keys can appear outside of data, this should be done
434 instead. But gc doesn't mark the data in that case. */
435 if (! check_use (data, head->first_free, usemap,
436 use_key | (here->first ? use_first : 0),
437 here->key, here->len))
438 #endif
439 goto fail;
442 work = here->next;
444 if (work == trail)
445 /* A circular list, this must not happen. */
446 goto fail;
447 if (tick)
448 trail = ((struct hashentry *) (data + trail))->next;
449 tick = 1 - tick;
453 if (he_cnt != head->nentries)
454 goto fail;
456 /* See if all data and keys had at least one reference from
457 he->first == true hashentry. */
458 for (ref_t idx = 0; idx < head->first_free; ++idx)
460 #if SEPARATE_KEY
461 if (usemap[idx] == use_key_begin)
462 goto fail;
463 #endif
464 if (usemap[idx] == use_data_begin)
465 goto fail;
468 /* Finally, make sure the database hasn't changed since the first test. */
469 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
470 goto fail;
472 free (usemap);
473 return 1;
475 fail:
476 free (usemap);
477 return 0;
481 #ifdef O_CLOEXEC
482 # define EXTRA_O_FLAGS O_CLOEXEC
483 #else
484 # define EXTRA_O_FLAGS 0
485 #endif
488 /* Initialize database information structures. */
489 void
490 nscd_init (void)
492 /* Look up unprivileged uid/gid/groups before we start listening on the
493 socket */
494 if (server_user != NULL)
495 begin_drop_privileges ();
497 if (nthreads == -1)
498 /* No configuration for this value, assume a default. */
499 nthreads = 2 * lastdb;
501 for (size_t cnt = 0; cnt < lastdb; ++cnt)
502 if (dbs[cnt].enabled)
504 pthread_rwlock_init (&dbs[cnt].lock, NULL);
505 pthread_mutex_init (&dbs[cnt].memlock, NULL);
507 if (dbs[cnt].persistent)
509 /* Try to open the appropriate file on disk. */
510 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
511 if (fd != -1)
513 struct stat64 st;
514 void *mem;
515 size_t total;
516 struct database_pers_head head;
517 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
518 sizeof (head)));
519 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
521 fail_db:
522 dbg_log (_("invalid persistent database file \"%s\": %s"),
523 dbs[cnt].db_filename, strerror (errno));
524 unlink (dbs[cnt].db_filename);
526 else if (head.module == 0 && head.data_size == 0)
528 /* The file has been created, but the head has not been
529 initialized yet. Remove the old file. */
530 unlink (dbs[cnt].db_filename);
532 else if (head.header_size != (int) sizeof (head))
534 dbg_log (_("invalid persistent database file \"%s\": %s"),
535 dbs[cnt].db_filename,
536 _("header size does not match"));
537 unlink (dbs[cnt].db_filename);
539 else if ((total = (sizeof (head)
540 + roundup (head.module * sizeof (ref_t),
541 ALIGN)
542 + head.data_size))
543 > st.st_size
544 || total < sizeof (head))
546 dbg_log (_("invalid persistent database file \"%s\": %s"),
547 dbs[cnt].db_filename,
548 _("file size does not match"));
549 unlink (dbs[cnt].db_filename);
551 /* Note we map with the maximum size allowed for the
552 database. This is likely much larger than the
553 actual file size. This is OK on most OSes since
554 extensions of the underlying file will
555 automatically translate more pages available for
556 memory access. */
557 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
558 PROT_READ | PROT_WRITE,
559 MAP_SHARED, fd, 0))
560 == MAP_FAILED)
561 goto fail_db;
562 else if (!verify_persistent_db (mem, &head, cnt))
564 munmap (mem, total);
565 dbg_log (_("invalid persistent database file \"%s\": %s"),
566 dbs[cnt].db_filename,
567 _("verification failed"));
568 unlink (dbs[cnt].db_filename);
570 else
572 /* Success. We have the database. */
573 dbs[cnt].head = mem;
574 dbs[cnt].memsize = total;
575 dbs[cnt].data = (char *)
576 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
577 ALIGN / sizeof (ref_t))];
578 dbs[cnt].mmap_used = true;
580 if (dbs[cnt].suggested_module > head.module)
581 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
582 dbnames[cnt]);
584 dbs[cnt].wr_fd = fd;
585 fd = -1;
586 /* We also need a read-only descriptor. */
587 if (dbs[cnt].shared)
589 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
590 O_RDONLY | EXTRA_O_FLAGS);
591 if (dbs[cnt].ro_fd == -1)
592 dbg_log (_("\
593 cannot create read-only descriptor for \"%s\"; no mmap"),
594 dbs[cnt].db_filename);
597 // XXX Shall we test whether the descriptors actually
598 // XXX point to the same file?
601 /* Close the file descriptors in case something went
602 wrong in which case the variable have not been
603 assigned -1. */
604 if (fd != -1)
605 close (fd);
609 if (dbs[cnt].head == NULL)
611 /* No database loaded. Allocate the data structure,
612 possibly on disk. */
613 struct database_pers_head head;
614 size_t total = (sizeof (head)
615 + roundup (dbs[cnt].suggested_module
616 * sizeof (ref_t), ALIGN)
617 + (dbs[cnt].suggested_module
618 * DEFAULT_DATASIZE_PER_BUCKET));
620 /* Try to create the database. If we do not need a
621 persistent database create a temporary file. */
622 int fd;
623 int ro_fd = -1;
624 if (dbs[cnt].persistent)
626 fd = open (dbs[cnt].db_filename,
627 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
628 S_IRUSR | S_IWUSR);
629 if (fd != -1 && dbs[cnt].shared)
630 ro_fd = open (dbs[cnt].db_filename,
631 O_RDONLY | EXTRA_O_FLAGS);
633 else
635 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
636 fd = mkostemp (fname, EXTRA_O_FLAGS);
638 /* We do not need the file name anymore after we
639 opened another file descriptor in read-only mode. */
640 if (fd != -1)
642 if (dbs[cnt].shared)
643 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
645 unlink (fname);
649 if (fd == -1)
651 if (errno == EEXIST)
653 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
654 dbnames[cnt], dbs[cnt].db_filename);
655 // XXX Correct way to terminate?
656 exit (1);
659 if (dbs[cnt].persistent)
660 dbg_log (_("cannot create %s; no persistent database used"),
661 dbs[cnt].db_filename);
662 else
663 dbg_log (_("cannot create %s; no sharing possible"),
664 dbs[cnt].db_filename);
666 dbs[cnt].persistent = 0;
667 // XXX remember: no mmap
669 else
671 /* Tell the user if we could not create the read-only
672 descriptor. */
673 if (ro_fd == -1 && dbs[cnt].shared)
674 dbg_log (_("\
675 cannot create read-only descriptor for \"%s\"; no mmap"),
676 dbs[cnt].db_filename);
678 /* Before we create the header, initialiye the hash
679 table. So that if we get interrupted if writing
680 the header we can recognize a partially initialized
681 database. */
682 size_t ps = sysconf (_SC_PAGESIZE);
683 char tmpbuf[ps];
684 assert (~ENDREF == 0);
685 memset (tmpbuf, '\xff', ps);
687 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
688 off_t offset = sizeof (head);
690 size_t towrite;
691 if (offset % ps != 0)
693 towrite = MIN (remaining, ps - (offset % ps));
694 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
695 goto write_fail;
696 offset += towrite;
697 remaining -= towrite;
700 while (remaining > ps)
702 if (pwrite (fd, tmpbuf, ps, offset) == -1)
703 goto write_fail;
704 offset += ps;
705 remaining -= ps;
708 if (remaining > 0
709 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
710 goto write_fail;
712 /* Create the header of the file. */
713 struct database_pers_head head =
715 .version = DB_VERSION,
716 .header_size = sizeof (head),
717 .module = dbs[cnt].suggested_module,
718 .data_size = (dbs[cnt].suggested_module
719 * DEFAULT_DATASIZE_PER_BUCKET),
720 .first_free = 0
722 void *mem;
724 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
725 != sizeof (head))
726 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
727 != 0)
728 || (mem = mmap (NULL, dbs[cnt].max_db_size,
729 PROT_READ | PROT_WRITE,
730 MAP_SHARED, fd, 0)) == MAP_FAILED)
732 write_fail:
733 unlink (dbs[cnt].db_filename);
734 dbg_log (_("cannot write to database file %s: %s"),
735 dbs[cnt].db_filename, strerror (errno));
736 dbs[cnt].persistent = 0;
738 else
740 /* Success. */
741 dbs[cnt].head = mem;
742 dbs[cnt].data = (char *)
743 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
744 ALIGN / sizeof (ref_t))];
745 dbs[cnt].memsize = total;
746 dbs[cnt].mmap_used = true;
748 /* Remember the descriptors. */
749 dbs[cnt].wr_fd = fd;
750 dbs[cnt].ro_fd = ro_fd;
751 fd = -1;
752 ro_fd = -1;
755 if (fd != -1)
756 close (fd);
757 if (ro_fd != -1)
758 close (ro_fd);
762 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
763 /* We do not check here whether the O_CLOEXEC provided to the
764 open call was successful or not. The two fcntl calls are
765 only performed once each per process start-up and therefore
766 is not noticeable at all. */
767 if (paranoia
768 && ((dbs[cnt].wr_fd != -1
769 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
770 || (dbs[cnt].ro_fd != -1
771 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
773 dbg_log (_("\
774 cannot set socket to close on exec: %s; disabling paranoia mode"),
775 strerror (errno));
776 paranoia = 0;
778 #endif
780 if (dbs[cnt].head == NULL)
782 /* We do not use the persistent database. Just
783 create an in-memory data structure. */
784 assert (! dbs[cnt].persistent);
786 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
787 + (dbs[cnt].suggested_module
788 * sizeof (ref_t)));
789 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
790 assert (~ENDREF == 0);
791 memset (dbs[cnt].head->array, '\xff',
792 dbs[cnt].suggested_module * sizeof (ref_t));
793 dbs[cnt].head->module = dbs[cnt].suggested_module;
794 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
795 * dbs[cnt].head->module);
796 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
797 dbs[cnt].head->first_free = 0;
799 dbs[cnt].shared = 0;
800 assert (dbs[cnt].ro_fd == -1);
803 if (dbs[cnt].check_file)
805 /* We need the modification date of the file. */
806 struct stat64 st;
808 if (stat64 (dbs[cnt].filename, &st) < 0)
810 /* We cannot stat() the file, disable file checking. */
811 dbg_log (_("cannot stat() file `%s': %s"),
812 dbs[cnt].filename, strerror (errno));
813 dbs[cnt].check_file = 0;
815 else
816 dbs[cnt].file_mtime = st.st_mtime;
820 /* Create the socket. */
821 sock = socket (AF_UNIX, SOCK_STREAM, 0);
822 if (sock < 0)
824 dbg_log (_("cannot open socket: %s"), strerror (errno));
825 exit (errno == EACCES ? 4 : 1);
827 /* Bind a name to the socket. */
828 struct sockaddr_un sock_addr;
829 sock_addr.sun_family = AF_UNIX;
830 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
831 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
833 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
834 exit (errno == EACCES ? 4 : 1);
837 /* We don't want to get stuck on accept. */
838 int fl = fcntl (sock, F_GETFL);
839 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
841 dbg_log (_("cannot change socket to nonblocking mode: %s"),
842 strerror (errno));
843 exit (1);
846 /* The descriptor needs to be closed on exec. */
847 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
849 dbg_log (_("cannot set socket to close on exec: %s"),
850 strerror (errno));
851 exit (1);
854 /* Set permissions for the socket. */
855 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
857 /* Set the socket up to accept connections. */
858 if (listen (sock, SOMAXCONN) < 0)
860 dbg_log (_("cannot enable socket to accept connections: %s"),
861 strerror (errno));
862 exit (1);
865 /* Change to unprivileged uid/gid/groups if specifed in config file */
866 if (server_user != NULL)
867 finish_drop_privileges ();
871 /* Close the connections. */
872 void
873 close_sockets (void)
875 close (sock);
879 static void
880 invalidate_cache (char *key, int fd)
882 dbtype number;
883 int32_t resp;
885 for (number = pwddb; number < lastdb; ++number)
886 if (strcmp (key, dbnames[number]) == 0)
888 if (dbs[number].reset_res)
889 res_init ();
891 break;
894 if (number == lastdb)
896 resp = EINVAL;
897 writeall (fd, &resp, sizeof (resp));
898 return;
901 if (dbs[number].enabled)
902 prune_cache (&dbs[number], LONG_MAX, fd);
903 else
905 resp = 0;
906 writeall (fd, &resp, sizeof (resp));
911 #ifdef SCM_RIGHTS
912 static void
913 send_ro_fd (struct database_dyn *db, char *key, int fd)
915 /* If we do not have an read-only file descriptor do nothing. */
916 if (db->ro_fd == -1)
917 return;
919 /* We need to send some data along with the descriptor. */
920 uint64_t mapsize = (db->head->data_size
921 + roundup (db->head->module * sizeof (ref_t), ALIGN)
922 + sizeof (struct database_pers_head));
923 struct iovec iov[2];
924 iov[0].iov_base = key;
925 iov[0].iov_len = strlen (key) + 1;
926 iov[1].iov_base = &mapsize;
927 iov[1].iov_len = sizeof (mapsize);
929 /* Prepare the control message to transfer the descriptor. */
930 union
932 struct cmsghdr hdr;
933 char bytes[CMSG_SPACE (sizeof (int))];
934 } buf;
935 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
936 .msg_control = buf.bytes,
937 .msg_controllen = sizeof (buf) };
938 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
940 cmsg->cmsg_level = SOL_SOCKET;
941 cmsg->cmsg_type = SCM_RIGHTS;
942 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
944 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
946 msg.msg_controllen = cmsg->cmsg_len;
948 /* Send the control message. We repeat when we are interrupted but
949 everything else is ignored. */
950 #ifndef MSG_NOSIGNAL
951 # define MSG_NOSIGNAL 0
952 #endif
953 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
955 if (__builtin_expect (debug_level > 0, 0))
956 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
958 #endif /* SCM_RIGHTS */
961 /* Handle new request. */
962 static void
963 handle_request (int fd, request_header *req, void *key, uid_t uid)
965 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
967 if (debug_level > 0)
968 dbg_log (_("\
969 cannot handle old request version %d; current version is %d"),
970 req->version, NSCD_VERSION);
971 return;
974 /* Make the SELinux check before we go on to the standard checks. */
975 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
976 return;
978 struct database_dyn *db = reqinfo[req->type].db;
980 /* See whether we can service the request from the cache. */
981 if (__builtin_expect (reqinfo[req->type].data_request, true))
983 if (__builtin_expect (debug_level, 0) > 0)
985 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
987 char buf[INET6_ADDRSTRLEN];
989 dbg_log ("\t%s (%s)", serv2str[req->type],
990 inet_ntop (req->type == GETHOSTBYADDR
991 ? AF_INET : AF_INET6,
992 key, buf, sizeof (buf)));
994 else
995 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
998 /* Is this service enabled? */
999 if (__builtin_expect (!db->enabled, 0))
1001 /* No, sent the prepared record. */
1002 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1003 db->disabled_iov->iov_len,
1004 MSG_NOSIGNAL))
1005 != (ssize_t) db->disabled_iov->iov_len
1006 && __builtin_expect (debug_level, 0) > 0)
1008 /* We have problems sending the result. */
1009 char buf[256];
1010 dbg_log (_("cannot write result: %s"),
1011 strerror_r (errno, buf, sizeof (buf)));
1014 return;
1017 /* Be sure we can read the data. */
1018 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
1020 ++db->head->rdlockdelayed;
1021 pthread_rwlock_rdlock (&db->lock);
1024 /* See whether we can handle it from the cache. */
1025 struct datahead *cached;
1026 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1027 db, uid);
1028 if (cached != NULL)
1030 /* Hurray it's in the cache. */
1031 ssize_t nwritten;
1033 #ifdef HAVE_SENDFILE
1034 if (__builtin_expect (db->mmap_used, 1))
1036 assert (db->wr_fd != -1);
1037 assert ((char *) cached->data > (char *) db->data);
1038 assert ((char *) cached->data - (char *) db->head
1039 + cached->recsize
1040 <= (sizeof (struct database_pers_head)
1041 + db->head->module * sizeof (ref_t)
1042 + db->head->data_size));
1043 nwritten = sendfileall (fd, db->wr_fd,
1044 (char *) cached->data
1045 - (char *) db->head, cached->recsize);
1046 # ifndef __ASSUME_SENDFILE
1047 if (nwritten == -1 && errno == ENOSYS)
1048 goto use_write;
1049 # endif
1051 else
1052 # ifndef __ASSUME_SENDFILE
1053 use_write:
1054 # endif
1055 #endif
1056 nwritten = writeall (fd, cached->data, cached->recsize);
1058 if (nwritten != cached->recsize
1059 && __builtin_expect (debug_level, 0) > 0)
1061 /* We have problems sending the result. */
1062 char buf[256];
1063 dbg_log (_("cannot write result: %s"),
1064 strerror_r (errno, buf, sizeof (buf)));
1067 pthread_rwlock_unlock (&db->lock);
1069 return;
1072 pthread_rwlock_unlock (&db->lock);
1074 else if (__builtin_expect (debug_level, 0) > 0)
1076 if (req->type == INVALIDATE)
1077 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1078 else
1079 dbg_log ("\t%s", serv2str[req->type]);
1082 /* Handle the request. */
1083 switch (req->type)
1085 case GETPWBYNAME:
1086 addpwbyname (db, fd, req, key, uid);
1087 break;
1089 case GETPWBYUID:
1090 addpwbyuid (db, fd, req, key, uid);
1091 break;
1093 case GETGRBYNAME:
1094 addgrbyname (db, fd, req, key, uid);
1095 break;
1097 case GETGRBYGID:
1098 addgrbygid (db, fd, req, key, uid);
1099 break;
1101 case GETHOSTBYNAME:
1102 addhstbyname (db, fd, req, key, uid);
1103 break;
1105 case GETHOSTBYNAMEv6:
1106 addhstbynamev6 (db, fd, req, key, uid);
1107 break;
1109 case GETHOSTBYADDR:
1110 addhstbyaddr (db, fd, req, key, uid);
1111 break;
1113 case GETHOSTBYADDRv6:
1114 addhstbyaddrv6 (db, fd, req, key, uid);
1115 break;
1117 case GETAI:
1118 addhstai (db, fd, req, key, uid);
1119 break;
1121 case INITGROUPS:
1122 addinitgroups (db, fd, req, key, uid);
1123 break;
1125 case GETSERVBYNAME:
1126 addservbyname (db, fd, req, key, uid);
1127 break;
1129 case GETSERVBYPORT:
1130 addservbyport (db, fd, req, key, uid);
1131 break;
1133 case GETSTAT:
1134 case SHUTDOWN:
1135 case INVALIDATE:
1137 /* Get the callers credentials. */
1138 #ifdef SO_PEERCRED
1139 struct ucred caller;
1140 socklen_t optlen = sizeof (caller);
1142 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1144 char buf[256];
1146 dbg_log (_("error getting caller's id: %s"),
1147 strerror_r (errno, buf, sizeof (buf)));
1148 break;
1151 uid = caller.uid;
1152 #else
1153 /* Some systems have no SO_PEERCRED implementation. They don't
1154 care about security so we don't as well. */
1155 uid = 0;
1156 #endif
1159 /* Accept shutdown, getstat and invalidate only from root. For
1160 the stat call also allow the user specified in the config file. */
1161 if (req->type == GETSTAT)
1163 if (uid == 0 || uid == stat_uid)
1164 send_stats (fd, dbs);
1166 else if (uid == 0)
1168 if (req->type == INVALIDATE)
1169 invalidate_cache (key, fd);
1170 else
1171 termination_handler (0);
1173 break;
1175 case GETFDPW:
1176 case GETFDGR:
1177 case GETFDHST:
1178 case GETFDSERV:
1179 #ifdef SCM_RIGHTS
1180 send_ro_fd (reqinfo[req->type].db, key, fd);
1181 #endif
1182 break;
1184 default:
1185 /* Ignore the command, it's nothing we know. */
1186 break;
1191 /* Restart the process. */
1192 static void
1193 restart (void)
1195 /* First determine the parameters. We do not use the parameters
1196 passed to main() since in case nscd is started by running the
1197 dynamic linker this will not work. Yes, this is not the usual
1198 case but nscd is part of glibc and we occasionally do this. */
1199 size_t buflen = 1024;
1200 char *buf = alloca (buflen);
1201 size_t readlen = 0;
1202 int fd = open ("/proc/self/cmdline", O_RDONLY);
1203 if (fd == -1)
1205 dbg_log (_("\
1206 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1207 strerror (errno));
1209 paranoia = 0;
1210 return;
1213 while (1)
1215 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1216 buflen - readlen));
1217 if (n == -1)
1219 dbg_log (_("\
1220 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1221 strerror (errno));
1223 close (fd);
1224 paranoia = 0;
1225 return;
1228 readlen += n;
1230 if (readlen < buflen)
1231 break;
1233 /* We might have to extend the buffer. */
1234 size_t old_buflen = buflen;
1235 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1236 buf = memmove (newp, buf, old_buflen);
1239 close (fd);
1241 /* Parse the command line. Worst case scenario: every two
1242 characters form one parameter (one character plus NUL). */
1243 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1244 int argc = 0;
1246 char *cp = buf;
1247 while (cp < buf + readlen)
1249 argv[argc++] = cp;
1250 cp = (char *) rawmemchr (cp, '\0') + 1;
1252 argv[argc] = NULL;
1254 /* Second, change back to the old user if we changed it. */
1255 if (server_user != NULL)
1257 if (setresuid (old_uid, old_uid, old_uid) != 0)
1259 dbg_log (_("\
1260 cannot change to old UID: %s; disabling paranoia mode"),
1261 strerror (errno));
1263 paranoia = 0;
1264 return;
1267 if (setresgid (old_gid, old_gid, old_gid) != 0)
1269 dbg_log (_("\
1270 cannot change to old GID: %s; disabling paranoia mode"),
1271 strerror (errno));
1273 setuid (server_uid);
1274 paranoia = 0;
1275 return;
1279 /* Next change back to the old working directory. */
1280 if (chdir (oldcwd) == -1)
1282 dbg_log (_("\
1283 cannot change to old working directory: %s; disabling paranoia mode"),
1284 strerror (errno));
1286 if (server_user != NULL)
1288 setuid (server_uid);
1289 setgid (server_gid);
1291 paranoia = 0;
1292 return;
1295 /* Synchronize memory. */
1296 for (int cnt = 0; cnt < lastdb; ++cnt)
1297 if (!dbs[cnt].enabled)
1299 /* Make sure nobody keeps using the database. */
1300 dbs[cnt].head->timestamp = 0;
1302 if (dbs[cnt].persistent)
1303 // XXX async OK?
1304 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1307 /* The preparations are done. */
1308 execv ("/proc/self/exe", argv);
1310 /* If we come here, we will never be able to re-exec. */
1311 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1312 strerror (errno));
1314 if (server_user != NULL)
1316 setuid (server_uid);
1317 setgid (server_gid);
1319 if (chdir ("/") != 0)
1320 dbg_log (_("cannot change current working directory to \"/\": %s"),
1321 strerror (errno));
1322 paranoia = 0;
1326 /* List of file descriptors. */
1327 struct fdlist
1329 int fd;
1330 struct fdlist *next;
1332 /* Memory allocated for the list. */
1333 static struct fdlist *fdlist;
1334 /* List of currently ready-to-read file descriptors. */
1335 static struct fdlist *readylist;
1337 /* Conditional variable and mutex to signal availability of entries in
1338 READYLIST. The condvar is initialized dynamically since we might
1339 use a different clock depending on availability. */
1340 static pthread_cond_t readylist_cond;
1341 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1343 /* The clock to use with the condvar. */
1344 static clockid_t timeout_clock = CLOCK_REALTIME;
1346 /* Number of threads ready to handle the READYLIST. */
1347 static unsigned long int nready;
1350 /* This is the main loop. It is replicated in different threads but the
1351 `poll' call makes sure only one thread handles an incoming connection. */
1352 static void *
1353 __attribute__ ((__noreturn__))
1354 nscd_run (void *p)
1356 const long int my_number = (long int) p;
1357 const int run_prune = my_number < lastdb && dbs[my_number].enabled;
1358 struct timespec prune_ts;
1359 int to = 0;
1360 char buf[256];
1362 if (run_prune)
1364 setup_thread (&dbs[my_number]);
1366 /* We are running. */
1367 dbs[my_number].head->timestamp = time (NULL);
1369 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1370 /* Should never happen. */
1371 abort ();
1373 /* Compute timeout time. */
1374 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1377 /* Initial locking. */
1378 pthread_mutex_lock (&readylist_lock);
1380 /* One more thread available. */
1381 ++nready;
1383 while (1)
1385 while (readylist == NULL)
1387 if (run_prune)
1389 /* Wait, but not forever. */
1390 to = pthread_cond_timedwait (&readylist_cond, &readylist_lock,
1391 &prune_ts);
1393 /* If we were woken and there is no work to be done,
1394 just start pruning. */
1395 if (readylist == NULL && to == ETIMEDOUT)
1397 --nready;
1399 if (sighup_pending)
1400 goto sighup_prune;
1402 pthread_mutex_unlock (&readylist_lock);
1403 goto only_prune;
1406 else
1407 /* No need to timeout. */
1408 pthread_cond_wait (&readylist_cond, &readylist_lock);
1411 if (sighup_pending)
1413 --nready;
1414 pthread_cond_signal (&readylist_cond);
1415 sighup_prune:
1416 sighup_pending = 0;
1417 pthread_mutex_unlock (&readylist_lock);
1419 /* Prune the password database. */
1420 if (dbs[pwddb].enabled)
1421 prune_cache (&dbs[pwddb], LONG_MAX, -1);
1423 /* Prune the group database. */
1424 if (dbs[grpdb].enabled)
1425 prune_cache (&dbs[grpdb], LONG_MAX, -1);
1427 /* Prune the host database. */
1428 if (dbs[hstdb].enabled)
1429 prune_cache (&dbs[hstdb], LONG_MAX, -1);
1431 /* Re-locking. */
1432 pthread_mutex_lock (&readylist_lock);
1434 /* One more thread available. */
1435 ++nready;
1436 continue;
1439 struct fdlist *it = readylist->next;
1440 if (readylist->next == readylist)
1441 /* Just one entry on the list. */
1442 readylist = NULL;
1443 else
1444 readylist->next = it->next;
1446 /* Extract the information and mark the record ready to be used
1447 again. */
1448 int fd = it->fd;
1449 it->next = NULL;
1451 /* One more thread available. */
1452 --nready;
1454 /* We are done with the list. */
1455 pthread_mutex_unlock (&readylist_lock);
1457 /* We do not want to block on a short read or so. */
1458 int fl = fcntl (fd, F_GETFL);
1459 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1460 goto close_and_out;
1462 /* Now read the request. */
1463 request_header req;
1464 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1465 != sizeof (req), 0))
1467 /* We failed to read data. Note that this also might mean we
1468 failed because we would have blocked. */
1469 if (debug_level > 0)
1470 dbg_log (_("short read while reading request: %s"),
1471 strerror_r (errno, buf, sizeof (buf)));
1472 goto close_and_out;
1475 /* Check whether this is a valid request type. */
1476 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1477 goto close_and_out;
1479 /* Some systems have no SO_PEERCRED implementation. They don't
1480 care about security so we don't as well. */
1481 uid_t uid = -1;
1482 #ifdef SO_PEERCRED
1483 pid_t pid = 0;
1485 if (__builtin_expect (debug_level > 0, 0))
1487 struct ucred caller;
1488 socklen_t optlen = sizeof (caller);
1490 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1491 pid = caller.pid;
1493 #endif
1495 /* It should not be possible to crash the nscd with a silly
1496 request (i.e., a terribly large key). We limit the size to 1kb. */
1497 if (__builtin_expect (req.key_len, 1) < 0
1498 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1500 if (debug_level > 0)
1501 dbg_log (_("key length in request too long: %d"), req.key_len);
1503 else
1505 /* Get the key. */
1506 char keybuf[MAXKEYLEN];
1508 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1509 req.key_len))
1510 != req.key_len, 0))
1512 /* Again, this can also mean we would have blocked. */
1513 if (debug_level > 0)
1514 dbg_log (_("short read while reading request key: %s"),
1515 strerror_r (errno, buf, sizeof (buf)));
1516 goto close_and_out;
1519 if (__builtin_expect (debug_level, 0) > 0)
1521 #ifdef SO_PEERCRED
1522 if (pid != 0)
1523 dbg_log (_("\
1524 handle_request: request received (Version = %d) from PID %ld"),
1525 req.version, (long int) pid);
1526 else
1527 #endif
1528 dbg_log (_("\
1529 handle_request: request received (Version = %d)"), req.version);
1532 /* Phew, we got all the data, now process it. */
1533 handle_request (fd, &req, keybuf, uid);
1536 close_and_out:
1537 /* We are done. */
1538 close (fd);
1540 /* Check whether we should be pruning the cache. */
1541 assert (run_prune || to == 0);
1542 if (to == ETIMEDOUT)
1544 only_prune:
1545 /* The pthread_cond_timedwait() call timed out. It is time
1546 to clean up the cache. */
1547 assert (my_number < lastdb);
1548 prune_cache (&dbs[my_number], time (NULL), -1);
1550 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1551 /* Should never happen. */
1552 abort ();
1554 /* Compute next timeout time. */
1555 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1557 /* In case the list is emtpy we do not want to run the prune
1558 code right away again. */
1559 to = 0;
1562 /* Re-locking. */
1563 pthread_mutex_lock (&readylist_lock);
1565 /* One more thread available. */
1566 ++nready;
1571 static unsigned int nconns;
1573 static void
1574 fd_ready (int fd)
1576 pthread_mutex_lock (&readylist_lock);
1578 /* Find an empty entry in FDLIST. */
1579 size_t inner;
1580 for (inner = 0; inner < nconns; ++inner)
1581 if (fdlist[inner].next == NULL)
1582 break;
1583 assert (inner < nconns);
1585 fdlist[inner].fd = fd;
1587 if (readylist == NULL)
1588 readylist = fdlist[inner].next = &fdlist[inner];
1589 else
1591 fdlist[inner].next = readylist->next;
1592 readylist = readylist->next = &fdlist[inner];
1595 bool do_signal = true;
1596 if (__builtin_expect (nready == 0, 0))
1598 ++client_queued;
1599 do_signal = false;
1601 /* Try to start another thread to help out. */
1602 pthread_t th;
1603 if (nthreads < max_nthreads
1604 && pthread_create (&th, &attr, nscd_run,
1605 (void *) (long int) nthreads) == 0)
1607 /* We got another thread. */
1608 ++nthreads;
1609 /* The new thread might need a kick. */
1610 do_signal = true;
1615 pthread_mutex_unlock (&readylist_lock);
1617 /* Tell one of the worker threads there is work to do. */
1618 if (do_signal)
1619 pthread_cond_signal (&readylist_cond);
1623 /* Check whether restarting should happen. */
1624 static inline int
1625 restart_p (time_t now)
1627 return (paranoia && readylist == NULL && nready == nthreads
1628 && now >= restart_time);
1632 /* Array for times a connection was accepted. */
1633 static time_t *starttime;
1636 static void
1637 __attribute__ ((__noreturn__))
1638 main_loop_poll (void)
1640 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1641 * sizeof (conns[0]));
1643 conns[0].fd = sock;
1644 conns[0].events = POLLRDNORM;
1645 size_t nused = 1;
1646 size_t firstfree = 1;
1648 while (1)
1650 /* Wait for any event. We wait at most a couple of seconds so
1651 that we can check whether we should close any of the accepted
1652 connections since we have not received a request. */
1653 #define MAX_ACCEPT_TIMEOUT 30
1654 #define MIN_ACCEPT_TIMEOUT 5
1655 #define MAIN_THREAD_TIMEOUT \
1656 (MAX_ACCEPT_TIMEOUT * 1000 \
1657 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1659 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1661 time_t now = time (NULL);
1663 /* If there is a descriptor ready for reading or there is a new
1664 connection, process this now. */
1665 if (n > 0)
1667 if (conns[0].revents != 0)
1669 /* We have a new incoming connection. Accept the connection. */
1670 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1672 /* Use the descriptor if we have not reached the limit. */
1673 if (fd >= 0)
1675 if (firstfree < nconns)
1677 conns[firstfree].fd = fd;
1678 conns[firstfree].events = POLLRDNORM;
1679 starttime[firstfree] = now;
1680 if (firstfree >= nused)
1681 nused = firstfree + 1;
1684 ++firstfree;
1685 while (firstfree < nused && conns[firstfree].fd != -1);
1687 else
1688 /* We cannot use the connection so close it. */
1689 close (fd);
1692 --n;
1695 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1696 if (conns[cnt].revents != 0)
1698 fd_ready (conns[cnt].fd);
1700 /* Clean up the CONNS array. */
1701 conns[cnt].fd = -1;
1702 if (cnt < firstfree)
1703 firstfree = cnt;
1704 if (cnt == nused - 1)
1706 --nused;
1707 while (conns[nused - 1].fd == -1);
1709 --n;
1713 /* Now find entries which have timed out. */
1714 assert (nused > 0);
1716 /* We make the timeout length depend on the number of file
1717 descriptors currently used. */
1718 #define ACCEPT_TIMEOUT \
1719 (MAX_ACCEPT_TIMEOUT \
1720 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1721 time_t laststart = now - ACCEPT_TIMEOUT;
1723 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1725 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1727 /* Remove the entry, it timed out. */
1728 (void) close (conns[cnt].fd);
1729 conns[cnt].fd = -1;
1731 if (cnt < firstfree)
1732 firstfree = cnt;
1733 if (cnt == nused - 1)
1735 --nused;
1736 while (conns[nused - 1].fd == -1);
1740 if (restart_p (now))
1741 restart ();
1746 #ifdef HAVE_EPOLL
1747 static void
1748 main_loop_epoll (int efd)
1750 struct epoll_event ev = { 0, };
1751 int nused = 1;
1752 size_t highest = 0;
1754 /* Add the socket. */
1755 ev.events = EPOLLRDNORM;
1756 ev.data.fd = sock;
1757 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1758 /* We cannot use epoll. */
1759 return;
1761 while (1)
1763 struct epoll_event revs[100];
1764 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1766 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1768 time_t now = time (NULL);
1770 for (int cnt = 0; cnt < n; ++cnt)
1771 if (revs[cnt].data.fd == sock)
1773 /* A new connection. */
1774 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1776 if (fd >= 0)
1778 /* Try to add the new descriptor. */
1779 ev.data.fd = fd;
1780 if (fd >= nconns
1781 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1782 /* The descriptor is too large or something went
1783 wrong. Close the descriptor. */
1784 close (fd);
1785 else
1787 /* Remember when we accepted the connection. */
1788 starttime[fd] = now;
1790 if (fd > highest)
1791 highest = fd;
1793 ++nused;
1797 else
1799 /* Remove the descriptor from the epoll descriptor. */
1800 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1802 /* Get a worker to handle the request. */
1803 fd_ready (revs[cnt].data.fd);
1805 /* Reset the time. */
1806 starttime[revs[cnt].data.fd] = 0;
1807 if (revs[cnt].data.fd == highest)
1809 --highest;
1810 while (highest > 0 && starttime[highest] == 0);
1812 --nused;
1815 /* Now look for descriptors for accepted connections which have
1816 no reply in too long of a time. */
1817 time_t laststart = now - ACCEPT_TIMEOUT;
1818 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1819 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1821 /* We are waiting for this one for too long. Close it. */
1822 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
1824 (void) close (cnt);
1826 starttime[cnt] = 0;
1827 if (cnt == highest)
1828 --highest;
1830 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1831 --highest;
1833 if (restart_p (now))
1834 restart ();
1837 #endif
1840 /* Start all the threads we want. The initial process is thread no. 1. */
1841 void
1842 start_threads (void)
1844 /* Initialize the conditional variable we will use. The only
1845 non-standard attribute we might use is the clock selection. */
1846 pthread_condattr_t condattr;
1847 pthread_condattr_init (&condattr);
1849 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1850 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1851 /* Determine whether the monotonous clock is available. */
1852 struct timespec dummy;
1853 # if _POSIX_MONOTONIC_CLOCK == 0
1854 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1855 # endif
1856 # if _POSIX_CLOCK_SELECTION == 0
1857 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1858 # endif
1859 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1860 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1861 timeout_clock = CLOCK_MONOTONIC;
1862 #endif
1864 pthread_cond_init (&readylist_cond, &condattr);
1865 pthread_condattr_destroy (&condattr);
1868 /* Create the attribute for the threads. They are all created
1869 detached. */
1870 pthread_attr_init (&attr);
1871 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1872 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1873 pthread_attr_setstacksize (&attr, 1024 * 1024 * (sizeof (void *) / 4));
1875 /* We allow less than LASTDB threads only for debugging. */
1876 if (debug_level == 0)
1877 nthreads = MAX (nthreads, lastdb);
1879 int nfailed = 0;
1880 for (long int i = 0; i < nthreads; ++i)
1882 pthread_t th;
1883 if (pthread_create (&th, &attr, nscd_run, (void *) (i - nfailed)) != 0)
1884 ++nfailed;
1886 if (nthreads - nfailed < lastdb)
1888 /* We could not start enough threads. */
1889 dbg_log (_("could only start %d threads; terminating"),
1890 nthreads - nfailed);
1891 exit (1);
1894 /* Determine how much room for descriptors we should initially
1895 allocate. This might need to change later if we cap the number
1896 with MAXCONN. */
1897 const long int nfds = sysconf (_SC_OPEN_MAX);
1898 #define MINCONN 32
1899 #define MAXCONN 16384
1900 if (nfds == -1 || nfds > MAXCONN)
1901 nconns = MAXCONN;
1902 else if (nfds < MINCONN)
1903 nconns = MINCONN;
1904 else
1905 nconns = nfds;
1907 /* We need memory to pass descriptors on to the worker threads. */
1908 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1909 /* Array to keep track when connection was accepted. */
1910 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1912 /* In the main thread we execute the loop which handles incoming
1913 connections. */
1914 #ifdef HAVE_EPOLL
1915 int efd = epoll_create (100);
1916 if (efd != -1)
1918 main_loop_epoll (efd);
1919 close (efd);
1921 #endif
1923 main_loop_poll ();
1927 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1928 this function is called, we are not listening on the nscd socket yet so
1929 we can just use the ordinary lookup functions without causing a lockup */
1930 static void
1931 begin_drop_privileges (void)
1933 struct passwd *pwd = getpwnam (server_user);
1935 if (pwd == NULL)
1937 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1938 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1939 server_user);
1942 server_uid = pwd->pw_uid;
1943 server_gid = pwd->pw_gid;
1945 /* Save the old UID/GID if we have to change back. */
1946 if (paranoia)
1948 old_uid = getuid ();
1949 old_gid = getgid ();
1952 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1954 /* This really must never happen. */
1955 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1956 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
1959 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
1961 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
1962 == -1)
1964 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1965 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
1970 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
1971 run nscd as the user specified in the configuration file. */
1972 static void
1973 finish_drop_privileges (void)
1975 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1976 /* We need to preserve the capabilities to connect to the audit daemon. */
1977 cap_t new_caps = preserve_capabilities ();
1978 #endif
1980 if (setgroups (server_ngroups, server_groups) == -1)
1982 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1983 error (EXIT_FAILURE, errno, _("setgroups failed"));
1986 int res;
1987 if (paranoia)
1988 res = setresgid (server_gid, server_gid, old_gid);
1989 else
1990 res = setgid (server_gid);
1991 if (res == -1)
1993 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1994 perror ("setgid");
1995 exit (4);
1998 if (paranoia)
1999 res = setresuid (server_uid, server_uid, old_uid);
2000 else
2001 res = setuid (server_uid);
2002 if (res == -1)
2004 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2005 perror ("setuid");
2006 exit (4);
2009 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2010 /* Remove the temporary capabilities. */
2011 install_real_capabilities (new_caps);
2012 #endif
2015 /* Handle the HUP signal which will force a dump of the cache */
2016 void
2017 sighup_handler (int signum)
2019 sighup_pending = 1;