[BZ #5979]
[glibc.git] / nscd / connections.c
blob5da5e5f08a6ccff87f60cab0067c7ab8b0f85fe8
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2007, 2008 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #include <alloca.h>
21 #include <assert.h>
22 #include <atomic.h>
23 #include <error.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <grp.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <arpa/inet.h>
35 #ifdef HAVE_EPOLL
36 # include <sys/epoll.h>
37 #endif
38 #include <sys/mman.h>
39 #include <sys/param.h>
40 #include <sys/poll.h>
41 #ifdef HAVE_SENDFILE
42 # include <sys/sendfile.h>
43 #endif
44 #include <sys/socket.h>
45 #include <sys/stat.h>
46 #include <sys/un.h>
48 #include "nscd.h"
49 #include "dbg_log.h"
50 #include "selinux.h"
51 #ifdef HAVE_SENDFILE
52 # include <kernel-features.h>
53 #endif
56 /* Wrapper functions with error checking for standard functions. */
57 extern void *xmalloc (size_t n);
58 extern void *xcalloc (size_t n, size_t s);
59 extern void *xrealloc (void *o, size_t n);
61 /* Support to run nscd as an unprivileged user */
62 const char *server_user;
63 static uid_t server_uid;
64 static gid_t server_gid;
65 const char *stat_user;
66 uid_t stat_uid;
67 static gid_t *server_groups;
68 #ifndef NGROUPS
69 # define NGROUPS 32
70 #endif
71 static int server_ngroups;
73 static pthread_attr_t attr;
75 static void begin_drop_privileges (void);
76 static void finish_drop_privileges (void);
78 /* Map request type to a string. */
79 const char *const serv2str[LASTREQ] =
81 [GETPWBYNAME] = "GETPWBYNAME",
82 [GETPWBYUID] = "GETPWBYUID",
83 [GETGRBYNAME] = "GETGRBYNAME",
84 [GETGRBYGID] = "GETGRBYGID",
85 [GETHOSTBYNAME] = "GETHOSTBYNAME",
86 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
87 [GETHOSTBYADDR] = "GETHOSTBYADDR",
88 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
89 [SHUTDOWN] = "SHUTDOWN",
90 [GETSTAT] = "GETSTAT",
91 [INVALIDATE] = "INVALIDATE",
92 [GETFDPW] = "GETFDPW",
93 [GETFDGR] = "GETFDGR",
94 [GETFDHST] = "GETFDHST",
95 [GETAI] = "GETAI",
96 [INITGROUPS] = "INITGROUPS",
97 [GETSERVBYNAME] = "GETSERVBYNAME",
98 [GETSERVBYPORT] = "GETSERVBYPORT",
99 [GETFDSERV] = "GETFDSERV"
102 /* The control data structures for the services. */
103 struct database_dyn dbs[lastdb] =
105 [pwddb] = {
106 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
107 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
108 .enabled = 0,
109 .check_file = 1,
110 .persistent = 0,
111 .propagate = 1,
112 .shared = 0,
113 .max_db_size = DEFAULT_MAX_DB_SIZE,
114 .suggested_module = DEFAULT_SUGGESTED_MODULE,
115 .reset_res = 0,
116 .filename = "/etc/passwd",
117 .db_filename = _PATH_NSCD_PASSWD_DB,
118 .disabled_iov = &pwd_iov_disabled,
119 .postimeout = 3600,
120 .negtimeout = 20,
121 .wr_fd = -1,
122 .ro_fd = -1,
123 .mmap_used = false
125 [grpdb] = {
126 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
127 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
128 .enabled = 0,
129 .check_file = 1,
130 .persistent = 0,
131 .propagate = 1,
132 .shared = 0,
133 .max_db_size = DEFAULT_MAX_DB_SIZE,
134 .suggested_module = DEFAULT_SUGGESTED_MODULE,
135 .reset_res = 0,
136 .filename = "/etc/group",
137 .db_filename = _PATH_NSCD_GROUP_DB,
138 .disabled_iov = &grp_iov_disabled,
139 .postimeout = 3600,
140 .negtimeout = 60,
141 .wr_fd = -1,
142 .ro_fd = -1,
143 .mmap_used = false
145 [hstdb] = {
146 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
147 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
148 .enabled = 0,
149 .check_file = 1,
150 .persistent = 0,
151 .propagate = 0, /* Not used. */
152 .shared = 0,
153 .max_db_size = DEFAULT_MAX_DB_SIZE,
154 .suggested_module = DEFAULT_SUGGESTED_MODULE,
155 .reset_res = 1,
156 .filename = "/etc/hosts",
157 .db_filename = _PATH_NSCD_HOSTS_DB,
158 .disabled_iov = &hst_iov_disabled,
159 .postimeout = 3600,
160 .negtimeout = 20,
161 .wr_fd = -1,
162 .ro_fd = -1,
163 .mmap_used = false
165 [servdb] = {
166 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
167 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
168 .enabled = 0,
169 .check_file = 1,
170 .persistent = 0,
171 .propagate = 0, /* Not used. */
172 .shared = 0,
173 .max_db_size = DEFAULT_MAX_DB_SIZE,
174 .suggested_module = DEFAULT_SUGGESTED_MODULE,
175 .reset_res = 0,
176 .filename = "/etc/services",
177 .db_filename = _PATH_NSCD_SERVICES_DB,
178 .disabled_iov = &serv_iov_disabled,
179 .postimeout = 28800,
180 .negtimeout = 20,
181 .wr_fd = -1,
182 .ro_fd = -1,
183 .mmap_used = false
188 /* Mapping of request type to database. */
189 static struct
191 bool data_request;
192 struct database_dyn *db;
193 } const reqinfo[LASTREQ] =
195 [GETPWBYNAME] = { true, &dbs[pwddb] },
196 [GETPWBYUID] = { true, &dbs[pwddb] },
197 [GETGRBYNAME] = { true, &dbs[grpdb] },
198 [GETGRBYGID] = { true, &dbs[grpdb] },
199 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
200 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
201 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
202 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
203 [SHUTDOWN] = { false, NULL },
204 [GETSTAT] = { false, NULL },
205 [SHUTDOWN] = { false, NULL },
206 [GETFDPW] = { false, &dbs[pwddb] },
207 [GETFDGR] = { false, &dbs[grpdb] },
208 [GETFDHST] = { false, &dbs[hstdb] },
209 [GETAI] = { true, &dbs[hstdb] },
210 [INITGROUPS] = { true, &dbs[grpdb] },
211 [GETSERVBYNAME] = { true, &dbs[servdb] },
212 [GETSERVBYPORT] = { true, &dbs[servdb] },
213 [GETFDSERV] = { false, &dbs[servdb] }
217 /* Initial number of threads to use. */
218 int nthreads = -1;
219 /* Maximum number of threads to use. */
220 int max_nthreads = 32;
222 /* Socket for incoming connections. */
223 static int sock;
225 /* Number of times clients had to wait. */
226 unsigned long int client_queued;
229 ssize_t
230 writeall (int fd, const void *buf, size_t len)
232 size_t n = len;
233 ssize_t ret;
236 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
237 if (ret <= 0)
238 break;
239 buf = (const char *) buf + ret;
240 n -= ret;
242 while (n > 0);
243 return ret < 0 ? ret : len - n;
247 #ifdef HAVE_SENDFILE
248 ssize_t
249 sendfileall (int tofd, int fromfd, off_t off, size_t len)
251 ssize_t n = len;
252 ssize_t ret;
256 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
257 if (ret <= 0)
258 break;
259 n -= ret;
261 while (n > 0);
262 return ret < 0 ? ret : len - n;
264 #endif
267 enum usekey
269 use_not = 0,
270 /* The following three are not really used, they are symbolic constants. */
271 use_first = 16,
272 use_begin = 32,
273 use_end = 64,
275 use_he = 1,
276 use_he_begin = use_he | use_begin,
277 use_he_end = use_he | use_end,
278 #if SEPARATE_KEY
279 use_key = 2,
280 use_key_begin = use_key | use_begin,
281 use_key_end = use_key | use_end,
282 use_key_first = use_key_begin | use_first,
283 #endif
284 use_data = 3,
285 use_data_begin = use_data | use_begin,
286 use_data_end = use_data | use_end,
287 use_data_first = use_data_begin | use_first
291 static int
292 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
293 enum usekey use, ref_t start, size_t len)
295 assert (len >= 2);
297 if (start > first_free || start + len > first_free
298 || (start & BLOCK_ALIGN_M1))
299 return 0;
301 if (usemap[start] == use_not)
303 /* Add the start marker. */
304 usemap[start] = use | use_begin;
305 use &= ~use_first;
307 while (--len > 0)
308 if (usemap[++start] != use_not)
309 return 0;
310 else
311 usemap[start] = use;
313 /* Add the end marker. */
314 usemap[start] = use | use_end;
316 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
318 /* Hash entries can't be shared. */
319 if (use == use_he)
320 return 0;
322 usemap[start] |= (use & use_first);
323 use &= ~use_first;
325 while (--len > 1)
326 if (usemap[++start] != use)
327 return 0;
329 if (usemap[++start] != (use | use_end))
330 return 0;
332 else
333 /* Points to a wrong object or somewhere in the middle. */
334 return 0;
336 return 1;
340 /* Verify data in persistent database. */
341 static int
342 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
344 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb);
346 time_t now = time (NULL);
348 struct database_pers_head *head = mem;
349 struct database_pers_head head_copy = *head;
351 /* Check that the header that was read matches the head in the database. */
352 if (memcmp (head, readhead, sizeof (*head)) != 0)
353 return 0;
355 /* First some easy tests: make sure the database header is sane. */
356 if (head->version != DB_VERSION
357 || head->header_size != sizeof (*head)
358 /* We allow a timestamp to be one hour ahead of the current time.
359 This should cover daylight saving time changes. */
360 || head->timestamp > now + 60 * 60 + 60
361 || (head->gc_cycle & 1)
362 || head->module == 0
363 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
364 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
365 || head->first_free < 0
366 || head->first_free > head->data_size
367 || (head->first_free & BLOCK_ALIGN_M1) != 0
368 || head->maxnentries < 0
369 || head->maxnsearched < 0)
370 return 0;
372 uint8_t *usemap = calloc (head->first_free, 1);
373 if (usemap == NULL)
374 return 0;
376 const char *data = (char *) &head->array[roundup (head->module,
377 ALIGN / sizeof (ref_t))];
379 nscd_ssize_t he_cnt = 0;
380 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
382 ref_t trail = head->array[cnt];
383 ref_t work = trail;
384 int tick = 0;
386 while (work != ENDREF)
388 if (! check_use (data, head->first_free, usemap, use_he, work,
389 sizeof (struct hashentry)))
390 goto fail;
392 /* Now we know we can dereference the record. */
393 struct hashentry *here = (struct hashentry *) (data + work);
395 ++he_cnt;
397 /* Make sure the record is for this type of service. */
398 if (here->type >= LASTREQ
399 || reqinfo[here->type].db != &dbs[dbnr])
400 goto fail;
402 /* Validate boolean field value. */
403 if (here->first != false && here->first != true)
404 goto fail;
406 if (here->len < 0)
407 goto fail;
409 /* Now the data. */
410 if (here->packet < 0
411 || here->packet > head->first_free
412 || here->packet + sizeof (struct datahead) > head->first_free)
413 goto fail;
415 struct datahead *dh = (struct datahead *) (data + here->packet);
417 if (! check_use (data, head->first_free, usemap,
418 use_data | (here->first ? use_first : 0),
419 here->packet, dh->allocsize))
420 goto fail;
422 if (dh->allocsize < sizeof (struct datahead)
423 || dh->recsize > dh->allocsize
424 || (dh->notfound != false && dh->notfound != true)
425 || (dh->usable != false && dh->usable != true))
426 goto fail;
428 if (here->key < here->packet + sizeof (struct datahead)
429 || here->key > here->packet + dh->allocsize
430 || here->key + here->len > here->packet + dh->allocsize)
432 #if SEPARATE_KEY
433 /* If keys can appear outside of data, this should be done
434 instead. But gc doesn't mark the data in that case. */
435 if (! check_use (data, head->first_free, usemap,
436 use_key | (here->first ? use_first : 0),
437 here->key, here->len))
438 #endif
439 goto fail;
442 work = here->next;
444 if (work == trail)
445 /* A circular list, this must not happen. */
446 goto fail;
447 if (tick)
448 trail = ((struct hashentry *) (data + trail))->next;
449 tick = 1 - tick;
453 if (he_cnt != head->nentries)
454 goto fail;
456 /* See if all data and keys had at least one reference from
457 he->first == true hashentry. */
458 for (ref_t idx = 0; idx < head->first_free; ++idx)
460 #if SEPARATE_KEY
461 if (usemap[idx] == use_key_begin)
462 goto fail;
463 #endif
464 if (usemap[idx] == use_data_begin)
465 goto fail;
468 /* Finally, make sure the database hasn't changed since the first test. */
469 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
470 goto fail;
472 free (usemap);
473 return 1;
475 fail:
476 free (usemap);
477 return 0;
481 #ifdef O_CLOEXEC
482 # define EXTRA_O_FLAGS O_CLOEXEC
483 #else
484 # define EXTRA_O_FLAGS 0
485 #endif
488 /* Initialize database information structures. */
489 void
490 nscd_init (void)
492 /* Look up unprivileged uid/gid/groups before we start listening on the
493 socket */
494 if (server_user != NULL)
495 begin_drop_privileges ();
497 if (nthreads == -1)
498 /* No configuration for this value, assume a default. */
499 nthreads = 4;
501 for (size_t cnt = 0; cnt < lastdb; ++cnt)
502 if (dbs[cnt].enabled)
504 pthread_rwlock_init (&dbs[cnt].lock, NULL);
505 pthread_mutex_init (&dbs[cnt].memlock, NULL);
507 if (dbs[cnt].persistent)
509 /* Try to open the appropriate file on disk. */
510 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
511 if (fd != -1)
513 char *msg = NULL;
514 struct stat64 st;
515 void *mem;
516 size_t total;
517 struct database_pers_head head;
518 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
519 sizeof (head)));
520 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
522 fail_db_errno:
523 /* The code is single-threaded at this point so
524 using strerror is just fine. */
525 msg = strerror (errno);
526 fail_db:
527 dbg_log (_("invalid persistent database file \"%s\": %s"),
528 dbs[cnt].db_filename, msg);
529 unlink (dbs[cnt].db_filename);
531 else if (head.module == 0 && head.data_size == 0)
533 /* The file has been created, but the head has not
534 been initialized yet. */
535 msg = _("uninitialized header");
536 goto fail_db;
538 else if (head.header_size != (int) sizeof (head))
540 msg = _("header size does not match");
541 goto fail_db;
543 else if ((total = (sizeof (head)
544 + roundup (head.module * sizeof (ref_t),
545 ALIGN)
546 + head.data_size))
547 > st.st_size
548 || total < sizeof (head))
550 msg = _("file size does not match");
551 goto fail_db;
553 /* Note we map with the maximum size allowed for the
554 database. This is likely much larger than the
555 actual file size. This is OK on most OSes since
556 extensions of the underlying file will
557 automatically translate more pages available for
558 memory access. */
559 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
560 PROT_READ | PROT_WRITE,
561 MAP_SHARED, fd, 0))
562 == MAP_FAILED)
563 goto fail_db_errno;
564 else if (!verify_persistent_db (mem, &head, cnt))
566 munmap (mem, total);
567 msg = _("verification failed");
568 goto fail_db;
570 else
572 /* Success. We have the database. */
573 dbs[cnt].head = mem;
574 dbs[cnt].memsize = total;
575 dbs[cnt].data = (char *)
576 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
577 ALIGN / sizeof (ref_t))];
578 dbs[cnt].mmap_used = true;
580 if (dbs[cnt].suggested_module > head.module)
581 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
582 dbnames[cnt]);
584 dbs[cnt].wr_fd = fd;
585 fd = -1;
586 /* We also need a read-only descriptor. */
587 if (dbs[cnt].shared)
589 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
590 O_RDONLY | EXTRA_O_FLAGS);
591 if (dbs[cnt].ro_fd == -1)
592 dbg_log (_("\
593 cannot create read-only descriptor for \"%s\"; no mmap"),
594 dbs[cnt].db_filename);
597 // XXX Shall we test whether the descriptors actually
598 // XXX point to the same file?
601 /* Close the file descriptors in case something went
602 wrong in which case the variable have not been
603 assigned -1. */
604 if (fd != -1)
605 close (fd);
609 if (dbs[cnt].head == NULL)
611 /* No database loaded. Allocate the data structure,
612 possibly on disk. */
613 struct database_pers_head head;
614 size_t total = (sizeof (head)
615 + roundup (dbs[cnt].suggested_module
616 * sizeof (ref_t), ALIGN)
617 + (dbs[cnt].suggested_module
618 * DEFAULT_DATASIZE_PER_BUCKET));
620 /* Try to create the database. If we do not need a
621 persistent database create a temporary file. */
622 int fd;
623 int ro_fd = -1;
624 if (dbs[cnt].persistent)
626 fd = open (dbs[cnt].db_filename,
627 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
628 S_IRUSR | S_IWUSR);
629 if (fd != -1 && dbs[cnt].shared)
630 ro_fd = open (dbs[cnt].db_filename,
631 O_RDONLY | EXTRA_O_FLAGS);
633 else
635 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
636 fd = mkostemp (fname, EXTRA_O_FLAGS);
638 /* We do not need the file name anymore after we
639 opened another file descriptor in read-only mode. */
640 if (fd != -1)
642 if (dbs[cnt].shared)
643 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
645 unlink (fname);
649 if (fd == -1)
651 if (errno == EEXIST)
653 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
654 dbnames[cnt], dbs[cnt].db_filename);
655 // XXX Correct way to terminate?
656 exit (1);
659 if (dbs[cnt].persistent)
660 dbg_log (_("cannot create %s; no persistent database used"),
661 dbs[cnt].db_filename);
662 else
663 dbg_log (_("cannot create %s; no sharing possible"),
664 dbs[cnt].db_filename);
666 dbs[cnt].persistent = 0;
667 // XXX remember: no mmap
669 else
671 /* Tell the user if we could not create the read-only
672 descriptor. */
673 if (ro_fd == -1 && dbs[cnt].shared)
674 dbg_log (_("\
675 cannot create read-only descriptor for \"%s\"; no mmap"),
676 dbs[cnt].db_filename);
678 /* Before we create the header, initialiye the hash
679 table. So that if we get interrupted if writing
680 the header we can recognize a partially initialized
681 database. */
682 size_t ps = sysconf (_SC_PAGESIZE);
683 char tmpbuf[ps];
684 assert (~ENDREF == 0);
685 memset (tmpbuf, '\xff', ps);
687 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
688 off_t offset = sizeof (head);
690 size_t towrite;
691 if (offset % ps != 0)
693 towrite = MIN (remaining, ps - (offset % ps));
694 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
695 goto write_fail;
696 offset += towrite;
697 remaining -= towrite;
700 while (remaining > ps)
702 if (pwrite (fd, tmpbuf, ps, offset) == -1)
703 goto write_fail;
704 offset += ps;
705 remaining -= ps;
708 if (remaining > 0
709 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
710 goto write_fail;
712 /* Create the header of the file. */
713 struct database_pers_head head =
715 .version = DB_VERSION,
716 .header_size = sizeof (head),
717 .module = dbs[cnt].suggested_module,
718 .data_size = (dbs[cnt].suggested_module
719 * DEFAULT_DATASIZE_PER_BUCKET),
720 .first_free = 0
722 void *mem;
724 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
725 != sizeof (head))
726 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
727 != 0)
728 || (mem = mmap (NULL, dbs[cnt].max_db_size,
729 PROT_READ | PROT_WRITE,
730 MAP_SHARED, fd, 0)) == MAP_FAILED)
732 write_fail:
733 unlink (dbs[cnt].db_filename);
734 dbg_log (_("cannot write to database file %s: %s"),
735 dbs[cnt].db_filename, strerror (errno));
736 dbs[cnt].persistent = 0;
738 else
740 /* Success. */
741 dbs[cnt].head = mem;
742 dbs[cnt].data = (char *)
743 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
744 ALIGN / sizeof (ref_t))];
745 dbs[cnt].memsize = total;
746 dbs[cnt].mmap_used = true;
748 /* Remember the descriptors. */
749 dbs[cnt].wr_fd = fd;
750 dbs[cnt].ro_fd = ro_fd;
751 fd = -1;
752 ro_fd = -1;
755 if (fd != -1)
756 close (fd);
757 if (ro_fd != -1)
758 close (ro_fd);
762 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
763 /* We do not check here whether the O_CLOEXEC provided to the
764 open call was successful or not. The two fcntl calls are
765 only performed once each per process start-up and therefore
766 is not noticeable at all. */
767 if (paranoia
768 && ((dbs[cnt].wr_fd != -1
769 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
770 || (dbs[cnt].ro_fd != -1
771 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
773 dbg_log (_("\
774 cannot set socket to close on exec: %s; disabling paranoia mode"),
775 strerror (errno));
776 paranoia = 0;
778 #endif
780 if (dbs[cnt].head == NULL)
782 /* We do not use the persistent database. Just
783 create an in-memory data structure. */
784 assert (! dbs[cnt].persistent);
786 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
787 + (dbs[cnt].suggested_module
788 * sizeof (ref_t)));
789 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
790 assert (~ENDREF == 0);
791 memset (dbs[cnt].head->array, '\xff',
792 dbs[cnt].suggested_module * sizeof (ref_t));
793 dbs[cnt].head->module = dbs[cnt].suggested_module;
794 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
795 * dbs[cnt].head->module);
796 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
797 dbs[cnt].head->first_free = 0;
799 dbs[cnt].shared = 0;
800 assert (dbs[cnt].ro_fd == -1);
803 if (dbs[cnt].check_file)
805 /* We need the modification date of the file. */
806 struct stat64 st;
808 if (stat64 (dbs[cnt].filename, &st) < 0)
810 /* We cannot stat() the file, disable file checking. */
811 dbg_log (_("cannot stat() file `%s': %s"),
812 dbs[cnt].filename, strerror (errno));
813 dbs[cnt].check_file = 0;
815 else
816 dbs[cnt].file_mtime = st.st_mtime;
820 /* Create the socket. */
821 sock = socket (AF_UNIX, SOCK_STREAM, 0);
822 if (sock < 0)
824 dbg_log (_("cannot open socket: %s"), strerror (errno));
825 exit (errno == EACCES ? 4 : 1);
827 /* Bind a name to the socket. */
828 struct sockaddr_un sock_addr;
829 sock_addr.sun_family = AF_UNIX;
830 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
831 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
833 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
834 exit (errno == EACCES ? 4 : 1);
837 /* We don't want to get stuck on accept. */
838 int fl = fcntl (sock, F_GETFL);
839 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
841 dbg_log (_("cannot change socket to nonblocking mode: %s"),
842 strerror (errno));
843 exit (1);
846 /* The descriptor needs to be closed on exec. */
847 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
849 dbg_log (_("cannot set socket to close on exec: %s"),
850 strerror (errno));
851 exit (1);
854 /* Set permissions for the socket. */
855 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
857 /* Set the socket up to accept connections. */
858 if (listen (sock, SOMAXCONN) < 0)
860 dbg_log (_("cannot enable socket to accept connections: %s"),
861 strerror (errno));
862 exit (1);
865 /* Change to unprivileged uid/gid/groups if specifed in config file */
866 if (server_user != NULL)
867 finish_drop_privileges ();
871 /* Close the connections. */
872 void
873 close_sockets (void)
875 close (sock);
879 static void
880 invalidate_cache (char *key, int fd)
882 dbtype number;
883 int32_t resp;
885 for (number = pwddb; number < lastdb; ++number)
886 if (strcmp (key, dbnames[number]) == 0)
888 if (dbs[number].reset_res)
889 res_init ();
891 break;
894 if (number == lastdb)
896 resp = EINVAL;
897 writeall (fd, &resp, sizeof (resp));
898 return;
901 if (dbs[number].enabled)
903 pthread_mutex_lock (&dbs[number].prune_lock);
904 prune_cache (&dbs[number], LONG_MAX, fd);
905 pthread_mutex_unlock (&dbs[number].prune_lock);
907 else
909 resp = 0;
910 writeall (fd, &resp, sizeof (resp));
915 #ifdef SCM_RIGHTS
916 static void
917 send_ro_fd (struct database_dyn *db, char *key, int fd)
919 /* If we do not have an read-only file descriptor do nothing. */
920 if (db->ro_fd == -1)
921 return;
923 /* We need to send some data along with the descriptor. */
924 uint64_t mapsize = (db->head->data_size
925 + roundup (db->head->module * sizeof (ref_t), ALIGN)
926 + sizeof (struct database_pers_head));
927 struct iovec iov[2];
928 iov[0].iov_base = key;
929 iov[0].iov_len = strlen (key) + 1;
930 iov[1].iov_base = &mapsize;
931 iov[1].iov_len = sizeof (mapsize);
933 /* Prepare the control message to transfer the descriptor. */
934 union
936 struct cmsghdr hdr;
937 char bytes[CMSG_SPACE (sizeof (int))];
938 } buf;
939 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
940 .msg_control = buf.bytes,
941 .msg_controllen = sizeof (buf) };
942 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
944 cmsg->cmsg_level = SOL_SOCKET;
945 cmsg->cmsg_type = SCM_RIGHTS;
946 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
948 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
950 msg.msg_controllen = cmsg->cmsg_len;
952 /* Send the control message. We repeat when we are interrupted but
953 everything else is ignored. */
954 #ifndef MSG_NOSIGNAL
955 # define MSG_NOSIGNAL 0
956 #endif
957 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
959 if (__builtin_expect (debug_level > 0, 0))
960 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
962 #endif /* SCM_RIGHTS */
965 /* Handle new request. */
966 static void
967 handle_request (int fd, request_header *req, void *key, uid_t uid)
969 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
971 if (debug_level > 0)
972 dbg_log (_("\
973 cannot handle old request version %d; current version is %d"),
974 req->version, NSCD_VERSION);
975 return;
978 /* Perform the SELinux check before we go on to the standard checks. */
979 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
981 if (debug_level > 0)
982 dbg_log (_("request not handled due to missing permission"));
983 return;
986 struct database_dyn *db = reqinfo[req->type].db;
988 /* See whether we can service the request from the cache. */
989 if (__builtin_expect (reqinfo[req->type].data_request, true))
991 if (__builtin_expect (debug_level, 0) > 0)
993 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
995 char buf[INET6_ADDRSTRLEN];
997 dbg_log ("\t%s (%s)", serv2str[req->type],
998 inet_ntop (req->type == GETHOSTBYADDR
999 ? AF_INET : AF_INET6,
1000 key, buf, sizeof (buf)));
1002 else
1003 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1006 /* Is this service enabled? */
1007 if (__builtin_expect (!db->enabled, 0))
1009 /* No, sent the prepared record. */
1010 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1011 db->disabled_iov->iov_len,
1012 MSG_NOSIGNAL))
1013 != (ssize_t) db->disabled_iov->iov_len
1014 && __builtin_expect (debug_level, 0) > 0)
1016 /* We have problems sending the result. */
1017 char buf[256];
1018 dbg_log (_("cannot write result: %s"),
1019 strerror_r (errno, buf, sizeof (buf)));
1022 return;
1025 /* Be sure we can read the data. */
1026 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
1028 ++db->head->rdlockdelayed;
1029 pthread_rwlock_rdlock (&db->lock);
1032 /* See whether we can handle it from the cache. */
1033 struct datahead *cached;
1034 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1035 db, uid);
1036 if (cached != NULL)
1038 /* Hurray it's in the cache. */
1039 ssize_t nwritten;
1041 #ifdef HAVE_SENDFILE
1042 if (__builtin_expect (db->mmap_used, 1))
1044 assert (db->wr_fd != -1);
1045 assert ((char *) cached->data > (char *) db->data);
1046 assert ((char *) cached->data - (char *) db->head
1047 + cached->recsize
1048 <= (sizeof (struct database_pers_head)
1049 + db->head->module * sizeof (ref_t)
1050 + db->head->data_size));
1051 nwritten = sendfileall (fd, db->wr_fd,
1052 (char *) cached->data
1053 - (char *) db->head, cached->recsize);
1054 # ifndef __ASSUME_SENDFILE
1055 if (nwritten == -1 && errno == ENOSYS)
1056 goto use_write;
1057 # endif
1059 else
1060 # ifndef __ASSUME_SENDFILE
1061 use_write:
1062 # endif
1063 #endif
1064 nwritten = writeall (fd, cached->data, cached->recsize);
1066 if (nwritten != cached->recsize
1067 && __builtin_expect (debug_level, 0) > 0)
1069 /* We have problems sending the result. */
1070 char buf[256];
1071 dbg_log (_("cannot write result: %s"),
1072 strerror_r (errno, buf, sizeof (buf)));
1075 pthread_rwlock_unlock (&db->lock);
1077 return;
1080 pthread_rwlock_unlock (&db->lock);
1082 else if (__builtin_expect (debug_level, 0) > 0)
1084 if (req->type == INVALIDATE)
1085 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1086 else
1087 dbg_log ("\t%s", serv2str[req->type]);
1090 /* Handle the request. */
1091 switch (req->type)
1093 case GETPWBYNAME:
1094 addpwbyname (db, fd, req, key, uid);
1095 break;
1097 case GETPWBYUID:
1098 addpwbyuid (db, fd, req, key, uid);
1099 break;
1101 case GETGRBYNAME:
1102 addgrbyname (db, fd, req, key, uid);
1103 break;
1105 case GETGRBYGID:
1106 addgrbygid (db, fd, req, key, uid);
1107 break;
1109 case GETHOSTBYNAME:
1110 addhstbyname (db, fd, req, key, uid);
1111 break;
1113 case GETHOSTBYNAMEv6:
1114 addhstbynamev6 (db, fd, req, key, uid);
1115 break;
1117 case GETHOSTBYADDR:
1118 addhstbyaddr (db, fd, req, key, uid);
1119 break;
1121 case GETHOSTBYADDRv6:
1122 addhstbyaddrv6 (db, fd, req, key, uid);
1123 break;
1125 case GETAI:
1126 addhstai (db, fd, req, key, uid);
1127 break;
1129 case INITGROUPS:
1130 addinitgroups (db, fd, req, key, uid);
1131 break;
1133 case GETSERVBYNAME:
1134 addservbyname (db, fd, req, key, uid);
1135 break;
1137 case GETSERVBYPORT:
1138 addservbyport (db, fd, req, key, uid);
1139 break;
1141 case GETSTAT:
1142 case SHUTDOWN:
1143 case INVALIDATE:
1145 /* Get the callers credentials. */
1146 #ifdef SO_PEERCRED
1147 struct ucred caller;
1148 socklen_t optlen = sizeof (caller);
1150 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1152 char buf[256];
1154 dbg_log (_("error getting caller's id: %s"),
1155 strerror_r (errno, buf, sizeof (buf)));
1156 break;
1159 uid = caller.uid;
1160 #else
1161 /* Some systems have no SO_PEERCRED implementation. They don't
1162 care about security so we don't as well. */
1163 uid = 0;
1164 #endif
1167 /* Accept shutdown, getstat and invalidate only from root. For
1168 the stat call also allow the user specified in the config file. */
1169 if (req->type == GETSTAT)
1171 if (uid == 0 || uid == stat_uid)
1172 send_stats (fd, dbs);
1174 else if (uid == 0)
1176 if (req->type == INVALIDATE)
1177 invalidate_cache (key, fd);
1178 else
1179 termination_handler (0);
1181 break;
1183 case GETFDPW:
1184 case GETFDGR:
1185 case GETFDHST:
1186 case GETFDSERV:
1187 #ifdef SCM_RIGHTS
1188 send_ro_fd (reqinfo[req->type].db, key, fd);
1189 #endif
1190 break;
1192 default:
1193 /* Ignore the command, it's nothing we know. */
1194 break;
1199 /* Restart the process. */
1200 static void
1201 restart (void)
1203 /* First determine the parameters. We do not use the parameters
1204 passed to main() since in case nscd is started by running the
1205 dynamic linker this will not work. Yes, this is not the usual
1206 case but nscd is part of glibc and we occasionally do this. */
1207 size_t buflen = 1024;
1208 char *buf = alloca (buflen);
1209 size_t readlen = 0;
1210 int fd = open ("/proc/self/cmdline", O_RDONLY);
1211 if (fd == -1)
1213 dbg_log (_("\
1214 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1215 strerror (errno));
1217 paranoia = 0;
1218 return;
1221 while (1)
1223 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1224 buflen - readlen));
1225 if (n == -1)
1227 dbg_log (_("\
1228 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1229 strerror (errno));
1231 close (fd);
1232 paranoia = 0;
1233 return;
1236 readlen += n;
1238 if (readlen < buflen)
1239 break;
1241 /* We might have to extend the buffer. */
1242 size_t old_buflen = buflen;
1243 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1244 buf = memmove (newp, buf, old_buflen);
1247 close (fd);
1249 /* Parse the command line. Worst case scenario: every two
1250 characters form one parameter (one character plus NUL). */
1251 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1252 int argc = 0;
1254 char *cp = buf;
1255 while (cp < buf + readlen)
1257 argv[argc++] = cp;
1258 cp = (char *) rawmemchr (cp, '\0') + 1;
1260 argv[argc] = NULL;
1262 /* Second, change back to the old user if we changed it. */
1263 if (server_user != NULL)
1265 if (setresuid (old_uid, old_uid, old_uid) != 0)
1267 dbg_log (_("\
1268 cannot change to old UID: %s; disabling paranoia mode"),
1269 strerror (errno));
1271 paranoia = 0;
1272 return;
1275 if (setresgid (old_gid, old_gid, old_gid) != 0)
1277 dbg_log (_("\
1278 cannot change to old GID: %s; disabling paranoia mode"),
1279 strerror (errno));
1281 setuid (server_uid);
1282 paranoia = 0;
1283 return;
1287 /* Next change back to the old working directory. */
1288 if (chdir (oldcwd) == -1)
1290 dbg_log (_("\
1291 cannot change to old working directory: %s; disabling paranoia mode"),
1292 strerror (errno));
1294 if (server_user != NULL)
1296 setuid (server_uid);
1297 setgid (server_gid);
1299 paranoia = 0;
1300 return;
1303 /* Synchronize memory. */
1304 for (int cnt = 0; cnt < lastdb; ++cnt)
1305 if (dbs[cnt].enabled)
1307 /* Make sure nobody keeps using the database. */
1308 dbs[cnt].head->timestamp = 0;
1310 if (dbs[cnt].persistent)
1311 // XXX async OK?
1312 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1315 /* The preparations are done. */
1316 execv ("/proc/self/exe", argv);
1318 /* If we come here, we will never be able to re-exec. */
1319 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1320 strerror (errno));
1322 if (server_user != NULL)
1324 setuid (server_uid);
1325 setgid (server_gid);
1327 if (chdir ("/") != 0)
1328 dbg_log (_("cannot change current working directory to \"/\": %s"),
1329 strerror (errno));
1330 paranoia = 0;
1334 /* List of file descriptors. */
1335 struct fdlist
1337 int fd;
1338 struct fdlist *next;
1340 /* Memory allocated for the list. */
1341 static struct fdlist *fdlist;
1342 /* List of currently ready-to-read file descriptors. */
1343 static struct fdlist *readylist;
1345 /* Conditional variable and mutex to signal availability of entries in
1346 READYLIST. The condvar is initialized dynamically since we might
1347 use a different clock depending on availability. */
1348 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1349 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1351 /* The clock to use with the condvar. */
1352 static clockid_t timeout_clock = CLOCK_REALTIME;
1354 /* Number of threads ready to handle the READYLIST. */
1355 static unsigned long int nready;
1358 /* Function for the clean-up threads. */
1359 static void *
1360 __attribute__ ((__noreturn__))
1361 nscd_run_prune (void *p)
1363 const long int my_number = (long int) p;
1364 assert (dbs[my_number].enabled);
1366 int dont_need_update = setup_thread (&dbs[my_number]);
1368 /* We are running. */
1369 dbs[my_number].head->timestamp = time (NULL);
1371 struct timespec prune_ts;
1372 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1373 /* Should never happen. */
1374 abort ();
1376 /* Compute the initial timeout time. Prevent all the timers to go
1377 off at the same time by adding a db-based value. */
1378 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1380 pthread_mutex_lock (&dbs[my_number].prune_lock);
1381 while (1)
1383 /* Wait, but not forever. */
1384 int e = pthread_cond_timedwait (&dbs[my_number].prune_cond,
1385 &dbs[my_number].prune_lock,
1386 &prune_ts);
1387 assert (e == 0 || e == ETIMEDOUT);
1389 time_t next_wait;
1390 time_t now = time (NULL);
1391 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time)
1393 next_wait = prune_cache (&dbs[my_number], now, -1);
1394 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1395 /* If clients cannot determine for sure whether nscd is running
1396 we need to wake up occasionally to update the timestamp.
1397 Wait 90% of the update period. */
1398 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1399 if (__builtin_expect (! dont_need_update, 0))
1400 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1402 /* Make it known when we will wake up again. */
1403 dbs[my_number].wakeup_time = now + next_wait;
1405 else
1406 /* The cache was just pruned. Do not do it again now. Just
1407 use the new timeout value. */
1408 next_wait = dbs[my_number].wakeup_time - now;
1410 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1411 /* Should never happen. */
1412 abort ();
1414 /* Compute next timeout time. */
1415 prune_ts.tv_sec += next_wait;
1420 /* This is the main loop. It is replicated in different threads but
1421 the the use of the ready list makes sure only one thread handles an
1422 incoming connection. */
1423 static void *
1424 __attribute__ ((__noreturn__))
1425 nscd_run_worker (void *p)
1427 char buf[256];
1429 /* Initial locking. */
1430 pthread_mutex_lock (&readylist_lock);
1432 /* One more thread available. */
1433 ++nready;
1435 while (1)
1437 while (readylist == NULL)
1438 pthread_cond_wait (&readylist_cond, &readylist_lock);
1440 struct fdlist *it = readylist->next;
1441 if (readylist->next == readylist)
1442 /* Just one entry on the list. */
1443 readylist = NULL;
1444 else
1445 readylist->next = it->next;
1447 /* Extract the information and mark the record ready to be used
1448 again. */
1449 int fd = it->fd;
1450 it->next = NULL;
1452 /* One more thread available. */
1453 --nready;
1455 /* We are done with the list. */
1456 pthread_mutex_unlock (&readylist_lock);
1458 /* We do not want to block on a short read or so. */
1459 int fl = fcntl (fd, F_GETFL);
1460 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1461 goto close_and_out;
1463 /* Now read the request. */
1464 request_header req;
1465 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1466 != sizeof (req), 0))
1468 /* We failed to read data. Note that this also might mean we
1469 failed because we would have blocked. */
1470 if (debug_level > 0)
1471 dbg_log (_("short read while reading request: %s"),
1472 strerror_r (errno, buf, sizeof (buf)));
1473 goto close_and_out;
1476 /* Check whether this is a valid request type. */
1477 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1478 goto close_and_out;
1480 /* Some systems have no SO_PEERCRED implementation. They don't
1481 care about security so we don't as well. */
1482 uid_t uid = -1;
1483 #ifdef SO_PEERCRED
1484 pid_t pid = 0;
1486 if (__builtin_expect (debug_level > 0, 0))
1488 struct ucred caller;
1489 socklen_t optlen = sizeof (caller);
1491 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1492 pid = caller.pid;
1494 #endif
1496 /* It should not be possible to crash the nscd with a silly
1497 request (i.e., a terribly large key). We limit the size to 1kb. */
1498 if (__builtin_expect (req.key_len, 1) < 0
1499 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1501 if (debug_level > 0)
1502 dbg_log (_("key length in request too long: %d"), req.key_len);
1504 else
1506 /* Get the key. */
1507 char keybuf[MAXKEYLEN];
1509 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1510 req.key_len))
1511 != req.key_len, 0))
1513 /* Again, this can also mean we would have blocked. */
1514 if (debug_level > 0)
1515 dbg_log (_("short read while reading request key: %s"),
1516 strerror_r (errno, buf, sizeof (buf)));
1517 goto close_and_out;
1520 if (__builtin_expect (debug_level, 0) > 0)
1522 #ifdef SO_PEERCRED
1523 if (pid != 0)
1524 dbg_log (_("\
1525 handle_request: request received (Version = %d) from PID %ld"),
1526 req.version, (long int) pid);
1527 else
1528 #endif
1529 dbg_log (_("\
1530 handle_request: request received (Version = %d)"), req.version);
1533 /* Phew, we got all the data, now process it. */
1534 handle_request (fd, &req, keybuf, uid);
1537 close_and_out:
1538 /* We are done. */
1539 close (fd);
1541 /* Re-locking. */
1542 pthread_mutex_lock (&readylist_lock);
1544 /* One more thread available. */
1545 ++nready;
1550 static unsigned int nconns;
1552 static void
1553 fd_ready (int fd)
1555 pthread_mutex_lock (&readylist_lock);
1557 /* Find an empty entry in FDLIST. */
1558 size_t inner;
1559 for (inner = 0; inner < nconns; ++inner)
1560 if (fdlist[inner].next == NULL)
1561 break;
1562 assert (inner < nconns);
1564 fdlist[inner].fd = fd;
1566 if (readylist == NULL)
1567 readylist = fdlist[inner].next = &fdlist[inner];
1568 else
1570 fdlist[inner].next = readylist->next;
1571 readylist = readylist->next = &fdlist[inner];
1574 bool do_signal = true;
1575 if (__builtin_expect (nready == 0, 0))
1577 ++client_queued;
1578 do_signal = false;
1580 /* Try to start another thread to help out. */
1581 pthread_t th;
1582 if (nthreads < max_nthreads
1583 && pthread_create (&th, &attr, nscd_run_worker,
1584 (void *) (long int) nthreads) == 0)
1586 /* We got another thread. */
1587 ++nthreads;
1588 /* The new thread might need a kick. */
1589 do_signal = true;
1594 pthread_mutex_unlock (&readylist_lock);
1596 /* Tell one of the worker threads there is work to do. */
1597 if (do_signal)
1598 pthread_cond_signal (&readylist_cond);
1602 /* Check whether restarting should happen. */
1603 static inline int
1604 restart_p (time_t now)
1606 return (paranoia && readylist == NULL && nready == nthreads
1607 && now >= restart_time);
1611 /* Array for times a connection was accepted. */
1612 static time_t *starttime;
1615 static void
1616 __attribute__ ((__noreturn__))
1617 main_loop_poll (void)
1619 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1620 * sizeof (conns[0]));
1622 conns[0].fd = sock;
1623 conns[0].events = POLLRDNORM;
1624 size_t nused = 1;
1625 size_t firstfree = 1;
1627 while (1)
1629 /* Wait for any event. We wait at most a couple of seconds so
1630 that we can check whether we should close any of the accepted
1631 connections since we have not received a request. */
1632 #define MAX_ACCEPT_TIMEOUT 30
1633 #define MIN_ACCEPT_TIMEOUT 5
1634 #define MAIN_THREAD_TIMEOUT \
1635 (MAX_ACCEPT_TIMEOUT * 1000 \
1636 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1638 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1640 time_t now = time (NULL);
1642 /* If there is a descriptor ready for reading or there is a new
1643 connection, process this now. */
1644 if (n > 0)
1646 if (conns[0].revents != 0)
1648 /* We have a new incoming connection. Accept the connection. */
1649 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1651 /* Use the descriptor if we have not reached the limit. */
1652 if (fd >= 0)
1654 if (firstfree < nconns)
1656 conns[firstfree].fd = fd;
1657 conns[firstfree].events = POLLRDNORM;
1658 starttime[firstfree] = now;
1659 if (firstfree >= nused)
1660 nused = firstfree + 1;
1663 ++firstfree;
1664 while (firstfree < nused && conns[firstfree].fd != -1);
1666 else
1667 /* We cannot use the connection so close it. */
1668 close (fd);
1671 --n;
1674 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1675 if (conns[cnt].revents != 0)
1677 fd_ready (conns[cnt].fd);
1679 /* Clean up the CONNS array. */
1680 conns[cnt].fd = -1;
1681 if (cnt < firstfree)
1682 firstfree = cnt;
1683 if (cnt == nused - 1)
1685 --nused;
1686 while (conns[nused - 1].fd == -1);
1688 --n;
1692 /* Now find entries which have timed out. */
1693 assert (nused > 0);
1695 /* We make the timeout length depend on the number of file
1696 descriptors currently used. */
1697 #define ACCEPT_TIMEOUT \
1698 (MAX_ACCEPT_TIMEOUT \
1699 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1700 time_t laststart = now - ACCEPT_TIMEOUT;
1702 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1704 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1706 /* Remove the entry, it timed out. */
1707 (void) close (conns[cnt].fd);
1708 conns[cnt].fd = -1;
1710 if (cnt < firstfree)
1711 firstfree = cnt;
1712 if (cnt == nused - 1)
1714 --nused;
1715 while (conns[nused - 1].fd == -1);
1719 if (restart_p (now))
1720 restart ();
1725 #ifdef HAVE_EPOLL
1726 static void
1727 main_loop_epoll (int efd)
1729 struct epoll_event ev = { 0, };
1730 int nused = 1;
1731 size_t highest = 0;
1733 /* Add the socket. */
1734 ev.events = EPOLLRDNORM;
1735 ev.data.fd = sock;
1736 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1737 /* We cannot use epoll. */
1738 return;
1740 while (1)
1742 struct epoll_event revs[100];
1743 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1745 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1747 time_t now = time (NULL);
1749 for (int cnt = 0; cnt < n; ++cnt)
1750 if (revs[cnt].data.fd == sock)
1752 /* A new connection. */
1753 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1755 if (fd >= 0)
1757 /* Try to add the new descriptor. */
1758 ev.data.fd = fd;
1759 if (fd >= nconns
1760 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1761 /* The descriptor is too large or something went
1762 wrong. Close the descriptor. */
1763 close (fd);
1764 else
1766 /* Remember when we accepted the connection. */
1767 starttime[fd] = now;
1769 if (fd > highest)
1770 highest = fd;
1772 ++nused;
1776 else
1778 /* Remove the descriptor from the epoll descriptor. */
1779 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1781 /* Get a worker to handle the request. */
1782 fd_ready (revs[cnt].data.fd);
1784 /* Reset the time. */
1785 starttime[revs[cnt].data.fd] = 0;
1786 if (revs[cnt].data.fd == highest)
1788 --highest;
1789 while (highest > 0 && starttime[highest] == 0);
1791 --nused;
1794 /* Now look for descriptors for accepted connections which have
1795 no reply in too long of a time. */
1796 time_t laststart = now - ACCEPT_TIMEOUT;
1797 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1798 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1800 /* We are waiting for this one for too long. Close it. */
1801 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
1803 (void) close (cnt);
1805 starttime[cnt] = 0;
1806 if (cnt == highest)
1807 --highest;
1809 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1810 --highest;
1812 if (restart_p (now))
1813 restart ();
1816 #endif
1819 /* Start all the threads we want. The initial process is thread no. 1. */
1820 void
1821 start_threads (void)
1823 /* Initialize the conditional variable we will use. The only
1824 non-standard attribute we might use is the clock selection. */
1825 pthread_condattr_t condattr;
1826 pthread_condattr_init (&condattr);
1828 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1829 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1830 /* Determine whether the monotonous clock is available. */
1831 struct timespec dummy;
1832 # if _POSIX_MONOTONIC_CLOCK == 0
1833 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1834 # endif
1835 # if _POSIX_CLOCK_SELECTION == 0
1836 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1837 # endif
1838 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1839 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1840 timeout_clock = CLOCK_MONOTONIC;
1841 #endif
1843 /* Create the attribute for the threads. They are all created
1844 detached. */
1845 pthread_attr_init (&attr);
1846 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1847 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1848 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
1850 /* We allow less than LASTDB threads only for debugging. */
1851 if (debug_level == 0)
1852 nthreads = MAX (nthreads, lastdb);
1854 /* Create the threads which prune the databases. */
1855 // XXX Ideally this work would be done by some of the worker threads.
1856 // XXX But this is problematic since we would need to be able to wake
1857 // XXX them up explicitly as well as part of the group handling the
1858 // XXX ready-list. This requires an operation where we can wait on
1859 // XXX two conditional variables at the same time. This operation
1860 // XXX does not exist (yet).
1861 for (long int i = 0; i < lastdb; ++i)
1863 /* Initialize the conditional variable. */
1864 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
1866 dbg_log (_("could not initialize conditional variable"));
1867 exit (1);
1870 pthread_t th;
1871 if (dbs[i].enabled
1872 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
1874 dbg_log (_("could not start clean-up thread; terminating"));
1875 exit (1);
1879 pthread_condattr_destroy (&condattr);
1881 for (long int i = 0; i < nthreads; ++i)
1883 pthread_t th;
1884 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
1886 if (i == 0)
1888 dbg_log (_("could not start any worker thread; terminating"));
1889 exit (1);
1892 break;
1896 /* Determine how much room for descriptors we should initially
1897 allocate. This might need to change later if we cap the number
1898 with MAXCONN. */
1899 const long int nfds = sysconf (_SC_OPEN_MAX);
1900 #define MINCONN 32
1901 #define MAXCONN 16384
1902 if (nfds == -1 || nfds > MAXCONN)
1903 nconns = MAXCONN;
1904 else if (nfds < MINCONN)
1905 nconns = MINCONN;
1906 else
1907 nconns = nfds;
1909 /* We need memory to pass descriptors on to the worker threads. */
1910 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1911 /* Array to keep track when connection was accepted. */
1912 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1914 /* In the main thread we execute the loop which handles incoming
1915 connections. */
1916 #ifdef HAVE_EPOLL
1917 int efd = epoll_create (100);
1918 if (efd != -1)
1920 main_loop_epoll (efd);
1921 close (efd);
1923 #endif
1925 main_loop_poll ();
1929 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1930 this function is called, we are not listening on the nscd socket yet so
1931 we can just use the ordinary lookup functions without causing a lockup */
1932 static void
1933 begin_drop_privileges (void)
1935 struct passwd *pwd = getpwnam (server_user);
1937 if (pwd == NULL)
1939 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1940 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1941 server_user);
1944 server_uid = pwd->pw_uid;
1945 server_gid = pwd->pw_gid;
1947 /* Save the old UID/GID if we have to change back. */
1948 if (paranoia)
1950 old_uid = getuid ();
1951 old_gid = getgid ();
1954 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1956 /* This really must never happen. */
1957 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1958 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
1961 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
1963 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
1964 == -1)
1966 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1967 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
1972 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
1973 run nscd as the user specified in the configuration file. */
1974 static void
1975 finish_drop_privileges (void)
1977 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1978 /* We need to preserve the capabilities to connect to the audit daemon. */
1979 cap_t new_caps = preserve_capabilities ();
1980 #endif
1982 if (setgroups (server_ngroups, server_groups) == -1)
1984 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1985 error (EXIT_FAILURE, errno, _("setgroups failed"));
1988 int res;
1989 if (paranoia)
1990 res = setresgid (server_gid, server_gid, old_gid);
1991 else
1992 res = setgid (server_gid);
1993 if (res == -1)
1995 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1996 perror ("setgid");
1997 exit (4);
2000 if (paranoia)
2001 res = setresuid (server_uid, server_uid, old_uid);
2002 else
2003 res = setuid (server_uid);
2004 if (res == -1)
2006 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2007 perror ("setuid");
2008 exit (4);
2011 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2012 /* Remove the temporary capabilities. */
2013 install_real_capabilities (new_caps);
2014 #endif