Fix comment.
[glibc.git] / nscd / connections.c
blob1328dc055e8befe853904364a88a93eb2f02a020
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License version 2 as
8 published by the Free Software Foundation.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <libintl.h>
27 #include <pthread.h>
28 #include <pwd.h>
29 #include <resolv.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <arpa/inet.h>
34 #ifdef HAVE_EPOLL
35 # include <sys/epoll.h>
36 #endif
37 #include <sys/mman.h>
38 #include <sys/param.h>
39 #include <sys/poll.h>
40 #ifdef HAVE_SENDFILE
41 # include <sys/sendfile.h>
42 #endif
43 #include <sys/socket.h>
44 #include <sys/stat.h>
45 #include <sys/un.h>
47 #include "nscd.h"
48 #include "dbg_log.h"
49 #include "selinux.h"
50 #ifdef HAVE_SENDFILE
51 # include <kernel-features.h>
52 #endif
55 /* Wrapper functions with error checking for standard functions. */
56 extern void *xmalloc (size_t n);
57 extern void *xcalloc (size_t n, size_t s);
58 extern void *xrealloc (void *o, size_t n);
60 /* Support to run nscd as an unprivileged user */
61 const char *server_user;
62 static uid_t server_uid;
63 static gid_t server_gid;
64 const char *stat_user;
65 uid_t stat_uid;
66 static gid_t *server_groups;
67 #ifndef NGROUPS
68 # define NGROUPS 32
69 #endif
70 static int server_ngroups;
72 static pthread_attr_t attr;
74 static void begin_drop_privileges (void);
75 static void finish_drop_privileges (void);
77 /* Map request type to a string. */
78 const char *const serv2str[LASTREQ] =
80 [GETPWBYNAME] = "GETPWBYNAME",
81 [GETPWBYUID] = "GETPWBYUID",
82 [GETGRBYNAME] = "GETGRBYNAME",
83 [GETGRBYGID] = "GETGRBYGID",
84 [GETHOSTBYNAME] = "GETHOSTBYNAME",
85 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
86 [GETHOSTBYADDR] = "GETHOSTBYADDR",
87 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
88 [SHUTDOWN] = "SHUTDOWN",
89 [GETSTAT] = "GETSTAT",
90 [INVALIDATE] = "INVALIDATE",
91 [GETFDPW] = "GETFDPW",
92 [GETFDGR] = "GETFDGR",
93 [GETFDHST] = "GETFDHST",
94 [GETAI] = "GETAI",
95 [INITGROUPS] = "INITGROUPS",
96 [GETSERVBYNAME] = "GETSERVBYNAME",
97 [GETSERVBYPORT] = "GETSERVBYPORT",
98 [GETFDSERV] = "GETFDSERV"
101 /* The control data structures for the services. */
102 struct database_dyn dbs[lastdb] =
104 [pwddb] = {
105 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
106 .prunelock = PTHREAD_MUTEX_INITIALIZER,
107 .enabled = 0,
108 .check_file = 1,
109 .persistent = 0,
110 .propagate = 1,
111 .shared = 0,
112 .max_db_size = DEFAULT_MAX_DB_SIZE,
113 .reset_res = 0,
114 .filename = "/etc/passwd",
115 .db_filename = _PATH_NSCD_PASSWD_DB,
116 .disabled_iov = &pwd_iov_disabled,
117 .postimeout = 3600,
118 .negtimeout = 20,
119 .wr_fd = -1,
120 .ro_fd = -1,
121 .mmap_used = false
123 [grpdb] = {
124 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
125 .prunelock = PTHREAD_MUTEX_INITIALIZER,
126 .enabled = 0,
127 .check_file = 1,
128 .persistent = 0,
129 .propagate = 1,
130 .shared = 0,
131 .max_db_size = DEFAULT_MAX_DB_SIZE,
132 .reset_res = 0,
133 .filename = "/etc/group",
134 .db_filename = _PATH_NSCD_GROUP_DB,
135 .disabled_iov = &grp_iov_disabled,
136 .postimeout = 3600,
137 .negtimeout = 60,
138 .wr_fd = -1,
139 .ro_fd = -1,
140 .mmap_used = false
142 [hstdb] = {
143 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
144 .prunelock = PTHREAD_MUTEX_INITIALIZER,
145 .enabled = 0,
146 .check_file = 1,
147 .persistent = 0,
148 .propagate = 0, /* Not used. */
149 .shared = 0,
150 .max_db_size = DEFAULT_MAX_DB_SIZE,
151 .reset_res = 1,
152 .filename = "/etc/hosts",
153 .db_filename = _PATH_NSCD_HOSTS_DB,
154 .disabled_iov = &hst_iov_disabled,
155 .postimeout = 3600,
156 .negtimeout = 20,
157 .wr_fd = -1,
158 .ro_fd = -1,
159 .mmap_used = false
161 [servdb] = {
162 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
163 .prunelock = PTHREAD_MUTEX_INITIALIZER,
164 .enabled = 0,
165 .check_file = 1,
166 .persistent = 0,
167 .propagate = 0, /* Not used. */
168 .shared = 0,
169 .max_db_size = DEFAULT_MAX_DB_SIZE,
170 .reset_res = 0,
171 .filename = "/etc/services",
172 .db_filename = _PATH_NSCD_SERVICES_DB,
173 .disabled_iov = &serv_iov_disabled,
174 .postimeout = 28800,
175 .negtimeout = 20,
176 .wr_fd = -1,
177 .ro_fd = -1,
178 .mmap_used = false
183 /* Mapping of request type to database. */
184 static struct
186 bool data_request;
187 struct database_dyn *db;
188 } const reqinfo[LASTREQ] =
190 [GETPWBYNAME] = { true, &dbs[pwddb] },
191 [GETPWBYUID] = { true, &dbs[pwddb] },
192 [GETGRBYNAME] = { true, &dbs[grpdb] },
193 [GETGRBYGID] = { true, &dbs[grpdb] },
194 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
195 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
196 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
197 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
198 [SHUTDOWN] = { false, NULL },
199 [GETSTAT] = { false, NULL },
200 [SHUTDOWN] = { false, NULL },
201 [GETFDPW] = { false, &dbs[pwddb] },
202 [GETFDGR] = { false, &dbs[grpdb] },
203 [GETFDHST] = { false, &dbs[hstdb] },
204 [GETAI] = { true, &dbs[hstdb] },
205 [INITGROUPS] = { true, &dbs[grpdb] },
206 [GETSERVBYNAME] = { true, &dbs[servdb] },
207 [GETSERVBYPORT] = { true, &dbs[servdb] },
208 [GETFDSERV] = { false, &dbs[servdb] }
212 /* Number of seconds between two cache pruning runs. */
213 #define CACHE_PRUNE_INTERVAL 15
216 /* Initial number of threads to use. */
217 int nthreads = -1;
218 /* Maximum number of threads to use. */
219 int max_nthreads = 32;
221 /* Socket for incoming connections. */
222 static int sock;
224 /* Number of times clients had to wait. */
225 unsigned long int client_queued;
228 ssize_t
229 writeall (int fd, const void *buf, size_t len)
231 size_t n = len;
232 ssize_t ret;
235 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
236 if (ret <= 0)
237 break;
238 buf = (const char *) buf + ret;
239 n -= ret;
241 while (n > 0);
242 return ret < 0 ? ret : len - n;
246 #ifdef HAVE_SENDFILE
247 ssize_t
248 sendfileall (int tofd, int fromfd, off_t off, size_t len)
250 ssize_t n = len;
251 ssize_t ret;
255 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
256 if (ret <= 0)
257 break;
258 n -= ret;
260 while (n > 0);
261 return ret < 0 ? ret : len - n;
263 #endif
266 enum usekey
268 use_not = 0,
269 /* The following three are not really used, they are symbolic constants. */
270 use_first = 16,
271 use_begin = 32,
272 use_end = 64,
274 use_he = 1,
275 use_he_begin = use_he | use_begin,
276 use_he_end = use_he | use_end,
277 #if SEPARATE_KEY
278 use_key = 2,
279 use_key_begin = use_key | use_begin,
280 use_key_end = use_key | use_end,
281 use_key_first = use_key_begin | use_first,
282 #endif
283 use_data = 3,
284 use_data_begin = use_data | use_begin,
285 use_data_end = use_data | use_end,
286 use_data_first = use_data_begin | use_first
290 static int
291 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
292 enum usekey use, ref_t start, size_t len)
294 assert (len >= 2);
296 if (start > first_free || start + len > first_free
297 || (start & BLOCK_ALIGN_M1))
298 return 0;
300 if (usemap[start] == use_not)
302 /* Add the start marker. */
303 usemap[start] = use | use_begin;
304 use &= ~use_first;
306 while (--len > 0)
307 if (usemap[++start] != use_not)
308 return 0;
309 else
310 usemap[start] = use;
312 /* Add the end marker. */
313 usemap[start] = use | use_end;
315 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
317 /* Hash entries can't be shared. */
318 if (use == use_he)
319 return 0;
321 usemap[start] |= (use & use_first);
322 use &= ~use_first;
324 while (--len > 1)
325 if (usemap[++start] != use)
326 return 0;
328 if (usemap[++start] != (use | use_end))
329 return 0;
331 else
332 /* Points to a wrong object or somewhere in the middle. */
333 return 0;
335 return 1;
339 /* Verify data in persistent database. */
340 static int
341 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
343 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb);
345 time_t now = time (NULL);
347 struct database_pers_head *head = mem;
348 struct database_pers_head head_copy = *head;
350 /* Check that the header that was read matches the head in the database. */
351 if (readhead != NULL && memcmp (head, readhead, sizeof (*head)) != 0)
352 return 0;
354 /* First some easy tests: make sure the database header is sane. */
355 if (head->version != DB_VERSION
356 || head->header_size != sizeof (*head)
357 /* We allow a timestamp to be one hour ahead of the current time.
358 This should cover daylight saving time changes. */
359 || head->timestamp > now + 60 * 60 + 60
360 || (head->gc_cycle & 1)
361 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
362 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
363 || head->first_free < 0
364 || head->first_free > head->data_size
365 || (head->first_free & BLOCK_ALIGN_M1) != 0
366 || head->maxnentries < 0
367 || head->maxnsearched < 0)
368 return 0;
370 uint8_t *usemap = calloc (head->first_free, 1);
371 if (usemap == NULL)
372 return 0;
374 const char *data = (char *) &head->array[roundup (head->module,
375 ALIGN / sizeof (ref_t))];
377 nscd_ssize_t he_cnt = 0;
378 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
380 ref_t work = head->array[cnt];
382 while (work != ENDREF)
384 if (! check_use (data, head->first_free, usemap, use_he, work,
385 sizeof (struct hashentry)))
386 goto fail;
388 /* Now we know we can dereference the record. */
389 struct hashentry *here = (struct hashentry *) (data + work);
391 ++he_cnt;
393 /* Make sure the record is for this type of service. */
394 if (here->type >= LASTREQ
395 || reqinfo[here->type].db != &dbs[dbnr])
396 goto fail;
398 /* Validate boolean field value. */
399 if (here->first != false && here->first != true)
400 goto fail;
402 if (here->len < 0)
403 goto fail;
405 /* Now the data. */
406 if (here->packet < 0
407 || here->packet > head->first_free
408 || here->packet + sizeof (struct datahead) > head->first_free)
409 goto fail;
411 struct datahead *dh = (struct datahead *) (data + here->packet);
413 if (! check_use (data, head->first_free, usemap,
414 use_data | (here->first ? use_first : 0),
415 here->packet, dh->allocsize))
416 goto fail;
418 if (dh->allocsize < sizeof (struct datahead)
419 || dh->recsize > dh->allocsize
420 || (dh->notfound != false && dh->notfound != true)
421 || (dh->usable != false && dh->usable != true))
422 goto fail;
424 if (here->key < here->packet + sizeof (struct datahead)
425 || here->key > here->packet + dh->allocsize
426 || here->key + here->len > here->packet + dh->allocsize)
428 #if SEPARATE_KEY
429 /* If keys can appear outside of data, this should be done
430 instead. But gc doesn't mark the data in that case. */
431 if (! check_use (data, head->first_free, usemap,
432 use_key | (here->first ? use_first : 0),
433 here->key, here->len))
434 #endif
435 goto fail;
438 work = here->next;
442 if (he_cnt != head->nentries)
443 goto fail;
445 /* See if all data and keys had at least one reference from
446 he->first == true hashentry. */
447 for (ref_t idx = 0; idx < head->first_free; ++idx)
449 #if SEPARATE_KEY
450 if (usemap[idx] == use_key_begin)
451 goto fail;
452 #endif
453 if (usemap[idx] == use_data_begin)
454 goto fail;
457 /* Finally, make sure the database hasn't changed since the first test. */
458 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
459 goto fail;
461 free (usemap);
462 return 1;
464 fail:
465 free (usemap);
466 return 0;
470 /* Initialize database information structures. */
471 void
472 nscd_init (void)
474 /* Look up unprivileged uid/gid/groups before we start listening on the
475 socket */
476 if (server_user != NULL)
477 begin_drop_privileges ();
479 if (nthreads == -1)
480 /* No configuration for this value, assume a default. */
481 nthreads = 2 * lastdb;
483 for (size_t cnt = 0; cnt < lastdb; ++cnt)
484 if (dbs[cnt].enabled)
486 pthread_rwlock_init (&dbs[cnt].lock, NULL);
487 pthread_mutex_init (&dbs[cnt].memlock, NULL);
489 if (dbs[cnt].persistent)
491 /* Try to open the appropriate file on disk. */
492 int fd = open (dbs[cnt].db_filename, O_RDWR);
493 if (fd != -1)
495 struct stat64 st;
496 void *mem;
497 size_t total;
498 struct database_pers_head head;
499 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
500 sizeof (head)));
501 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
503 fail_db:
504 dbg_log (_("invalid persistent database file \"%s\": %s"),
505 dbs[cnt].db_filename, strerror (errno));
506 unlink (dbs[cnt].db_filename);
508 else if (head.module == 0 && head.data_size == 0)
510 /* The file has been created, but the head has not been
511 initialized yet. Remove the old file. */
512 unlink (dbs[cnt].db_filename);
514 else if (head.header_size != (int) sizeof (head))
516 dbg_log (_("invalid persistent database file \"%s\": %s"),
517 dbs[cnt].db_filename,
518 _("header size does not match"));
519 unlink (dbs[cnt].db_filename);
521 else if ((total = (sizeof (head)
522 + roundup (head.module * sizeof (ref_t),
523 ALIGN)
524 + head.data_size))
525 > st.st_size
526 || total < sizeof (head))
528 dbg_log (_("invalid persistent database file \"%s\": %s"),
529 dbs[cnt].db_filename,
530 _("file size does not match"));
531 unlink (dbs[cnt].db_filename);
533 /* Note we map with the maximum size allowed for the
534 database. This is likely much larger than the
535 actual file size. This is OK on most OSes since
536 extensions of the underlying file will
537 automatically translate more pages available for
538 memory access. */
539 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
540 PROT_READ | PROT_WRITE,
541 MAP_SHARED, fd, 0))
542 == MAP_FAILED)
543 goto fail_db;
544 else if (!verify_persistent_db (mem, &head, cnt))
546 munmap (mem, total);
547 dbg_log (_("invalid persistent database file \"%s\": %s"),
548 dbs[cnt].db_filename,
549 _("verification failed"));
550 unlink (dbs[cnt].db_filename);
552 else
554 /* Success. We have the database. */
555 dbs[cnt].head = mem;
556 dbs[cnt].memsize = total;
557 dbs[cnt].data = (char *)
558 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
559 ALIGN / sizeof (ref_t))];
560 dbs[cnt].mmap_used = true;
562 if (dbs[cnt].suggested_module > head.module)
563 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
564 dbnames[cnt]);
566 dbs[cnt].wr_fd = fd;
567 fd = -1;
568 /* We also need a read-only descriptor. */
569 if (dbs[cnt].shared)
571 dbs[cnt].ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
572 if (dbs[cnt].ro_fd == -1)
573 dbg_log (_("\
574 cannot create read-only descriptor for \"%s\"; no mmap"),
575 dbs[cnt].db_filename);
578 // XXX Shall we test whether the descriptors actually
579 // XXX point to the same file?
582 /* Close the file descriptors in case something went
583 wrong in which case the variable have not been
584 assigned -1. */
585 if (fd != -1)
586 close (fd);
590 if (dbs[cnt].head == NULL)
592 /* No database loaded. Allocate the data structure,
593 possibly on disk. */
594 struct database_pers_head head;
595 size_t total = (sizeof (head)
596 + roundup (dbs[cnt].suggested_module
597 * sizeof (ref_t), ALIGN)
598 + (dbs[cnt].suggested_module
599 * DEFAULT_DATASIZE_PER_BUCKET));
601 /* Try to create the database. If we do not need a
602 persistent database create a temporary file. */
603 int fd;
604 int ro_fd = -1;
605 if (dbs[cnt].persistent)
607 fd = open (dbs[cnt].db_filename,
608 O_RDWR | O_CREAT | O_EXCL | O_TRUNC,
609 S_IRUSR | S_IWUSR);
610 if (fd != -1 && dbs[cnt].shared)
611 ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
613 else
615 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
616 fd = mkstemp (fname);
618 /* We do not need the file name anymore after we
619 opened another file descriptor in read-only mode. */
620 if (fd != -1)
622 if (dbs[cnt].shared)
623 ro_fd = open (fname, O_RDONLY);
625 unlink (fname);
629 if (fd == -1)
631 if (errno == EEXIST)
633 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
634 dbnames[cnt], dbs[cnt].db_filename);
635 // XXX Correct way to terminate?
636 exit (1);
639 if (dbs[cnt].persistent)
640 dbg_log (_("cannot create %s; no persistent database used"),
641 dbs[cnt].db_filename);
642 else
643 dbg_log (_("cannot create %s; no sharing possible"),
644 dbs[cnt].db_filename);
646 dbs[cnt].persistent = 0;
647 // XXX remember: no mmap
649 else
651 /* Tell the user if we could not create the read-only
652 descriptor. */
653 if (ro_fd == -1 && dbs[cnt].shared)
654 dbg_log (_("\
655 cannot create read-only descriptor for \"%s\"; no mmap"),
656 dbs[cnt].db_filename);
658 /* Before we create the header, initialiye the hash
659 table. So that if we get interrupted if writing
660 the header we can recognize a partially initialized
661 database. */
662 size_t ps = sysconf (_SC_PAGESIZE);
663 char tmpbuf[ps];
664 assert (~ENDREF == 0);
665 memset (tmpbuf, '\xff', ps);
667 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
668 off_t offset = sizeof (head);
670 size_t towrite;
671 if (offset % ps != 0)
673 towrite = MIN (remaining, ps - (offset % ps));
674 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
675 goto write_fail;
676 offset += towrite;
677 remaining -= towrite;
680 while (remaining > ps)
682 if (pwrite (fd, tmpbuf, ps, offset) == -1)
683 goto write_fail;
684 offset += ps;
685 remaining -= ps;
688 if (remaining > 0
689 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
690 goto write_fail;
692 /* Create the header of the file. */
693 struct database_pers_head head =
695 .version = DB_VERSION,
696 .header_size = sizeof (head),
697 .module = dbs[cnt].suggested_module,
698 .data_size = (dbs[cnt].suggested_module
699 * DEFAULT_DATASIZE_PER_BUCKET),
700 .first_free = 0
702 void *mem;
704 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
705 != sizeof (head))
706 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
707 != 0)
708 || (mem = mmap (NULL, dbs[cnt].max_db_size,
709 PROT_READ | PROT_WRITE,
710 MAP_SHARED, fd, 0)) == MAP_FAILED)
712 write_fail:
713 unlink (dbs[cnt].db_filename);
714 dbg_log (_("cannot write to database file %s: %s"),
715 dbs[cnt].db_filename, strerror (errno));
716 dbs[cnt].persistent = 0;
718 else
720 /* Success. */
721 dbs[cnt].head = mem;
722 dbs[cnt].data = (char *)
723 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
724 ALIGN / sizeof (ref_t))];
725 dbs[cnt].memsize = total;
726 dbs[cnt].mmap_used = true;
728 /* Remember the descriptors. */
729 dbs[cnt].wr_fd = fd;
730 dbs[cnt].ro_fd = ro_fd;
731 fd = -1;
732 ro_fd = -1;
735 if (fd != -1)
736 close (fd);
737 if (ro_fd != -1)
738 close (ro_fd);
742 if (paranoia
743 && ((dbs[cnt].wr_fd != -1
744 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
745 || (dbs[cnt].ro_fd != -1
746 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
748 dbg_log (_("\
749 cannot set socket to close on exec: %s; disabling paranoia mode"),
750 strerror (errno));
751 paranoia = 0;
754 if (dbs[cnt].head == NULL)
756 /* We do not use the persistent database. Just
757 create an in-memory data structure. */
758 assert (! dbs[cnt].persistent);
760 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
761 + (dbs[cnt].suggested_module
762 * sizeof (ref_t)));
763 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
764 assert (~ENDREF == 0);
765 memset (dbs[cnt].head->array, '\xff',
766 dbs[cnt].suggested_module * sizeof (ref_t));
767 dbs[cnt].head->module = dbs[cnt].suggested_module;
768 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
769 * dbs[cnt].head->module);
770 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
771 dbs[cnt].head->first_free = 0;
773 dbs[cnt].shared = 0;
774 assert (dbs[cnt].ro_fd == -1);
777 if (dbs[cnt].check_file)
779 /* We need the modification date of the file. */
780 struct stat64 st;
782 if (stat64 (dbs[cnt].filename, &st) < 0)
784 /* We cannot stat() the file, disable file checking. */
785 dbg_log (_("cannot stat() file `%s': %s"),
786 dbs[cnt].filename, strerror (errno));
787 dbs[cnt].check_file = 0;
789 else
790 dbs[cnt].file_mtime = st.st_mtime;
794 /* Create the socket. */
795 sock = socket (AF_UNIX, SOCK_STREAM, 0);
796 if (sock < 0)
798 dbg_log (_("cannot open socket: %s"), strerror (errno));
799 exit (errno == EACCES ? 4 : 1);
801 /* Bind a name to the socket. */
802 struct sockaddr_un sock_addr;
803 sock_addr.sun_family = AF_UNIX;
804 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
805 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
807 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
808 exit (errno == EACCES ? 4 : 1);
811 /* We don't want to get stuck on accept. */
812 int fl = fcntl (sock, F_GETFL);
813 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
815 dbg_log (_("cannot change socket to nonblocking mode: %s"),
816 strerror (errno));
817 exit (1);
820 /* The descriptor needs to be closed on exec. */
821 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
823 dbg_log (_("cannot set socket to close on exec: %s"),
824 strerror (errno));
825 exit (1);
828 /* Set permissions for the socket. */
829 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
831 /* Set the socket up to accept connections. */
832 if (listen (sock, SOMAXCONN) < 0)
834 dbg_log (_("cannot enable socket to accept connections: %s"),
835 strerror (errno));
836 exit (1);
839 /* Change to unprivileged uid/gid/groups if specifed in config file */
840 if (server_user != NULL)
841 finish_drop_privileges ();
845 /* Close the connections. */
846 void
847 close_sockets (void)
849 close (sock);
853 static void
854 invalidate_cache (char *key, int fd)
856 dbtype number;
857 int32_t resp;
859 for (number = pwddb; number < lastdb; ++number)
860 if (strcmp (key, dbnames[number]) == 0)
862 if (dbs[number].reset_res)
863 res_init ();
865 break;
868 if (number == lastdb)
870 resp = EINVAL;
871 writeall (fd, &resp, sizeof (resp));
872 return;
875 if (dbs[number].enabled)
876 prune_cache (&dbs[number], LONG_MAX, fd);
877 else
879 resp = 0;
880 writeall (fd, &resp, sizeof (resp));
885 #ifdef SCM_RIGHTS
886 static void
887 send_ro_fd (struct database_dyn *db, char *key, int fd)
889 /* If we do not have an read-only file descriptor do nothing. */
890 if (db->ro_fd == -1)
891 return;
893 /* We need to send some data along with the descriptor. */
894 struct iovec iov[1];
895 iov[0].iov_base = key;
896 iov[0].iov_len = strlen (key) + 1;
898 /* Prepare the control message to transfer the descriptor. */
899 union
901 struct cmsghdr hdr;
902 char bytes[CMSG_SPACE (sizeof (int))];
903 } buf;
904 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1,
905 .msg_control = buf.bytes,
906 .msg_controllen = sizeof (buf) };
907 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
909 cmsg->cmsg_level = SOL_SOCKET;
910 cmsg->cmsg_type = SCM_RIGHTS;
911 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
913 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
915 msg.msg_controllen = cmsg->cmsg_len;
917 /* Send the control message. We repeat when we are interrupted but
918 everything else is ignored. */
919 #ifndef MSG_NOSIGNAL
920 # define MSG_NOSIGNAL 0
921 #endif
922 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
924 if (__builtin_expect (debug_level > 0, 0))
925 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
927 #endif /* SCM_RIGHTS */
930 /* Handle new request. */
931 static void
932 handle_request (int fd, request_header *req, void *key, uid_t uid)
934 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
936 if (debug_level > 0)
937 dbg_log (_("\
938 cannot handle old request version %d; current version is %d"),
939 req->version, NSCD_VERSION);
940 return;
943 /* Make the SELinux check before we go on to the standard checks. */
944 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
945 return;
947 struct database_dyn *db = reqinfo[req->type].db;
949 /* See whether we can service the request from the cache. */
950 if (__builtin_expect (reqinfo[req->type].data_request, true))
952 if (__builtin_expect (debug_level, 0) > 0)
954 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
956 char buf[INET6_ADDRSTRLEN];
958 dbg_log ("\t%s (%s)", serv2str[req->type],
959 inet_ntop (req->type == GETHOSTBYADDR
960 ? AF_INET : AF_INET6,
961 key, buf, sizeof (buf)));
963 else
964 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
967 /* Is this service enabled? */
968 if (__builtin_expect (!db->enabled, 0))
970 /* No, sent the prepared record. */
971 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
972 db->disabled_iov->iov_len,
973 MSG_NOSIGNAL))
974 != (ssize_t) db->disabled_iov->iov_len
975 && __builtin_expect (debug_level, 0) > 0)
977 /* We have problems sending the result. */
978 char buf[256];
979 dbg_log (_("cannot write result: %s"),
980 strerror_r (errno, buf, sizeof (buf)));
983 return;
986 /* Be sure we can read the data. */
987 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
989 ++db->head->rdlockdelayed;
990 pthread_rwlock_rdlock (&db->lock);
993 /* See whether we can handle it from the cache. */
994 struct datahead *cached;
995 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
996 db, uid);
997 if (cached != NULL)
999 /* Hurray it's in the cache. */
1000 ssize_t nwritten;
1002 #ifdef HAVE_SENDFILE
1003 if (db->mmap_used || !cached->notfound)
1005 assert (db->wr_fd != -1);
1006 assert ((char *) cached->data > (char *) db->data);
1007 assert ((char *) cached->data - (char *) db->head
1008 + cached->recsize
1009 <= (sizeof (struct database_pers_head)
1010 + db->head->module * sizeof (ref_t)
1011 + db->head->data_size));
1012 nwritten = sendfileall (fd, db->wr_fd,
1013 (char *) cached->data
1014 - (char *) db->head, cached->recsize);
1015 # ifndef __ASSUME_SENDFILE
1016 if (nwritten == -1 && errno == ENOSYS)
1017 goto use_write;
1018 # endif
1020 else
1021 # ifndef __ASSUME_SENDFILE
1022 use_write:
1023 # endif
1024 #endif
1025 nwritten = writeall (fd, cached->data, cached->recsize);
1027 if (nwritten != cached->recsize
1028 && __builtin_expect (debug_level, 0) > 0)
1030 /* We have problems sending the result. */
1031 char buf[256];
1032 dbg_log (_("cannot write result: %s"),
1033 strerror_r (errno, buf, sizeof (buf)));
1036 pthread_rwlock_unlock (&db->lock);
1038 return;
1041 pthread_rwlock_unlock (&db->lock);
1043 else if (__builtin_expect (debug_level, 0) > 0)
1045 if (req->type == INVALIDATE)
1046 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1047 else
1048 dbg_log ("\t%s", serv2str[req->type]);
1051 /* Handle the request. */
1052 switch (req->type)
1054 case GETPWBYNAME:
1055 addpwbyname (db, fd, req, key, uid);
1056 break;
1058 case GETPWBYUID:
1059 addpwbyuid (db, fd, req, key, uid);
1060 break;
1062 case GETGRBYNAME:
1063 addgrbyname (db, fd, req, key, uid);
1064 break;
1066 case GETGRBYGID:
1067 addgrbygid (db, fd, req, key, uid);
1068 break;
1070 case GETHOSTBYNAME:
1071 addhstbyname (db, fd, req, key, uid);
1072 break;
1074 case GETHOSTBYNAMEv6:
1075 addhstbynamev6 (db, fd, req, key, uid);
1076 break;
1078 case GETHOSTBYADDR:
1079 addhstbyaddr (db, fd, req, key, uid);
1080 break;
1082 case GETHOSTBYADDRv6:
1083 addhstbyaddrv6 (db, fd, req, key, uid);
1084 break;
1086 case GETAI:
1087 addhstai (db, fd, req, key, uid);
1088 break;
1090 case INITGROUPS:
1091 addinitgroups (db, fd, req, key, uid);
1092 break;
1094 case GETSERVBYNAME:
1095 addservbyname (db, fd, req, key, uid);
1096 break;
1098 case GETSERVBYPORT:
1099 addservbyport (db, fd, req, key, uid);
1100 break;
1102 case GETSTAT:
1103 case SHUTDOWN:
1104 case INVALIDATE:
1106 /* Get the callers credentials. */
1107 #ifdef SO_PEERCRED
1108 struct ucred caller;
1109 socklen_t optlen = sizeof (caller);
1111 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1113 char buf[256];
1115 dbg_log (_("error getting caller's id: %s"),
1116 strerror_r (errno, buf, sizeof (buf)));
1117 break;
1120 uid = caller.uid;
1121 #else
1122 /* Some systems have no SO_PEERCRED implementation. They don't
1123 care about security so we don't as well. */
1124 uid = 0;
1125 #endif
1128 /* Accept shutdown, getstat and invalidate only from root. For
1129 the stat call also allow the user specified in the config file. */
1130 if (req->type == GETSTAT)
1132 if (uid == 0 || uid == stat_uid)
1133 send_stats (fd, dbs);
1135 else if (uid == 0)
1137 if (req->type == INVALIDATE)
1138 invalidate_cache (key, fd);
1139 else
1140 termination_handler (0);
1142 break;
1144 case GETFDPW:
1145 case GETFDGR:
1146 case GETFDHST:
1147 case GETFDSERV:
1148 #ifdef SCM_RIGHTS
1149 send_ro_fd (reqinfo[req->type].db, key, fd);
1150 #endif
1151 break;
1153 default:
1154 /* Ignore the command, it's nothing we know. */
1155 break;
1160 /* Restart the process. */
1161 static void
1162 restart (void)
1164 /* First determine the parameters. We do not use the parameters
1165 passed to main() since in case nscd is started by running the
1166 dynamic linker this will not work. Yes, this is not the usual
1167 case but nscd is part of glibc and we occasionally do this. */
1168 size_t buflen = 1024;
1169 char *buf = alloca (buflen);
1170 size_t readlen = 0;
1171 int fd = open ("/proc/self/cmdline", O_RDONLY);
1172 if (fd == -1)
1174 dbg_log (_("\
1175 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1176 strerror (errno));
1178 paranoia = 0;
1179 return;
1182 while (1)
1184 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1185 buflen - readlen));
1186 if (n == -1)
1188 dbg_log (_("\
1189 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1190 strerror (errno));
1192 close (fd);
1193 paranoia = 0;
1194 return;
1197 readlen += n;
1199 if (readlen < buflen)
1200 break;
1202 /* We might have to extend the buffer. */
1203 size_t old_buflen = buflen;
1204 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1205 buf = memmove (newp, buf, old_buflen);
1208 close (fd);
1210 /* Parse the command line. Worst case scenario: every two
1211 characters form one parameter (one character plus NUL). */
1212 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1213 int argc = 0;
1215 char *cp = buf;
1216 while (cp < buf + readlen)
1218 argv[argc++] = cp;
1219 cp = (char *) rawmemchr (cp, '\0') + 1;
1221 argv[argc] = NULL;
1223 /* Second, change back to the old user if we changed it. */
1224 if (server_user != NULL)
1226 if (setresuid (old_uid, old_uid, old_uid) != 0)
1228 dbg_log (_("\
1229 cannot change to old UID: %s; disabling paranoia mode"),
1230 strerror (errno));
1232 paranoia = 0;
1233 return;
1236 if (setresgid (old_gid, old_gid, old_gid) != 0)
1238 dbg_log (_("\
1239 cannot change to old GID: %s; disabling paranoia mode"),
1240 strerror (errno));
1242 setuid (server_uid);
1243 paranoia = 0;
1244 return;
1248 /* Next change back to the old working directory. */
1249 if (chdir (oldcwd) == -1)
1251 dbg_log (_("\
1252 cannot change to old working directory: %s; disabling paranoia mode"),
1253 strerror (errno));
1255 if (server_user != NULL)
1257 setuid (server_uid);
1258 setgid (server_gid);
1260 paranoia = 0;
1261 return;
1264 /* Synchronize memory. */
1265 for (int cnt = 0; cnt < lastdb; ++cnt)
1267 /* Make sure nobody keeps using the database. */
1268 dbs[cnt].head->timestamp = 0;
1270 if (dbs[cnt].persistent)
1271 // XXX async OK?
1272 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1275 /* The preparations are done. */
1276 execv ("/proc/self/exe", argv);
1278 /* If we come here, we will never be able to re-exec. */
1279 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1280 strerror (errno));
1282 if (server_user != NULL)
1284 setuid (server_uid);
1285 setgid (server_gid);
1287 if (chdir ("/") != 0)
1288 dbg_log (_("cannot change current working directory to \"/\": %s"),
1289 strerror (errno));
1290 paranoia = 0;
1294 /* List of file descriptors. */
1295 struct fdlist
1297 int fd;
1298 struct fdlist *next;
1300 /* Memory allocated for the list. */
1301 static struct fdlist *fdlist;
1302 /* List of currently ready-to-read file descriptors. */
1303 static struct fdlist *readylist;
1305 /* Conditional variable and mutex to signal availability of entries in
1306 READYLIST. The condvar is initialized dynamically since we might
1307 use a different clock depending on availability. */
1308 static pthread_cond_t readylist_cond;
1309 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1311 /* The clock to use with the condvar. */
1312 static clockid_t timeout_clock = CLOCK_REALTIME;
1314 /* Number of threads ready to handle the READYLIST. */
1315 static unsigned long int nready;
1318 /* This is the main loop. It is replicated in different threads but the
1319 `poll' call makes sure only one thread handles an incoming connection. */
1320 static void *
1321 __attribute__ ((__noreturn__))
1322 nscd_run (void *p)
1324 const long int my_number = (long int) p;
1325 const int run_prune = my_number < lastdb && dbs[my_number].enabled;
1326 struct timespec prune_ts;
1327 int to = 0;
1328 char buf[256];
1330 if (run_prune)
1332 setup_thread (&dbs[my_number]);
1334 /* We are running. */
1335 dbs[my_number].head->timestamp = time (NULL);
1337 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1338 /* Should never happen. */
1339 abort ();
1341 /* Compute timeout time. */
1342 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1345 /* Initial locking. */
1346 pthread_mutex_lock (&readylist_lock);
1348 /* One more thread available. */
1349 ++nready;
1351 while (1)
1353 while (readylist == NULL)
1355 if (run_prune)
1357 /* Wait, but not forever. */
1358 to = pthread_cond_timedwait (&readylist_cond, &readylist_lock,
1359 &prune_ts);
1361 /* If we were woken and there is no work to be done,
1362 just start pruning. */
1363 if (readylist == NULL && to == ETIMEDOUT)
1365 --nready;
1366 pthread_mutex_unlock (&readylist_lock);
1367 goto only_prune;
1370 else
1371 /* No need to timeout. */
1372 pthread_cond_wait (&readylist_cond, &readylist_lock);
1375 struct fdlist *it = readylist->next;
1376 if (readylist->next == readylist)
1377 /* Just one entry on the list. */
1378 readylist = NULL;
1379 else
1380 readylist->next = it->next;
1382 /* Extract the information and mark the record ready to be used
1383 again. */
1384 int fd = it->fd;
1385 it->next = NULL;
1387 /* One more thread available. */
1388 --nready;
1390 /* We are done with the list. */
1391 pthread_mutex_unlock (&readylist_lock);
1393 /* We do not want to block on a short read or so. */
1394 int fl = fcntl (fd, F_GETFL);
1395 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1396 goto close_and_out;
1398 /* Now read the request. */
1399 request_header req;
1400 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1401 != sizeof (req), 0))
1403 /* We failed to read data. Note that this also might mean we
1404 failed because we would have blocked. */
1405 if (debug_level > 0)
1406 dbg_log (_("short read while reading request: %s"),
1407 strerror_r (errno, buf, sizeof (buf)));
1408 goto close_and_out;
1411 /* Check whether this is a valid request type. */
1412 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1413 goto close_and_out;
1415 /* Some systems have no SO_PEERCRED implementation. They don't
1416 care about security so we don't as well. */
1417 uid_t uid = -1;
1418 #ifdef SO_PEERCRED
1419 pid_t pid = 0;
1421 if (__builtin_expect (debug_level > 0, 0))
1423 struct ucred caller;
1424 socklen_t optlen = sizeof (caller);
1426 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1427 pid = caller.pid;
1429 #endif
1431 /* It should not be possible to crash the nscd with a silly
1432 request (i.e., a terribly large key). We limit the size to 1kb. */
1433 if (__builtin_expect (req.key_len, 1) < 0
1434 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1436 if (debug_level > 0)
1437 dbg_log (_("key length in request too long: %d"), req.key_len);
1439 else
1441 /* Get the key. */
1442 char keybuf[MAXKEYLEN];
1444 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1445 req.key_len))
1446 != req.key_len, 0))
1448 /* Again, this can also mean we would have blocked. */
1449 if (debug_level > 0)
1450 dbg_log (_("short read while reading request key: %s"),
1451 strerror_r (errno, buf, sizeof (buf)));
1452 goto close_and_out;
1455 if (__builtin_expect (debug_level, 0) > 0)
1457 #ifdef SO_PEERCRED
1458 if (pid != 0)
1459 dbg_log (_("\
1460 handle_request: request received (Version = %d) from PID %ld"),
1461 req.version, (long int) pid);
1462 else
1463 #endif
1464 dbg_log (_("\
1465 handle_request: request received (Version = %d)"), req.version);
1468 /* Phew, we got all the data, now process it. */
1469 handle_request (fd, &req, keybuf, uid);
1472 close_and_out:
1473 /* We are done. */
1474 close (fd);
1476 /* Check whether we should be pruning the cache. */
1477 assert (run_prune || to == 0);
1478 if (to == ETIMEDOUT)
1480 only_prune:
1481 /* The pthread_cond_timedwait() call timed out. It is time
1482 to clean up the cache. */
1483 assert (my_number < lastdb);
1484 prune_cache (&dbs[my_number], time (NULL), -1);
1486 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1487 /* Should never happen. */
1488 abort ();
1490 /* Compute next timeout time. */
1491 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1493 /* In case the list is emtpy we do not want to run the prune
1494 code right away again. */
1495 to = 0;
1498 /* Re-locking. */
1499 pthread_mutex_lock (&readylist_lock);
1501 /* One more thread available. */
1502 ++nready;
1507 static unsigned int nconns;
1509 static void
1510 fd_ready (int fd)
1512 pthread_mutex_lock (&readylist_lock);
1514 /* Find an empty entry in FDLIST. */
1515 size_t inner;
1516 for (inner = 0; inner < nconns; ++inner)
1517 if (fdlist[inner].next == NULL)
1518 break;
1519 assert (inner < nconns);
1521 fdlist[inner].fd = fd;
1523 if (readylist == NULL)
1524 readylist = fdlist[inner].next = &fdlist[inner];
1525 else
1527 fdlist[inner].next = readylist->next;
1528 readylist = readylist->next = &fdlist[inner];
1531 bool do_signal = true;
1532 if (__builtin_expect (nready == 0, 0))
1534 ++client_queued;
1535 do_signal = false;
1537 /* Try to start another thread to help out. */
1538 pthread_t th;
1539 if (nthreads < max_nthreads
1540 && pthread_create (&th, &attr, nscd_run,
1541 (void *) (long int) nthreads) == 0)
1543 /* We got another thread. */
1544 ++nthreads;
1545 /* The new thread might need a kick. */
1546 do_signal = true;
1551 pthread_mutex_unlock (&readylist_lock);
1553 /* Tell one of the worker threads there is work to do. */
1554 if (do_signal)
1555 pthread_cond_signal (&readylist_cond);
1559 /* Check whether restarting should happen. */
1560 static inline int
1561 restart_p (time_t now)
1563 return (paranoia && readylist == NULL && nready == nthreads
1564 && now >= restart_time);
1568 /* Array for times a connection was accepted. */
1569 static time_t *starttime;
1572 static void
1573 __attribute__ ((__noreturn__))
1574 main_loop_poll (void)
1576 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1577 * sizeof (conns[0]));
1579 conns[0].fd = sock;
1580 conns[0].events = POLLRDNORM;
1581 size_t nused = 1;
1582 size_t firstfree = 1;
1584 while (1)
1586 /* Wait for any event. We wait at most a couple of seconds so
1587 that we can check whether we should close any of the accepted
1588 connections since we have not received a request. */
1589 #define MAX_ACCEPT_TIMEOUT 30
1590 #define MIN_ACCEPT_TIMEOUT 5
1591 #define MAIN_THREAD_TIMEOUT \
1592 (MAX_ACCEPT_TIMEOUT * 1000 \
1593 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1595 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1597 time_t now = time (NULL);
1599 /* If there is a descriptor ready for reading or there is a new
1600 connection, process this now. */
1601 if (n > 0)
1603 if (conns[0].revents != 0)
1605 /* We have a new incoming connection. Accept the connection. */
1606 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1608 /* Use the descriptor if we have not reached the limit. */
1609 if (fd >= 0)
1611 if (firstfree < nconns)
1613 conns[firstfree].fd = fd;
1614 conns[firstfree].events = POLLRDNORM;
1615 starttime[firstfree] = now;
1616 if (firstfree >= nused)
1617 nused = firstfree + 1;
1620 ++firstfree;
1621 while (firstfree < nused && conns[firstfree].fd != -1);
1623 else
1624 /* We cannot use the connection so close it. */
1625 close (fd);
1628 --n;
1631 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1632 if (conns[cnt].revents != 0)
1634 fd_ready (conns[cnt].fd);
1636 /* Clean up the CONNS array. */
1637 conns[cnt].fd = -1;
1638 if (cnt < firstfree)
1639 firstfree = cnt;
1640 if (cnt == nused - 1)
1642 --nused;
1643 while (conns[nused - 1].fd == -1);
1645 --n;
1649 /* Now find entries which have timed out. */
1650 assert (nused > 0);
1652 /* We make the timeout length depend on the number of file
1653 descriptors currently used. */
1654 #define ACCEPT_TIMEOUT \
1655 (MAX_ACCEPT_TIMEOUT \
1656 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1657 time_t laststart = now - ACCEPT_TIMEOUT;
1659 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1661 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1663 /* Remove the entry, it timed out. */
1664 (void) close (conns[cnt].fd);
1665 conns[cnt].fd = -1;
1667 if (cnt < firstfree)
1668 firstfree = cnt;
1669 if (cnt == nused - 1)
1671 --nused;
1672 while (conns[nused - 1].fd == -1);
1676 if (restart_p (now))
1677 restart ();
1682 #ifdef HAVE_EPOLL
1683 static void
1684 main_loop_epoll (int efd)
1686 struct epoll_event ev = { 0, };
1687 int nused = 1;
1688 size_t highest = 0;
1690 /* Add the socket. */
1691 ev.events = EPOLLRDNORM;
1692 ev.data.fd = sock;
1693 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1694 /* We cannot use epoll. */
1695 return;
1697 while (1)
1699 struct epoll_event revs[100];
1700 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1702 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1704 time_t now = time (NULL);
1706 for (int cnt = 0; cnt < n; ++cnt)
1707 if (revs[cnt].data.fd == sock)
1709 /* A new connection. */
1710 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1712 if (fd >= 0)
1714 /* Try to add the new descriptor. */
1715 ev.data.fd = fd;
1716 if (fd >= nconns
1717 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1718 /* The descriptor is too large or something went
1719 wrong. Close the descriptor. */
1720 close (fd);
1721 else
1723 /* Remember when we accepted the connection. */
1724 starttime[fd] = now;
1726 if (fd > highest)
1727 highest = fd;
1729 ++nused;
1733 else
1735 /* Remove the descriptor from the epoll descriptor. */
1736 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1738 /* Get a worker to handle the request. */
1739 fd_ready (revs[cnt].data.fd);
1741 /* Reset the time. */
1742 starttime[revs[cnt].data.fd] = 0;
1743 if (revs[cnt].data.fd == highest)
1745 --highest;
1746 while (highest > 0 && starttime[highest] == 0);
1748 --nused;
1751 /* Now look for descriptors for accepted connections which have
1752 no reply in too long of a time. */
1753 time_t laststart = now - ACCEPT_TIMEOUT;
1754 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1755 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1757 /* We are waiting for this one for too long. Close it. */
1758 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
1760 (void) close (cnt);
1762 starttime[cnt] = 0;
1763 if (cnt == highest)
1764 --highest;
1766 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1767 --highest;
1769 if (restart_p (now))
1770 restart ();
1773 #endif
1776 /* Start all the threads we want. The initial process is thread no. 1. */
1777 void
1778 start_threads (void)
1780 /* Initialize the conditional variable we will use. The only
1781 non-standard attribute we might use is the clock selection. */
1782 pthread_condattr_t condattr;
1783 pthread_condattr_init (&condattr);
1785 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1786 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1787 /* Determine whether the monotonous clock is available. */
1788 struct timespec dummy;
1789 # if _POSIX_MONOTONIC_CLOCK == 0
1790 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1791 # endif
1792 # if _POSIX_CLOCK_SELECTION == 0
1793 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1794 # endif
1795 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1796 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1797 timeout_clock = CLOCK_MONOTONIC;
1798 #endif
1800 pthread_cond_init (&readylist_cond, &condattr);
1801 pthread_condattr_destroy (&condattr);
1804 /* Create the attribute for the threads. They are all created
1805 detached. */
1806 pthread_attr_init (&attr);
1807 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1808 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1809 pthread_attr_setstacksize (&attr, 1024 * 1024 * (sizeof (void *) / 4));
1811 /* We allow less than LASTDB threads only for debugging. */
1812 if (debug_level == 0)
1813 nthreads = MAX (nthreads, lastdb);
1815 int nfailed = 0;
1816 for (long int i = 0; i < nthreads; ++i)
1818 pthread_t th;
1819 if (pthread_create (&th, &attr, nscd_run, (void *) (i - nfailed)) != 0)
1820 ++nfailed;
1822 if (nthreads - nfailed < lastdb)
1824 /* We could not start enough threads. */
1825 dbg_log (_("could only start %d threads; terminating"),
1826 nthreads - nfailed);
1827 exit (1);
1830 /* Determine how much room for descriptors we should initially
1831 allocate. This might need to change later if we cap the number
1832 with MAXCONN. */
1833 const long int nfds = sysconf (_SC_OPEN_MAX);
1834 #define MINCONN 32
1835 #define MAXCONN 16384
1836 if (nfds == -1 || nfds > MAXCONN)
1837 nconns = MAXCONN;
1838 else if (nfds < MINCONN)
1839 nconns = MINCONN;
1840 else
1841 nconns = nfds;
1843 /* We need memory to pass descriptors on to the worker threads. */
1844 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1845 /* Array to keep track when connection was accepted. */
1846 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1848 /* In the main thread we execute the loop which handles incoming
1849 connections. */
1850 #ifdef HAVE_EPOLL
1851 int efd = epoll_create (100);
1852 if (efd != -1)
1854 main_loop_epoll (efd);
1855 close (efd);
1857 #endif
1859 main_loop_poll ();
1863 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1864 this function is called, we are not listening on the nscd socket yet so
1865 we can just use the ordinary lookup functions without causing a lockup */
1866 static void
1867 begin_drop_privileges (void)
1869 struct passwd *pwd = getpwnam (server_user);
1871 if (pwd == NULL)
1873 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1874 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1875 server_user);
1878 server_uid = pwd->pw_uid;
1879 server_gid = pwd->pw_gid;
1881 /* Save the old UID/GID if we have to change back. */
1882 if (paranoia)
1884 old_uid = getuid ();
1885 old_gid = getgid ();
1888 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1890 /* This really must never happen. */
1891 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1892 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
1895 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
1897 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
1898 == -1)
1900 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1901 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
1906 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
1907 run nscd as the user specified in the configuration file. */
1908 static void
1909 finish_drop_privileges (void)
1911 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1912 /* We need to preserve the capabilities to connect to the audit daemon. */
1913 cap_t new_caps = preserve_capabilities ();
1914 #endif
1916 if (setgroups (server_ngroups, server_groups) == -1)
1918 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1919 error (EXIT_FAILURE, errno, _("setgroups failed"));
1922 int res;
1923 if (paranoia)
1924 res = setresgid (server_gid, server_gid, old_gid);
1925 else
1926 res = setgid (server_gid);
1927 if (res == -1)
1929 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1930 perror ("setgid");
1931 exit (4);
1934 if (paranoia)
1935 res = setresuid (server_uid, server_uid, old_uid);
1936 else
1937 res = setuid (server_uid);
1938 if (res == -1)
1940 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1941 perror ("setuid");
1942 exit (4);
1945 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1946 /* Remove the temporary capabilities. */
1947 install_real_capabilities (new_caps);
1948 #endif