2007-08-06 Roland McGrath <roland@redhat.com>
[glibc.git] / nscd / connections.c
blob48e91e80fec10ef17155ea6665812683ba69da60
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #include <alloca.h>
21 #include <assert.h>
22 #include <atomic.h>
23 #include <error.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <grp.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <arpa/inet.h>
35 #ifdef HAVE_EPOLL
36 # include <sys/epoll.h>
37 #endif
38 #include <sys/mman.h>
39 #include <sys/param.h>
40 #include <sys/poll.h>
41 #ifdef HAVE_SENDFILE
42 # include <sys/sendfile.h>
43 #endif
44 #include <sys/socket.h>
45 #include <sys/stat.h>
46 #include <sys/un.h>
48 #include "nscd.h"
49 #include "dbg_log.h"
50 #include "selinux.h"
51 #ifdef HAVE_SENDFILE
52 # include <kernel-features.h>
53 #endif
56 /* Wrapper functions with error checking for standard functions. */
57 extern void *xmalloc (size_t n);
58 extern void *xcalloc (size_t n, size_t s);
59 extern void *xrealloc (void *o, size_t n);
61 /* Support to run nscd as an unprivileged user */
62 const char *server_user;
63 static uid_t server_uid;
64 static gid_t server_gid;
65 const char *stat_user;
66 uid_t stat_uid;
67 static gid_t *server_groups;
68 #ifndef NGROUPS
69 # define NGROUPS 32
70 #endif
71 static int server_ngroups;
73 static pthread_attr_t attr;
75 static void begin_drop_privileges (void);
76 static void finish_drop_privileges (void);
78 /* Map request type to a string. */
79 const char *const serv2str[LASTREQ] =
81 [GETPWBYNAME] = "GETPWBYNAME",
82 [GETPWBYUID] = "GETPWBYUID",
83 [GETGRBYNAME] = "GETGRBYNAME",
84 [GETGRBYGID] = "GETGRBYGID",
85 [GETHOSTBYNAME] = "GETHOSTBYNAME",
86 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
87 [GETHOSTBYADDR] = "GETHOSTBYADDR",
88 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
89 [SHUTDOWN] = "SHUTDOWN",
90 [GETSTAT] = "GETSTAT",
91 [INVALIDATE] = "INVALIDATE",
92 [GETFDPW] = "GETFDPW",
93 [GETFDGR] = "GETFDGR",
94 [GETFDHST] = "GETFDHST",
95 [GETAI] = "GETAI",
96 [INITGROUPS] = "INITGROUPS",
97 [GETSERVBYNAME] = "GETSERVBYNAME",
98 [GETSERVBYPORT] = "GETSERVBYPORT",
99 [GETFDSERV] = "GETFDSERV"
102 /* The control data structures for the services. */
103 struct database_dyn dbs[lastdb] =
105 [pwddb] = {
106 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
107 .prunelock = PTHREAD_MUTEX_INITIALIZER,
108 .enabled = 0,
109 .check_file = 1,
110 .persistent = 0,
111 .propagate = 1,
112 .shared = 0,
113 .max_db_size = DEFAULT_MAX_DB_SIZE,
114 .reset_res = 0,
115 .filename = "/etc/passwd",
116 .db_filename = _PATH_NSCD_PASSWD_DB,
117 .disabled_iov = &pwd_iov_disabled,
118 .postimeout = 3600,
119 .negtimeout = 20,
120 .wr_fd = -1,
121 .ro_fd = -1,
122 .mmap_used = false
124 [grpdb] = {
125 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
126 .prunelock = PTHREAD_MUTEX_INITIALIZER,
127 .enabled = 0,
128 .check_file = 1,
129 .persistent = 0,
130 .propagate = 1,
131 .shared = 0,
132 .max_db_size = DEFAULT_MAX_DB_SIZE,
133 .reset_res = 0,
134 .filename = "/etc/group",
135 .db_filename = _PATH_NSCD_GROUP_DB,
136 .disabled_iov = &grp_iov_disabled,
137 .postimeout = 3600,
138 .negtimeout = 60,
139 .wr_fd = -1,
140 .ro_fd = -1,
141 .mmap_used = false
143 [hstdb] = {
144 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
145 .prunelock = PTHREAD_MUTEX_INITIALIZER,
146 .enabled = 0,
147 .check_file = 1,
148 .persistent = 0,
149 .propagate = 0, /* Not used. */
150 .shared = 0,
151 .max_db_size = DEFAULT_MAX_DB_SIZE,
152 .reset_res = 1,
153 .filename = "/etc/hosts",
154 .db_filename = _PATH_NSCD_HOSTS_DB,
155 .disabled_iov = &hst_iov_disabled,
156 .postimeout = 3600,
157 .negtimeout = 20,
158 .wr_fd = -1,
159 .ro_fd = -1,
160 .mmap_used = false
162 [servdb] = {
163 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
164 .prunelock = PTHREAD_MUTEX_INITIALIZER,
165 .enabled = 0,
166 .check_file = 1,
167 .persistent = 0,
168 .propagate = 0, /* Not used. */
169 .shared = 0,
170 .max_db_size = DEFAULT_MAX_DB_SIZE,
171 .reset_res = 0,
172 .filename = "/etc/services",
173 .db_filename = _PATH_NSCD_SERVICES_DB,
174 .disabled_iov = &serv_iov_disabled,
175 .postimeout = 28800,
176 .negtimeout = 20,
177 .wr_fd = -1,
178 .ro_fd = -1,
179 .mmap_used = false
184 /* Mapping of request type to database. */
185 static struct
187 bool data_request;
188 struct database_dyn *db;
189 } const reqinfo[LASTREQ] =
191 [GETPWBYNAME] = { true, &dbs[pwddb] },
192 [GETPWBYUID] = { true, &dbs[pwddb] },
193 [GETGRBYNAME] = { true, &dbs[grpdb] },
194 [GETGRBYGID] = { true, &dbs[grpdb] },
195 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
196 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
197 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
198 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
199 [SHUTDOWN] = { false, NULL },
200 [GETSTAT] = { false, NULL },
201 [SHUTDOWN] = { false, NULL },
202 [GETFDPW] = { false, &dbs[pwddb] },
203 [GETFDGR] = { false, &dbs[grpdb] },
204 [GETFDHST] = { false, &dbs[hstdb] },
205 [GETAI] = { true, &dbs[hstdb] },
206 [INITGROUPS] = { true, &dbs[grpdb] },
207 [GETSERVBYNAME] = { true, &dbs[servdb] },
208 [GETSERVBYPORT] = { true, &dbs[servdb] },
209 [GETFDSERV] = { false, &dbs[servdb] }
213 /* Number of seconds between two cache pruning runs. */
214 #define CACHE_PRUNE_INTERVAL 15
217 /* Initial number of threads to use. */
218 int nthreads = -1;
219 /* Maximum number of threads to use. */
220 int max_nthreads = 32;
222 /* Socket for incoming connections. */
223 static int sock;
225 /* Number of times clients had to wait. */
226 unsigned long int client_queued;
229 ssize_t
230 writeall (int fd, const void *buf, size_t len)
232 size_t n = len;
233 ssize_t ret;
236 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
237 if (ret <= 0)
238 break;
239 buf = (const char *) buf + ret;
240 n -= ret;
242 while (n > 0);
243 return ret < 0 ? ret : len - n;
247 #ifdef HAVE_SENDFILE
248 ssize_t
249 sendfileall (int tofd, int fromfd, off_t off, size_t len)
251 ssize_t n = len;
252 ssize_t ret;
256 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
257 if (ret <= 0)
258 break;
259 n -= ret;
261 while (n > 0);
262 return ret < 0 ? ret : len - n;
264 #endif
267 enum usekey
269 use_not = 0,
270 /* The following three are not really used, they are symbolic constants. */
271 use_first = 16,
272 use_begin = 32,
273 use_end = 64,
275 use_he = 1,
276 use_he_begin = use_he | use_begin,
277 use_he_end = use_he | use_end,
278 #if SEPARATE_KEY
279 use_key = 2,
280 use_key_begin = use_key | use_begin,
281 use_key_end = use_key | use_end,
282 use_key_first = use_key_begin | use_first,
283 #endif
284 use_data = 3,
285 use_data_begin = use_data | use_begin,
286 use_data_end = use_data | use_end,
287 use_data_first = use_data_begin | use_first
291 static int
292 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
293 enum usekey use, ref_t start, size_t len)
295 assert (len >= 2);
297 if (start > first_free || start + len > first_free
298 || (start & BLOCK_ALIGN_M1))
299 return 0;
301 if (usemap[start] == use_not)
303 /* Add the start marker. */
304 usemap[start] = use | use_begin;
305 use &= ~use_first;
307 while (--len > 0)
308 if (usemap[++start] != use_not)
309 return 0;
310 else
311 usemap[start] = use;
313 /* Add the end marker. */
314 usemap[start] = use | use_end;
316 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
318 /* Hash entries can't be shared. */
319 if (use == use_he)
320 return 0;
322 usemap[start] |= (use & use_first);
323 use &= ~use_first;
325 while (--len > 1)
326 if (usemap[++start] != use)
327 return 0;
329 if (usemap[++start] != (use | use_end))
330 return 0;
332 else
333 /* Points to a wrong object or somewhere in the middle. */
334 return 0;
336 return 1;
340 /* Verify data in persistent database. */
341 static int
342 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
344 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb);
346 time_t now = time (NULL);
348 struct database_pers_head *head = mem;
349 struct database_pers_head head_copy = *head;
351 /* Check that the header that was read matches the head in the database. */
352 if (readhead != NULL && memcmp (head, readhead, sizeof (*head)) != 0)
353 return 0;
355 /* First some easy tests: make sure the database header is sane. */
356 if (head->version != DB_VERSION
357 || head->header_size != sizeof (*head)
358 /* We allow a timestamp to be one hour ahead of the current time.
359 This should cover daylight saving time changes. */
360 || head->timestamp > now + 60 * 60 + 60
361 || (head->gc_cycle & 1)
362 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
363 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
364 || head->first_free < 0
365 || head->first_free > head->data_size
366 || (head->first_free & BLOCK_ALIGN_M1) != 0
367 || head->maxnentries < 0
368 || head->maxnsearched < 0)
369 return 0;
371 uint8_t *usemap = calloc (head->first_free, 1);
372 if (usemap == NULL)
373 return 0;
375 const char *data = (char *) &head->array[roundup (head->module,
376 ALIGN / sizeof (ref_t))];
378 nscd_ssize_t he_cnt = 0;
379 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
381 ref_t work = head->array[cnt];
383 while (work != ENDREF)
385 if (! check_use (data, head->first_free, usemap, use_he, work,
386 sizeof (struct hashentry)))
387 goto fail;
389 /* Now we know we can dereference the record. */
390 struct hashentry *here = (struct hashentry *) (data + work);
392 ++he_cnt;
394 /* Make sure the record is for this type of service. */
395 if (here->type >= LASTREQ
396 || reqinfo[here->type].db != &dbs[dbnr])
397 goto fail;
399 /* Validate boolean field value. */
400 if (here->first != false && here->first != true)
401 goto fail;
403 if (here->len < 0)
404 goto fail;
406 /* Now the data. */
407 if (here->packet < 0
408 || here->packet > head->first_free
409 || here->packet + sizeof (struct datahead) > head->first_free)
410 goto fail;
412 struct datahead *dh = (struct datahead *) (data + here->packet);
414 if (! check_use (data, head->first_free, usemap,
415 use_data | (here->first ? use_first : 0),
416 here->packet, dh->allocsize))
417 goto fail;
419 if (dh->allocsize < sizeof (struct datahead)
420 || dh->recsize > dh->allocsize
421 || (dh->notfound != false && dh->notfound != true)
422 || (dh->usable != false && dh->usable != true))
423 goto fail;
425 if (here->key < here->packet + sizeof (struct datahead)
426 || here->key > here->packet + dh->allocsize
427 || here->key + here->len > here->packet + dh->allocsize)
429 #if SEPARATE_KEY
430 /* If keys can appear outside of data, this should be done
431 instead. But gc doesn't mark the data in that case. */
432 if (! check_use (data, head->first_free, usemap,
433 use_key | (here->first ? use_first : 0),
434 here->key, here->len))
435 #endif
436 goto fail;
439 work = here->next;
443 if (he_cnt != head->nentries)
444 goto fail;
446 /* See if all data and keys had at least one reference from
447 he->first == true hashentry. */
448 for (ref_t idx = 0; idx < head->first_free; ++idx)
450 #if SEPARATE_KEY
451 if (usemap[idx] == use_key_begin)
452 goto fail;
453 #endif
454 if (usemap[idx] == use_data_begin)
455 goto fail;
458 /* Finally, make sure the database hasn't changed since the first test. */
459 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
460 goto fail;
462 free (usemap);
463 return 1;
465 fail:
466 free (usemap);
467 return 0;
471 /* Initialize database information structures. */
472 void
473 nscd_init (void)
475 /* Look up unprivileged uid/gid/groups before we start listening on the
476 socket */
477 if (server_user != NULL)
478 begin_drop_privileges ();
480 if (nthreads == -1)
481 /* No configuration for this value, assume a default. */
482 nthreads = 2 * lastdb;
484 for (size_t cnt = 0; cnt < lastdb; ++cnt)
485 if (dbs[cnt].enabled)
487 pthread_rwlock_init (&dbs[cnt].lock, NULL);
488 pthread_mutex_init (&dbs[cnt].memlock, NULL);
490 if (dbs[cnt].persistent)
492 /* Try to open the appropriate file on disk. */
493 int fd = open (dbs[cnt].db_filename, O_RDWR);
494 if (fd != -1)
496 struct stat64 st;
497 void *mem;
498 size_t total;
499 struct database_pers_head head;
500 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
501 sizeof (head)));
502 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
504 fail_db:
505 dbg_log (_("invalid persistent database file \"%s\": %s"),
506 dbs[cnt].db_filename, strerror (errno));
507 unlink (dbs[cnt].db_filename);
509 else if (head.module == 0 && head.data_size == 0)
511 /* The file has been created, but the head has not been
512 initialized yet. Remove the old file. */
513 unlink (dbs[cnt].db_filename);
515 else if (head.header_size != (int) sizeof (head))
517 dbg_log (_("invalid persistent database file \"%s\": %s"),
518 dbs[cnt].db_filename,
519 _("header size does not match"));
520 unlink (dbs[cnt].db_filename);
522 else if ((total = (sizeof (head)
523 + roundup (head.module * sizeof (ref_t),
524 ALIGN)
525 + head.data_size))
526 > st.st_size
527 || total < sizeof (head))
529 dbg_log (_("invalid persistent database file \"%s\": %s"),
530 dbs[cnt].db_filename,
531 _("file size does not match"));
532 unlink (dbs[cnt].db_filename);
534 /* Note we map with the maximum size allowed for the
535 database. This is likely much larger than the
536 actual file size. This is OK on most OSes since
537 extensions of the underlying file will
538 automatically translate more pages available for
539 memory access. */
540 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
541 PROT_READ | PROT_WRITE,
542 MAP_SHARED, fd, 0))
543 == MAP_FAILED)
544 goto fail_db;
545 else if (!verify_persistent_db (mem, &head, cnt))
547 munmap (mem, total);
548 dbg_log (_("invalid persistent database file \"%s\": %s"),
549 dbs[cnt].db_filename,
550 _("verification failed"));
551 unlink (dbs[cnt].db_filename);
553 else
555 /* Success. We have the database. */
556 dbs[cnt].head = mem;
557 dbs[cnt].memsize = total;
558 dbs[cnt].data = (char *)
559 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
560 ALIGN / sizeof (ref_t))];
561 dbs[cnt].mmap_used = true;
563 if (dbs[cnt].suggested_module > head.module)
564 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
565 dbnames[cnt]);
567 dbs[cnt].wr_fd = fd;
568 fd = -1;
569 /* We also need a read-only descriptor. */
570 if (dbs[cnt].shared)
572 dbs[cnt].ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
573 if (dbs[cnt].ro_fd == -1)
574 dbg_log (_("\
575 cannot create read-only descriptor for \"%s\"; no mmap"),
576 dbs[cnt].db_filename);
579 // XXX Shall we test whether the descriptors actually
580 // XXX point to the same file?
583 /* Close the file descriptors in case something went
584 wrong in which case the variable have not been
585 assigned -1. */
586 if (fd != -1)
587 close (fd);
591 if (dbs[cnt].head == NULL)
593 /* No database loaded. Allocate the data structure,
594 possibly on disk. */
595 struct database_pers_head head;
596 size_t total = (sizeof (head)
597 + roundup (dbs[cnt].suggested_module
598 * sizeof (ref_t), ALIGN)
599 + (dbs[cnt].suggested_module
600 * DEFAULT_DATASIZE_PER_BUCKET));
602 /* Try to create the database. If we do not need a
603 persistent database create a temporary file. */
604 int fd;
605 int ro_fd = -1;
606 if (dbs[cnt].persistent)
608 fd = open (dbs[cnt].db_filename,
609 O_RDWR | O_CREAT | O_EXCL | O_TRUNC,
610 S_IRUSR | S_IWUSR);
611 if (fd != -1 && dbs[cnt].shared)
612 ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
614 else
616 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
617 fd = mkstemp (fname);
619 /* We do not need the file name anymore after we
620 opened another file descriptor in read-only mode. */
621 if (fd != -1)
623 if (dbs[cnt].shared)
624 ro_fd = open (fname, O_RDONLY);
626 unlink (fname);
630 if (fd == -1)
632 if (errno == EEXIST)
634 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
635 dbnames[cnt], dbs[cnt].db_filename);
636 // XXX Correct way to terminate?
637 exit (1);
640 if (dbs[cnt].persistent)
641 dbg_log (_("cannot create %s; no persistent database used"),
642 dbs[cnt].db_filename);
643 else
644 dbg_log (_("cannot create %s; no sharing possible"),
645 dbs[cnt].db_filename);
647 dbs[cnt].persistent = 0;
648 // XXX remember: no mmap
650 else
652 /* Tell the user if we could not create the read-only
653 descriptor. */
654 if (ro_fd == -1 && dbs[cnt].shared)
655 dbg_log (_("\
656 cannot create read-only descriptor for \"%s\"; no mmap"),
657 dbs[cnt].db_filename);
659 /* Before we create the header, initialiye the hash
660 table. So that if we get interrupted if writing
661 the header we can recognize a partially initialized
662 database. */
663 size_t ps = sysconf (_SC_PAGESIZE);
664 char tmpbuf[ps];
665 assert (~ENDREF == 0);
666 memset (tmpbuf, '\xff', ps);
668 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
669 off_t offset = sizeof (head);
671 size_t towrite;
672 if (offset % ps != 0)
674 towrite = MIN (remaining, ps - (offset % ps));
675 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
676 goto write_fail;
677 offset += towrite;
678 remaining -= towrite;
681 while (remaining > ps)
683 if (pwrite (fd, tmpbuf, ps, offset) == -1)
684 goto write_fail;
685 offset += ps;
686 remaining -= ps;
689 if (remaining > 0
690 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
691 goto write_fail;
693 /* Create the header of the file. */
694 struct database_pers_head head =
696 .version = DB_VERSION,
697 .header_size = sizeof (head),
698 .module = dbs[cnt].suggested_module,
699 .data_size = (dbs[cnt].suggested_module
700 * DEFAULT_DATASIZE_PER_BUCKET),
701 .first_free = 0
703 void *mem;
705 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
706 != sizeof (head))
707 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
708 != 0)
709 || (mem = mmap (NULL, dbs[cnt].max_db_size,
710 PROT_READ | PROT_WRITE,
711 MAP_SHARED, fd, 0)) == MAP_FAILED)
713 write_fail:
714 unlink (dbs[cnt].db_filename);
715 dbg_log (_("cannot write to database file %s: %s"),
716 dbs[cnt].db_filename, strerror (errno));
717 dbs[cnt].persistent = 0;
719 else
721 /* Success. */
722 dbs[cnt].head = mem;
723 dbs[cnt].data = (char *)
724 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
725 ALIGN / sizeof (ref_t))];
726 dbs[cnt].memsize = total;
727 dbs[cnt].mmap_used = true;
729 /* Remember the descriptors. */
730 dbs[cnt].wr_fd = fd;
731 dbs[cnt].ro_fd = ro_fd;
732 fd = -1;
733 ro_fd = -1;
736 if (fd != -1)
737 close (fd);
738 if (ro_fd != -1)
739 close (ro_fd);
743 if (paranoia
744 && ((dbs[cnt].wr_fd != -1
745 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
746 || (dbs[cnt].ro_fd != -1
747 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
749 dbg_log (_("\
750 cannot set socket to close on exec: %s; disabling paranoia mode"),
751 strerror (errno));
752 paranoia = 0;
755 if (dbs[cnt].head == NULL)
757 /* We do not use the persistent database. Just
758 create an in-memory data structure. */
759 assert (! dbs[cnt].persistent);
761 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
762 + (dbs[cnt].suggested_module
763 * sizeof (ref_t)));
764 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
765 assert (~ENDREF == 0);
766 memset (dbs[cnt].head->array, '\xff',
767 dbs[cnt].suggested_module * sizeof (ref_t));
768 dbs[cnt].head->module = dbs[cnt].suggested_module;
769 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
770 * dbs[cnt].head->module);
771 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
772 dbs[cnt].head->first_free = 0;
774 dbs[cnt].shared = 0;
775 assert (dbs[cnt].ro_fd == -1);
778 if (dbs[cnt].check_file)
780 /* We need the modification date of the file. */
781 struct stat64 st;
783 if (stat64 (dbs[cnt].filename, &st) < 0)
785 /* We cannot stat() the file, disable file checking. */
786 dbg_log (_("cannot stat() file `%s': %s"),
787 dbs[cnt].filename, strerror (errno));
788 dbs[cnt].check_file = 0;
790 else
791 dbs[cnt].file_mtime = st.st_mtime;
795 /* Create the socket. */
796 sock = socket (AF_UNIX, SOCK_STREAM, 0);
797 if (sock < 0)
799 dbg_log (_("cannot open socket: %s"), strerror (errno));
800 exit (errno == EACCES ? 4 : 1);
802 /* Bind a name to the socket. */
803 struct sockaddr_un sock_addr;
804 sock_addr.sun_family = AF_UNIX;
805 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
806 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
808 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
809 exit (errno == EACCES ? 4 : 1);
812 /* We don't want to get stuck on accept. */
813 int fl = fcntl (sock, F_GETFL);
814 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
816 dbg_log (_("cannot change socket to nonblocking mode: %s"),
817 strerror (errno));
818 exit (1);
821 /* The descriptor needs to be closed on exec. */
822 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
824 dbg_log (_("cannot set socket to close on exec: %s"),
825 strerror (errno));
826 exit (1);
829 /* Set permissions for the socket. */
830 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
832 /* Set the socket up to accept connections. */
833 if (listen (sock, SOMAXCONN) < 0)
835 dbg_log (_("cannot enable socket to accept connections: %s"),
836 strerror (errno));
837 exit (1);
840 /* Change to unprivileged uid/gid/groups if specifed in config file */
841 if (server_user != NULL)
842 finish_drop_privileges ();
846 /* Close the connections. */
847 void
848 close_sockets (void)
850 close (sock);
854 static void
855 invalidate_cache (char *key, int fd)
857 dbtype number;
858 int32_t resp;
860 for (number = pwddb; number < lastdb; ++number)
861 if (strcmp (key, dbnames[number]) == 0)
863 if (dbs[number].reset_res)
864 res_init ();
866 break;
869 if (number == lastdb)
871 resp = EINVAL;
872 writeall (fd, &resp, sizeof (resp));
873 return;
876 if (dbs[number].enabled)
877 prune_cache (&dbs[number], LONG_MAX, fd);
878 else
880 resp = 0;
881 writeall (fd, &resp, sizeof (resp));
886 #ifdef SCM_RIGHTS
887 static void
888 send_ro_fd (struct database_dyn *db, char *key, int fd)
890 /* If we do not have an read-only file descriptor do nothing. */
891 if (db->ro_fd == -1)
892 return;
894 /* We need to send some data along with the descriptor. */
895 struct iovec iov[1];
896 iov[0].iov_base = key;
897 iov[0].iov_len = strlen (key) + 1;
899 /* Prepare the control message to transfer the descriptor. */
900 union
902 struct cmsghdr hdr;
903 char bytes[CMSG_SPACE (sizeof (int))];
904 } buf;
905 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1,
906 .msg_control = buf.bytes,
907 .msg_controllen = sizeof (buf) };
908 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
910 cmsg->cmsg_level = SOL_SOCKET;
911 cmsg->cmsg_type = SCM_RIGHTS;
912 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
914 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
916 msg.msg_controllen = cmsg->cmsg_len;
918 /* Send the control message. We repeat when we are interrupted but
919 everything else is ignored. */
920 #ifndef MSG_NOSIGNAL
921 # define MSG_NOSIGNAL 0
922 #endif
923 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
925 if (__builtin_expect (debug_level > 0, 0))
926 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
928 #endif /* SCM_RIGHTS */
931 /* Handle new request. */
932 static void
933 handle_request (int fd, request_header *req, void *key, uid_t uid)
935 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
937 if (debug_level > 0)
938 dbg_log (_("\
939 cannot handle old request version %d; current version is %d"),
940 req->version, NSCD_VERSION);
941 return;
944 /* Make the SELinux check before we go on to the standard checks. */
945 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
946 return;
948 struct database_dyn *db = reqinfo[req->type].db;
950 /* See whether we can service the request from the cache. */
951 if (__builtin_expect (reqinfo[req->type].data_request, true))
953 if (__builtin_expect (debug_level, 0) > 0)
955 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
957 char buf[INET6_ADDRSTRLEN];
959 dbg_log ("\t%s (%s)", serv2str[req->type],
960 inet_ntop (req->type == GETHOSTBYADDR
961 ? AF_INET : AF_INET6,
962 key, buf, sizeof (buf)));
964 else
965 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
968 /* Is this service enabled? */
969 if (__builtin_expect (!db->enabled, 0))
971 /* No, sent the prepared record. */
972 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
973 db->disabled_iov->iov_len,
974 MSG_NOSIGNAL))
975 != (ssize_t) db->disabled_iov->iov_len
976 && __builtin_expect (debug_level, 0) > 0)
978 /* We have problems sending the result. */
979 char buf[256];
980 dbg_log (_("cannot write result: %s"),
981 strerror_r (errno, buf, sizeof (buf)));
984 return;
987 /* Be sure we can read the data. */
988 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
990 ++db->head->rdlockdelayed;
991 pthread_rwlock_rdlock (&db->lock);
994 /* See whether we can handle it from the cache. */
995 struct datahead *cached;
996 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
997 db, uid);
998 if (cached != NULL)
1000 /* Hurray it's in the cache. */
1001 ssize_t nwritten;
1003 #ifdef HAVE_SENDFILE
1004 if (db->mmap_used || !cached->notfound)
1006 assert (db->wr_fd != -1);
1007 assert ((char *) cached->data > (char *) db->data);
1008 assert ((char *) cached->data - (char *) db->head
1009 + cached->recsize
1010 <= (sizeof (struct database_pers_head)
1011 + db->head->module * sizeof (ref_t)
1012 + db->head->data_size));
1013 nwritten = sendfileall (fd, db->wr_fd,
1014 (char *) cached->data
1015 - (char *) db->head, cached->recsize);
1016 # ifndef __ASSUME_SENDFILE
1017 if (nwritten == -1 && errno == ENOSYS)
1018 goto use_write;
1019 # endif
1021 else
1022 # ifndef __ASSUME_SENDFILE
1023 use_write:
1024 # endif
1025 #endif
1026 nwritten = writeall (fd, cached->data, cached->recsize);
1028 if (nwritten != cached->recsize
1029 && __builtin_expect (debug_level, 0) > 0)
1031 /* We have problems sending the result. */
1032 char buf[256];
1033 dbg_log (_("cannot write result: %s"),
1034 strerror_r (errno, buf, sizeof (buf)));
1037 pthread_rwlock_unlock (&db->lock);
1039 return;
1042 pthread_rwlock_unlock (&db->lock);
1044 else if (__builtin_expect (debug_level, 0) > 0)
1046 if (req->type == INVALIDATE)
1047 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1048 else
1049 dbg_log ("\t%s", serv2str[req->type]);
1052 /* Handle the request. */
1053 switch (req->type)
1055 case GETPWBYNAME:
1056 addpwbyname (db, fd, req, key, uid);
1057 break;
1059 case GETPWBYUID:
1060 addpwbyuid (db, fd, req, key, uid);
1061 break;
1063 case GETGRBYNAME:
1064 addgrbyname (db, fd, req, key, uid);
1065 break;
1067 case GETGRBYGID:
1068 addgrbygid (db, fd, req, key, uid);
1069 break;
1071 case GETHOSTBYNAME:
1072 addhstbyname (db, fd, req, key, uid);
1073 break;
1075 case GETHOSTBYNAMEv6:
1076 addhstbynamev6 (db, fd, req, key, uid);
1077 break;
1079 case GETHOSTBYADDR:
1080 addhstbyaddr (db, fd, req, key, uid);
1081 break;
1083 case GETHOSTBYADDRv6:
1084 addhstbyaddrv6 (db, fd, req, key, uid);
1085 break;
1087 case GETAI:
1088 addhstai (db, fd, req, key, uid);
1089 break;
1091 case INITGROUPS:
1092 addinitgroups (db, fd, req, key, uid);
1093 break;
1095 case GETSERVBYNAME:
1096 addservbyname (db, fd, req, key, uid);
1097 break;
1099 case GETSERVBYPORT:
1100 addservbyport (db, fd, req, key, uid);
1101 break;
1103 case GETSTAT:
1104 case SHUTDOWN:
1105 case INVALIDATE:
1107 /* Get the callers credentials. */
1108 #ifdef SO_PEERCRED
1109 struct ucred caller;
1110 socklen_t optlen = sizeof (caller);
1112 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1114 char buf[256];
1116 dbg_log (_("error getting caller's id: %s"),
1117 strerror_r (errno, buf, sizeof (buf)));
1118 break;
1121 uid = caller.uid;
1122 #else
1123 /* Some systems have no SO_PEERCRED implementation. They don't
1124 care about security so we don't as well. */
1125 uid = 0;
1126 #endif
1129 /* Accept shutdown, getstat and invalidate only from root. For
1130 the stat call also allow the user specified in the config file. */
1131 if (req->type == GETSTAT)
1133 if (uid == 0 || uid == stat_uid)
1134 send_stats (fd, dbs);
1136 else if (uid == 0)
1138 if (req->type == INVALIDATE)
1139 invalidate_cache (key, fd);
1140 else
1141 termination_handler (0);
1143 break;
1145 case GETFDPW:
1146 case GETFDGR:
1147 case GETFDHST:
1148 case GETFDSERV:
1149 #ifdef SCM_RIGHTS
1150 send_ro_fd (reqinfo[req->type].db, key, fd);
1151 #endif
1152 break;
1154 default:
1155 /* Ignore the command, it's nothing we know. */
1156 break;
1161 /* Restart the process. */
1162 static void
1163 restart (void)
1165 /* First determine the parameters. We do not use the parameters
1166 passed to main() since in case nscd is started by running the
1167 dynamic linker this will not work. Yes, this is not the usual
1168 case but nscd is part of glibc and we occasionally do this. */
1169 size_t buflen = 1024;
1170 char *buf = alloca (buflen);
1171 size_t readlen = 0;
1172 int fd = open ("/proc/self/cmdline", O_RDONLY);
1173 if (fd == -1)
1175 dbg_log (_("\
1176 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1177 strerror (errno));
1179 paranoia = 0;
1180 return;
1183 while (1)
1185 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1186 buflen - readlen));
1187 if (n == -1)
1189 dbg_log (_("\
1190 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1191 strerror (errno));
1193 close (fd);
1194 paranoia = 0;
1195 return;
1198 readlen += n;
1200 if (readlen < buflen)
1201 break;
1203 /* We might have to extend the buffer. */
1204 size_t old_buflen = buflen;
1205 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1206 buf = memmove (newp, buf, old_buflen);
1209 close (fd);
1211 /* Parse the command line. Worst case scenario: every two
1212 characters form one parameter (one character plus NUL). */
1213 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1214 int argc = 0;
1216 char *cp = buf;
1217 while (cp < buf + readlen)
1219 argv[argc++] = cp;
1220 cp = (char *) rawmemchr (cp, '\0') + 1;
1222 argv[argc] = NULL;
1224 /* Second, change back to the old user if we changed it. */
1225 if (server_user != NULL)
1227 if (setresuid (old_uid, old_uid, old_uid) != 0)
1229 dbg_log (_("\
1230 cannot change to old UID: %s; disabling paranoia mode"),
1231 strerror (errno));
1233 paranoia = 0;
1234 return;
1237 if (setresgid (old_gid, old_gid, old_gid) != 0)
1239 dbg_log (_("\
1240 cannot change to old GID: %s; disabling paranoia mode"),
1241 strerror (errno));
1243 setuid (server_uid);
1244 paranoia = 0;
1245 return;
1249 /* Next change back to the old working directory. */
1250 if (chdir (oldcwd) == -1)
1252 dbg_log (_("\
1253 cannot change to old working directory: %s; disabling paranoia mode"),
1254 strerror (errno));
1256 if (server_user != NULL)
1258 setuid (server_uid);
1259 setgid (server_gid);
1261 paranoia = 0;
1262 return;
1265 /* Synchronize memory. */
1266 for (int cnt = 0; cnt < lastdb; ++cnt)
1268 /* Make sure nobody keeps using the database. */
1269 dbs[cnt].head->timestamp = 0;
1271 if (dbs[cnt].persistent)
1272 // XXX async OK?
1273 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1276 /* The preparations are done. */
1277 execv ("/proc/self/exe", argv);
1279 /* If we come here, we will never be able to re-exec. */
1280 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1281 strerror (errno));
1283 if (server_user != NULL)
1285 setuid (server_uid);
1286 setgid (server_gid);
1288 if (chdir ("/") != 0)
1289 dbg_log (_("cannot change current working directory to \"/\": %s"),
1290 strerror (errno));
1291 paranoia = 0;
1295 /* List of file descriptors. */
1296 struct fdlist
1298 int fd;
1299 struct fdlist *next;
1301 /* Memory allocated for the list. */
1302 static struct fdlist *fdlist;
1303 /* List of currently ready-to-read file descriptors. */
1304 static struct fdlist *readylist;
1306 /* Conditional variable and mutex to signal availability of entries in
1307 READYLIST. The condvar is initialized dynamically since we might
1308 use a different clock depending on availability. */
1309 static pthread_cond_t readylist_cond;
1310 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1312 /* The clock to use with the condvar. */
1313 static clockid_t timeout_clock = CLOCK_REALTIME;
1315 /* Number of threads ready to handle the READYLIST. */
1316 static unsigned long int nready;
1319 /* This is the main loop. It is replicated in different threads but the
1320 `poll' call makes sure only one thread handles an incoming connection. */
1321 static void *
1322 __attribute__ ((__noreturn__))
1323 nscd_run (void *p)
1325 const long int my_number = (long int) p;
1326 const int run_prune = my_number < lastdb && dbs[my_number].enabled;
1327 struct timespec prune_ts;
1328 int to = 0;
1329 char buf[256];
1331 if (run_prune)
1333 setup_thread (&dbs[my_number]);
1335 /* We are running. */
1336 dbs[my_number].head->timestamp = time (NULL);
1338 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1339 /* Should never happen. */
1340 abort ();
1342 /* Compute timeout time. */
1343 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1346 /* Initial locking. */
1347 pthread_mutex_lock (&readylist_lock);
1349 /* One more thread available. */
1350 ++nready;
1352 while (1)
1354 while (readylist == NULL)
1356 if (run_prune)
1358 /* Wait, but not forever. */
1359 to = pthread_cond_timedwait (&readylist_cond, &readylist_lock,
1360 &prune_ts);
1362 /* If we were woken and there is no work to be done,
1363 just start pruning. */
1364 if (readylist == NULL && to == ETIMEDOUT)
1366 --nready;
1367 pthread_mutex_unlock (&readylist_lock);
1368 goto only_prune;
1371 else
1372 /* No need to timeout. */
1373 pthread_cond_wait (&readylist_cond, &readylist_lock);
1376 struct fdlist *it = readylist->next;
1377 if (readylist->next == readylist)
1378 /* Just one entry on the list. */
1379 readylist = NULL;
1380 else
1381 readylist->next = it->next;
1383 /* Extract the information and mark the record ready to be used
1384 again. */
1385 int fd = it->fd;
1386 it->next = NULL;
1388 /* One more thread available. */
1389 --nready;
1391 /* We are done with the list. */
1392 pthread_mutex_unlock (&readylist_lock);
1394 /* We do not want to block on a short read or so. */
1395 int fl = fcntl (fd, F_GETFL);
1396 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1397 goto close_and_out;
1399 /* Now read the request. */
1400 request_header req;
1401 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1402 != sizeof (req), 0))
1404 /* We failed to read data. Note that this also might mean we
1405 failed because we would have blocked. */
1406 if (debug_level > 0)
1407 dbg_log (_("short read while reading request: %s"),
1408 strerror_r (errno, buf, sizeof (buf)));
1409 goto close_and_out;
1412 /* Check whether this is a valid request type. */
1413 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1414 goto close_and_out;
1416 /* Some systems have no SO_PEERCRED implementation. They don't
1417 care about security so we don't as well. */
1418 uid_t uid = -1;
1419 #ifdef SO_PEERCRED
1420 pid_t pid = 0;
1422 if (__builtin_expect (debug_level > 0, 0))
1424 struct ucred caller;
1425 socklen_t optlen = sizeof (caller);
1427 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1428 pid = caller.pid;
1430 #endif
1432 /* It should not be possible to crash the nscd with a silly
1433 request (i.e., a terribly large key). We limit the size to 1kb. */
1434 if (__builtin_expect (req.key_len, 1) < 0
1435 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1437 if (debug_level > 0)
1438 dbg_log (_("key length in request too long: %d"), req.key_len);
1440 else
1442 /* Get the key. */
1443 char keybuf[MAXKEYLEN];
1445 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1446 req.key_len))
1447 != req.key_len, 0))
1449 /* Again, this can also mean we would have blocked. */
1450 if (debug_level > 0)
1451 dbg_log (_("short read while reading request key: %s"),
1452 strerror_r (errno, buf, sizeof (buf)));
1453 goto close_and_out;
1456 if (__builtin_expect (debug_level, 0) > 0)
1458 #ifdef SO_PEERCRED
1459 if (pid != 0)
1460 dbg_log (_("\
1461 handle_request: request received (Version = %d) from PID %ld"),
1462 req.version, (long int) pid);
1463 else
1464 #endif
1465 dbg_log (_("\
1466 handle_request: request received (Version = %d)"), req.version);
1469 /* Phew, we got all the data, now process it. */
1470 handle_request (fd, &req, keybuf, uid);
1473 close_and_out:
1474 /* We are done. */
1475 close (fd);
1477 /* Check whether we should be pruning the cache. */
1478 assert (run_prune || to == 0);
1479 if (to == ETIMEDOUT)
1481 only_prune:
1482 /* The pthread_cond_timedwait() call timed out. It is time
1483 to clean up the cache. */
1484 assert (my_number < lastdb);
1485 prune_cache (&dbs[my_number], time (NULL), -1);
1487 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1488 /* Should never happen. */
1489 abort ();
1491 /* Compute next timeout time. */
1492 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1494 /* In case the list is emtpy we do not want to run the prune
1495 code right away again. */
1496 to = 0;
1499 /* Re-locking. */
1500 pthread_mutex_lock (&readylist_lock);
1502 /* One more thread available. */
1503 ++nready;
1508 static unsigned int nconns;
1510 static void
1511 fd_ready (int fd)
1513 pthread_mutex_lock (&readylist_lock);
1515 /* Find an empty entry in FDLIST. */
1516 size_t inner;
1517 for (inner = 0; inner < nconns; ++inner)
1518 if (fdlist[inner].next == NULL)
1519 break;
1520 assert (inner < nconns);
1522 fdlist[inner].fd = fd;
1524 if (readylist == NULL)
1525 readylist = fdlist[inner].next = &fdlist[inner];
1526 else
1528 fdlist[inner].next = readylist->next;
1529 readylist = readylist->next = &fdlist[inner];
1532 bool do_signal = true;
1533 if (__builtin_expect (nready == 0, 0))
1535 ++client_queued;
1536 do_signal = false;
1538 /* Try to start another thread to help out. */
1539 pthread_t th;
1540 if (nthreads < max_nthreads
1541 && pthread_create (&th, &attr, nscd_run,
1542 (void *) (long int) nthreads) == 0)
1544 /* We got another thread. */
1545 ++nthreads;
1546 /* The new thread might need a kick. */
1547 do_signal = true;
1552 pthread_mutex_unlock (&readylist_lock);
1554 /* Tell one of the worker threads there is work to do. */
1555 if (do_signal)
1556 pthread_cond_signal (&readylist_cond);
1560 /* Check whether restarting should happen. */
1561 static inline int
1562 restart_p (time_t now)
1564 return (paranoia && readylist == NULL && nready == nthreads
1565 && now >= restart_time);
1569 /* Array for times a connection was accepted. */
1570 static time_t *starttime;
1573 static void
1574 __attribute__ ((__noreturn__))
1575 main_loop_poll (void)
1577 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1578 * sizeof (conns[0]));
1580 conns[0].fd = sock;
1581 conns[0].events = POLLRDNORM;
1582 size_t nused = 1;
1583 size_t firstfree = 1;
1585 while (1)
1587 /* Wait for any event. We wait at most a couple of seconds so
1588 that we can check whether we should close any of the accepted
1589 connections since we have not received a request. */
1590 #define MAX_ACCEPT_TIMEOUT 30
1591 #define MIN_ACCEPT_TIMEOUT 5
1592 #define MAIN_THREAD_TIMEOUT \
1593 (MAX_ACCEPT_TIMEOUT * 1000 \
1594 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1596 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1598 time_t now = time (NULL);
1600 /* If there is a descriptor ready for reading or there is a new
1601 connection, process this now. */
1602 if (n > 0)
1604 if (conns[0].revents != 0)
1606 /* We have a new incoming connection. Accept the connection. */
1607 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1609 /* Use the descriptor if we have not reached the limit. */
1610 if (fd >= 0)
1612 if (firstfree < nconns)
1614 conns[firstfree].fd = fd;
1615 conns[firstfree].events = POLLRDNORM;
1616 starttime[firstfree] = now;
1617 if (firstfree >= nused)
1618 nused = firstfree + 1;
1621 ++firstfree;
1622 while (firstfree < nused && conns[firstfree].fd != -1);
1624 else
1625 /* We cannot use the connection so close it. */
1626 close (fd);
1629 --n;
1632 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1633 if (conns[cnt].revents != 0)
1635 fd_ready (conns[cnt].fd);
1637 /* Clean up the CONNS array. */
1638 conns[cnt].fd = -1;
1639 if (cnt < firstfree)
1640 firstfree = cnt;
1641 if (cnt == nused - 1)
1643 --nused;
1644 while (conns[nused - 1].fd == -1);
1646 --n;
1650 /* Now find entries which have timed out. */
1651 assert (nused > 0);
1653 /* We make the timeout length depend on the number of file
1654 descriptors currently used. */
1655 #define ACCEPT_TIMEOUT \
1656 (MAX_ACCEPT_TIMEOUT \
1657 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1658 time_t laststart = now - ACCEPT_TIMEOUT;
1660 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1662 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1664 /* Remove the entry, it timed out. */
1665 (void) close (conns[cnt].fd);
1666 conns[cnt].fd = -1;
1668 if (cnt < firstfree)
1669 firstfree = cnt;
1670 if (cnt == nused - 1)
1672 --nused;
1673 while (conns[nused - 1].fd == -1);
1677 if (restart_p (now))
1678 restart ();
1683 #ifdef HAVE_EPOLL
1684 static void
1685 main_loop_epoll (int efd)
1687 struct epoll_event ev = { 0, };
1688 int nused = 1;
1689 size_t highest = 0;
1691 /* Add the socket. */
1692 ev.events = EPOLLRDNORM;
1693 ev.data.fd = sock;
1694 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1695 /* We cannot use epoll. */
1696 return;
1698 while (1)
1700 struct epoll_event revs[100];
1701 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1703 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1705 time_t now = time (NULL);
1707 for (int cnt = 0; cnt < n; ++cnt)
1708 if (revs[cnt].data.fd == sock)
1710 /* A new connection. */
1711 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1713 if (fd >= 0)
1715 /* Try to add the new descriptor. */
1716 ev.data.fd = fd;
1717 if (fd >= nconns
1718 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1719 /* The descriptor is too large or something went
1720 wrong. Close the descriptor. */
1721 close (fd);
1722 else
1724 /* Remember when we accepted the connection. */
1725 starttime[fd] = now;
1727 if (fd > highest)
1728 highest = fd;
1730 ++nused;
1734 else
1736 /* Remove the descriptor from the epoll descriptor. */
1737 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1739 /* Get a worker to handle the request. */
1740 fd_ready (revs[cnt].data.fd);
1742 /* Reset the time. */
1743 starttime[revs[cnt].data.fd] = 0;
1744 if (revs[cnt].data.fd == highest)
1746 --highest;
1747 while (highest > 0 && starttime[highest] == 0);
1749 --nused;
1752 /* Now look for descriptors for accepted connections which have
1753 no reply in too long of a time. */
1754 time_t laststart = now - ACCEPT_TIMEOUT;
1755 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1756 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1758 /* We are waiting for this one for too long. Close it. */
1759 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
1761 (void) close (cnt);
1763 starttime[cnt] = 0;
1764 if (cnt == highest)
1765 --highest;
1767 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1768 --highest;
1770 if (restart_p (now))
1771 restart ();
1774 #endif
1777 /* Start all the threads we want. The initial process is thread no. 1. */
1778 void
1779 start_threads (void)
1781 /* Initialize the conditional variable we will use. The only
1782 non-standard attribute we might use is the clock selection. */
1783 pthread_condattr_t condattr;
1784 pthread_condattr_init (&condattr);
1786 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1787 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1788 /* Determine whether the monotonous clock is available. */
1789 struct timespec dummy;
1790 # if _POSIX_MONOTONIC_CLOCK == 0
1791 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1792 # endif
1793 # if _POSIX_CLOCK_SELECTION == 0
1794 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1795 # endif
1796 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1797 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1798 timeout_clock = CLOCK_MONOTONIC;
1799 #endif
1801 pthread_cond_init (&readylist_cond, &condattr);
1802 pthread_condattr_destroy (&condattr);
1805 /* Create the attribute for the threads. They are all created
1806 detached. */
1807 pthread_attr_init (&attr);
1808 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1809 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1810 pthread_attr_setstacksize (&attr, 1024 * 1024 * (sizeof (void *) / 4));
1812 /* We allow less than LASTDB threads only for debugging. */
1813 if (debug_level == 0)
1814 nthreads = MAX (nthreads, lastdb);
1816 int nfailed = 0;
1817 for (long int i = 0; i < nthreads; ++i)
1819 pthread_t th;
1820 if (pthread_create (&th, &attr, nscd_run, (void *) (i - nfailed)) != 0)
1821 ++nfailed;
1823 if (nthreads - nfailed < lastdb)
1825 /* We could not start enough threads. */
1826 dbg_log (_("could only start %d threads; terminating"),
1827 nthreads - nfailed);
1828 exit (1);
1831 /* Determine how much room for descriptors we should initially
1832 allocate. This might need to change later if we cap the number
1833 with MAXCONN. */
1834 const long int nfds = sysconf (_SC_OPEN_MAX);
1835 #define MINCONN 32
1836 #define MAXCONN 16384
1837 if (nfds == -1 || nfds > MAXCONN)
1838 nconns = MAXCONN;
1839 else if (nfds < MINCONN)
1840 nconns = MINCONN;
1841 else
1842 nconns = nfds;
1844 /* We need memory to pass descriptors on to the worker threads. */
1845 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1846 /* Array to keep track when connection was accepted. */
1847 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1849 /* In the main thread we execute the loop which handles incoming
1850 connections. */
1851 #ifdef HAVE_EPOLL
1852 int efd = epoll_create (100);
1853 if (efd != -1)
1855 main_loop_epoll (efd);
1856 close (efd);
1858 #endif
1860 main_loop_poll ();
1864 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1865 this function is called, we are not listening on the nscd socket yet so
1866 we can just use the ordinary lookup functions without causing a lockup */
1867 static void
1868 begin_drop_privileges (void)
1870 struct passwd *pwd = getpwnam (server_user);
1872 if (pwd == NULL)
1874 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1875 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1876 server_user);
1879 server_uid = pwd->pw_uid;
1880 server_gid = pwd->pw_gid;
1882 /* Save the old UID/GID if we have to change back. */
1883 if (paranoia)
1885 old_uid = getuid ();
1886 old_gid = getgid ();
1889 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1891 /* This really must never happen. */
1892 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1893 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
1896 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
1898 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
1899 == -1)
1901 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1902 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
1907 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
1908 run nscd as the user specified in the configuration file. */
1909 static void
1910 finish_drop_privileges (void)
1912 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1913 /* We need to preserve the capabilities to connect to the audit daemon. */
1914 cap_t new_caps = preserve_capabilities ();
1915 #endif
1917 if (setgroups (server_ngroups, server_groups) == -1)
1919 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1920 error (EXIT_FAILURE, errno, _("setgroups failed"));
1923 int res;
1924 if (paranoia)
1925 res = setresgid (server_gid, server_gid, old_gid);
1926 else
1927 res = setgid (server_gid);
1928 if (res == -1)
1930 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1931 perror ("setgid");
1932 exit (4);
1935 if (paranoia)
1936 res = setresuid (server_uid, server_uid, old_uid);
1937 else
1938 res = setuid (server_uid);
1939 if (res == -1)
1941 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1942 perror ("setuid");
1943 exit (4);
1946 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1947 /* Remove the temporary capabilities. */
1948 install_real_capabilities (new_caps);
1949 #endif