Don't confuse unwinder by loading mangled values in %rbp and %rsp registers and by...
[glibc.git] / nscd / connections.c
blob384464dafcfef502f66cd0f627f43a4487852012
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2003, 2004, 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License version 2 as
8 published by the Free Software Foundation.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <libintl.h>
27 #include <pthread.h>
28 #include <pwd.h>
29 #include <resolv.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <arpa/inet.h>
34 #ifdef HAVE_EPOLL
35 # include <sys/epoll.h>
36 #endif
37 #include <sys/mman.h>
38 #include <sys/param.h>
39 #include <sys/poll.h>
40 #ifdef HAVE_SENDFILE
41 # include <sys/sendfile.h>
42 #endif
43 #include <sys/socket.h>
44 #include <sys/stat.h>
45 #include <sys/un.h>
47 #include "nscd.h"
48 #include "dbg_log.h"
49 #include "selinux.h"
50 #ifdef HAVE_SENDFILE
51 # include <kernel-features.h>
52 #endif
55 /* Wrapper functions with error checking for standard functions. */
56 extern void *xmalloc (size_t n);
57 extern void *xcalloc (size_t n, size_t s);
58 extern void *xrealloc (void *o, size_t n);
60 /* Support to run nscd as an unprivileged user */
61 const char *server_user;
62 static uid_t server_uid;
63 static gid_t server_gid;
64 const char *stat_user;
65 uid_t stat_uid;
66 static gid_t *server_groups;
67 #ifndef NGROUPS
68 # define NGROUPS 32
69 #endif
70 static int server_ngroups;
72 static pthread_attr_t attr;
74 static void begin_drop_privileges (void);
75 static void finish_drop_privileges (void);
77 /* Map request type to a string. */
78 const char *serv2str[LASTREQ] =
80 [GETPWBYNAME] = "GETPWBYNAME",
81 [GETPWBYUID] = "GETPWBYUID",
82 [GETGRBYNAME] = "GETGRBYNAME",
83 [GETGRBYGID] = "GETGRBYGID",
84 [GETHOSTBYNAME] = "GETHOSTBYNAME",
85 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
86 [GETHOSTBYADDR] = "GETHOSTBYADDR",
87 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
88 [SHUTDOWN] = "SHUTDOWN",
89 [GETSTAT] = "GETSTAT",
90 [INVALIDATE] = "INVALIDATE",
91 [GETFDPW] = "GETFDPW",
92 [GETFDGR] = "GETFDGR",
93 [GETFDHST] = "GETFDHST",
94 [GETAI] = "GETAI",
95 [INITGROUPS] = "INITGROUPS"
98 /* The control data structures for the services. */
99 struct database_dyn dbs[lastdb] =
101 [pwddb] = {
102 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
103 .enabled = 0,
104 .check_file = 1,
105 .persistent = 0,
106 .shared = 0,
107 .max_db_size = DEFAULT_MAX_DB_SIZE,
108 .filename = "/etc/passwd",
109 .db_filename = _PATH_NSCD_PASSWD_DB,
110 .disabled_iov = &pwd_iov_disabled,
111 .postimeout = 3600,
112 .negtimeout = 20,
113 .wr_fd = -1,
114 .ro_fd = -1,
115 .mmap_used = false
117 [grpdb] = {
118 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
119 .enabled = 0,
120 .check_file = 1,
121 .persistent = 0,
122 .shared = 0,
123 .max_db_size = DEFAULT_MAX_DB_SIZE,
124 .filename = "/etc/group",
125 .db_filename = _PATH_NSCD_GROUP_DB,
126 .disabled_iov = &grp_iov_disabled,
127 .postimeout = 3600,
128 .negtimeout = 60,
129 .wr_fd = -1,
130 .ro_fd = -1,
131 .mmap_used = false
133 [hstdb] = {
134 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
135 .enabled = 0,
136 .check_file = 1,
137 .persistent = 0,
138 .shared = 0,
139 .max_db_size = DEFAULT_MAX_DB_SIZE,
140 .filename = "/etc/hosts",
141 .db_filename = _PATH_NSCD_HOSTS_DB,
142 .disabled_iov = &hst_iov_disabled,
143 .postimeout = 3600,
144 .negtimeout = 20,
145 .wr_fd = -1,
146 .ro_fd = -1,
147 .mmap_used = false
152 /* Mapping of request type to database. */
153 static struct database_dyn *const serv2db[LASTREQ] =
155 [GETPWBYNAME] = &dbs[pwddb],
156 [GETPWBYUID] = &dbs[pwddb],
157 [GETGRBYNAME] = &dbs[grpdb],
158 [GETGRBYGID] = &dbs[grpdb],
159 [GETHOSTBYNAME] = &dbs[hstdb],
160 [GETHOSTBYNAMEv6] = &dbs[hstdb],
161 [GETHOSTBYADDR] = &dbs[hstdb],
162 [GETHOSTBYADDRv6] = &dbs[hstdb],
163 [GETFDPW] = &dbs[pwddb],
164 [GETFDGR] = &dbs[grpdb],
165 [GETFDHST] = &dbs[hstdb],
166 [GETAI] = &dbs[hstdb],
167 [INITGROUPS] = &dbs[grpdb]
171 /* Number of seconds between two cache pruning runs. */
172 #define CACHE_PRUNE_INTERVAL 15
175 /* Initial number of threads to use. */
176 int nthreads = -1;
177 /* Maximum number of threads to use. */
178 int max_nthreads = 32;
180 /* Socket for incoming connections. */
181 static int sock;
183 /* Number of times clients had to wait. */
184 unsigned long int client_queued;
187 ssize_t
188 writeall (int fd, const void *buf, size_t len)
190 size_t n = len;
191 ssize_t ret;
194 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
195 if (ret <= 0)
196 break;
197 buf = (const char *) buf + ret;
198 n -= ret;
200 while (n > 0);
201 return ret < 0 ? ret : len - n;
205 #ifdef HAVE_SENDFILE
206 ssize_t
207 sendfileall (int tofd, int fromfd, off_t off, size_t len)
209 ssize_t n = len;
210 ssize_t ret;
214 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
215 if (ret <= 0)
216 break;
217 n -= ret;
219 while (n > 0);
220 return ret < 0 ? ret : len - n;
222 #endif
225 enum usekey
227 use_not = 0,
228 /* The following three are not really used, they are symbolic constants. */
229 use_first = 16,
230 use_begin = 32,
231 use_end = 64,
233 use_he = 1,
234 use_he_begin = use_he | use_begin,
235 use_he_end = use_he | use_end,
236 #if SEPARATE_KEY
237 use_key = 2,
238 use_key_begin = use_key | use_begin,
239 use_key_end = use_key | use_end,
240 use_key_first = use_key_begin | use_first,
241 #endif
242 use_data = 3,
243 use_data_begin = use_data | use_begin,
244 use_data_end = use_data | use_end,
245 use_data_first = use_data_begin | use_first
249 static int
250 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
251 enum usekey use, ref_t start, size_t len)
253 assert (len >= 2);
255 if (start > first_free || start + len > first_free
256 || (start & BLOCK_ALIGN_M1))
257 return 0;
259 if (usemap[start] == use_not)
261 /* Add the start marker. */
262 usemap[start] = use | use_begin;
263 use &= ~use_first;
265 while (--len > 0)
266 if (usemap[++start] != use_not)
267 return 0;
268 else
269 usemap[start] = use;
271 /* Add the end marker. */
272 usemap[start] = use | use_end;
274 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
276 /* Hash entries can't be shared. */
277 if (use == use_he)
278 return 0;
280 usemap[start] |= (use & use_first);
281 use &= ~use_first;
283 while (--len > 1)
284 if (usemap[++start] != use)
285 return 0;
287 if (usemap[++start] != (use | use_end))
288 return 0;
290 else
291 /* Points to a wrong object or somewhere in the middle. */
292 return 0;
294 return 1;
298 /* Verify data in persistent database. */
299 static int
300 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
302 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb);
304 time_t now = time (NULL);
306 struct database_pers_head *head = mem;
307 struct database_pers_head head_copy = *head;
309 /* Check that the header that was read matches the head in the database. */
310 if (readhead != NULL && memcmp (head, readhead, sizeof (*head)) != 0)
311 return 0;
313 /* First some easy tests: make sure the database header is sane. */
314 if (head->version != DB_VERSION
315 || head->header_size != sizeof (*head)
316 /* We allow a timestamp to be one hour ahead of the current time.
317 This should cover daylight saving time changes. */
318 || head->timestamp > now + 60 * 60 + 60
319 || (head->gc_cycle & 1)
320 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
321 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
322 || head->first_free < 0
323 || head->first_free > head->data_size
324 || (head->first_free & BLOCK_ALIGN_M1) != 0
325 || head->maxnentries < 0
326 || head->maxnsearched < 0)
327 return 0;
329 uint8_t *usemap = calloc (head->first_free, 1);
330 if (usemap == NULL)
331 return 0;
333 const char *data = (char *) &head->array[roundup (head->module,
334 ALIGN / sizeof (ref_t))];
336 nscd_ssize_t he_cnt = 0;
337 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
339 ref_t work = head->array[cnt];
341 while (work != ENDREF)
343 if (! check_use (data, head->first_free, usemap, use_he, work,
344 sizeof (struct hashentry)))
345 goto fail;
347 /* Now we know we can dereference the record. */
348 struct hashentry *here = (struct hashentry *) (data + work);
350 ++he_cnt;
352 /* Make sure the record is for this type of service. */
353 if (here->type >= LASTREQ
354 || serv2db[here->type] != &dbs[dbnr])
355 goto fail;
357 /* Validate boolean field value. */
358 if (here->first != false && here->first != true)
359 goto fail;
361 if (here->len < 0)
362 goto fail;
364 /* Now the data. */
365 if (here->packet < 0
366 || here->packet > head->first_free
367 || here->packet + sizeof (struct datahead) > head->first_free)
368 goto fail;
370 struct datahead *dh = (struct datahead *) (data + here->packet);
372 if (! check_use (data, head->first_free, usemap,
373 use_data | (here->first ? use_first : 0),
374 here->packet, dh->allocsize))
375 goto fail;
377 if (dh->allocsize < sizeof (struct datahead)
378 || dh->recsize > dh->allocsize
379 || (dh->notfound != false && dh->notfound != true)
380 || (dh->usable != false && dh->usable != true))
381 goto fail;
383 if (here->key < here->packet + sizeof (struct datahead)
384 || here->key > here->packet + dh->allocsize
385 || here->key + here->len > here->packet + dh->allocsize)
387 #if SEPARATE_KEY
388 /* If keys can appear outside of data, this should be done
389 instead. But gc doesn't mark the data in that case. */
390 if (! check_use (data, head->first_free, usemap,
391 use_key | (here->first ? use_first : 0),
392 here->key, here->len))
393 #endif
394 goto fail;
397 work = here->next;
401 if (he_cnt != head->nentries)
402 goto fail;
404 /* See if all data and keys had at least one reference from
405 he->first == true hashentry. */
406 for (ref_t idx = 0; idx < head->first_free; ++idx)
408 #if SEPARATE_KEY
409 if (usemap[idx] == use_key_begin)
410 goto fail;
411 #endif
412 if (usemap[idx] == use_data_begin)
413 goto fail;
416 /* Finally, make sure the database hasn't changed since the first test. */
417 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
418 goto fail;
420 free (usemap);
421 return 1;
423 fail:
424 free (usemap);
425 return 0;
429 /* Initialize database information structures. */
430 void
431 nscd_init (void)
433 /* Secure mode and unprivileged mode are incompatible */
434 if (server_user != NULL && secure_in_use)
436 dbg_log (_("Cannot run nscd in secure mode as unprivileged user"));
437 exit (4);
440 /* Look up unprivileged uid/gid/groups before we start listening on the
441 socket */
442 if (server_user != NULL)
443 begin_drop_privileges ();
445 if (nthreads == -1)
446 /* No configuration for this value, assume a default. */
447 nthreads = 2 * lastdb;
449 for (size_t cnt = 0; cnt < lastdb; ++cnt)
450 if (dbs[cnt].enabled)
452 pthread_rwlock_init (&dbs[cnt].lock, NULL);
453 pthread_mutex_init (&dbs[cnt].memlock, NULL);
455 if (dbs[cnt].persistent)
457 /* Try to open the appropriate file on disk. */
458 int fd = open (dbs[cnt].db_filename, O_RDWR);
459 if (fd != -1)
461 struct stat64 st;
462 void *mem;
463 size_t total;
464 struct database_pers_head head;
465 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
466 sizeof (head)));
467 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
469 fail_db:
470 dbg_log (_("invalid persistent database file \"%s\": %s"),
471 dbs[cnt].db_filename, strerror (errno));
472 unlink (dbs[cnt].db_filename);
474 else if (head.module == 0 && head.data_size == 0)
476 /* The file has been created, but the head has not been
477 initialized yet. Remove the old file. */
478 unlink (dbs[cnt].db_filename);
480 else if (head.header_size != (int) sizeof (head))
482 dbg_log (_("invalid persistent database file \"%s\": %s"),
483 dbs[cnt].db_filename,
484 _("header size does not match"));
485 unlink (dbs[cnt].db_filename);
487 else if ((total = (sizeof (head)
488 + roundup (head.module * sizeof (ref_t),
489 ALIGN)
490 + head.data_size))
491 > st.st_size
492 || total < sizeof (head))
494 dbg_log (_("invalid persistent database file \"%s\": %s"),
495 dbs[cnt].db_filename,
496 _("file size does not match"));
497 unlink (dbs[cnt].db_filename);
499 /* Note we map with the maximum size allowed for the
500 database. This is likely much larger than the
501 actual file size. This is OK on most OSes since
502 extensions of the underlying file will
503 automatically translate more pages available for
504 memory access. */
505 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
506 PROT_READ | PROT_WRITE,
507 MAP_SHARED, fd, 0))
508 == MAP_FAILED)
509 goto fail_db;
510 else if (!verify_persistent_db (mem, &head, cnt))
512 munmap (mem, total);
513 dbg_log (_("invalid persistent database file \"%s\": %s"),
514 dbs[cnt].db_filename,
515 _("verification failed"));
516 unlink (dbs[cnt].db_filename);
518 else
520 /* Success. We have the database. */
521 dbs[cnt].head = mem;
522 dbs[cnt].memsize = total;
523 dbs[cnt].data = (char *)
524 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
525 ALIGN / sizeof (ref_t))];
526 dbs[cnt].mmap_used = true;
528 if (dbs[cnt].suggested_module > head.module)
529 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
530 dbnames[cnt]);
532 dbs[cnt].wr_fd = fd;
533 fd = -1;
534 /* We also need a read-only descriptor. */
535 if (dbs[cnt].shared)
537 dbs[cnt].ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
538 if (dbs[cnt].ro_fd == -1)
539 dbg_log (_("\
540 cannot create read-only descriptor for \"%s\"; no mmap"),
541 dbs[cnt].db_filename);
544 // XXX Shall we test whether the descriptors actually
545 // XXX point to the same file?
548 /* Close the file descriptors in case something went
549 wrong in which case the variable have not been
550 assigned -1. */
551 if (fd != -1)
552 close (fd);
556 if (dbs[cnt].head == NULL)
558 /* No database loaded. Allocate the data structure,
559 possibly on disk. */
560 struct database_pers_head head;
561 size_t total = (sizeof (head)
562 + roundup (dbs[cnt].suggested_module
563 * sizeof (ref_t), ALIGN)
564 + (dbs[cnt].suggested_module
565 * DEFAULT_DATASIZE_PER_BUCKET));
567 /* Try to create the database. If we do not need a
568 persistent database create a temporary file. */
569 int fd;
570 int ro_fd = -1;
571 if (dbs[cnt].persistent)
573 fd = open (dbs[cnt].db_filename,
574 O_RDWR | O_CREAT | O_EXCL | O_TRUNC,
575 S_IRUSR | S_IWUSR);
576 if (fd != -1 && dbs[cnt].shared)
577 ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
579 else
581 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
582 fd = mkstemp (fname);
584 /* We do not need the file name anymore after we
585 opened another file descriptor in read-only mode. */
586 if (fd != -1)
588 if (dbs[cnt].shared)
589 ro_fd = open (fname, O_RDONLY);
591 unlink (fname);
595 if (fd == -1)
597 if (errno == EEXIST)
599 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
600 dbnames[cnt], dbs[cnt].db_filename);
601 // XXX Correct way to terminate?
602 exit (1);
605 if (dbs[cnt].persistent)
606 dbg_log (_("cannot create %s; no persistent database used"),
607 dbs[cnt].db_filename);
608 else
609 dbg_log (_("cannot create %s; no sharing possible"),
610 dbs[cnt].db_filename);
612 dbs[cnt].persistent = 0;
613 // XXX remember: no mmap
615 else
617 /* Tell the user if we could not create the read-only
618 descriptor. */
619 if (ro_fd == -1 && dbs[cnt].shared)
620 dbg_log (_("\
621 cannot create read-only descriptor for \"%s\"; no mmap"),
622 dbs[cnt].db_filename);
624 /* Before we create the header, initialiye the hash
625 table. So that if we get interrupted if writing
626 the header we can recognize a partially initialized
627 database. */
628 size_t ps = sysconf (_SC_PAGESIZE);
629 char tmpbuf[ps];
630 assert (~ENDREF == 0);
631 memset (tmpbuf, '\xff', ps);
633 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
634 off_t offset = sizeof (head);
636 size_t towrite;
637 if (offset % ps != 0)
639 towrite = MIN (remaining, ps - (offset % ps));
640 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
641 goto write_fail;
642 offset += towrite;
643 remaining -= towrite;
646 while (remaining > ps)
648 if (pwrite (fd, tmpbuf, ps, offset) == -1)
649 goto write_fail;
650 offset += ps;
651 remaining -= ps;
654 if (remaining > 0
655 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
656 goto write_fail;
658 /* Create the header of the file. */
659 struct database_pers_head head =
661 .version = DB_VERSION,
662 .header_size = sizeof (head),
663 .module = dbs[cnt].suggested_module,
664 .data_size = (dbs[cnt].suggested_module
665 * DEFAULT_DATASIZE_PER_BUCKET),
666 .first_free = 0
668 void *mem;
670 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
671 != sizeof (head))
672 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
673 != 0)
674 || (mem = mmap (NULL, dbs[cnt].max_db_size,
675 PROT_READ | PROT_WRITE,
676 MAP_SHARED, fd, 0)) == MAP_FAILED)
678 write_fail:
679 unlink (dbs[cnt].db_filename);
680 dbg_log (_("cannot write to database file %s: %s"),
681 dbs[cnt].db_filename, strerror (errno));
682 dbs[cnt].persistent = 0;
684 else
686 /* Success. */
687 dbs[cnt].head = mem;
688 dbs[cnt].data = (char *)
689 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
690 ALIGN / sizeof (ref_t))];
691 dbs[cnt].memsize = total;
692 dbs[cnt].mmap_used = true;
694 /* Remember the descriptors. */
695 dbs[cnt].wr_fd = fd;
696 dbs[cnt].ro_fd = ro_fd;
697 fd = -1;
698 ro_fd = -1;
701 if (fd != -1)
702 close (fd);
703 if (ro_fd != -1)
704 close (ro_fd);
708 if (paranoia
709 && ((dbs[cnt].wr_fd != -1
710 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
711 || (dbs[cnt].ro_fd != -1
712 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
714 dbg_log (_("\
715 cannot set socket to close on exec: %s; disabling paranoia mode"),
716 strerror (errno));
717 paranoia = 0;
720 if (dbs[cnt].head == NULL)
722 /* We do not use the persistent database. Just
723 create an in-memory data structure. */
724 assert (! dbs[cnt].persistent);
726 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
727 + (dbs[cnt].suggested_module
728 * sizeof (ref_t)));
729 memset (dbs[cnt].head, '\0', sizeof (dbs[cnt].head));
730 assert (~ENDREF == 0);
731 memset (dbs[cnt].head->array, '\xff',
732 dbs[cnt].suggested_module * sizeof (ref_t));
733 dbs[cnt].head->module = dbs[cnt].suggested_module;
734 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
735 * dbs[cnt].head->module);
736 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
737 dbs[cnt].head->first_free = 0;
739 dbs[cnt].shared = 0;
740 assert (dbs[cnt].ro_fd == -1);
743 if (dbs[cnt].check_file)
745 /* We need the modification date of the file. */
746 struct stat64 st;
748 if (stat64 (dbs[cnt].filename, &st) < 0)
750 /* We cannot stat() the file, disable file checking. */
751 dbg_log (_("cannot stat() file `%s': %s"),
752 dbs[cnt].filename, strerror (errno));
753 dbs[cnt].check_file = 0;
755 else
756 dbs[cnt].file_mtime = st.st_mtime;
760 /* Create the socket. */
761 sock = socket (AF_UNIX, SOCK_STREAM, 0);
762 if (sock < 0)
764 dbg_log (_("cannot open socket: %s"), strerror (errno));
765 exit (errno == EACCES ? 4 : 1);
767 /* Bind a name to the socket. */
768 struct sockaddr_un sock_addr;
769 sock_addr.sun_family = AF_UNIX;
770 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
771 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
773 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
774 exit (errno == EACCES ? 4 : 1);
777 /* We don't want to get stuck on accept. */
778 int fl = fcntl (sock, F_GETFL);
779 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
781 dbg_log (_("cannot change socket to nonblocking mode: %s"),
782 strerror (errno));
783 exit (1);
786 /* The descriptor needs to be closed on exec. */
787 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
789 dbg_log (_("cannot set socket to close on exec: %s"),
790 strerror (errno));
791 exit (1);
794 /* Set permissions for the socket. */
795 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
797 /* Set the socket up to accept connections. */
798 if (listen (sock, SOMAXCONN) < 0)
800 dbg_log (_("cannot enable socket to accept connections: %s"),
801 strerror (errno));
802 exit (1);
805 /* Change to unprivileged uid/gid/groups if specifed in config file */
806 if (server_user != NULL)
807 finish_drop_privileges ();
811 /* Close the connections. */
812 void
813 close_sockets (void)
815 close (sock);
819 static void
820 invalidate_cache (char *key)
822 dbtype number;
824 if (strcmp (key, "passwd") == 0)
825 number = pwddb;
826 else if (strcmp (key, "group") == 0)
827 number = grpdb;
828 else if (__builtin_expect (strcmp (key, "hosts"), 0) == 0)
830 number = hstdb;
832 /* Re-initialize the resolver. resolv.conf might have changed. */
833 res_init ();
835 else
836 return;
838 if (dbs[number].enabled)
839 prune_cache (&dbs[number], LONG_MAX);
843 #ifdef SCM_RIGHTS
844 static void
845 send_ro_fd (struct database_dyn *db, char *key, int fd)
847 /* If we do not have an read-only file descriptor do nothing. */
848 if (db->ro_fd == -1)
849 return;
851 /* We need to send some data along with the descriptor. */
852 struct iovec iov[1];
853 iov[0].iov_base = key;
854 iov[0].iov_len = strlen (key) + 1;
856 /* Prepare the control message to transfer the descriptor. */
857 union
859 struct cmsghdr hdr;
860 char bytes[CMSG_SPACE (sizeof (int))];
861 } buf;
862 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1,
863 .msg_control = buf.bytes,
864 .msg_controllen = sizeof (buf) };
865 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
867 cmsg->cmsg_level = SOL_SOCKET;
868 cmsg->cmsg_type = SCM_RIGHTS;
869 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
871 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
873 msg.msg_controllen = cmsg->cmsg_len;
875 /* Send the control message. We repeat when we are interrupted but
876 everything else is ignored. */
877 #ifndef MSG_NOSIGNAL
878 # define MSG_NOSIGNAL 0
879 #endif
880 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
882 if (__builtin_expect (debug_level > 0, 0))
883 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
885 #endif /* SCM_RIGHTS */
888 /* Handle new request. */
889 static void
890 handle_request (int fd, request_header *req, void *key, uid_t uid)
892 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
894 if (debug_level > 0)
895 dbg_log (_("\
896 cannot handle old request version %d; current version is %d"),
897 req->version, NSCD_VERSION);
898 return;
901 /* Make the SELinux check before we go on to the standard checks. We
902 need to verify that the request type is valid, since it has not
903 yet been checked at this point. */
904 if (selinux_enabled
905 && __builtin_expect (req->type, GETPWBYNAME) >= GETPWBYNAME
906 && __builtin_expect (req->type, LASTREQ) < LASTREQ
907 && nscd_request_avc_has_perm (fd, req->type) != 0)
908 return;
910 struct database_dyn *db = serv2db[req->type];
912 // XXX Clean up so that each new command need not introduce a
913 // XXX new conditional.
914 if ((__builtin_expect (req->type, GETPWBYNAME) >= GETPWBYNAME
915 && __builtin_expect (req->type, LASTDBREQ) <= LASTDBREQ)
916 || req->type == GETAI || req->type == INITGROUPS)
918 if (__builtin_expect (debug_level, 0) > 0)
920 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
922 char buf[INET6_ADDRSTRLEN];
924 dbg_log ("\t%s (%s)", serv2str[req->type],
925 inet_ntop (req->type == GETHOSTBYADDR
926 ? AF_INET : AF_INET6,
927 key, buf, sizeof (buf)));
929 else
930 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
933 /* Is this service enabled? */
934 if (!db->enabled)
936 /* No, sent the prepared record. */
937 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
938 db->disabled_iov->iov_len,
939 MSG_NOSIGNAL))
940 != (ssize_t) db->disabled_iov->iov_len
941 && __builtin_expect (debug_level, 0) > 0)
943 /* We have problems sending the result. */
944 char buf[256];
945 dbg_log (_("cannot write result: %s"),
946 strerror_r (errno, buf, sizeof (buf)));
949 return;
952 /* Be sure we can read the data. */
953 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
955 ++db->head->rdlockdelayed;
956 pthread_rwlock_rdlock (&db->lock);
959 /* See whether we can handle it from the cache. */
960 struct datahead *cached;
961 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
962 db, uid);
963 if (cached != NULL)
965 /* Hurray it's in the cache. */
966 ssize_t nwritten;
968 #ifdef HAVE_SENDFILE
969 if (db->mmap_used || !cached->notfound)
971 assert (db->wr_fd != -1);
972 assert ((char *) cached->data > (char *) db->data);
973 assert ((char *) cached->data - (char *) db->head
974 + cached->recsize
975 <= (sizeof (struct database_pers_head)
976 + db->head->module * sizeof (ref_t)
977 + db->head->data_size));
978 nwritten = sendfileall (fd, db->wr_fd,
979 (char *) cached->data
980 - (char *) db->head, cached->recsize);
981 # ifndef __ASSUME_SENDFILE
982 if (nwritten == -1 && errno == ENOSYS)
983 goto use_write;
984 # endif
986 else
987 # ifndef __ASSUME_SENDFILE
988 use_write:
989 # endif
990 #endif
991 nwritten = writeall (fd, cached->data, cached->recsize);
993 if (nwritten != cached->recsize
994 && __builtin_expect (debug_level, 0) > 0)
996 /* We have problems sending the result. */
997 char buf[256];
998 dbg_log (_("cannot write result: %s"),
999 strerror_r (errno, buf, sizeof (buf)));
1002 pthread_rwlock_unlock (&db->lock);
1004 return;
1007 pthread_rwlock_unlock (&db->lock);
1009 else if (__builtin_expect (debug_level, 0) > 0)
1011 if (req->type == INVALIDATE)
1012 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1013 else
1014 dbg_log ("\t%s", serv2str[req->type]);
1017 /* Handle the request. */
1018 switch (req->type)
1020 case GETPWBYNAME:
1021 addpwbyname (db, fd, req, key, uid);
1022 break;
1024 case GETPWBYUID:
1025 addpwbyuid (db, fd, req, key, uid);
1026 break;
1028 case GETGRBYNAME:
1029 addgrbyname (db, fd, req, key, uid);
1030 break;
1032 case GETGRBYGID:
1033 addgrbygid (db, fd, req, key, uid);
1034 break;
1036 case GETHOSTBYNAME:
1037 addhstbyname (db, fd, req, key, uid);
1038 break;
1040 case GETHOSTBYNAMEv6:
1041 addhstbynamev6 (db, fd, req, key, uid);
1042 break;
1044 case GETHOSTBYADDR:
1045 addhstbyaddr (db, fd, req, key, uid);
1046 break;
1048 case GETHOSTBYADDRv6:
1049 addhstbyaddrv6 (db, fd, req, key, uid);
1050 break;
1052 case GETAI:
1053 addhstai (db, fd, req, key, uid);
1054 break;
1056 case INITGROUPS:
1057 addinitgroups (db, fd, req, key, uid);
1058 break;
1060 case GETSTAT:
1061 case SHUTDOWN:
1062 case INVALIDATE:
1063 if (! secure_in_use)
1065 /* Get the callers credentials. */
1066 #ifdef SO_PEERCRED
1067 struct ucred caller;
1068 socklen_t optlen = sizeof (caller);
1070 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1072 char buf[256];
1074 dbg_log (_("error getting callers id: %s"),
1075 strerror_r (errno, buf, sizeof (buf)));
1076 break;
1079 uid = caller.uid;
1080 #else
1081 /* Some systems have no SO_PEERCRED implementation. They don't
1082 care about security so we don't as well. */
1083 uid = 0;
1084 #endif
1087 /* Accept shutdown, getstat and invalidate only from root. For
1088 the stat call also allow the user specified in the config file. */
1089 if (req->type == GETSTAT)
1091 if (uid == 0 || uid == stat_uid)
1092 send_stats (fd, dbs);
1094 else if (uid == 0)
1096 if (req->type == INVALIDATE)
1097 invalidate_cache (key);
1098 else
1099 termination_handler (0);
1101 break;
1103 case GETFDPW:
1104 case GETFDGR:
1105 case GETFDHST:
1106 #ifdef SCM_RIGHTS
1107 send_ro_fd (serv2db[req->type], key, fd);
1108 #endif
1109 break;
1111 default:
1112 /* Ignore the command, it's nothing we know. */
1113 break;
1118 /* Restart the process. */
1119 static void
1120 restart (void)
1122 /* First determine the parameters. We do not use the parameters
1123 passed to main() since in case nscd is started by running the
1124 dynamic linker this will not work. Yes, this is not the usual
1125 case but nscd is part of glibc and we occasionally do this. */
1126 size_t buflen = 1024;
1127 char *buf = alloca (buflen);
1128 size_t readlen = 0;
1129 int fd = open ("/proc/self/cmdline", O_RDONLY);
1130 if (fd == -1)
1132 dbg_log (_("\
1133 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1134 strerror (errno));
1136 paranoia = 0;
1137 return;
1140 while (1)
1142 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1143 buflen - readlen));
1144 if (n == -1)
1146 dbg_log (_("\
1147 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1148 strerror (errno));
1150 close (fd);
1151 paranoia = 0;
1152 return;
1155 readlen += n;
1157 if (readlen < buflen)
1158 break;
1160 /* We might have to extend the buffer. */
1161 size_t old_buflen = buflen;
1162 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1163 buf = memmove (newp, buf, old_buflen);
1166 close (fd);
1168 /* Parse the command line. Worst case scenario: every two
1169 characters form one parameter (one character plus NUL). */
1170 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1171 int argc = 0;
1173 char *cp = buf;
1174 while (cp < buf + readlen)
1176 argv[argc++] = cp;
1177 cp = (char *) rawmemchr (cp, '\0') + 1;
1179 argv[argc] = NULL;
1181 /* Second, change back to the old user if we changed it. */
1182 if (server_user != NULL)
1184 if (setuid (old_uid) != 0)
1186 dbg_log (_("\
1187 cannot change to old UID: %s; disabling paranoia mode"),
1188 strerror (errno));
1190 paranoia = 0;
1191 return;
1194 if (setgid (old_gid) != 0)
1196 dbg_log (_("\
1197 cannot change to old GID: %s; disabling paranoia mode"),
1198 strerror (errno));
1200 setuid (server_uid);
1201 paranoia = 0;
1202 return;
1206 /* Next change back to the old working directory. */
1207 if (chdir (oldcwd) == -1)
1209 dbg_log (_("\
1210 cannot change to old working directory: %s; disabling paranoia mode"),
1211 strerror (errno));
1213 if (server_user != NULL)
1215 setuid (server_uid);
1216 setgid (server_gid);
1218 paranoia = 0;
1219 return;
1222 /* Synchronize memory. */
1223 for (int cnt = 0; cnt < lastdb; ++cnt)
1225 /* Make sure nobody keeps using the database. */
1226 dbs[cnt].head->timestamp = 0;
1228 if (dbs[cnt].persistent)
1229 // XXX async OK?
1230 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1233 /* The preparations are done. */
1234 execv ("/proc/self/exe", argv);
1236 /* If we come here, we will never be able to re-exec. */
1237 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1238 strerror (errno));
1240 if (server_user != NULL)
1242 setuid (server_uid);
1243 setgid (server_gid);
1245 if (chdir ("/") != 0)
1246 dbg_log (_("cannot change current working directory to \"/\": %s"),
1247 strerror (errno));
1248 paranoia = 0;
1252 /* List of file descriptors. */
1253 struct fdlist
1255 int fd;
1256 struct fdlist *next;
1258 /* Memory allocated for the list. */
1259 static struct fdlist *fdlist;
1260 /* List of currently ready-to-read file descriptors. */
1261 static struct fdlist *readylist;
1263 /* Conditional variable and mutex to signal availability of entries in
1264 READYLIST. The condvar is initialized dynamically since we might
1265 use a different clock depending on availability. */
1266 static pthread_cond_t readylist_cond;
1267 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1269 /* The clock to use with the condvar. */
1270 static clockid_t timeout_clock = CLOCK_REALTIME;
1272 /* Number of threads ready to handle the READYLIST. */
1273 static unsigned long int nready;
1276 /* This is the main loop. It is replicated in different threads but the
1277 `poll' call makes sure only one thread handles an incoming connection. */
1278 static void *
1279 __attribute__ ((__noreturn__))
1280 nscd_run (void *p)
1282 const long int my_number = (long int) p;
1283 const int run_prune = my_number < lastdb && dbs[my_number].enabled;
1284 struct timespec prune_ts;
1285 int to = 0;
1286 char buf[256];
1288 if (run_prune)
1290 setup_thread (&dbs[my_number]);
1292 /* We are running. */
1293 dbs[my_number].head->timestamp = time (NULL);
1295 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1296 /* Should never happen. */
1297 abort ();
1299 /* Compute timeout time. */
1300 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1303 /* Initial locking. */
1304 pthread_mutex_lock (&readylist_lock);
1306 /* One more thread available. */
1307 ++nready;
1309 while (1)
1311 while (readylist == NULL)
1313 if (run_prune)
1315 /* Wait, but not forever. */
1316 to = pthread_cond_timedwait (&readylist_cond, &readylist_lock,
1317 &prune_ts);
1319 /* If we were woken and there is no work to be done,
1320 just start pruning. */
1321 if (readylist == NULL && to == ETIMEDOUT)
1323 --nready;
1324 pthread_mutex_unlock (&readylist_lock);
1325 goto only_prune;
1328 else
1329 /* No need to timeout. */
1330 pthread_cond_wait (&readylist_cond, &readylist_lock);
1333 struct fdlist *it = readylist->next;
1334 if (readylist->next == readylist)
1335 /* Just one entry on the list. */
1336 readylist = NULL;
1337 else
1338 readylist->next = it->next;
1340 /* Extract the information and mark the record ready to be used
1341 again. */
1342 int fd = it->fd;
1343 it->next = NULL;
1345 /* One more thread available. */
1346 --nready;
1348 /* We are done with the list. */
1349 pthread_mutex_unlock (&readylist_lock);
1351 /* We do not want to block on a short read or so. */
1352 int fl = fcntl (fd, F_GETFL);
1353 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1354 goto close_and_out;
1356 /* Now read the request. */
1357 request_header req;
1358 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1359 != sizeof (req), 0))
1361 /* We failed to read data. Note that this also might mean we
1362 failed because we would have blocked. */
1363 if (debug_level > 0)
1364 dbg_log (_("short read while reading request: %s"),
1365 strerror_r (errno, buf, sizeof (buf)));
1366 goto close_and_out;
1369 /* Check whether this is a valid request type. */
1370 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1371 goto close_and_out;
1373 /* Some systems have no SO_PEERCRED implementation. They don't
1374 care about security so we don't as well. */
1375 uid_t uid = -1;
1376 #ifdef SO_PEERCRED
1377 pid_t pid = 0;
1379 if (secure_in_use)
1381 struct ucred caller;
1382 socklen_t optlen = sizeof (caller);
1384 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1386 dbg_log (_("error getting callers id: %s"),
1387 strerror_r (errno, buf, sizeof (buf)));
1388 goto close_and_out;
1391 if (req.type < GETPWBYNAME || req.type > LASTDBREQ
1392 || serv2db[req.type]->secure)
1393 uid = caller.uid;
1395 pid = caller.pid;
1397 else if (__builtin_expect (debug_level > 0, 0))
1399 struct ucred caller;
1400 socklen_t optlen = sizeof (caller);
1402 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1403 pid = caller.pid;
1405 #endif
1407 /* It should not be possible to crash the nscd with a silly
1408 request (i.e., a terribly large key). We limit the size to 1kb. */
1409 #define MAXKEYLEN 1024
1410 if (__builtin_expect (req.key_len, 1) < 0
1411 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1413 if (debug_level > 0)
1414 dbg_log (_("key length in request too long: %d"), req.key_len);
1416 else
1418 /* Get the key. */
1419 char keybuf[MAXKEYLEN];
1421 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1422 req.key_len))
1423 != req.key_len, 0))
1425 /* Again, this can also mean we would have blocked. */
1426 if (debug_level > 0)
1427 dbg_log (_("short read while reading request key: %s"),
1428 strerror_r (errno, buf, sizeof (buf)));
1429 goto close_and_out;
1432 if (__builtin_expect (debug_level, 0) > 0)
1434 #ifdef SO_PEERCRED
1435 if (pid != 0)
1436 dbg_log (_("\
1437 handle_request: request received (Version = %d) from PID %ld"),
1438 req.version, (long int) pid);
1439 else
1440 #endif
1441 dbg_log (_("\
1442 handle_request: request received (Version = %d)"), req.version);
1445 /* Phew, we got all the data, now process it. */
1446 handle_request (fd, &req, keybuf, uid);
1449 close_and_out:
1450 /* We are done. */
1451 close (fd);
1453 /* Check whether we should be pruning the cache. */
1454 assert (run_prune || to == 0);
1455 if (to == ETIMEDOUT)
1457 only_prune:
1458 /* The pthread_cond_timedwait() call timed out. It is time
1459 to clean up the cache. */
1460 assert (my_number < lastdb);
1461 prune_cache (&dbs[my_number], time (NULL));
1463 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1464 /* Should never happen. */
1465 abort ();
1467 /* Compute next timeout time. */
1468 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1470 /* In case the list is emtpy we do not want to run the prune
1471 code right away again. */
1472 to = 0;
1475 /* Re-locking. */
1476 pthread_mutex_lock (&readylist_lock);
1478 /* One more thread available. */
1479 ++nready;
1484 static unsigned int nconns;
1486 static void
1487 fd_ready (int fd)
1489 pthread_mutex_lock (&readylist_lock);
1491 /* Find an empty entry in FDLIST. */
1492 size_t inner;
1493 for (inner = 0; inner < nconns; ++inner)
1494 if (fdlist[inner].next == NULL)
1495 break;
1496 assert (inner < nconns);
1498 fdlist[inner].fd = fd;
1500 if (readylist == NULL)
1501 readylist = fdlist[inner].next = &fdlist[inner];
1502 else
1504 fdlist[inner].next = readylist->next;
1505 readylist = readylist->next = &fdlist[inner];
1508 bool do_signal = true;
1509 if (__builtin_expect (nready == 0, 0))
1511 ++client_queued;
1512 do_signal = false;
1514 /* Try to start another thread to help out. */
1515 pthread_t th;
1516 if (nthreads < max_nthreads
1517 && pthread_create (&th, &attr, nscd_run,
1518 (void *) (long int) nthreads) == 0)
1520 /* We got another thread. */
1521 ++nthreads;
1522 /* The new thread might need a kick. */
1523 do_signal = true;
1528 pthread_mutex_unlock (&readylist_lock);
1530 /* Tell one of the worker threads there is work to do. */
1531 if (do_signal)
1532 pthread_cond_signal (&readylist_cond);
1536 /* Check whether restarting should happen. */
1537 static inline int
1538 restart_p (time_t now)
1540 return (paranoia && readylist == NULL && nready == nthreads
1541 && now >= restart_time);
1545 /* Array for times a connection was accepted. */
1546 static time_t *starttime;
1549 static void
1550 __attribute__ ((__noreturn__))
1551 main_loop_poll (void)
1553 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1554 * sizeof (conns[0]));
1556 conns[0].fd = sock;
1557 conns[0].events = POLLRDNORM;
1558 size_t nused = 1;
1559 size_t firstfree = 1;
1561 while (1)
1563 /* Wait for any event. We wait at most a couple of seconds so
1564 that we can check whether we should close any of the accepted
1565 connections since we have not received a request. */
1566 #define MAX_ACCEPT_TIMEOUT 30
1567 #define MIN_ACCEPT_TIMEOUT 5
1568 #define MAIN_THREAD_TIMEOUT \
1569 (MAX_ACCEPT_TIMEOUT * 1000 \
1570 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1572 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1574 time_t now = time (NULL);
1576 /* If there is a descriptor ready for reading or there is a new
1577 connection, process this now. */
1578 if (n > 0)
1580 if (conns[0].revents != 0)
1582 /* We have a new incoming connection. Accept the connection. */
1583 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1585 /* use the descriptor if we have not reached the limit. */
1586 if (fd >= 0 && firstfree < nconns)
1588 conns[firstfree].fd = fd;
1589 conns[firstfree].events = POLLRDNORM;
1590 starttime[firstfree] = now;
1591 if (firstfree >= nused)
1592 nused = firstfree + 1;
1595 ++firstfree;
1596 while (firstfree < nused && conns[firstfree].fd != -1);
1599 --n;
1602 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1603 if (conns[cnt].revents != 0)
1605 fd_ready (conns[cnt].fd);
1607 /* Clean up the CONNS array. */
1608 conns[cnt].fd = -1;
1609 if (cnt < firstfree)
1610 firstfree = cnt;
1611 if (cnt == nused - 1)
1613 --nused;
1614 while (conns[nused - 1].fd == -1);
1616 --n;
1620 /* Now find entries which have timed out. */
1621 assert (nused > 0);
1623 /* We make the timeout length depend on the number of file
1624 descriptors currently used. */
1625 #define ACCEPT_TIMEOUT \
1626 (MAX_ACCEPT_TIMEOUT \
1627 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1628 time_t laststart = now - ACCEPT_TIMEOUT;
1630 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1632 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1634 /* Remove the entry, it timed out. */
1635 (void) close (conns[cnt].fd);
1636 conns[cnt].fd = -1;
1638 if (cnt < firstfree)
1639 firstfree = cnt;
1640 if (cnt == nused - 1)
1642 --nused;
1643 while (conns[nused - 1].fd == -1);
1647 if (restart_p (now))
1648 restart ();
1653 #ifdef HAVE_EPOLL
1654 static void
1655 main_loop_epoll (int efd)
1657 struct epoll_event ev = { 0, };
1658 int nused = 1;
1659 size_t highest = 0;
1661 /* Add the socket. */
1662 ev.events = EPOLLRDNORM;
1663 ev.data.fd = sock;
1664 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1665 /* We cannot use epoll. */
1666 return;
1668 while (1)
1670 struct epoll_event revs[100];
1671 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1673 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1675 time_t now = time (NULL);
1677 for (int cnt = 0; cnt < n; ++cnt)
1678 if (revs[cnt].data.fd == sock)
1680 /* A new connection. */
1681 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1683 if (fd >= 0)
1685 /* Try to add the new descriptor. */
1686 ev.data.fd = fd;
1687 if (fd >= nconns
1688 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1689 /* The descriptor is too large or something went
1690 wrong. Close the descriptor. */
1691 close (fd);
1692 else
1694 /* Remember when we accepted the connection. */
1695 starttime[fd] = now;
1697 if (fd > highest)
1698 highest = fd;
1700 ++nused;
1704 else
1706 /* Remove the descriptor from the epoll descriptor. */
1707 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1709 /* Get a worker to handle the request. */
1710 fd_ready (revs[cnt].data.fd);
1712 /* Reset the time. */
1713 starttime[revs[cnt].data.fd] = 0;
1714 if (revs[cnt].data.fd == highest)
1716 --highest;
1717 while (highest > 0 && starttime[highest] == 0);
1719 --nused;
1722 /* Now look for descriptors for accepted connections which have
1723 no reply in too long of a time. */
1724 time_t laststart = now - ACCEPT_TIMEOUT;
1725 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1726 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1728 /* We are waiting for this one for too long. Close it. */
1729 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
1731 (void) close (cnt);
1733 starttime[cnt] = 0;
1734 if (cnt == highest)
1735 --highest;
1737 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1738 --highest;
1740 if (restart_p (now))
1741 restart ();
1744 #endif
1747 /* Start all the threads we want. The initial process is thread no. 1. */
1748 void
1749 start_threads (void)
1751 /* Initialize the conditional variable we will use. The only
1752 non-standard attribute we might use is the clock selection. */
1753 pthread_condattr_t condattr;
1754 pthread_condattr_init (&condattr);
1756 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1757 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1758 /* Determine whether the monotonous clock is available. */
1759 struct timespec dummy;
1760 # if _POSIX_MONOTONIC_CLOCK == 0
1761 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1762 # endif
1763 # if _POSIX_CLOCK_SELECTION == 0
1764 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1765 # endif
1766 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1767 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1768 timeout_clock = CLOCK_MONOTONIC;
1769 #endif
1771 pthread_cond_init (&readylist_cond, &condattr);
1772 pthread_condattr_destroy (&condattr);
1775 /* Create the attribute for the threads. They are all created
1776 detached. */
1777 pthread_attr_init (&attr);
1778 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1779 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1780 pthread_attr_setstacksize (&attr, 1024 * 1024 * (sizeof (void *) / 4));
1782 /* We allow less than LASTDB threads only for debugging. */
1783 if (debug_level == 0)
1784 nthreads = MAX (nthreads, lastdb);
1786 int nfailed = 0;
1787 for (long int i = 0; i < nthreads; ++i)
1789 pthread_t th;
1790 if (pthread_create (&th, &attr, nscd_run, (void *) (i - nfailed)) != 0)
1791 ++nfailed;
1793 if (nthreads - nfailed < lastdb)
1795 /* We could not start enough threads. */
1796 dbg_log (_("could only start %d threads; terminating"),
1797 nthreads - nfailed);
1798 exit (1);
1801 /* Determine how much room for descriptors we should initially
1802 allocate. This might need to change later if we cap the number
1803 with MAXCONN. */
1804 const long int nfds = sysconf (_SC_OPEN_MAX);
1805 #define MINCONN 32
1806 #define MAXCONN 16384
1807 if (nfds == -1 || nfds > MAXCONN)
1808 nconns = MAXCONN;
1809 else if (nfds < MINCONN)
1810 nconns = MINCONN;
1811 else
1812 nconns = nfds;
1814 /* We need memory to pass descriptors on to the worker threads. */
1815 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1816 /* Array to keep track when connection was accepted. */
1817 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1819 /* In the main thread we execute the loop which handles incoming
1820 connections. */
1821 #ifdef HAVE_EPOLL
1822 int efd = epoll_create (100);
1823 if (efd != -1)
1825 main_loop_epoll (efd);
1826 close (efd);
1828 #endif
1830 main_loop_poll ();
1834 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1835 this function is called, we are not listening on the nscd socket yet so
1836 we can just use the ordinary lookup functions without causing a lockup */
1837 static void
1838 begin_drop_privileges (void)
1840 struct passwd *pwd = getpwnam (server_user);
1842 if (pwd == NULL)
1844 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1845 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1846 server_user);
1849 server_uid = pwd->pw_uid;
1850 server_gid = pwd->pw_gid;
1852 /* Save the old UID/GID if we have to change back. */
1853 if (paranoia)
1855 old_uid = getuid ();
1856 old_gid = getgid ();
1859 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1861 /* This really must never happen. */
1862 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1863 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
1866 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
1868 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
1869 == -1)
1871 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1872 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
1877 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
1878 run nscd as the user specified in the configuration file. */
1879 static void
1880 finish_drop_privileges (void)
1882 if (setgroups (server_ngroups, server_groups) == -1)
1884 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1885 error (EXIT_FAILURE, errno, _("setgroups failed"));
1888 if (setgid (server_gid) == -1)
1890 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1891 perror ("setgid");
1892 exit (4);
1895 if (setuid (server_uid) == -1)
1897 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1898 perror ("setuid");
1899 exit (4);