Updated to fedora-glibc-20070731T1624
[glibc.git] / nscd / connections.c
blob32a107781970def97862df52502bf7b08cb9963a
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #include <alloca.h>
21 #include <assert.h>
22 #include <atomic.h>
23 #include <error.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <grp.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <arpa/inet.h>
35 #ifdef HAVE_EPOLL
36 # include <sys/epoll.h>
37 #endif
38 #include <sys/mman.h>
39 #include <sys/param.h>
40 #include <sys/poll.h>
41 #ifdef HAVE_SENDFILE
42 # include <sys/sendfile.h>
43 #endif
44 #include <sys/socket.h>
45 #include <sys/stat.h>
46 #include <sys/un.h>
48 #include "nscd.h"
49 #include "dbg_log.h"
50 #include "selinux.h"
51 #ifdef HAVE_SENDFILE
52 # include <kernel-features.h>
53 #endif
56 /* Wrapper functions with error checking for standard functions. */
57 extern void *xmalloc (size_t n);
58 extern void *xcalloc (size_t n, size_t s);
59 extern void *xrealloc (void *o, size_t n);
61 /* Support to run nscd as an unprivileged user */
62 const char *server_user;
63 static uid_t server_uid;
64 static gid_t server_gid;
65 const char *stat_user;
66 uid_t stat_uid;
67 static gid_t *server_groups;
68 #ifndef NGROUPS
69 # define NGROUPS 32
70 #endif
71 static int server_ngroups;
72 static volatile int sighup_pending;
74 static pthread_attr_t attr;
76 static void begin_drop_privileges (void);
77 static void finish_drop_privileges (void);
79 /* Map request type to a string. */
80 const char *const serv2str[LASTREQ] =
82 [GETPWBYNAME] = "GETPWBYNAME",
83 [GETPWBYUID] = "GETPWBYUID",
84 [GETGRBYNAME] = "GETGRBYNAME",
85 [GETGRBYGID] = "GETGRBYGID",
86 [GETHOSTBYNAME] = "GETHOSTBYNAME",
87 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
88 [GETHOSTBYADDR] = "GETHOSTBYADDR",
89 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
90 [SHUTDOWN] = "SHUTDOWN",
91 [GETSTAT] = "GETSTAT",
92 [INVALIDATE] = "INVALIDATE",
93 [GETFDPW] = "GETFDPW",
94 [GETFDGR] = "GETFDGR",
95 [GETFDHST] = "GETFDHST",
96 [GETAI] = "GETAI",
97 [INITGROUPS] = "INITGROUPS",
98 [GETSERVBYNAME] = "GETSERVBYNAME",
99 [GETSERVBYPORT] = "GETSERVBYPORT",
100 [GETFDSERV] = "GETFDSERV"
103 /* The control data structures for the services. */
104 struct database_dyn dbs[lastdb] =
106 [pwddb] = {
107 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
108 .prunelock = PTHREAD_MUTEX_INITIALIZER,
109 .enabled = 0,
110 .check_file = 1,
111 .persistent = 0,
112 .propagate = 1,
113 .shared = 0,
114 .max_db_size = DEFAULT_MAX_DB_SIZE,
115 .reset_res = 0,
116 .filename = "/etc/passwd",
117 .db_filename = _PATH_NSCD_PASSWD_DB,
118 .disabled_iov = &pwd_iov_disabled,
119 .postimeout = 3600,
120 .negtimeout = 20,
121 .wr_fd = -1,
122 .ro_fd = -1,
123 .mmap_used = false
125 [grpdb] = {
126 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
127 .prunelock = PTHREAD_MUTEX_INITIALIZER,
128 .enabled = 0,
129 .check_file = 1,
130 .persistent = 0,
131 .propagate = 1,
132 .shared = 0,
133 .max_db_size = DEFAULT_MAX_DB_SIZE,
134 .reset_res = 0,
135 .filename = "/etc/group",
136 .db_filename = _PATH_NSCD_GROUP_DB,
137 .disabled_iov = &grp_iov_disabled,
138 .postimeout = 3600,
139 .negtimeout = 60,
140 .wr_fd = -1,
141 .ro_fd = -1,
142 .mmap_used = false
144 [hstdb] = {
145 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
146 .prunelock = PTHREAD_MUTEX_INITIALIZER,
147 .enabled = 0,
148 .check_file = 1,
149 .persistent = 0,
150 .propagate = 0, /* Not used. */
151 .shared = 0,
152 .max_db_size = DEFAULT_MAX_DB_SIZE,
153 .reset_res = 1,
154 .filename = "/etc/hosts",
155 .db_filename = _PATH_NSCD_HOSTS_DB,
156 .disabled_iov = &hst_iov_disabled,
157 .postimeout = 3600,
158 .negtimeout = 20,
159 .wr_fd = -1,
160 .ro_fd = -1,
161 .mmap_used = false
163 [servdb] = {
164 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
165 .prunelock = PTHREAD_MUTEX_INITIALIZER,
166 .enabled = 0,
167 .check_file = 1,
168 .persistent = 0,
169 .propagate = 0, /* Not used. */
170 .shared = 0,
171 .max_db_size = DEFAULT_MAX_DB_SIZE,
172 .reset_res = 0,
173 .filename = "/etc/services",
174 .db_filename = _PATH_NSCD_SERVICES_DB,
175 .disabled_iov = &serv_iov_disabled,
176 .postimeout = 28800,
177 .negtimeout = 20,
178 .wr_fd = -1,
179 .ro_fd = -1,
180 .mmap_used = false
185 /* Mapping of request type to database. */
186 static struct
188 bool data_request;
189 struct database_dyn *db;
190 } const reqinfo[LASTREQ] =
192 [GETPWBYNAME] = { true, &dbs[pwddb] },
193 [GETPWBYUID] = { true, &dbs[pwddb] },
194 [GETGRBYNAME] = { true, &dbs[grpdb] },
195 [GETGRBYGID] = { true, &dbs[grpdb] },
196 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
197 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
198 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
199 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
200 [SHUTDOWN] = { false, NULL },
201 [GETSTAT] = { false, NULL },
202 [SHUTDOWN] = { false, NULL },
203 [GETFDPW] = { false, &dbs[pwddb] },
204 [GETFDGR] = { false, &dbs[grpdb] },
205 [GETFDHST] = { false, &dbs[hstdb] },
206 [GETAI] = { true, &dbs[hstdb] },
207 [INITGROUPS] = { true, &dbs[grpdb] },
208 [GETSERVBYNAME] = { true, &dbs[servdb] },
209 [GETSERVBYPORT] = { true, &dbs[servdb] },
210 [GETFDSERV] = { false, &dbs[servdb] }
214 /* Number of seconds between two cache pruning runs. */
215 #define CACHE_PRUNE_INTERVAL 15
218 /* Initial number of threads to use. */
219 int nthreads = -1;
220 /* Maximum number of threads to use. */
221 int max_nthreads = 32;
223 /* Socket for incoming connections. */
224 static int sock;
226 /* Number of times clients had to wait. */
227 unsigned long int client_queued;
230 ssize_t
231 writeall (int fd, const void *buf, size_t len)
233 size_t n = len;
234 ssize_t ret;
237 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
238 if (ret <= 0)
239 break;
240 buf = (const char *) buf + ret;
241 n -= ret;
243 while (n > 0);
244 return ret < 0 ? ret : len - n;
248 #ifdef HAVE_SENDFILE
249 ssize_t
250 sendfileall (int tofd, int fromfd, off_t off, size_t len)
252 ssize_t n = len;
253 ssize_t ret;
257 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
258 if (ret <= 0)
259 break;
260 n -= ret;
262 while (n > 0);
263 return ret < 0 ? ret : len - n;
265 #endif
268 enum usekey
270 use_not = 0,
271 /* The following three are not really used, they are symbolic constants. */
272 use_first = 16,
273 use_begin = 32,
274 use_end = 64,
276 use_he = 1,
277 use_he_begin = use_he | use_begin,
278 use_he_end = use_he | use_end,
279 #if SEPARATE_KEY
280 use_key = 2,
281 use_key_begin = use_key | use_begin,
282 use_key_end = use_key | use_end,
283 use_key_first = use_key_begin | use_first,
284 #endif
285 use_data = 3,
286 use_data_begin = use_data | use_begin,
287 use_data_end = use_data | use_end,
288 use_data_first = use_data_begin | use_first
292 static int
293 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
294 enum usekey use, ref_t start, size_t len)
296 assert (len >= 2);
298 if (start > first_free || start + len > first_free
299 || (start & BLOCK_ALIGN_M1))
300 return 0;
302 if (usemap[start] == use_not)
304 /* Add the start marker. */
305 usemap[start] = use | use_begin;
306 use &= ~use_first;
308 while (--len > 0)
309 if (usemap[++start] != use_not)
310 return 0;
311 else
312 usemap[start] = use;
314 /* Add the end marker. */
315 usemap[start] = use | use_end;
317 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
319 /* Hash entries can't be shared. */
320 if (use == use_he)
321 return 0;
323 usemap[start] |= (use & use_first);
324 use &= ~use_first;
326 while (--len > 1)
327 if (usemap[++start] != use)
328 return 0;
330 if (usemap[++start] != (use | use_end))
331 return 0;
333 else
334 /* Points to a wrong object or somewhere in the middle. */
335 return 0;
337 return 1;
341 /* Verify data in persistent database. */
342 static int
343 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
345 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb);
347 time_t now = time (NULL);
349 struct database_pers_head *head = mem;
350 struct database_pers_head head_copy = *head;
352 /* Check that the header that was read matches the head in the database. */
353 if (readhead != NULL && memcmp (head, readhead, sizeof (*head)) != 0)
354 return 0;
356 /* First some easy tests: make sure the database header is sane. */
357 if (head->version != DB_VERSION
358 || head->header_size != sizeof (*head)
359 /* We allow a timestamp to be one hour ahead of the current time.
360 This should cover daylight saving time changes. */
361 || head->timestamp > now + 60 * 60 + 60
362 || (head->gc_cycle & 1)
363 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
364 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
365 || head->first_free < 0
366 || head->first_free > head->data_size
367 || (head->first_free & BLOCK_ALIGN_M1) != 0
368 || head->maxnentries < 0
369 || head->maxnsearched < 0)
370 return 0;
372 uint8_t *usemap = calloc (head->first_free, 1);
373 if (usemap == NULL)
374 return 0;
376 const char *data = (char *) &head->array[roundup (head->module,
377 ALIGN / sizeof (ref_t))];
379 nscd_ssize_t he_cnt = 0;
380 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
382 ref_t work = head->array[cnt];
384 while (work != ENDREF)
386 if (! check_use (data, head->first_free, usemap, use_he, work,
387 sizeof (struct hashentry)))
388 goto fail;
390 /* Now we know we can dereference the record. */
391 struct hashentry *here = (struct hashentry *) (data + work);
393 ++he_cnt;
395 /* Make sure the record is for this type of service. */
396 if (here->type >= LASTREQ
397 || reqinfo[here->type].db != &dbs[dbnr])
398 goto fail;
400 /* Validate boolean field value. */
401 if (here->first != false && here->first != true)
402 goto fail;
404 if (here->len < 0)
405 goto fail;
407 /* Now the data. */
408 if (here->packet < 0
409 || here->packet > head->first_free
410 || here->packet + sizeof (struct datahead) > head->first_free)
411 goto fail;
413 struct datahead *dh = (struct datahead *) (data + here->packet);
415 if (! check_use (data, head->first_free, usemap,
416 use_data | (here->first ? use_first : 0),
417 here->packet, dh->allocsize))
418 goto fail;
420 if (dh->allocsize < sizeof (struct datahead)
421 || dh->recsize > dh->allocsize
422 || (dh->notfound != false && dh->notfound != true)
423 || (dh->usable != false && dh->usable != true))
424 goto fail;
426 if (here->key < here->packet + sizeof (struct datahead)
427 || here->key > here->packet + dh->allocsize
428 || here->key + here->len > here->packet + dh->allocsize)
430 #if SEPARATE_KEY
431 /* If keys can appear outside of data, this should be done
432 instead. But gc doesn't mark the data in that case. */
433 if (! check_use (data, head->first_free, usemap,
434 use_key | (here->first ? use_first : 0),
435 here->key, here->len))
436 #endif
437 goto fail;
440 work = here->next;
444 if (he_cnt != head->nentries)
445 goto fail;
447 /* See if all data and keys had at least one reference from
448 he->first == true hashentry. */
449 for (ref_t idx = 0; idx < head->first_free; ++idx)
451 #if SEPARATE_KEY
452 if (usemap[idx] == use_key_begin)
453 goto fail;
454 #endif
455 if (usemap[idx] == use_data_begin)
456 goto fail;
459 /* Finally, make sure the database hasn't changed since the first test. */
460 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
461 goto fail;
463 free (usemap);
464 return 1;
466 fail:
467 free (usemap);
468 return 0;
472 /* Initialize database information structures. */
473 void
474 nscd_init (void)
476 /* Look up unprivileged uid/gid/groups before we start listening on the
477 socket */
478 if (server_user != NULL)
479 begin_drop_privileges ();
481 if (nthreads == -1)
482 /* No configuration for this value, assume a default. */
483 nthreads = 2 * lastdb;
485 for (size_t cnt = 0; cnt < lastdb; ++cnt)
486 if (dbs[cnt].enabled)
488 pthread_rwlock_init (&dbs[cnt].lock, NULL);
489 pthread_mutex_init (&dbs[cnt].memlock, NULL);
491 if (dbs[cnt].persistent)
493 /* Try to open the appropriate file on disk. */
494 int fd = open (dbs[cnt].db_filename, O_RDWR);
495 if (fd != -1)
497 struct stat64 st;
498 void *mem;
499 size_t total;
500 struct database_pers_head head;
501 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
502 sizeof (head)));
503 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
505 fail_db:
506 dbg_log (_("invalid persistent database file \"%s\": %s"),
507 dbs[cnt].db_filename, strerror (errno));
508 unlink (dbs[cnt].db_filename);
510 else if (head.module == 0 && head.data_size == 0)
512 /* The file has been created, but the head has not been
513 initialized yet. Remove the old file. */
514 unlink (dbs[cnt].db_filename);
516 else if (head.header_size != (int) sizeof (head))
518 dbg_log (_("invalid persistent database file \"%s\": %s"),
519 dbs[cnt].db_filename,
520 _("header size does not match"));
521 unlink (dbs[cnt].db_filename);
523 else if ((total = (sizeof (head)
524 + roundup (head.module * sizeof (ref_t),
525 ALIGN)
526 + head.data_size))
527 > st.st_size
528 || total < sizeof (head))
530 dbg_log (_("invalid persistent database file \"%s\": %s"),
531 dbs[cnt].db_filename,
532 _("file size does not match"));
533 unlink (dbs[cnt].db_filename);
535 /* Note we map with the maximum size allowed for the
536 database. This is likely much larger than the
537 actual file size. This is OK on most OSes since
538 extensions of the underlying file will
539 automatically translate more pages available for
540 memory access. */
541 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
542 PROT_READ | PROT_WRITE,
543 MAP_SHARED, fd, 0))
544 == MAP_FAILED)
545 goto fail_db;
546 else if (!verify_persistent_db (mem, &head, cnt))
548 munmap (mem, total);
549 dbg_log (_("invalid persistent database file \"%s\": %s"),
550 dbs[cnt].db_filename,
551 _("verification failed"));
552 unlink (dbs[cnt].db_filename);
554 else
556 /* Success. We have the database. */
557 dbs[cnt].head = mem;
558 dbs[cnt].memsize = total;
559 dbs[cnt].data = (char *)
560 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
561 ALIGN / sizeof (ref_t))];
562 dbs[cnt].mmap_used = true;
564 if (dbs[cnt].suggested_module > head.module)
565 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
566 dbnames[cnt]);
568 dbs[cnt].wr_fd = fd;
569 fd = -1;
570 /* We also need a read-only descriptor. */
571 if (dbs[cnt].shared)
573 dbs[cnt].ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
574 if (dbs[cnt].ro_fd == -1)
575 dbg_log (_("\
576 cannot create read-only descriptor for \"%s\"; no mmap"),
577 dbs[cnt].db_filename);
580 // XXX Shall we test whether the descriptors actually
581 // XXX point to the same file?
584 /* Close the file descriptors in case something went
585 wrong in which case the variable have not been
586 assigned -1. */
587 if (fd != -1)
588 close (fd);
592 if (dbs[cnt].head == NULL)
594 /* No database loaded. Allocate the data structure,
595 possibly on disk. */
596 struct database_pers_head head;
597 size_t total = (sizeof (head)
598 + roundup (dbs[cnt].suggested_module
599 * sizeof (ref_t), ALIGN)
600 + (dbs[cnt].suggested_module
601 * DEFAULT_DATASIZE_PER_BUCKET));
603 /* Try to create the database. If we do not need a
604 persistent database create a temporary file. */
605 int fd;
606 int ro_fd = -1;
607 if (dbs[cnt].persistent)
609 fd = open (dbs[cnt].db_filename,
610 O_RDWR | O_CREAT | O_EXCL | O_TRUNC,
611 S_IRUSR | S_IWUSR);
612 if (fd != -1 && dbs[cnt].shared)
613 ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
615 else
617 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
618 fd = mkstemp (fname);
620 /* We do not need the file name anymore after we
621 opened another file descriptor in read-only mode. */
622 if (fd != -1)
624 if (dbs[cnt].shared)
625 ro_fd = open (fname, O_RDONLY);
627 unlink (fname);
631 if (fd == -1)
633 if (errno == EEXIST)
635 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
636 dbnames[cnt], dbs[cnt].db_filename);
637 // XXX Correct way to terminate?
638 exit (1);
641 if (dbs[cnt].persistent)
642 dbg_log (_("cannot create %s; no persistent database used"),
643 dbs[cnt].db_filename);
644 else
645 dbg_log (_("cannot create %s; no sharing possible"),
646 dbs[cnt].db_filename);
648 dbs[cnt].persistent = 0;
649 // XXX remember: no mmap
651 else
653 /* Tell the user if we could not create the read-only
654 descriptor. */
655 if (ro_fd == -1 && dbs[cnt].shared)
656 dbg_log (_("\
657 cannot create read-only descriptor for \"%s\"; no mmap"),
658 dbs[cnt].db_filename);
660 /* Before we create the header, initialiye the hash
661 table. So that if we get interrupted if writing
662 the header we can recognize a partially initialized
663 database. */
664 size_t ps = sysconf (_SC_PAGESIZE);
665 char tmpbuf[ps];
666 assert (~ENDREF == 0);
667 memset (tmpbuf, '\xff', ps);
669 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
670 off_t offset = sizeof (head);
672 size_t towrite;
673 if (offset % ps != 0)
675 towrite = MIN (remaining, ps - (offset % ps));
676 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
677 goto write_fail;
678 offset += towrite;
679 remaining -= towrite;
682 while (remaining > ps)
684 if (pwrite (fd, tmpbuf, ps, offset) == -1)
685 goto write_fail;
686 offset += ps;
687 remaining -= ps;
690 if (remaining > 0
691 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
692 goto write_fail;
694 /* Create the header of the file. */
695 struct database_pers_head head =
697 .version = DB_VERSION,
698 .header_size = sizeof (head),
699 .module = dbs[cnt].suggested_module,
700 .data_size = (dbs[cnt].suggested_module
701 * DEFAULT_DATASIZE_PER_BUCKET),
702 .first_free = 0
704 void *mem;
706 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
707 != sizeof (head))
708 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
709 != 0)
710 || (mem = mmap (NULL, dbs[cnt].max_db_size,
711 PROT_READ | PROT_WRITE,
712 MAP_SHARED, fd, 0)) == MAP_FAILED)
714 write_fail:
715 unlink (dbs[cnt].db_filename);
716 dbg_log (_("cannot write to database file %s: %s"),
717 dbs[cnt].db_filename, strerror (errno));
718 dbs[cnt].persistent = 0;
720 else
722 /* Success. */
723 dbs[cnt].head = mem;
724 dbs[cnt].data = (char *)
725 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
726 ALIGN / sizeof (ref_t))];
727 dbs[cnt].memsize = total;
728 dbs[cnt].mmap_used = true;
730 /* Remember the descriptors. */
731 dbs[cnt].wr_fd = fd;
732 dbs[cnt].ro_fd = ro_fd;
733 fd = -1;
734 ro_fd = -1;
737 if (fd != -1)
738 close (fd);
739 if (ro_fd != -1)
740 close (ro_fd);
744 if (paranoia
745 && ((dbs[cnt].wr_fd != -1
746 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
747 || (dbs[cnt].ro_fd != -1
748 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
750 dbg_log (_("\
751 cannot set socket to close on exec: %s; disabling paranoia mode"),
752 strerror (errno));
753 paranoia = 0;
756 if (dbs[cnt].head == NULL)
758 /* We do not use the persistent database. Just
759 create an in-memory data structure. */
760 assert (! dbs[cnt].persistent);
762 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
763 + (dbs[cnt].suggested_module
764 * sizeof (ref_t)));
765 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
766 assert (~ENDREF == 0);
767 memset (dbs[cnt].head->array, '\xff',
768 dbs[cnt].suggested_module * sizeof (ref_t));
769 dbs[cnt].head->module = dbs[cnt].suggested_module;
770 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
771 * dbs[cnt].head->module);
772 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
773 dbs[cnt].head->first_free = 0;
775 dbs[cnt].shared = 0;
776 assert (dbs[cnt].ro_fd == -1);
779 if (dbs[cnt].check_file)
781 /* We need the modification date of the file. */
782 struct stat64 st;
784 if (stat64 (dbs[cnt].filename, &st) < 0)
786 /* We cannot stat() the file, disable file checking. */
787 dbg_log (_("cannot stat() file `%s': %s"),
788 dbs[cnt].filename, strerror (errno));
789 dbs[cnt].check_file = 0;
791 else
792 dbs[cnt].file_mtime = st.st_mtime;
796 /* Create the socket. */
797 sock = socket (AF_UNIX, SOCK_STREAM, 0);
798 if (sock < 0)
800 dbg_log (_("cannot open socket: %s"), strerror (errno));
801 exit (errno == EACCES ? 4 : 1);
803 /* Bind a name to the socket. */
804 struct sockaddr_un sock_addr;
805 sock_addr.sun_family = AF_UNIX;
806 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
807 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
809 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
810 exit (errno == EACCES ? 4 : 1);
813 /* We don't want to get stuck on accept. */
814 int fl = fcntl (sock, F_GETFL);
815 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
817 dbg_log (_("cannot change socket to nonblocking mode: %s"),
818 strerror (errno));
819 exit (1);
822 /* The descriptor needs to be closed on exec. */
823 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
825 dbg_log (_("cannot set socket to close on exec: %s"),
826 strerror (errno));
827 exit (1);
830 /* Set permissions for the socket. */
831 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
833 /* Set the socket up to accept connections. */
834 if (listen (sock, SOMAXCONN) < 0)
836 dbg_log (_("cannot enable socket to accept connections: %s"),
837 strerror (errno));
838 exit (1);
841 /* Change to unprivileged uid/gid/groups if specifed in config file */
842 if (server_user != NULL)
843 finish_drop_privileges ();
847 /* Close the connections. */
848 void
849 close_sockets (void)
851 close (sock);
855 static void
856 invalidate_cache (char *key, int fd)
858 dbtype number;
859 int32_t resp;
861 for (number = pwddb; number < lastdb; ++number)
862 if (strcmp (key, dbnames[number]) == 0)
864 if (dbs[number].reset_res)
865 res_init ();
867 break;
870 if (number == lastdb)
872 resp = EINVAL;
873 writeall (fd, &resp, sizeof (resp));
874 return;
877 if (dbs[number].enabled)
878 prune_cache (&dbs[number], LONG_MAX, fd);
879 else
881 resp = 0;
882 writeall (fd, &resp, sizeof (resp));
887 #ifdef SCM_RIGHTS
888 static void
889 send_ro_fd (struct database_dyn *db, char *key, int fd)
891 /* If we do not have an read-only file descriptor do nothing. */
892 if (db->ro_fd == -1)
893 return;
895 /* We need to send some data along with the descriptor. */
896 struct iovec iov[1];
897 iov[0].iov_base = key;
898 iov[0].iov_len = strlen (key) + 1;
900 /* Prepare the control message to transfer the descriptor. */
901 union
903 struct cmsghdr hdr;
904 char bytes[CMSG_SPACE (sizeof (int))];
905 } buf;
906 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1,
907 .msg_control = buf.bytes,
908 .msg_controllen = sizeof (buf) };
909 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
911 cmsg->cmsg_level = SOL_SOCKET;
912 cmsg->cmsg_type = SCM_RIGHTS;
913 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
915 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
917 msg.msg_controllen = cmsg->cmsg_len;
919 /* Send the control message. We repeat when we are interrupted but
920 everything else is ignored. */
921 #ifndef MSG_NOSIGNAL
922 # define MSG_NOSIGNAL 0
923 #endif
924 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
926 if (__builtin_expect (debug_level > 0, 0))
927 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
929 #endif /* SCM_RIGHTS */
932 /* Handle new request. */
933 static void
934 handle_request (int fd, request_header *req, void *key, uid_t uid)
936 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
938 if (debug_level > 0)
939 dbg_log (_("\
940 cannot handle old request version %d; current version is %d"),
941 req->version, NSCD_VERSION);
942 return;
945 /* Make the SELinux check before we go on to the standard checks. */
946 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
947 return;
949 struct database_dyn *db = reqinfo[req->type].db;
951 /* See whether we can service the request from the cache. */
952 if (__builtin_expect (reqinfo[req->type].data_request, true))
954 if (__builtin_expect (debug_level, 0) > 0)
956 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
958 char buf[INET6_ADDRSTRLEN];
960 dbg_log ("\t%s (%s)", serv2str[req->type],
961 inet_ntop (req->type == GETHOSTBYADDR
962 ? AF_INET : AF_INET6,
963 key, buf, sizeof (buf)));
965 else
966 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
969 /* Is this service enabled? */
970 if (__builtin_expect (!db->enabled, 0))
972 /* No, sent the prepared record. */
973 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
974 db->disabled_iov->iov_len,
975 MSG_NOSIGNAL))
976 != (ssize_t) db->disabled_iov->iov_len
977 && __builtin_expect (debug_level, 0) > 0)
979 /* We have problems sending the result. */
980 char buf[256];
981 dbg_log (_("cannot write result: %s"),
982 strerror_r (errno, buf, sizeof (buf)));
985 return;
988 /* Be sure we can read the data. */
989 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
991 ++db->head->rdlockdelayed;
992 pthread_rwlock_rdlock (&db->lock);
995 /* See whether we can handle it from the cache. */
996 struct datahead *cached;
997 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
998 db, uid);
999 if (cached != NULL)
1001 /* Hurray it's in the cache. */
1002 ssize_t nwritten;
1004 #ifdef HAVE_SENDFILE
1005 if (db->mmap_used || !cached->notfound)
1007 assert (db->wr_fd != -1);
1008 assert ((char *) cached->data > (char *) db->data);
1009 assert ((char *) cached->data - (char *) db->head
1010 + cached->recsize
1011 <= (sizeof (struct database_pers_head)
1012 + db->head->module * sizeof (ref_t)
1013 + db->head->data_size));
1014 nwritten = sendfileall (fd, db->wr_fd,
1015 (char *) cached->data
1016 - (char *) db->head, cached->recsize);
1017 # ifndef __ASSUME_SENDFILE
1018 if (nwritten == -1 && errno == ENOSYS)
1019 goto use_write;
1020 # endif
1022 else
1023 # ifndef __ASSUME_SENDFILE
1024 use_write:
1025 # endif
1026 #endif
1027 nwritten = writeall (fd, cached->data, cached->recsize);
1029 if (nwritten != cached->recsize
1030 && __builtin_expect (debug_level, 0) > 0)
1032 /* We have problems sending the result. */
1033 char buf[256];
1034 dbg_log (_("cannot write result: %s"),
1035 strerror_r (errno, buf, sizeof (buf)));
1038 pthread_rwlock_unlock (&db->lock);
1040 return;
1043 pthread_rwlock_unlock (&db->lock);
1045 else if (__builtin_expect (debug_level, 0) > 0)
1047 if (req->type == INVALIDATE)
1048 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1049 else
1050 dbg_log ("\t%s", serv2str[req->type]);
1053 /* Handle the request. */
1054 switch (req->type)
1056 case GETPWBYNAME:
1057 addpwbyname (db, fd, req, key, uid);
1058 break;
1060 case GETPWBYUID:
1061 addpwbyuid (db, fd, req, key, uid);
1062 break;
1064 case GETGRBYNAME:
1065 addgrbyname (db, fd, req, key, uid);
1066 break;
1068 case GETGRBYGID:
1069 addgrbygid (db, fd, req, key, uid);
1070 break;
1072 case GETHOSTBYNAME:
1073 addhstbyname (db, fd, req, key, uid);
1074 break;
1076 case GETHOSTBYNAMEv6:
1077 addhstbynamev6 (db, fd, req, key, uid);
1078 break;
1080 case GETHOSTBYADDR:
1081 addhstbyaddr (db, fd, req, key, uid);
1082 break;
1084 case GETHOSTBYADDRv6:
1085 addhstbyaddrv6 (db, fd, req, key, uid);
1086 break;
1088 case GETAI:
1089 addhstai (db, fd, req, key, uid);
1090 break;
1092 case INITGROUPS:
1093 addinitgroups (db, fd, req, key, uid);
1094 break;
1096 case GETSERVBYNAME:
1097 addservbyname (db, fd, req, key, uid);
1098 break;
1100 case GETSERVBYPORT:
1101 addservbyport (db, fd, req, key, uid);
1102 break;
1104 case GETSTAT:
1105 case SHUTDOWN:
1106 case INVALIDATE:
1108 /* Get the callers credentials. */
1109 #ifdef SO_PEERCRED
1110 struct ucred caller;
1111 socklen_t optlen = sizeof (caller);
1113 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1115 char buf[256];
1117 dbg_log (_("error getting caller's id: %s"),
1118 strerror_r (errno, buf, sizeof (buf)));
1119 break;
1122 uid = caller.uid;
1123 #else
1124 /* Some systems have no SO_PEERCRED implementation. They don't
1125 care about security so we don't as well. */
1126 uid = 0;
1127 #endif
1130 /* Accept shutdown, getstat and invalidate only from root. For
1131 the stat call also allow the user specified in the config file. */
1132 if (req->type == GETSTAT)
1134 if (uid == 0 || uid == stat_uid)
1135 send_stats (fd, dbs);
1137 else if (uid == 0)
1139 if (req->type == INVALIDATE)
1140 invalidate_cache (key, fd);
1141 else
1142 termination_handler (0);
1144 break;
1146 case GETFDPW:
1147 case GETFDGR:
1148 case GETFDHST:
1149 case GETFDSERV:
1150 #ifdef SCM_RIGHTS
1151 send_ro_fd (reqinfo[req->type].db, key, fd);
1152 #endif
1153 break;
1155 default:
1156 /* Ignore the command, it's nothing we know. */
1157 break;
1162 /* Restart the process. */
1163 static void
1164 restart (void)
1166 /* First determine the parameters. We do not use the parameters
1167 passed to main() since in case nscd is started by running the
1168 dynamic linker this will not work. Yes, this is not the usual
1169 case but nscd is part of glibc and we occasionally do this. */
1170 size_t buflen = 1024;
1171 char *buf = alloca (buflen);
1172 size_t readlen = 0;
1173 int fd = open ("/proc/self/cmdline", O_RDONLY);
1174 if (fd == -1)
1176 dbg_log (_("\
1177 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1178 strerror (errno));
1180 paranoia = 0;
1181 return;
1184 while (1)
1186 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1187 buflen - readlen));
1188 if (n == -1)
1190 dbg_log (_("\
1191 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1192 strerror (errno));
1194 close (fd);
1195 paranoia = 0;
1196 return;
1199 readlen += n;
1201 if (readlen < buflen)
1202 break;
1204 /* We might have to extend the buffer. */
1205 size_t old_buflen = buflen;
1206 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1207 buf = memmove (newp, buf, old_buflen);
1210 close (fd);
1212 /* Parse the command line. Worst case scenario: every two
1213 characters form one parameter (one character plus NUL). */
1214 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1215 int argc = 0;
1217 char *cp = buf;
1218 while (cp < buf + readlen)
1220 argv[argc++] = cp;
1221 cp = (char *) rawmemchr (cp, '\0') + 1;
1223 argv[argc] = NULL;
1225 /* Second, change back to the old user if we changed it. */
1226 if (server_user != NULL)
1228 if (setresuid (old_uid, old_uid, old_uid) != 0)
1230 dbg_log (_("\
1231 cannot change to old UID: %s; disabling paranoia mode"),
1232 strerror (errno));
1234 paranoia = 0;
1235 return;
1238 if (setresgid (old_gid, old_gid, old_gid) != 0)
1240 dbg_log (_("\
1241 cannot change to old GID: %s; disabling paranoia mode"),
1242 strerror (errno));
1244 setuid (server_uid);
1245 paranoia = 0;
1246 return;
1250 /* Next change back to the old working directory. */
1251 if (chdir (oldcwd) == -1)
1253 dbg_log (_("\
1254 cannot change to old working directory: %s; disabling paranoia mode"),
1255 strerror (errno));
1257 if (server_user != NULL)
1259 setuid (server_uid);
1260 setgid (server_gid);
1262 paranoia = 0;
1263 return;
1266 /* Synchronize memory. */
1267 for (int cnt = 0; cnt < lastdb; ++cnt)
1269 /* Make sure nobody keeps using the database. */
1270 dbs[cnt].head->timestamp = 0;
1272 if (dbs[cnt].persistent)
1273 // XXX async OK?
1274 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1277 /* The preparations are done. */
1278 execv ("/proc/self/exe", argv);
1280 /* If we come here, we will never be able to re-exec. */
1281 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1282 strerror (errno));
1284 if (server_user != NULL)
1286 setuid (server_uid);
1287 setgid (server_gid);
1289 if (chdir ("/") != 0)
1290 dbg_log (_("cannot change current working directory to \"/\": %s"),
1291 strerror (errno));
1292 paranoia = 0;
1296 /* List of file descriptors. */
1297 struct fdlist
1299 int fd;
1300 struct fdlist *next;
1302 /* Memory allocated for the list. */
1303 static struct fdlist *fdlist;
1304 /* List of currently ready-to-read file descriptors. */
1305 static struct fdlist *readylist;
1307 /* Conditional variable and mutex to signal availability of entries in
1308 READYLIST. The condvar is initialized dynamically since we might
1309 use a different clock depending on availability. */
1310 static pthread_cond_t readylist_cond;
1311 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1313 /* The clock to use with the condvar. */
1314 static clockid_t timeout_clock = CLOCK_REALTIME;
1316 /* Number of threads ready to handle the READYLIST. */
1317 static unsigned long int nready;
1320 /* This is the main loop. It is replicated in different threads but the
1321 `poll' call makes sure only one thread handles an incoming connection. */
1322 static void *
1323 __attribute__ ((__noreturn__))
1324 nscd_run (void *p)
1326 const long int my_number = (long int) p;
1327 const int run_prune = my_number < lastdb && dbs[my_number].enabled;
1328 struct timespec prune_ts;
1329 int to = 0;
1330 char buf[256];
1332 if (run_prune)
1334 setup_thread (&dbs[my_number]);
1336 /* We are running. */
1337 dbs[my_number].head->timestamp = time (NULL);
1339 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1340 /* Should never happen. */
1341 abort ();
1343 /* Compute timeout time. */
1344 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1347 /* Initial locking. */
1348 pthread_mutex_lock (&readylist_lock);
1350 /* One more thread available. */
1351 ++nready;
1353 while (1)
1355 while (readylist == NULL)
1357 if (run_prune)
1359 /* Wait, but not forever. */
1360 to = pthread_cond_timedwait (&readylist_cond, &readylist_lock,
1361 &prune_ts);
1363 /* If we were woken and there is no work to be done,
1364 just start pruning. */
1365 if (readylist == NULL && to == ETIMEDOUT)
1367 --nready;
1369 if (sighup_pending)
1370 goto sighup_prune;
1372 pthread_mutex_unlock (&readylist_lock);
1373 goto only_prune;
1376 else
1377 /* No need to timeout. */
1378 pthread_cond_wait (&readylist_cond, &readylist_lock);
1381 if (sighup_pending)
1383 --nready;
1384 pthread_cond_signal (&readylist_cond);
1385 sighup_prune:
1386 sighup_pending = 0;
1387 pthread_mutex_unlock (&readylist_lock);
1389 /* Prune the password database. */
1390 if (dbs[pwddb].enabled)
1391 prune_cache (&dbs[pwddb], LONG_MAX, -1);
1393 /* Prune the group database. */
1394 if (dbs[grpdb].enabled)
1395 prune_cache (&dbs[grpdb], LONG_MAX, -1);
1397 /* Prune the host database. */
1398 if (dbs[hstdb].enabled)
1399 prune_cache (&dbs[hstdb], LONG_MAX, -1);
1401 /* Re-locking. */
1402 pthread_mutex_lock (&readylist_lock);
1404 /* One more thread available. */
1405 ++nready;
1406 continue;
1409 struct fdlist *it = readylist->next;
1410 if (readylist->next == readylist)
1411 /* Just one entry on the list. */
1412 readylist = NULL;
1413 else
1414 readylist->next = it->next;
1416 /* Extract the information and mark the record ready to be used
1417 again. */
1418 int fd = it->fd;
1419 it->next = NULL;
1421 /* One more thread available. */
1422 --nready;
1424 /* We are done with the list. */
1425 pthread_mutex_unlock (&readylist_lock);
1427 /* We do not want to block on a short read or so. */
1428 int fl = fcntl (fd, F_GETFL);
1429 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1430 goto close_and_out;
1432 /* Now read the request. */
1433 request_header req;
1434 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1435 != sizeof (req), 0))
1437 /* We failed to read data. Note that this also might mean we
1438 failed because we would have blocked. */
1439 if (debug_level > 0)
1440 dbg_log (_("short read while reading request: %s"),
1441 strerror_r (errno, buf, sizeof (buf)));
1442 goto close_and_out;
1445 /* Check whether this is a valid request type. */
1446 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1447 goto close_and_out;
1449 /* Some systems have no SO_PEERCRED implementation. They don't
1450 care about security so we don't as well. */
1451 uid_t uid = -1;
1452 #ifdef SO_PEERCRED
1453 pid_t pid = 0;
1455 if (__builtin_expect (debug_level > 0, 0))
1457 struct ucred caller;
1458 socklen_t optlen = sizeof (caller);
1460 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1461 pid = caller.pid;
1463 #endif
1465 /* It should not be possible to crash the nscd with a silly
1466 request (i.e., a terribly large key). We limit the size to 1kb. */
1467 if (__builtin_expect (req.key_len, 1) < 0
1468 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1470 if (debug_level > 0)
1471 dbg_log (_("key length in request too long: %d"), req.key_len);
1473 else
1475 /* Get the key. */
1476 char keybuf[MAXKEYLEN];
1478 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1479 req.key_len))
1480 != req.key_len, 0))
1482 /* Again, this can also mean we would have blocked. */
1483 if (debug_level > 0)
1484 dbg_log (_("short read while reading request key: %s"),
1485 strerror_r (errno, buf, sizeof (buf)));
1486 goto close_and_out;
1489 if (__builtin_expect (debug_level, 0) > 0)
1491 #ifdef SO_PEERCRED
1492 if (pid != 0)
1493 dbg_log (_("\
1494 handle_request: request received (Version = %d) from PID %ld"),
1495 req.version, (long int) pid);
1496 else
1497 #endif
1498 dbg_log (_("\
1499 handle_request: request received (Version = %d)"), req.version);
1502 /* Phew, we got all the data, now process it. */
1503 handle_request (fd, &req, keybuf, uid);
1506 close_and_out:
1507 /* We are done. */
1508 close (fd);
1510 /* Check whether we should be pruning the cache. */
1511 assert (run_prune || to == 0);
1512 if (to == ETIMEDOUT)
1514 only_prune:
1515 /* The pthread_cond_timedwait() call timed out. It is time
1516 to clean up the cache. */
1517 assert (my_number < lastdb);
1518 prune_cache (&dbs[my_number], time (NULL), -1);
1520 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1521 /* Should never happen. */
1522 abort ();
1524 /* Compute next timeout time. */
1525 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1527 /* In case the list is emtpy we do not want to run the prune
1528 code right away again. */
1529 to = 0;
1532 /* Re-locking. */
1533 pthread_mutex_lock (&readylist_lock);
1535 /* One more thread available. */
1536 ++nready;
1541 static unsigned int nconns;
1543 static void
1544 fd_ready (int fd)
1546 pthread_mutex_lock (&readylist_lock);
1548 /* Find an empty entry in FDLIST. */
1549 size_t inner;
1550 for (inner = 0; inner < nconns; ++inner)
1551 if (fdlist[inner].next == NULL)
1552 break;
1553 assert (inner < nconns);
1555 fdlist[inner].fd = fd;
1557 if (readylist == NULL)
1558 readylist = fdlist[inner].next = &fdlist[inner];
1559 else
1561 fdlist[inner].next = readylist->next;
1562 readylist = readylist->next = &fdlist[inner];
1565 bool do_signal = true;
1566 if (__builtin_expect (nready == 0, 0))
1568 ++client_queued;
1569 do_signal = false;
1571 /* Try to start another thread to help out. */
1572 pthread_t th;
1573 if (nthreads < max_nthreads
1574 && pthread_create (&th, &attr, nscd_run,
1575 (void *) (long int) nthreads) == 0)
1577 /* We got another thread. */
1578 ++nthreads;
1579 /* The new thread might need a kick. */
1580 do_signal = true;
1585 pthread_mutex_unlock (&readylist_lock);
1587 /* Tell one of the worker threads there is work to do. */
1588 if (do_signal)
1589 pthread_cond_signal (&readylist_cond);
1593 /* Check whether restarting should happen. */
1594 static inline int
1595 restart_p (time_t now)
1597 return (paranoia && readylist == NULL && nready == nthreads
1598 && now >= restart_time);
1602 /* Array for times a connection was accepted. */
1603 static time_t *starttime;
1606 static void
1607 __attribute__ ((__noreturn__))
1608 main_loop_poll (void)
1610 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1611 * sizeof (conns[0]));
1613 conns[0].fd = sock;
1614 conns[0].events = POLLRDNORM;
1615 size_t nused = 1;
1616 size_t firstfree = 1;
1618 while (1)
1620 /* Wait for any event. We wait at most a couple of seconds so
1621 that we can check whether we should close any of the accepted
1622 connections since we have not received a request. */
1623 #define MAX_ACCEPT_TIMEOUT 30
1624 #define MIN_ACCEPT_TIMEOUT 5
1625 #define MAIN_THREAD_TIMEOUT \
1626 (MAX_ACCEPT_TIMEOUT * 1000 \
1627 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1629 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1631 time_t now = time (NULL);
1633 /* If there is a descriptor ready for reading or there is a new
1634 connection, process this now. */
1635 if (n > 0)
1637 if (conns[0].revents != 0)
1639 /* We have a new incoming connection. Accept the connection. */
1640 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1642 /* Use the descriptor if we have not reached the limit. */
1643 if (fd >= 0)
1645 if (firstfree < nconns)
1647 conns[firstfree].fd = fd;
1648 conns[firstfree].events = POLLRDNORM;
1649 starttime[firstfree] = now;
1650 if (firstfree >= nused)
1651 nused = firstfree + 1;
1654 ++firstfree;
1655 while (firstfree < nused && conns[firstfree].fd != -1);
1657 else
1658 /* We cannot use the connection so close it. */
1659 close (fd);
1662 --n;
1665 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1666 if (conns[cnt].revents != 0)
1668 fd_ready (conns[cnt].fd);
1670 /* Clean up the CONNS array. */
1671 conns[cnt].fd = -1;
1672 if (cnt < firstfree)
1673 firstfree = cnt;
1674 if (cnt == nused - 1)
1676 --nused;
1677 while (conns[nused - 1].fd == -1);
1679 --n;
1683 /* Now find entries which have timed out. */
1684 assert (nused > 0);
1686 /* We make the timeout length depend on the number of file
1687 descriptors currently used. */
1688 #define ACCEPT_TIMEOUT \
1689 (MAX_ACCEPT_TIMEOUT \
1690 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1691 time_t laststart = now - ACCEPT_TIMEOUT;
1693 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1695 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1697 /* Remove the entry, it timed out. */
1698 (void) close (conns[cnt].fd);
1699 conns[cnt].fd = -1;
1701 if (cnt < firstfree)
1702 firstfree = cnt;
1703 if (cnt == nused - 1)
1705 --nused;
1706 while (conns[nused - 1].fd == -1);
1710 if (restart_p (now))
1711 restart ();
1716 #ifdef HAVE_EPOLL
1717 static void
1718 main_loop_epoll (int efd)
1720 struct epoll_event ev = { 0, };
1721 int nused = 1;
1722 size_t highest = 0;
1724 /* Add the socket. */
1725 ev.events = EPOLLRDNORM;
1726 ev.data.fd = sock;
1727 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1728 /* We cannot use epoll. */
1729 return;
1731 while (1)
1733 struct epoll_event revs[100];
1734 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1736 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1738 time_t now = time (NULL);
1740 for (int cnt = 0; cnt < n; ++cnt)
1741 if (revs[cnt].data.fd == sock)
1743 /* A new connection. */
1744 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1746 if (fd >= 0)
1748 /* Try to add the new descriptor. */
1749 ev.data.fd = fd;
1750 if (fd >= nconns
1751 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1752 /* The descriptor is too large or something went
1753 wrong. Close the descriptor. */
1754 close (fd);
1755 else
1757 /* Remember when we accepted the connection. */
1758 starttime[fd] = now;
1760 if (fd > highest)
1761 highest = fd;
1763 ++nused;
1767 else
1769 /* Remove the descriptor from the epoll descriptor. */
1770 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1772 /* Get a worker to handle the request. */
1773 fd_ready (revs[cnt].data.fd);
1775 /* Reset the time. */
1776 starttime[revs[cnt].data.fd] = 0;
1777 if (revs[cnt].data.fd == highest)
1779 --highest;
1780 while (highest > 0 && starttime[highest] == 0);
1782 --nused;
1785 /* Now look for descriptors for accepted connections which have
1786 no reply in too long of a time. */
1787 time_t laststart = now - ACCEPT_TIMEOUT;
1788 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1789 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1791 /* We are waiting for this one for too long. Close it. */
1792 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
1794 (void) close (cnt);
1796 starttime[cnt] = 0;
1797 if (cnt == highest)
1798 --highest;
1800 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1801 --highest;
1803 if (restart_p (now))
1804 restart ();
1807 #endif
1810 /* Start all the threads we want. The initial process is thread no. 1. */
1811 void
1812 start_threads (void)
1814 /* Initialize the conditional variable we will use. The only
1815 non-standard attribute we might use is the clock selection. */
1816 pthread_condattr_t condattr;
1817 pthread_condattr_init (&condattr);
1819 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1820 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1821 /* Determine whether the monotonous clock is available. */
1822 struct timespec dummy;
1823 # if _POSIX_MONOTONIC_CLOCK == 0
1824 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1825 # endif
1826 # if _POSIX_CLOCK_SELECTION == 0
1827 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1828 # endif
1829 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1830 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1831 timeout_clock = CLOCK_MONOTONIC;
1832 #endif
1834 pthread_cond_init (&readylist_cond, &condattr);
1835 pthread_condattr_destroy (&condattr);
1838 /* Create the attribute for the threads. They are all created
1839 detached. */
1840 pthread_attr_init (&attr);
1841 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1842 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1843 pthread_attr_setstacksize (&attr, 1024 * 1024 * (sizeof (void *) / 4));
1845 /* We allow less than LASTDB threads only for debugging. */
1846 if (debug_level == 0)
1847 nthreads = MAX (nthreads, lastdb);
1849 int nfailed = 0;
1850 for (long int i = 0; i < nthreads; ++i)
1852 pthread_t th;
1853 if (pthread_create (&th, &attr, nscd_run, (void *) (i - nfailed)) != 0)
1854 ++nfailed;
1856 if (nthreads - nfailed < lastdb)
1858 /* We could not start enough threads. */
1859 dbg_log (_("could only start %d threads; terminating"),
1860 nthreads - nfailed);
1861 exit (1);
1864 /* Determine how much room for descriptors we should initially
1865 allocate. This might need to change later if we cap the number
1866 with MAXCONN. */
1867 const long int nfds = sysconf (_SC_OPEN_MAX);
1868 #define MINCONN 32
1869 #define MAXCONN 16384
1870 if (nfds == -1 || nfds > MAXCONN)
1871 nconns = MAXCONN;
1872 else if (nfds < MINCONN)
1873 nconns = MINCONN;
1874 else
1875 nconns = nfds;
1877 /* We need memory to pass descriptors on to the worker threads. */
1878 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1879 /* Array to keep track when connection was accepted. */
1880 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1882 /* In the main thread we execute the loop which handles incoming
1883 connections. */
1884 #ifdef HAVE_EPOLL
1885 int efd = epoll_create (100);
1886 if (efd != -1)
1888 main_loop_epoll (efd);
1889 close (efd);
1891 #endif
1893 main_loop_poll ();
1897 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1898 this function is called, we are not listening on the nscd socket yet so
1899 we can just use the ordinary lookup functions without causing a lockup */
1900 static void
1901 begin_drop_privileges (void)
1903 struct passwd *pwd = getpwnam (server_user);
1905 if (pwd == NULL)
1907 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1908 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1909 server_user);
1912 server_uid = pwd->pw_uid;
1913 server_gid = pwd->pw_gid;
1915 /* Save the old UID/GID if we have to change back. */
1916 if (paranoia)
1918 old_uid = getuid ();
1919 old_gid = getgid ();
1922 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1924 /* This really must never happen. */
1925 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1926 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
1929 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
1931 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
1932 == -1)
1934 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1935 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
1940 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
1941 run nscd as the user specified in the configuration file. */
1942 static void
1943 finish_drop_privileges (void)
1945 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1946 /* We need to preserve the capabilities to connect to the audit daemon. */
1947 cap_t new_caps = preserve_capabilities ();
1948 #endif
1950 if (setgroups (server_ngroups, server_groups) == -1)
1952 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1953 error (EXIT_FAILURE, errno, _("setgroups failed"));
1956 int res;
1957 if (paranoia)
1958 res = setresgid (server_gid, server_gid, old_gid);
1959 else
1960 res = setgid (server_gid);
1961 if (res == -1)
1963 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1964 perror ("setgid");
1965 exit (4);
1968 if (paranoia)
1969 res = setresuid (server_uid, server_uid, old_uid);
1970 else
1971 res = setuid (server_uid);
1972 if (res == -1)
1974 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1975 perror ("setuid");
1976 exit (4);
1979 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1980 /* Remove the temporary capabilities. */
1981 install_real_capabilities (new_caps);
1982 #endif
1985 /* Handle the HUP signal which will force a dump of the cache */
1986 void
1987 sighup_handler (int signum)
1989 sighup_pending = 1;