2.5-18.1
[glibc.git] / nscd / connections.c
blob8f1142143194cbeba9d1bcda55dc382bf8a26bb6
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2003, 2004, 2005, 2006 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License version 2 as
8 published by the Free Software Foundation.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <libintl.h>
27 #include <pthread.h>
28 #include <pwd.h>
29 #include <resolv.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <arpa/inet.h>
34 #ifdef HAVE_EPOLL
35 # include <sys/epoll.h>
36 #endif
37 #include <sys/mman.h>
38 #include <sys/param.h>
39 #include <sys/poll.h>
40 #ifdef HAVE_SENDFILE
41 # include <sys/sendfile.h>
42 #endif
43 #include <sys/socket.h>
44 #include <sys/stat.h>
45 #include <sys/un.h>
47 #include "nscd.h"
48 #include "dbg_log.h"
49 #include "selinux.h"
50 #ifdef HAVE_SENDFILE
51 # include <kernel-features.h>
52 #endif
55 /* Wrapper functions with error checking for standard functions. */
56 extern void *xmalloc (size_t n);
57 extern void *xcalloc (size_t n, size_t s);
58 extern void *xrealloc (void *o, size_t n);
60 /* Support to run nscd as an unprivileged user */
61 const char *server_user;
62 static uid_t server_uid;
63 static gid_t server_gid;
64 const char *stat_user;
65 uid_t stat_uid;
66 static gid_t *server_groups;
67 #ifndef NGROUPS
68 # define NGROUPS 32
69 #endif
70 static int server_ngroups;
71 static volatile int sighup_pending;
73 static pthread_attr_t attr;
75 static void begin_drop_privileges (void);
76 static void finish_drop_privileges (void);
78 /* Map request type to a string. */
79 const char *serv2str[LASTREQ] =
81 [GETPWBYNAME] = "GETPWBYNAME",
82 [GETPWBYUID] = "GETPWBYUID",
83 [GETGRBYNAME] = "GETGRBYNAME",
84 [GETGRBYGID] = "GETGRBYGID",
85 [GETHOSTBYNAME] = "GETHOSTBYNAME",
86 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
87 [GETHOSTBYADDR] = "GETHOSTBYADDR",
88 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
89 [SHUTDOWN] = "SHUTDOWN",
90 [GETSTAT] = "GETSTAT",
91 [INVALIDATE] = "INVALIDATE",
92 [GETFDPW] = "GETFDPW",
93 [GETFDGR] = "GETFDGR",
94 [GETFDHST] = "GETFDHST",
95 [GETAI] = "GETAI",
96 [INITGROUPS] = "INITGROUPS"
99 /* The control data structures for the services. */
100 struct database_dyn dbs[lastdb] =
102 [pwddb] = {
103 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
104 .prunelock = PTHREAD_MUTEX_INITIALIZER,
105 .enabled = 0,
106 .check_file = 1,
107 .persistent = 0,
108 .propagate = 1,
109 .shared = 0,
110 .max_db_size = DEFAULT_MAX_DB_SIZE,
111 .filename = "/etc/passwd",
112 .db_filename = _PATH_NSCD_PASSWD_DB,
113 .disabled_iov = &pwd_iov_disabled,
114 .postimeout = 3600,
115 .negtimeout = 20,
116 .wr_fd = -1,
117 .ro_fd = -1,
118 .mmap_used = false
120 [grpdb] = {
121 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
122 .prunelock = PTHREAD_MUTEX_INITIALIZER,
123 .enabled = 0,
124 .check_file = 1,
125 .persistent = 0,
126 .propagate = 1,
127 .shared = 0,
128 .max_db_size = DEFAULT_MAX_DB_SIZE,
129 .filename = "/etc/group",
130 .db_filename = _PATH_NSCD_GROUP_DB,
131 .disabled_iov = &grp_iov_disabled,
132 .postimeout = 3600,
133 .negtimeout = 60,
134 .wr_fd = -1,
135 .ro_fd = -1,
136 .mmap_used = false
138 [hstdb] = {
139 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
140 .prunelock = PTHREAD_MUTEX_INITIALIZER,
141 .enabled = 0,
142 .check_file = 1,
143 .persistent = 0,
144 .propagate = 0, /* Not used. */
145 .shared = 0,
146 .max_db_size = DEFAULT_MAX_DB_SIZE,
147 .filename = "/etc/hosts",
148 .db_filename = _PATH_NSCD_HOSTS_DB,
149 .disabled_iov = &hst_iov_disabled,
150 .postimeout = 3600,
151 .negtimeout = 20,
152 .wr_fd = -1,
153 .ro_fd = -1,
154 .mmap_used = false
159 /* Mapping of request type to database. */
160 static struct database_dyn *const serv2db[LASTREQ] =
162 [GETPWBYNAME] = &dbs[pwddb],
163 [GETPWBYUID] = &dbs[pwddb],
164 [GETGRBYNAME] = &dbs[grpdb],
165 [GETGRBYGID] = &dbs[grpdb],
166 [GETHOSTBYNAME] = &dbs[hstdb],
167 [GETHOSTBYNAMEv6] = &dbs[hstdb],
168 [GETHOSTBYADDR] = &dbs[hstdb],
169 [GETHOSTBYADDRv6] = &dbs[hstdb],
170 [GETFDPW] = &dbs[pwddb],
171 [GETFDGR] = &dbs[grpdb],
172 [GETFDHST] = &dbs[hstdb],
173 [GETAI] = &dbs[hstdb],
174 [INITGROUPS] = &dbs[grpdb]
178 /* Number of seconds between two cache pruning runs. */
179 #define CACHE_PRUNE_INTERVAL 15
182 /* Initial number of threads to use. */
183 int nthreads = -1;
184 /* Maximum number of threads to use. */
185 int max_nthreads = 32;
187 /* Socket for incoming connections. */
188 static int sock;
190 /* Number of times clients had to wait. */
191 unsigned long int client_queued;
194 ssize_t
195 writeall (int fd, const void *buf, size_t len)
197 size_t n = len;
198 ssize_t ret;
201 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
202 if (ret <= 0)
203 break;
204 buf = (const char *) buf + ret;
205 n -= ret;
207 while (n > 0);
208 return ret < 0 ? ret : len - n;
212 #ifdef HAVE_SENDFILE
213 ssize_t
214 sendfileall (int tofd, int fromfd, off_t off, size_t len)
216 ssize_t n = len;
217 ssize_t ret;
221 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
222 if (ret <= 0)
223 break;
224 n -= ret;
226 while (n > 0);
227 return ret < 0 ? ret : len - n;
229 #endif
232 enum usekey
234 use_not = 0,
235 /* The following three are not really used, they are symbolic constants. */
236 use_first = 16,
237 use_begin = 32,
238 use_end = 64,
240 use_he = 1,
241 use_he_begin = use_he | use_begin,
242 use_he_end = use_he | use_end,
243 #if SEPARATE_KEY
244 use_key = 2,
245 use_key_begin = use_key | use_begin,
246 use_key_end = use_key | use_end,
247 use_key_first = use_key_begin | use_first,
248 #endif
249 use_data = 3,
250 use_data_begin = use_data | use_begin,
251 use_data_end = use_data | use_end,
252 use_data_first = use_data_begin | use_first
256 static int
257 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
258 enum usekey use, ref_t start, size_t len)
260 assert (len >= 2);
262 if (start > first_free || start + len > first_free
263 || (start & BLOCK_ALIGN_M1))
264 return 0;
266 if (usemap[start] == use_not)
268 /* Add the start marker. */
269 usemap[start] = use | use_begin;
270 use &= ~use_first;
272 while (--len > 0)
273 if (usemap[++start] != use_not)
274 return 0;
275 else
276 usemap[start] = use;
278 /* Add the end marker. */
279 usemap[start] = use | use_end;
281 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
283 /* Hash entries can't be shared. */
284 if (use == use_he)
285 return 0;
287 usemap[start] |= (use & use_first);
288 use &= ~use_first;
290 while (--len > 1)
291 if (usemap[++start] != use)
292 return 0;
294 if (usemap[++start] != (use | use_end))
295 return 0;
297 else
298 /* Points to a wrong object or somewhere in the middle. */
299 return 0;
301 return 1;
305 /* Verify data in persistent database. */
306 static int
307 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
309 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb);
311 time_t now = time (NULL);
313 struct database_pers_head *head = mem;
314 struct database_pers_head head_copy = *head;
316 /* Check that the header that was read matches the head in the database. */
317 if (readhead != NULL && memcmp (head, readhead, sizeof (*head)) != 0)
318 return 0;
320 /* First some easy tests: make sure the database header is sane. */
321 if (head->version != DB_VERSION
322 || head->header_size != sizeof (*head)
323 /* We allow a timestamp to be one hour ahead of the current time.
324 This should cover daylight saving time changes. */
325 || head->timestamp > now + 60 * 60 + 60
326 || (head->gc_cycle & 1)
327 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
328 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
329 || head->first_free < 0
330 || head->first_free > head->data_size
331 || (head->first_free & BLOCK_ALIGN_M1) != 0
332 || head->maxnentries < 0
333 || head->maxnsearched < 0)
334 return 0;
336 uint8_t *usemap = calloc (head->first_free, 1);
337 if (usemap == NULL)
338 return 0;
340 const char *data = (char *) &head->array[roundup (head->module,
341 ALIGN / sizeof (ref_t))];
343 nscd_ssize_t he_cnt = 0;
344 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
346 ref_t work = head->array[cnt];
348 while (work != ENDREF)
350 if (! check_use (data, head->first_free, usemap, use_he, work,
351 sizeof (struct hashentry)))
352 goto fail;
354 /* Now we know we can dereference the record. */
355 struct hashentry *here = (struct hashentry *) (data + work);
357 ++he_cnt;
359 /* Make sure the record is for this type of service. */
360 if (here->type >= LASTREQ
361 || serv2db[here->type] != &dbs[dbnr])
362 goto fail;
364 /* Validate boolean field value. */
365 if (here->first != false && here->first != true)
366 goto fail;
368 if (here->len < 0)
369 goto fail;
371 /* Now the data. */
372 if (here->packet < 0
373 || here->packet > head->first_free
374 || here->packet + sizeof (struct datahead) > head->first_free)
375 goto fail;
377 struct datahead *dh = (struct datahead *) (data + here->packet);
379 if (! check_use (data, head->first_free, usemap,
380 use_data | (here->first ? use_first : 0),
381 here->packet, dh->allocsize))
382 goto fail;
384 if (dh->allocsize < sizeof (struct datahead)
385 || dh->recsize > dh->allocsize
386 || (dh->notfound != false && dh->notfound != true)
387 || (dh->usable != false && dh->usable != true))
388 goto fail;
390 if (here->key < here->packet + sizeof (struct datahead)
391 || here->key > here->packet + dh->allocsize
392 || here->key + here->len > here->packet + dh->allocsize)
394 #if SEPARATE_KEY
395 /* If keys can appear outside of data, this should be done
396 instead. But gc doesn't mark the data in that case. */
397 if (! check_use (data, head->first_free, usemap,
398 use_key | (here->first ? use_first : 0),
399 here->key, here->len))
400 #endif
401 goto fail;
404 work = here->next;
408 if (he_cnt != head->nentries)
409 goto fail;
411 /* See if all data and keys had at least one reference from
412 he->first == true hashentry. */
413 for (ref_t idx = 0; idx < head->first_free; ++idx)
415 #if SEPARATE_KEY
416 if (usemap[idx] == use_key_begin)
417 goto fail;
418 #endif
419 if (usemap[idx] == use_data_begin)
420 goto fail;
423 /* Finally, make sure the database hasn't changed since the first test. */
424 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
425 goto fail;
427 free (usemap);
428 return 1;
430 fail:
431 free (usemap);
432 return 0;
436 /* Initialize database information structures. */
437 void
438 nscd_init (void)
440 /* Look up unprivileged uid/gid/groups before we start listening on the
441 socket */
442 if (server_user != NULL)
443 begin_drop_privileges ();
445 if (nthreads == -1)
446 /* No configuration for this value, assume a default. */
447 nthreads = 2 * lastdb;
449 for (size_t cnt = 0; cnt < lastdb; ++cnt)
450 if (dbs[cnt].enabled)
452 pthread_rwlock_init (&dbs[cnt].lock, NULL);
453 pthread_mutex_init (&dbs[cnt].memlock, NULL);
455 if (dbs[cnt].persistent)
457 /* Try to open the appropriate file on disk. */
458 int fd = open (dbs[cnt].db_filename, O_RDWR);
459 if (fd != -1)
461 struct stat64 st;
462 void *mem;
463 size_t total;
464 struct database_pers_head head;
465 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
466 sizeof (head)));
467 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
469 fail_db:
470 dbg_log (_("invalid persistent database file \"%s\": %s"),
471 dbs[cnt].db_filename, strerror (errno));
472 unlink (dbs[cnt].db_filename);
474 else if (head.module == 0 && head.data_size == 0)
476 /* The file has been created, but the head has not been
477 initialized yet. Remove the old file. */
478 unlink (dbs[cnt].db_filename);
480 else if (head.header_size != (int) sizeof (head))
482 dbg_log (_("invalid persistent database file \"%s\": %s"),
483 dbs[cnt].db_filename,
484 _("header size does not match"));
485 unlink (dbs[cnt].db_filename);
487 else if ((total = (sizeof (head)
488 + roundup (head.module * sizeof (ref_t),
489 ALIGN)
490 + head.data_size))
491 > st.st_size
492 || total < sizeof (head))
494 dbg_log (_("invalid persistent database file \"%s\": %s"),
495 dbs[cnt].db_filename,
496 _("file size does not match"));
497 unlink (dbs[cnt].db_filename);
499 /* Note we map with the maximum size allowed for the
500 database. This is likely much larger than the
501 actual file size. This is OK on most OSes since
502 extensions of the underlying file will
503 automatically translate more pages available for
504 memory access. */
505 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
506 PROT_READ | PROT_WRITE,
507 MAP_SHARED, fd, 0))
508 == MAP_FAILED)
509 goto fail_db;
510 else if (!verify_persistent_db (mem, &head, cnt))
512 munmap (mem, total);
513 dbg_log (_("invalid persistent database file \"%s\": %s"),
514 dbs[cnt].db_filename,
515 _("verification failed"));
516 unlink (dbs[cnt].db_filename);
518 else
520 /* Success. We have the database. */
521 dbs[cnt].head = mem;
522 dbs[cnt].memsize = total;
523 dbs[cnt].data = (char *)
524 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
525 ALIGN / sizeof (ref_t))];
526 dbs[cnt].mmap_used = true;
528 if (dbs[cnt].suggested_module > head.module)
529 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
530 dbnames[cnt]);
532 dbs[cnt].wr_fd = fd;
533 fd = -1;
534 /* We also need a read-only descriptor. */
535 if (dbs[cnt].shared)
537 dbs[cnt].ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
538 if (dbs[cnt].ro_fd == -1)
539 dbg_log (_("\
540 cannot create read-only descriptor for \"%s\"; no mmap"),
541 dbs[cnt].db_filename);
544 // XXX Shall we test whether the descriptors actually
545 // XXX point to the same file?
548 /* Close the file descriptors in case something went
549 wrong in which case the variable have not been
550 assigned -1. */
551 if (fd != -1)
552 close (fd);
556 if (dbs[cnt].head == NULL)
558 /* No database loaded. Allocate the data structure,
559 possibly on disk. */
560 struct database_pers_head head;
561 size_t total = (sizeof (head)
562 + roundup (dbs[cnt].suggested_module
563 * sizeof (ref_t), ALIGN)
564 + (dbs[cnt].suggested_module
565 * DEFAULT_DATASIZE_PER_BUCKET));
567 /* Try to create the database. If we do not need a
568 persistent database create a temporary file. */
569 int fd;
570 int ro_fd = -1;
571 if (dbs[cnt].persistent)
573 fd = open (dbs[cnt].db_filename,
574 O_RDWR | O_CREAT | O_EXCL | O_TRUNC,
575 S_IRUSR | S_IWUSR);
576 if (fd != -1 && dbs[cnt].shared)
577 ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
579 else
581 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
582 fd = mkstemp (fname);
584 /* We do not need the file name anymore after we
585 opened another file descriptor in read-only mode. */
586 if (fd != -1)
588 if (dbs[cnt].shared)
589 ro_fd = open (fname, O_RDONLY);
591 unlink (fname);
595 if (fd == -1)
597 if (errno == EEXIST)
599 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
600 dbnames[cnt], dbs[cnt].db_filename);
601 // XXX Correct way to terminate?
602 exit (1);
605 if (dbs[cnt].persistent)
606 dbg_log (_("cannot create %s; no persistent database used"),
607 dbs[cnt].db_filename);
608 else
609 dbg_log (_("cannot create %s; no sharing possible"),
610 dbs[cnt].db_filename);
612 dbs[cnt].persistent = 0;
613 // XXX remember: no mmap
615 else
617 /* Tell the user if we could not create the read-only
618 descriptor. */
619 if (ro_fd == -1 && dbs[cnt].shared)
620 dbg_log (_("\
621 cannot create read-only descriptor for \"%s\"; no mmap"),
622 dbs[cnt].db_filename);
624 /* Before we create the header, initialiye the hash
625 table. So that if we get interrupted if writing
626 the header we can recognize a partially initialized
627 database. */
628 size_t ps = sysconf (_SC_PAGESIZE);
629 char tmpbuf[ps];
630 assert (~ENDREF == 0);
631 memset (tmpbuf, '\xff', ps);
633 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
634 off_t offset = sizeof (head);
636 size_t towrite;
637 if (offset % ps != 0)
639 towrite = MIN (remaining, ps - (offset % ps));
640 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
641 goto write_fail;
642 offset += towrite;
643 remaining -= towrite;
646 while (remaining > ps)
648 if (pwrite (fd, tmpbuf, ps, offset) == -1)
649 goto write_fail;
650 offset += ps;
651 remaining -= ps;
654 if (remaining > 0
655 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
656 goto write_fail;
658 /* Create the header of the file. */
659 struct database_pers_head head =
661 .version = DB_VERSION,
662 .header_size = sizeof (head),
663 .module = dbs[cnt].suggested_module,
664 .data_size = (dbs[cnt].suggested_module
665 * DEFAULT_DATASIZE_PER_BUCKET),
666 .first_free = 0
668 void *mem;
670 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
671 != sizeof (head))
672 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
673 != 0)
674 || (mem = mmap (NULL, dbs[cnt].max_db_size,
675 PROT_READ | PROT_WRITE,
676 MAP_SHARED, fd, 0)) == MAP_FAILED)
678 write_fail:
679 unlink (dbs[cnt].db_filename);
680 dbg_log (_("cannot write to database file %s: %s"),
681 dbs[cnt].db_filename, strerror (errno));
682 dbs[cnt].persistent = 0;
684 else
686 /* Success. */
687 dbs[cnt].head = mem;
688 dbs[cnt].data = (char *)
689 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
690 ALIGN / sizeof (ref_t))];
691 dbs[cnt].memsize = total;
692 dbs[cnt].mmap_used = true;
694 /* Remember the descriptors. */
695 dbs[cnt].wr_fd = fd;
696 dbs[cnt].ro_fd = ro_fd;
697 fd = -1;
698 ro_fd = -1;
701 if (fd != -1)
702 close (fd);
703 if (ro_fd != -1)
704 close (ro_fd);
708 if (paranoia
709 && ((dbs[cnt].wr_fd != -1
710 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
711 || (dbs[cnt].ro_fd != -1
712 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
714 dbg_log (_("\
715 cannot set socket to close on exec: %s; disabling paranoia mode"),
716 strerror (errno));
717 paranoia = 0;
720 if (dbs[cnt].head == NULL)
722 /* We do not use the persistent database. Just
723 create an in-memory data structure. */
724 assert (! dbs[cnt].persistent);
726 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
727 + (dbs[cnt].suggested_module
728 * sizeof (ref_t)));
729 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
730 assert (~ENDREF == 0);
731 memset (dbs[cnt].head->array, '\xff',
732 dbs[cnt].suggested_module * sizeof (ref_t));
733 dbs[cnt].head->module = dbs[cnt].suggested_module;
734 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
735 * dbs[cnt].head->module);
736 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
737 dbs[cnt].head->first_free = 0;
739 dbs[cnt].shared = 0;
740 assert (dbs[cnt].ro_fd == -1);
743 if (dbs[cnt].check_file)
745 /* We need the modification date of the file. */
746 struct stat64 st;
748 if (stat64 (dbs[cnt].filename, &st) < 0)
750 /* We cannot stat() the file, disable file checking. */
751 dbg_log (_("cannot stat() file `%s': %s"),
752 dbs[cnt].filename, strerror (errno));
753 dbs[cnt].check_file = 0;
755 else
756 dbs[cnt].file_mtime = st.st_mtime;
760 /* Create the socket. */
761 sock = socket (AF_UNIX, SOCK_STREAM, 0);
762 if (sock < 0)
764 dbg_log (_("cannot open socket: %s"), strerror (errno));
765 exit (errno == EACCES ? 4 : 1);
767 /* Bind a name to the socket. */
768 struct sockaddr_un sock_addr;
769 sock_addr.sun_family = AF_UNIX;
770 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
771 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
773 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
774 exit (errno == EACCES ? 4 : 1);
777 /* We don't want to get stuck on accept. */
778 int fl = fcntl (sock, F_GETFL);
779 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
781 dbg_log (_("cannot change socket to nonblocking mode: %s"),
782 strerror (errno));
783 exit (1);
786 /* The descriptor needs to be closed on exec. */
787 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
789 dbg_log (_("cannot set socket to close on exec: %s"),
790 strerror (errno));
791 exit (1);
794 /* Set permissions for the socket. */
795 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
797 /* Set the socket up to accept connections. */
798 if (listen (sock, SOMAXCONN) < 0)
800 dbg_log (_("cannot enable socket to accept connections: %s"),
801 strerror (errno));
802 exit (1);
805 /* Change to unprivileged uid/gid/groups if specifed in config file */
806 if (server_user != NULL)
807 finish_drop_privileges ();
811 /* Close the connections. */
812 void
813 close_sockets (void)
815 close (sock);
819 static void
820 invalidate_cache (char *key, int fd)
822 dbtype number;
823 int32_t resp;
825 if (strcmp (key, "passwd") == 0)
826 number = pwddb;
827 else if (strcmp (key, "group") == 0)
828 number = grpdb;
829 else if (__builtin_expect (strcmp (key, "hosts"), 0) == 0)
831 number = hstdb;
833 /* Re-initialize the resolver. resolv.conf might have changed. */
834 res_init ();
836 else
838 resp = EINVAL;
839 writeall (fd, &resp, sizeof (resp));
840 return;
843 if (dbs[number].enabled)
844 prune_cache (&dbs[number], LONG_MAX, fd);
845 else
847 resp = 0;
848 writeall (fd, &resp, sizeof (resp));
853 #ifdef SCM_RIGHTS
854 static void
855 send_ro_fd (struct database_dyn *db, char *key, int fd)
857 /* If we do not have an read-only file descriptor do nothing. */
858 if (db->ro_fd == -1)
859 return;
861 /* We need to send some data along with the descriptor. */
862 struct iovec iov[1];
863 iov[0].iov_base = key;
864 iov[0].iov_len = strlen (key) + 1;
866 /* Prepare the control message to transfer the descriptor. */
867 union
869 struct cmsghdr hdr;
870 char bytes[CMSG_SPACE (sizeof (int))];
871 } buf;
872 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1,
873 .msg_control = buf.bytes,
874 .msg_controllen = sizeof (buf) };
875 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
877 cmsg->cmsg_level = SOL_SOCKET;
878 cmsg->cmsg_type = SCM_RIGHTS;
879 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
881 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
883 msg.msg_controllen = cmsg->cmsg_len;
885 /* Send the control message. We repeat when we are interrupted but
886 everything else is ignored. */
887 #ifndef MSG_NOSIGNAL
888 # define MSG_NOSIGNAL 0
889 #endif
890 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
892 if (__builtin_expect (debug_level > 0, 0))
893 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
895 #endif /* SCM_RIGHTS */
898 /* Handle new request. */
899 static void
900 handle_request (int fd, request_header *req, void *key, uid_t uid)
902 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
904 if (debug_level > 0)
905 dbg_log (_("\
906 cannot handle old request version %d; current version is %d"),
907 req->version, NSCD_VERSION);
908 return;
911 /* Make the SELinux check before we go on to the standard checks. We
912 need to verify that the request type is valid, since it has not
913 yet been checked at this point. */
914 if (selinux_enabled
915 && __builtin_expect (req->type, GETPWBYNAME) >= GETPWBYNAME
916 && __builtin_expect (req->type, LASTREQ) < LASTREQ
917 && nscd_request_avc_has_perm (fd, req->type) != 0)
918 return;
920 struct database_dyn *db = serv2db[req->type];
922 // XXX Clean up so that each new command need not introduce a
923 // XXX new conditional.
924 if ((__builtin_expect (req->type, GETPWBYNAME) >= GETPWBYNAME
925 && __builtin_expect (req->type, LASTDBREQ) <= LASTDBREQ)
926 || req->type == GETAI || req->type == INITGROUPS)
928 if (__builtin_expect (debug_level, 0) > 0)
930 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
932 char buf[INET6_ADDRSTRLEN];
934 dbg_log ("\t%s (%s)", serv2str[req->type],
935 inet_ntop (req->type == GETHOSTBYADDR
936 ? AF_INET : AF_INET6,
937 key, buf, sizeof (buf)));
939 else
940 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
943 /* Is this service enabled? */
944 if (!db->enabled)
946 /* No, sent the prepared record. */
947 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
948 db->disabled_iov->iov_len,
949 MSG_NOSIGNAL))
950 != (ssize_t) db->disabled_iov->iov_len
951 && __builtin_expect (debug_level, 0) > 0)
953 /* We have problems sending the result. */
954 char buf[256];
955 dbg_log (_("cannot write result: %s"),
956 strerror_r (errno, buf, sizeof (buf)));
959 return;
962 /* Be sure we can read the data. */
963 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
965 ++db->head->rdlockdelayed;
966 pthread_rwlock_rdlock (&db->lock);
969 /* See whether we can handle it from the cache. */
970 struct datahead *cached;
971 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
972 db, uid);
973 if (cached != NULL)
975 /* Hurray it's in the cache. */
976 ssize_t nwritten;
978 #ifdef HAVE_SENDFILE
979 if (db->mmap_used || !cached->notfound)
981 assert (db->wr_fd != -1);
982 assert ((char *) cached->data > (char *) db->data);
983 assert ((char *) cached->data - (char *) db->head
984 + cached->recsize
985 <= (sizeof (struct database_pers_head)
986 + db->head->module * sizeof (ref_t)
987 + db->head->data_size));
988 nwritten = sendfileall (fd, db->wr_fd,
989 (char *) cached->data
990 - (char *) db->head, cached->recsize);
991 # ifndef __ASSUME_SENDFILE
992 if (nwritten == -1 && errno == ENOSYS)
993 goto use_write;
994 # endif
996 else
997 # ifndef __ASSUME_SENDFILE
998 use_write:
999 # endif
1000 #endif
1001 nwritten = writeall (fd, cached->data, cached->recsize);
1003 if (nwritten != cached->recsize
1004 && __builtin_expect (debug_level, 0) > 0)
1006 /* We have problems sending the result. */
1007 char buf[256];
1008 dbg_log (_("cannot write result: %s"),
1009 strerror_r (errno, buf, sizeof (buf)));
1012 pthread_rwlock_unlock (&db->lock);
1014 return;
1017 pthread_rwlock_unlock (&db->lock);
1019 else if (__builtin_expect (debug_level, 0) > 0)
1021 if (req->type == INVALIDATE)
1022 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1023 else
1024 dbg_log ("\t%s", serv2str[req->type]);
1027 /* Handle the request. */
1028 switch (req->type)
1030 case GETPWBYNAME:
1031 addpwbyname (db, fd, req, key, uid);
1032 break;
1034 case GETPWBYUID:
1035 addpwbyuid (db, fd, req, key, uid);
1036 break;
1038 case GETGRBYNAME:
1039 addgrbyname (db, fd, req, key, uid);
1040 break;
1042 case GETGRBYGID:
1043 addgrbygid (db, fd, req, key, uid);
1044 break;
1046 case GETHOSTBYNAME:
1047 addhstbyname (db, fd, req, key, uid);
1048 break;
1050 case GETHOSTBYNAMEv6:
1051 addhstbynamev6 (db, fd, req, key, uid);
1052 break;
1054 case GETHOSTBYADDR:
1055 addhstbyaddr (db, fd, req, key, uid);
1056 break;
1058 case GETHOSTBYADDRv6:
1059 addhstbyaddrv6 (db, fd, req, key, uid);
1060 break;
1062 case GETAI:
1063 addhstai (db, fd, req, key, uid);
1064 break;
1066 case INITGROUPS:
1067 addinitgroups (db, fd, req, key, uid);
1068 break;
1070 case GETSTAT:
1071 case SHUTDOWN:
1072 case INVALIDATE:
1074 /* Get the callers credentials. */
1075 #ifdef SO_PEERCRED
1076 struct ucred caller;
1077 socklen_t optlen = sizeof (caller);
1079 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1081 char buf[256];
1083 dbg_log (_("error getting caller's id: %s"),
1084 strerror_r (errno, buf, sizeof (buf)));
1085 break;
1088 uid = caller.uid;
1089 #else
1090 /* Some systems have no SO_PEERCRED implementation. They don't
1091 care about security so we don't as well. */
1092 uid = 0;
1093 #endif
1096 /* Accept shutdown, getstat and invalidate only from root. For
1097 the stat call also allow the user specified in the config file. */
1098 if (req->type == GETSTAT)
1100 if (uid == 0 || uid == stat_uid)
1101 send_stats (fd, dbs);
1103 else if (uid == 0)
1105 if (req->type == INVALIDATE)
1106 invalidate_cache (key, fd);
1107 else
1108 termination_handler (0);
1110 break;
1112 case GETFDPW:
1113 case GETFDGR:
1114 case GETFDHST:
1115 #ifdef SCM_RIGHTS
1116 send_ro_fd (serv2db[req->type], key, fd);
1117 #endif
1118 break;
1120 default:
1121 /* Ignore the command, it's nothing we know. */
1122 break;
1127 /* Restart the process. */
1128 static void
1129 restart (void)
1131 /* First determine the parameters. We do not use the parameters
1132 passed to main() since in case nscd is started by running the
1133 dynamic linker this will not work. Yes, this is not the usual
1134 case but nscd is part of glibc and we occasionally do this. */
1135 size_t buflen = 1024;
1136 char *buf = alloca (buflen);
1137 size_t readlen = 0;
1138 int fd = open ("/proc/self/cmdline", O_RDONLY);
1139 if (fd == -1)
1141 dbg_log (_("\
1142 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1143 strerror (errno));
1145 paranoia = 0;
1146 return;
1149 while (1)
1151 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1152 buflen - readlen));
1153 if (n == -1)
1155 dbg_log (_("\
1156 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1157 strerror (errno));
1159 close (fd);
1160 paranoia = 0;
1161 return;
1164 readlen += n;
1166 if (readlen < buflen)
1167 break;
1169 /* We might have to extend the buffer. */
1170 size_t old_buflen = buflen;
1171 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1172 buf = memmove (newp, buf, old_buflen);
1175 close (fd);
1177 /* Parse the command line. Worst case scenario: every two
1178 characters form one parameter (one character plus NUL). */
1179 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1180 int argc = 0;
1182 char *cp = buf;
1183 while (cp < buf + readlen)
1185 argv[argc++] = cp;
1186 cp = (char *) rawmemchr (cp, '\0') + 1;
1188 argv[argc] = NULL;
1190 /* Second, change back to the old user if we changed it. */
1191 if (server_user != NULL)
1193 if (setresuid (old_uid, old_uid, old_uid) != 0)
1195 dbg_log (_("\
1196 cannot change to old UID: %s; disabling paranoia mode"),
1197 strerror (errno));
1199 paranoia = 0;
1200 return;
1203 if (setresgid (old_gid, old_gid, old_gid) != 0)
1205 dbg_log (_("\
1206 cannot change to old GID: %s; disabling paranoia mode"),
1207 strerror (errno));
1209 setuid (server_uid);
1210 paranoia = 0;
1211 return;
1215 /* Next change back to the old working directory. */
1216 if (chdir (oldcwd) == -1)
1218 dbg_log (_("\
1219 cannot change to old working directory: %s; disabling paranoia mode"),
1220 strerror (errno));
1222 if (server_user != NULL)
1224 setuid (server_uid);
1225 setgid (server_gid);
1227 paranoia = 0;
1228 return;
1231 /* Synchronize memory. */
1232 for (int cnt = 0; cnt < lastdb; ++cnt)
1234 /* Make sure nobody keeps using the database. */
1235 dbs[cnt].head->timestamp = 0;
1237 if (dbs[cnt].persistent)
1238 // XXX async OK?
1239 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1242 /* The preparations are done. */
1243 execv ("/proc/self/exe", argv);
1245 /* If we come here, we will never be able to re-exec. */
1246 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1247 strerror (errno));
1249 if (server_user != NULL)
1251 setuid (server_uid);
1252 setgid (server_gid);
1254 if (chdir ("/") != 0)
1255 dbg_log (_("cannot change current working directory to \"/\": %s"),
1256 strerror (errno));
1257 paranoia = 0;
1261 /* List of file descriptors. */
1262 struct fdlist
1264 int fd;
1265 struct fdlist *next;
1267 /* Memory allocated for the list. */
1268 static struct fdlist *fdlist;
1269 /* List of currently ready-to-read file descriptors. */
1270 static struct fdlist *readylist;
1272 /* Conditional variable and mutex to signal availability of entries in
1273 READYLIST. The condvar is initialized dynamically since we might
1274 use a different clock depending on availability. */
1275 static pthread_cond_t readylist_cond;
1276 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1278 /* The clock to use with the condvar. */
1279 static clockid_t timeout_clock = CLOCK_REALTIME;
1281 /* Number of threads ready to handle the READYLIST. */
1282 static unsigned long int nready;
1285 /* This is the main loop. It is replicated in different threads but the
1286 `poll' call makes sure only one thread handles an incoming connection. */
1287 static void *
1288 __attribute__ ((__noreturn__))
1289 nscd_run (void *p)
1291 const long int my_number = (long int) p;
1292 const int run_prune = my_number < lastdb && dbs[my_number].enabled;
1293 struct timespec prune_ts;
1294 int to = 0;
1295 char buf[256];
1297 if (run_prune)
1299 setup_thread (&dbs[my_number]);
1301 /* We are running. */
1302 dbs[my_number].head->timestamp = time (NULL);
1304 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1305 /* Should never happen. */
1306 abort ();
1308 /* Compute timeout time. */
1309 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1312 /* Initial locking. */
1313 pthread_mutex_lock (&readylist_lock);
1315 /* One more thread available. */
1316 ++nready;
1318 while (1)
1320 while (readylist == NULL)
1322 if (run_prune)
1324 /* Wait, but not forever. */
1325 to = pthread_cond_timedwait (&readylist_cond, &readylist_lock,
1326 &prune_ts);
1328 /* If we were woken and there is no work to be done,
1329 just start pruning. */
1330 if (readylist == NULL && to == ETIMEDOUT)
1332 --nready;
1334 if (sighup_pending)
1335 goto sighup_prune;
1337 pthread_mutex_unlock (&readylist_lock);
1338 goto only_prune;
1341 else
1342 /* No need to timeout. */
1343 pthread_cond_wait (&readylist_cond, &readylist_lock);
1346 if (sighup_pending)
1348 --nready;
1349 pthread_cond_signal (&readylist_cond);
1350 sighup_prune:
1351 sighup_pending = 0;
1352 pthread_mutex_unlock (&readylist_lock);
1354 /* Prune the password database. */
1355 if (dbs[pwddb].enabled)
1356 prune_cache (&dbs[pwddb], LONG_MAX, -1);
1358 /* Prune the group database. */
1359 if (dbs[grpdb].enabled)
1360 prune_cache (&dbs[grpdb], LONG_MAX, -1);
1362 /* Prune the host database. */
1363 if (dbs[hstdb].enabled)
1364 prune_cache (&dbs[hstdb], LONG_MAX, -1);
1366 /* Re-locking. */
1367 pthread_mutex_lock (&readylist_lock);
1369 /* One more thread available. */
1370 ++nready;
1371 continue;
1374 struct fdlist *it = readylist->next;
1375 if (readylist->next == readylist)
1376 /* Just one entry on the list. */
1377 readylist = NULL;
1378 else
1379 readylist->next = it->next;
1381 /* Extract the information and mark the record ready to be used
1382 again. */
1383 int fd = it->fd;
1384 it->next = NULL;
1386 /* One more thread available. */
1387 --nready;
1389 /* We are done with the list. */
1390 pthread_mutex_unlock (&readylist_lock);
1392 /* We do not want to block on a short read or so. */
1393 int fl = fcntl (fd, F_GETFL);
1394 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1395 goto close_and_out;
1397 /* Now read the request. */
1398 request_header req;
1399 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1400 != sizeof (req), 0))
1402 /* We failed to read data. Note that this also might mean we
1403 failed because we would have blocked. */
1404 if (debug_level > 0)
1405 dbg_log (_("short read while reading request: %s"),
1406 strerror_r (errno, buf, sizeof (buf)));
1407 goto close_and_out;
1410 /* Check whether this is a valid request type. */
1411 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1412 goto close_and_out;
1414 /* Some systems have no SO_PEERCRED implementation. They don't
1415 care about security so we don't as well. */
1416 uid_t uid = -1;
1417 #ifdef SO_PEERCRED
1418 pid_t pid = 0;
1420 if (__builtin_expect (debug_level > 0, 0))
1422 struct ucred caller;
1423 socklen_t optlen = sizeof (caller);
1425 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1426 pid = caller.pid;
1428 #endif
1430 /* It should not be possible to crash the nscd with a silly
1431 request (i.e., a terribly large key). We limit the size to 1kb. */
1432 #define MAXKEYLEN 1024
1433 if (__builtin_expect (req.key_len, 1) < 0
1434 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1436 if (debug_level > 0)
1437 dbg_log (_("key length in request too long: %d"), req.key_len);
1439 else
1441 /* Get the key. */
1442 char keybuf[MAXKEYLEN];
1444 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1445 req.key_len))
1446 != req.key_len, 0))
1448 /* Again, this can also mean we would have blocked. */
1449 if (debug_level > 0)
1450 dbg_log (_("short read while reading request key: %s"),
1451 strerror_r (errno, buf, sizeof (buf)));
1452 goto close_and_out;
1455 if (__builtin_expect (debug_level, 0) > 0)
1457 #ifdef SO_PEERCRED
1458 if (pid != 0)
1459 dbg_log (_("\
1460 handle_request: request received (Version = %d) from PID %ld"),
1461 req.version, (long int) pid);
1462 else
1463 #endif
1464 dbg_log (_("\
1465 handle_request: request received (Version = %d)"), req.version);
1468 /* Phew, we got all the data, now process it. */
1469 handle_request (fd, &req, keybuf, uid);
1472 close_and_out:
1473 /* We are done. */
1474 close (fd);
1476 /* Check whether we should be pruning the cache. */
1477 assert (run_prune || to == 0);
1478 if (to == ETIMEDOUT)
1480 only_prune:
1481 /* The pthread_cond_timedwait() call timed out. It is time
1482 to clean up the cache. */
1483 assert (my_number < lastdb);
1484 prune_cache (&dbs[my_number], time (NULL), -1);
1486 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1487 /* Should never happen. */
1488 abort ();
1490 /* Compute next timeout time. */
1491 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1493 /* In case the list is emtpy we do not want to run the prune
1494 code right away again. */
1495 to = 0;
1498 /* Re-locking. */
1499 pthread_mutex_lock (&readylist_lock);
1501 /* One more thread available. */
1502 ++nready;
1507 static unsigned int nconns;
1509 static void
1510 fd_ready (int fd)
1512 pthread_mutex_lock (&readylist_lock);
1514 /* Find an empty entry in FDLIST. */
1515 size_t inner;
1516 for (inner = 0; inner < nconns; ++inner)
1517 if (fdlist[inner].next == NULL)
1518 break;
1519 assert (inner < nconns);
1521 fdlist[inner].fd = fd;
1523 if (readylist == NULL)
1524 readylist = fdlist[inner].next = &fdlist[inner];
1525 else
1527 fdlist[inner].next = readylist->next;
1528 readylist = readylist->next = &fdlist[inner];
1531 bool do_signal = true;
1532 if (__builtin_expect (nready == 0, 0))
1534 ++client_queued;
1535 do_signal = false;
1537 /* Try to start another thread to help out. */
1538 pthread_t th;
1539 if (nthreads < max_nthreads
1540 && pthread_create (&th, &attr, nscd_run,
1541 (void *) (long int) nthreads) == 0)
1543 /* We got another thread. */
1544 ++nthreads;
1545 /* The new thread might need a kick. */
1546 do_signal = true;
1551 pthread_mutex_unlock (&readylist_lock);
1553 /* Tell one of the worker threads there is work to do. */
1554 if (do_signal)
1555 pthread_cond_signal (&readylist_cond);
1559 /* Check whether restarting should happen. */
1560 static inline int
1561 restart_p (time_t now)
1563 return (paranoia && readylist == NULL && nready == nthreads
1564 && now >= restart_time);
1568 /* Array for times a connection was accepted. */
1569 static time_t *starttime;
1572 static void
1573 __attribute__ ((__noreturn__))
1574 main_loop_poll (void)
1576 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1577 * sizeof (conns[0]));
1579 conns[0].fd = sock;
1580 conns[0].events = POLLRDNORM;
1581 size_t nused = 1;
1582 size_t firstfree = 1;
1584 while (1)
1586 /* Wait for any event. We wait at most a couple of seconds so
1587 that we can check whether we should close any of the accepted
1588 connections since we have not received a request. */
1589 #define MAX_ACCEPT_TIMEOUT 30
1590 #define MIN_ACCEPT_TIMEOUT 5
1591 #define MAIN_THREAD_TIMEOUT \
1592 (MAX_ACCEPT_TIMEOUT * 1000 \
1593 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1595 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1597 time_t now = time (NULL);
1599 /* If there is a descriptor ready for reading or there is a new
1600 connection, process this now. */
1601 if (n > 0)
1603 if (conns[0].revents != 0)
1605 /* We have a new incoming connection. Accept the connection. */
1606 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1608 /* Use the descriptor if we have not reached the limit. */
1609 if (fd >= 0)
1611 if (firstfree < nconns)
1613 conns[firstfree].fd = fd;
1614 conns[firstfree].events = POLLRDNORM;
1615 starttime[firstfree] = now;
1616 if (firstfree >= nused)
1617 nused = firstfree + 1;
1620 ++firstfree;
1621 while (firstfree < nused && conns[firstfree].fd != -1);
1623 else
1624 /* We cannot use the connection so close it. */
1625 close (fd);
1628 --n;
1631 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1632 if (conns[cnt].revents != 0)
1634 fd_ready (conns[cnt].fd);
1636 /* Clean up the CONNS array. */
1637 conns[cnt].fd = -1;
1638 if (cnt < firstfree)
1639 firstfree = cnt;
1640 if (cnt == nused - 1)
1642 --nused;
1643 while (conns[nused - 1].fd == -1);
1645 --n;
1649 /* Now find entries which have timed out. */
1650 assert (nused > 0);
1652 /* We make the timeout length depend on the number of file
1653 descriptors currently used. */
1654 #define ACCEPT_TIMEOUT \
1655 (MAX_ACCEPT_TIMEOUT \
1656 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1657 time_t laststart = now - ACCEPT_TIMEOUT;
1659 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1661 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1663 /* Remove the entry, it timed out. */
1664 (void) close (conns[cnt].fd);
1665 conns[cnt].fd = -1;
1667 if (cnt < firstfree)
1668 firstfree = cnt;
1669 if (cnt == nused - 1)
1671 --nused;
1672 while (conns[nused - 1].fd == -1);
1676 if (restart_p (now))
1677 restart ();
1682 #ifdef HAVE_EPOLL
1683 static void
1684 main_loop_epoll (int efd)
1686 struct epoll_event ev = { 0, };
1687 int nused = 1;
1688 size_t highest = 0;
1690 /* Add the socket. */
1691 ev.events = EPOLLRDNORM;
1692 ev.data.fd = sock;
1693 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1694 /* We cannot use epoll. */
1695 return;
1697 while (1)
1699 struct epoll_event revs[100];
1700 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1702 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1704 time_t now = time (NULL);
1706 for (int cnt = 0; cnt < n; ++cnt)
1707 if (revs[cnt].data.fd == sock)
1709 /* A new connection. */
1710 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1712 if (fd >= 0)
1714 /* Try to add the new descriptor. */
1715 ev.data.fd = fd;
1716 if (fd >= nconns
1717 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1718 /* The descriptor is too large or something went
1719 wrong. Close the descriptor. */
1720 close (fd);
1721 else
1723 /* Remember when we accepted the connection. */
1724 starttime[fd] = now;
1726 if (fd > highest)
1727 highest = fd;
1729 ++nused;
1733 else
1735 /* Remove the descriptor from the epoll descriptor. */
1736 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1738 /* Get a worker to handle the request. */
1739 fd_ready (revs[cnt].data.fd);
1741 /* Reset the time. */
1742 starttime[revs[cnt].data.fd] = 0;
1743 if (revs[cnt].data.fd == highest)
1745 --highest;
1746 while (highest > 0 && starttime[highest] == 0);
1748 --nused;
1751 /* Now look for descriptors for accepted connections which have
1752 no reply in too long of a time. */
1753 time_t laststart = now - ACCEPT_TIMEOUT;
1754 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1755 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1757 /* We are waiting for this one for too long. Close it. */
1758 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
1760 (void) close (cnt);
1762 starttime[cnt] = 0;
1763 if (cnt == highest)
1764 --highest;
1766 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1767 --highest;
1769 if (restart_p (now))
1770 restart ();
1773 #endif
1776 /* Start all the threads we want. The initial process is thread no. 1. */
1777 void
1778 start_threads (void)
1780 /* Initialize the conditional variable we will use. The only
1781 non-standard attribute we might use is the clock selection. */
1782 pthread_condattr_t condattr;
1783 pthread_condattr_init (&condattr);
1785 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1786 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1787 /* Determine whether the monotonous clock is available. */
1788 struct timespec dummy;
1789 # if _POSIX_MONOTONIC_CLOCK == 0
1790 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1791 # endif
1792 # if _POSIX_CLOCK_SELECTION == 0
1793 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1794 # endif
1795 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1796 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1797 timeout_clock = CLOCK_MONOTONIC;
1798 #endif
1800 pthread_cond_init (&readylist_cond, &condattr);
1801 pthread_condattr_destroy (&condattr);
1804 /* Create the attribute for the threads. They are all created
1805 detached. */
1806 pthread_attr_init (&attr);
1807 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1808 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1809 pthread_attr_setstacksize (&attr, 1024 * 1024 * (sizeof (void *) / 4));
1811 /* We allow less than LASTDB threads only for debugging. */
1812 if (debug_level == 0)
1813 nthreads = MAX (nthreads, lastdb);
1815 int nfailed = 0;
1816 for (long int i = 0; i < nthreads; ++i)
1818 pthread_t th;
1819 if (pthread_create (&th, &attr, nscd_run, (void *) (i - nfailed)) != 0)
1820 ++nfailed;
1822 if (nthreads - nfailed < lastdb)
1824 /* We could not start enough threads. */
1825 dbg_log (_("could only start %d threads; terminating"),
1826 nthreads - nfailed);
1827 exit (1);
1830 /* Determine how much room for descriptors we should initially
1831 allocate. This might need to change later if we cap the number
1832 with MAXCONN. */
1833 const long int nfds = sysconf (_SC_OPEN_MAX);
1834 #define MINCONN 32
1835 #define MAXCONN 16384
1836 if (nfds == -1 || nfds > MAXCONN)
1837 nconns = MAXCONN;
1838 else if (nfds < MINCONN)
1839 nconns = MINCONN;
1840 else
1841 nconns = nfds;
1843 /* We need memory to pass descriptors on to the worker threads. */
1844 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1845 /* Array to keep track when connection was accepted. */
1846 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1848 /* In the main thread we execute the loop which handles incoming
1849 connections. */
1850 #ifdef HAVE_EPOLL
1851 int efd = epoll_create (100);
1852 if (efd != -1)
1854 main_loop_epoll (efd);
1855 close (efd);
1857 #endif
1859 main_loop_poll ();
1863 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1864 this function is called, we are not listening on the nscd socket yet so
1865 we can just use the ordinary lookup functions without causing a lockup */
1866 static void
1867 begin_drop_privileges (void)
1869 struct passwd *pwd = getpwnam (server_user);
1871 if (pwd == NULL)
1873 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1874 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1875 server_user);
1878 server_uid = pwd->pw_uid;
1879 server_gid = pwd->pw_gid;
1881 /* Save the old UID/GID if we have to change back. */
1882 if (paranoia)
1884 old_uid = getuid ();
1885 old_gid = getgid ();
1888 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1890 /* This really must never happen. */
1891 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1892 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
1895 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
1897 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
1898 == -1)
1900 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1901 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
1906 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
1907 run nscd as the user specified in the configuration file. */
1908 static void
1909 finish_drop_privileges (void)
1911 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1912 /* We need to preserve the capabilities to connect to the audit daemon. */
1913 cap_t new_caps = preserve_capabilities ();
1914 #endif
1916 if (setgroups (server_ngroups, server_groups) == -1)
1918 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1919 error (EXIT_FAILURE, errno, _("setgroups failed"));
1922 int res;
1923 if (paranoia)
1924 res = setresgid (server_gid, server_gid, old_gid);
1925 else
1926 res = setgid (server_gid);
1927 if (res == -1)
1929 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1930 perror ("setgid");
1931 exit (4);
1934 if (paranoia)
1935 res = setresuid (server_uid, server_uid, old_uid);
1936 else
1937 res = setuid (server_uid);
1938 if (res == -1)
1940 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1941 perror ("setuid");
1942 exit (4);
1945 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1946 /* Remove the temporary capabilities. */
1947 install_real_capabilities (new_caps);
1948 #endif
1951 /* Handle the HUP signal which will force a dump of the cache */
1952 void
1953 sighup_handler (int signum)
1955 sighup_pending = 1;