* sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Make sure
[glibc.git] / nscd / connections.c
blob26d75d297866bb6d274a160178819c60874c030a
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #include <alloca.h>
21 #include <assert.h>
22 #include <atomic.h>
23 #include <error.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <grp.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <arpa/inet.h>
35 #ifdef HAVE_EPOLL
36 # include <sys/epoll.h>
37 #endif
38 #include <sys/mman.h>
39 #include <sys/param.h>
40 #include <sys/poll.h>
41 #ifdef HAVE_SENDFILE
42 # include <sys/sendfile.h>
43 #endif
44 #include <sys/socket.h>
45 #include <sys/stat.h>
46 #include <sys/un.h>
48 #include "nscd.h"
49 #include "dbg_log.h"
50 #include "selinux.h"
51 #ifdef HAVE_SENDFILE
52 # include <kernel-features.h>
53 #endif
56 /* Wrapper functions with error checking for standard functions. */
57 extern void *xmalloc (size_t n);
58 extern void *xcalloc (size_t n, size_t s);
59 extern void *xrealloc (void *o, size_t n);
61 /* Support to run nscd as an unprivileged user */
62 const char *server_user;
63 static uid_t server_uid;
64 static gid_t server_gid;
65 const char *stat_user;
66 uid_t stat_uid;
67 static gid_t *server_groups;
68 #ifndef NGROUPS
69 # define NGROUPS 32
70 #endif
71 static int server_ngroups;
73 static pthread_attr_t attr;
75 static void begin_drop_privileges (void);
76 static void finish_drop_privileges (void);
78 /* Map request type to a string. */
79 const char *const serv2str[LASTREQ] =
81 [GETPWBYNAME] = "GETPWBYNAME",
82 [GETPWBYUID] = "GETPWBYUID",
83 [GETGRBYNAME] = "GETGRBYNAME",
84 [GETGRBYGID] = "GETGRBYGID",
85 [GETHOSTBYNAME] = "GETHOSTBYNAME",
86 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
87 [GETHOSTBYADDR] = "GETHOSTBYADDR",
88 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
89 [SHUTDOWN] = "SHUTDOWN",
90 [GETSTAT] = "GETSTAT",
91 [INVALIDATE] = "INVALIDATE",
92 [GETFDPW] = "GETFDPW",
93 [GETFDGR] = "GETFDGR",
94 [GETFDHST] = "GETFDHST",
95 [GETAI] = "GETAI",
96 [INITGROUPS] = "INITGROUPS",
97 [GETSERVBYNAME] = "GETSERVBYNAME",
98 [GETSERVBYPORT] = "GETSERVBYPORT",
99 [GETFDSERV] = "GETFDSERV"
102 /* The control data structures for the services. */
103 struct database_dyn dbs[lastdb] =
105 [pwddb] = {
106 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
107 .prunelock = PTHREAD_MUTEX_INITIALIZER,
108 .enabled = 0,
109 .check_file = 1,
110 .persistent = 0,
111 .propagate = 1,
112 .shared = 0,
113 .max_db_size = DEFAULT_MAX_DB_SIZE,
114 .reset_res = 0,
115 .filename = "/etc/passwd",
116 .db_filename = _PATH_NSCD_PASSWD_DB,
117 .disabled_iov = &pwd_iov_disabled,
118 .postimeout = 3600,
119 .negtimeout = 20,
120 .wr_fd = -1,
121 .ro_fd = -1,
122 .mmap_used = false
124 [grpdb] = {
125 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
126 .prunelock = PTHREAD_MUTEX_INITIALIZER,
127 .enabled = 0,
128 .check_file = 1,
129 .persistent = 0,
130 .propagate = 1,
131 .shared = 0,
132 .max_db_size = DEFAULT_MAX_DB_SIZE,
133 .reset_res = 0,
134 .filename = "/etc/group",
135 .db_filename = _PATH_NSCD_GROUP_DB,
136 .disabled_iov = &grp_iov_disabled,
137 .postimeout = 3600,
138 .negtimeout = 60,
139 .wr_fd = -1,
140 .ro_fd = -1,
141 .mmap_used = false
143 [hstdb] = {
144 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
145 .prunelock = PTHREAD_MUTEX_INITIALIZER,
146 .enabled = 0,
147 .check_file = 1,
148 .persistent = 0,
149 .propagate = 0, /* Not used. */
150 .shared = 0,
151 .max_db_size = DEFAULT_MAX_DB_SIZE,
152 .reset_res = 1,
153 .filename = "/etc/hosts",
154 .db_filename = _PATH_NSCD_HOSTS_DB,
155 .disabled_iov = &hst_iov_disabled,
156 .postimeout = 3600,
157 .negtimeout = 20,
158 .wr_fd = -1,
159 .ro_fd = -1,
160 .mmap_used = false
162 [servdb] = {
163 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
164 .prunelock = PTHREAD_MUTEX_INITIALIZER,
165 .enabled = 0,
166 .check_file = 1,
167 .persistent = 0,
168 .propagate = 0, /* Not used. */
169 .shared = 0,
170 .max_db_size = DEFAULT_MAX_DB_SIZE,
171 .reset_res = 0,
172 .filename = "/etc/services",
173 .db_filename = _PATH_NSCD_SERVICES_DB,
174 .disabled_iov = &serv_iov_disabled,
175 .postimeout = 28800,
176 .negtimeout = 20,
177 .wr_fd = -1,
178 .ro_fd = -1,
179 .mmap_used = false
184 /* Mapping of request type to database. */
185 static struct
187 bool data_request;
188 struct database_dyn *db;
189 } const reqinfo[LASTREQ] =
191 [GETPWBYNAME] = { true, &dbs[pwddb] },
192 [GETPWBYUID] = { true, &dbs[pwddb] },
193 [GETGRBYNAME] = { true, &dbs[grpdb] },
194 [GETGRBYGID] = { true, &dbs[grpdb] },
195 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
196 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
197 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
198 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
199 [SHUTDOWN] = { false, NULL },
200 [GETSTAT] = { false, NULL },
201 [SHUTDOWN] = { false, NULL },
202 [GETFDPW] = { false, &dbs[pwddb] },
203 [GETFDGR] = { false, &dbs[grpdb] },
204 [GETFDHST] = { false, &dbs[hstdb] },
205 [GETAI] = { true, &dbs[hstdb] },
206 [INITGROUPS] = { true, &dbs[grpdb] },
207 [GETSERVBYNAME] = { true, &dbs[servdb] },
208 [GETSERVBYPORT] = { true, &dbs[servdb] },
209 [GETFDSERV] = { false, &dbs[servdb] }
213 /* Number of seconds between two cache pruning runs. */
214 #define CACHE_PRUNE_INTERVAL 15
217 /* Initial number of threads to use. */
218 int nthreads = -1;
219 /* Maximum number of threads to use. */
220 int max_nthreads = 32;
222 /* Socket for incoming connections. */
223 static int sock;
225 /* Number of times clients had to wait. */
226 unsigned long int client_queued;
229 ssize_t
230 writeall (int fd, const void *buf, size_t len)
232 size_t n = len;
233 ssize_t ret;
236 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
237 if (ret <= 0)
238 break;
239 buf = (const char *) buf + ret;
240 n -= ret;
242 while (n > 0);
243 return ret < 0 ? ret : len - n;
247 #ifdef HAVE_SENDFILE
248 ssize_t
249 sendfileall (int tofd, int fromfd, off_t off, size_t len)
251 ssize_t n = len;
252 ssize_t ret;
256 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
257 if (ret <= 0)
258 break;
259 n -= ret;
261 while (n > 0);
262 return ret < 0 ? ret : len - n;
264 #endif
267 enum usekey
269 use_not = 0,
270 /* The following three are not really used, they are symbolic constants. */
271 use_first = 16,
272 use_begin = 32,
273 use_end = 64,
275 use_he = 1,
276 use_he_begin = use_he | use_begin,
277 use_he_end = use_he | use_end,
278 #if SEPARATE_KEY
279 use_key = 2,
280 use_key_begin = use_key | use_begin,
281 use_key_end = use_key | use_end,
282 use_key_first = use_key_begin | use_first,
283 #endif
284 use_data = 3,
285 use_data_begin = use_data | use_begin,
286 use_data_end = use_data | use_end,
287 use_data_first = use_data_begin | use_first
291 static int
292 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
293 enum usekey use, ref_t start, size_t len)
295 assert (len >= 2);
297 if (start > first_free || start + len > first_free
298 || (start & BLOCK_ALIGN_M1))
299 return 0;
301 if (usemap[start] == use_not)
303 /* Add the start marker. */
304 usemap[start] = use | use_begin;
305 use &= ~use_first;
307 while (--len > 0)
308 if (usemap[++start] != use_not)
309 return 0;
310 else
311 usemap[start] = use;
313 /* Add the end marker. */
314 usemap[start] = use | use_end;
316 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
318 /* Hash entries can't be shared. */
319 if (use == use_he)
320 return 0;
322 usemap[start] |= (use & use_first);
323 use &= ~use_first;
325 while (--len > 1)
326 if (usemap[++start] != use)
327 return 0;
329 if (usemap[++start] != (use | use_end))
330 return 0;
332 else
333 /* Points to a wrong object or somewhere in the middle. */
334 return 0;
336 return 1;
340 /* Verify data in persistent database. */
341 static int
342 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
344 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb);
346 time_t now = time (NULL);
348 struct database_pers_head *head = mem;
349 struct database_pers_head head_copy = *head;
351 /* Check that the header that was read matches the head in the database. */
352 if (readhead != NULL && memcmp (head, readhead, sizeof (*head)) != 0)
353 return 0;
355 /* First some easy tests: make sure the database header is sane. */
356 if (head->version != DB_VERSION
357 || head->header_size != sizeof (*head)
358 /* We allow a timestamp to be one hour ahead of the current time.
359 This should cover daylight saving time changes. */
360 || head->timestamp > now + 60 * 60 + 60
361 || (head->gc_cycle & 1)
362 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
363 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
364 || head->first_free < 0
365 || head->first_free > head->data_size
366 || (head->first_free & BLOCK_ALIGN_M1) != 0
367 || head->maxnentries < 0
368 || head->maxnsearched < 0)
369 return 0;
371 uint8_t *usemap = calloc (head->first_free, 1);
372 if (usemap == NULL)
373 return 0;
375 const char *data = (char *) &head->array[roundup (head->module,
376 ALIGN / sizeof (ref_t))];
378 nscd_ssize_t he_cnt = 0;
379 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
381 ref_t trail = head->array[cnt];
382 ref_t work = trail;
383 int tick = 0;
385 while (work != ENDREF)
387 if (! check_use (data, head->first_free, usemap, use_he, work,
388 sizeof (struct hashentry)))
389 goto fail;
391 /* Now we know we can dereference the record. */
392 struct hashentry *here = (struct hashentry *) (data + work);
394 ++he_cnt;
396 /* Make sure the record is for this type of service. */
397 if (here->type >= LASTREQ
398 || reqinfo[here->type].db != &dbs[dbnr])
399 goto fail;
401 /* Validate boolean field value. */
402 if (here->first != false && here->first != true)
403 goto fail;
405 if (here->len < 0)
406 goto fail;
408 /* Now the data. */
409 if (here->packet < 0
410 || here->packet > head->first_free
411 || here->packet + sizeof (struct datahead) > head->first_free)
412 goto fail;
414 struct datahead *dh = (struct datahead *) (data + here->packet);
416 if (! check_use (data, head->first_free, usemap,
417 use_data | (here->first ? use_first : 0),
418 here->packet, dh->allocsize))
419 goto fail;
421 if (dh->allocsize < sizeof (struct datahead)
422 || dh->recsize > dh->allocsize
423 || (dh->notfound != false && dh->notfound != true)
424 || (dh->usable != false && dh->usable != true))
425 goto fail;
427 if (here->key < here->packet + sizeof (struct datahead)
428 || here->key > here->packet + dh->allocsize
429 || here->key + here->len > here->packet + dh->allocsize)
431 #if SEPARATE_KEY
432 /* If keys can appear outside of data, this should be done
433 instead. But gc doesn't mark the data in that case. */
434 if (! check_use (data, head->first_free, usemap,
435 use_key | (here->first ? use_first : 0),
436 here->key, here->len))
437 #endif
438 goto fail;
441 work = here->next;
443 if (work == trail)
444 /* A circular list, this must not happen. */
445 goto fail;
446 if (tick)
447 trail = ((struct hashentry *) (data + trail))->next;
448 tick = 1 - tick;
452 if (he_cnt != head->nentries)
453 goto fail;
455 /* See if all data and keys had at least one reference from
456 he->first == true hashentry. */
457 for (ref_t idx = 0; idx < head->first_free; ++idx)
459 #if SEPARATE_KEY
460 if (usemap[idx] == use_key_begin)
461 goto fail;
462 #endif
463 if (usemap[idx] == use_data_begin)
464 goto fail;
467 /* Finally, make sure the database hasn't changed since the first test. */
468 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
469 goto fail;
471 free (usemap);
472 return 1;
474 fail:
475 free (usemap);
476 return 0;
480 #ifdef O_CLOEXEC
481 # define EXTRA_O_FLAGS O_CLOEXEC
482 #else
483 # define EXTRA_O_FLAGS 0
484 #endif
487 /* Initialize database information structures. */
488 void
489 nscd_init (void)
491 /* Look up unprivileged uid/gid/groups before we start listening on the
492 socket */
493 if (server_user != NULL)
494 begin_drop_privileges ();
496 if (nthreads == -1)
497 /* No configuration for this value, assume a default. */
498 nthreads = 2 * lastdb;
500 for (size_t cnt = 0; cnt < lastdb; ++cnt)
501 if (dbs[cnt].enabled)
503 pthread_rwlock_init (&dbs[cnt].lock, NULL);
504 pthread_mutex_init (&dbs[cnt].memlock, NULL);
506 if (dbs[cnt].persistent)
508 /* Try to open the appropriate file on disk. */
509 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
510 if (fd != -1)
512 struct stat64 st;
513 void *mem;
514 size_t total;
515 struct database_pers_head head;
516 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
517 sizeof (head)));
518 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
520 fail_db:
521 dbg_log (_("invalid persistent database file \"%s\": %s"),
522 dbs[cnt].db_filename, strerror (errno));
523 unlink (dbs[cnt].db_filename);
525 else if (head.module == 0 && head.data_size == 0)
527 /* The file has been created, but the head has not been
528 initialized yet. Remove the old file. */
529 unlink (dbs[cnt].db_filename);
531 else if (head.header_size != (int) sizeof (head))
533 dbg_log (_("invalid persistent database file \"%s\": %s"),
534 dbs[cnt].db_filename,
535 _("header size does not match"));
536 unlink (dbs[cnt].db_filename);
538 else if ((total = (sizeof (head)
539 + roundup (head.module * sizeof (ref_t),
540 ALIGN)
541 + head.data_size))
542 > st.st_size
543 || total < sizeof (head))
545 dbg_log (_("invalid persistent database file \"%s\": %s"),
546 dbs[cnt].db_filename,
547 _("file size does not match"));
548 unlink (dbs[cnt].db_filename);
550 /* Note we map with the maximum size allowed for the
551 database. This is likely much larger than the
552 actual file size. This is OK on most OSes since
553 extensions of the underlying file will
554 automatically translate more pages available for
555 memory access. */
556 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
557 PROT_READ | PROT_WRITE,
558 MAP_SHARED, fd, 0))
559 == MAP_FAILED)
560 goto fail_db;
561 else if (!verify_persistent_db (mem, &head, cnt))
563 munmap (mem, total);
564 dbg_log (_("invalid persistent database file \"%s\": %s"),
565 dbs[cnt].db_filename,
566 _("verification failed"));
567 unlink (dbs[cnt].db_filename);
569 else
571 /* Success. We have the database. */
572 dbs[cnt].head = mem;
573 dbs[cnt].memsize = total;
574 dbs[cnt].data = (char *)
575 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
576 ALIGN / sizeof (ref_t))];
577 dbs[cnt].mmap_used = true;
579 if (dbs[cnt].suggested_module > head.module)
580 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
581 dbnames[cnt]);
583 dbs[cnt].wr_fd = fd;
584 fd = -1;
585 /* We also need a read-only descriptor. */
586 if (dbs[cnt].shared)
588 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
589 O_RDONLY | EXTRA_O_FLAGS);
590 if (dbs[cnt].ro_fd == -1)
591 dbg_log (_("\
592 cannot create read-only descriptor for \"%s\"; no mmap"),
593 dbs[cnt].db_filename);
596 // XXX Shall we test whether the descriptors actually
597 // XXX point to the same file?
600 /* Close the file descriptors in case something went
601 wrong in which case the variable have not been
602 assigned -1. */
603 if (fd != -1)
604 close (fd);
608 if (dbs[cnt].head == NULL)
610 /* No database loaded. Allocate the data structure,
611 possibly on disk. */
612 struct database_pers_head head;
613 size_t total = (sizeof (head)
614 + roundup (dbs[cnt].suggested_module
615 * sizeof (ref_t), ALIGN)
616 + (dbs[cnt].suggested_module
617 * DEFAULT_DATASIZE_PER_BUCKET));
619 /* Try to create the database. If we do not need a
620 persistent database create a temporary file. */
621 int fd;
622 int ro_fd = -1;
623 if (dbs[cnt].persistent)
625 fd = open (dbs[cnt].db_filename,
626 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
627 S_IRUSR | S_IWUSR);
628 if (fd != -1 && dbs[cnt].shared)
629 ro_fd = open (dbs[cnt].db_filename,
630 O_RDONLY | EXTRA_O_FLAGS);
632 else
634 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
635 fd = mkostemp (fname, EXTRA_O_FLAGS);
637 /* We do not need the file name anymore after we
638 opened another file descriptor in read-only mode. */
639 if (fd != -1)
641 if (dbs[cnt].shared)
642 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
644 unlink (fname);
648 if (fd == -1)
650 if (errno == EEXIST)
652 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
653 dbnames[cnt], dbs[cnt].db_filename);
654 // XXX Correct way to terminate?
655 exit (1);
658 if (dbs[cnt].persistent)
659 dbg_log (_("cannot create %s; no persistent database used"),
660 dbs[cnt].db_filename);
661 else
662 dbg_log (_("cannot create %s; no sharing possible"),
663 dbs[cnt].db_filename);
665 dbs[cnt].persistent = 0;
666 // XXX remember: no mmap
668 else
670 /* Tell the user if we could not create the read-only
671 descriptor. */
672 if (ro_fd == -1 && dbs[cnt].shared)
673 dbg_log (_("\
674 cannot create read-only descriptor for \"%s\"; no mmap"),
675 dbs[cnt].db_filename);
677 /* Before we create the header, initialiye the hash
678 table. So that if we get interrupted if writing
679 the header we can recognize a partially initialized
680 database. */
681 size_t ps = sysconf (_SC_PAGESIZE);
682 char tmpbuf[ps];
683 assert (~ENDREF == 0);
684 memset (tmpbuf, '\xff', ps);
686 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
687 off_t offset = sizeof (head);
689 size_t towrite;
690 if (offset % ps != 0)
692 towrite = MIN (remaining, ps - (offset % ps));
693 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
694 goto write_fail;
695 offset += towrite;
696 remaining -= towrite;
699 while (remaining > ps)
701 if (pwrite (fd, tmpbuf, ps, offset) == -1)
702 goto write_fail;
703 offset += ps;
704 remaining -= ps;
707 if (remaining > 0
708 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
709 goto write_fail;
711 /* Create the header of the file. */
712 struct database_pers_head head =
714 .version = DB_VERSION,
715 .header_size = sizeof (head),
716 .module = dbs[cnt].suggested_module,
717 .data_size = (dbs[cnt].suggested_module
718 * DEFAULT_DATASIZE_PER_BUCKET),
719 .first_free = 0
721 void *mem;
723 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
724 != sizeof (head))
725 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
726 != 0)
727 || (mem = mmap (NULL, dbs[cnt].max_db_size,
728 PROT_READ | PROT_WRITE,
729 MAP_SHARED, fd, 0)) == MAP_FAILED)
731 write_fail:
732 unlink (dbs[cnt].db_filename);
733 dbg_log (_("cannot write to database file %s: %s"),
734 dbs[cnt].db_filename, strerror (errno));
735 dbs[cnt].persistent = 0;
737 else
739 /* Success. */
740 dbs[cnt].head = mem;
741 dbs[cnt].data = (char *)
742 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
743 ALIGN / sizeof (ref_t))];
744 dbs[cnt].memsize = total;
745 dbs[cnt].mmap_used = true;
747 /* Remember the descriptors. */
748 dbs[cnt].wr_fd = fd;
749 dbs[cnt].ro_fd = ro_fd;
750 fd = -1;
751 ro_fd = -1;
754 if (fd != -1)
755 close (fd);
756 if (ro_fd != -1)
757 close (ro_fd);
761 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
762 /* We do not check here whether the O_CLOEXEC provided to the
763 open call was successful or not. The two fcntl calls are
764 only performed once each per process start-up and therefore
765 is not noticeable at all. */
766 if (paranoia
767 && ((dbs[cnt].wr_fd != -1
768 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
769 || (dbs[cnt].ro_fd != -1
770 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
772 dbg_log (_("\
773 cannot set socket to close on exec: %s; disabling paranoia mode"),
774 strerror (errno));
775 paranoia = 0;
777 #endif
779 if (dbs[cnt].head == NULL)
781 /* We do not use the persistent database. Just
782 create an in-memory data structure. */
783 assert (! dbs[cnt].persistent);
785 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
786 + (dbs[cnt].suggested_module
787 * sizeof (ref_t)));
788 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
789 assert (~ENDREF == 0);
790 memset (dbs[cnt].head->array, '\xff',
791 dbs[cnt].suggested_module * sizeof (ref_t));
792 dbs[cnt].head->module = dbs[cnt].suggested_module;
793 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
794 * dbs[cnt].head->module);
795 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
796 dbs[cnt].head->first_free = 0;
798 dbs[cnt].shared = 0;
799 assert (dbs[cnt].ro_fd == -1);
802 if (dbs[cnt].check_file)
804 /* We need the modification date of the file. */
805 struct stat64 st;
807 if (stat64 (dbs[cnt].filename, &st) < 0)
809 /* We cannot stat() the file, disable file checking. */
810 dbg_log (_("cannot stat() file `%s': %s"),
811 dbs[cnt].filename, strerror (errno));
812 dbs[cnt].check_file = 0;
814 else
815 dbs[cnt].file_mtime = st.st_mtime;
819 /* Create the socket. */
820 sock = socket (AF_UNIX, SOCK_STREAM, 0);
821 if (sock < 0)
823 dbg_log (_("cannot open socket: %s"), strerror (errno));
824 exit (errno == EACCES ? 4 : 1);
826 /* Bind a name to the socket. */
827 struct sockaddr_un sock_addr;
828 sock_addr.sun_family = AF_UNIX;
829 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
830 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
832 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
833 exit (errno == EACCES ? 4 : 1);
836 /* We don't want to get stuck on accept. */
837 int fl = fcntl (sock, F_GETFL);
838 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
840 dbg_log (_("cannot change socket to nonblocking mode: %s"),
841 strerror (errno));
842 exit (1);
845 /* The descriptor needs to be closed on exec. */
846 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
848 dbg_log (_("cannot set socket to close on exec: %s"),
849 strerror (errno));
850 exit (1);
853 /* Set permissions for the socket. */
854 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
856 /* Set the socket up to accept connections. */
857 if (listen (sock, SOMAXCONN) < 0)
859 dbg_log (_("cannot enable socket to accept connections: %s"),
860 strerror (errno));
861 exit (1);
864 /* Change to unprivileged uid/gid/groups if specifed in config file */
865 if (server_user != NULL)
866 finish_drop_privileges ();
870 /* Close the connections. */
871 void
872 close_sockets (void)
874 close (sock);
878 static void
879 invalidate_cache (char *key, int fd)
881 dbtype number;
882 int32_t resp;
884 for (number = pwddb; number < lastdb; ++number)
885 if (strcmp (key, dbnames[number]) == 0)
887 if (dbs[number].reset_res)
888 res_init ();
890 break;
893 if (number == lastdb)
895 resp = EINVAL;
896 writeall (fd, &resp, sizeof (resp));
897 return;
900 if (dbs[number].enabled)
901 prune_cache (&dbs[number], LONG_MAX, fd);
902 else
904 resp = 0;
905 writeall (fd, &resp, sizeof (resp));
910 #ifdef SCM_RIGHTS
911 static void
912 send_ro_fd (struct database_dyn *db, char *key, int fd)
914 /* If we do not have an read-only file descriptor do nothing. */
915 if (db->ro_fd == -1)
916 return;
918 /* We need to send some data along with the descriptor. */
919 uint64_t mapsize = (db->head->data_size
920 + roundup (db->head->module * sizeof (ref_t), ALIGN)
921 + sizeof (struct database_pers_head));
922 struct iovec iov[2];
923 iov[0].iov_base = key;
924 iov[0].iov_len = strlen (key) + 1;
925 iov[1].iov_base = &mapsize;
926 iov[1].iov_len = sizeof (mapsize);
928 /* Prepare the control message to transfer the descriptor. */
929 union
931 struct cmsghdr hdr;
932 char bytes[CMSG_SPACE (sizeof (int))];
933 } buf;
934 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
935 .msg_control = buf.bytes,
936 .msg_controllen = sizeof (buf) };
937 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
939 cmsg->cmsg_level = SOL_SOCKET;
940 cmsg->cmsg_type = SCM_RIGHTS;
941 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
943 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
945 msg.msg_controllen = cmsg->cmsg_len;
947 /* Send the control message. We repeat when we are interrupted but
948 everything else is ignored. */
949 #ifndef MSG_NOSIGNAL
950 # define MSG_NOSIGNAL 0
951 #endif
952 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
954 if (__builtin_expect (debug_level > 0, 0))
955 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
957 #endif /* SCM_RIGHTS */
960 /* Handle new request. */
961 static void
962 handle_request (int fd, request_header *req, void *key, uid_t uid)
964 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
966 if (debug_level > 0)
967 dbg_log (_("\
968 cannot handle old request version %d; current version is %d"),
969 req->version, NSCD_VERSION);
970 return;
973 /* Make the SELinux check before we go on to the standard checks. */
974 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
975 return;
977 struct database_dyn *db = reqinfo[req->type].db;
979 /* See whether we can service the request from the cache. */
980 if (__builtin_expect (reqinfo[req->type].data_request, true))
982 if (__builtin_expect (debug_level, 0) > 0)
984 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
986 char buf[INET6_ADDRSTRLEN];
988 dbg_log ("\t%s (%s)", serv2str[req->type],
989 inet_ntop (req->type == GETHOSTBYADDR
990 ? AF_INET : AF_INET6,
991 key, buf, sizeof (buf)));
993 else
994 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
997 /* Is this service enabled? */
998 if (__builtin_expect (!db->enabled, 0))
1000 /* No, sent the prepared record. */
1001 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1002 db->disabled_iov->iov_len,
1003 MSG_NOSIGNAL))
1004 != (ssize_t) db->disabled_iov->iov_len
1005 && __builtin_expect (debug_level, 0) > 0)
1007 /* We have problems sending the result. */
1008 char buf[256];
1009 dbg_log (_("cannot write result: %s"),
1010 strerror_r (errno, buf, sizeof (buf)));
1013 return;
1016 /* Be sure we can read the data. */
1017 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
1019 ++db->head->rdlockdelayed;
1020 pthread_rwlock_rdlock (&db->lock);
1023 /* See whether we can handle it from the cache. */
1024 struct datahead *cached;
1025 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1026 db, uid);
1027 if (cached != NULL)
1029 /* Hurray it's in the cache. */
1030 ssize_t nwritten;
1032 #ifdef HAVE_SENDFILE
1033 if (__builtin_expect (db->mmap_used, 1))
1035 assert (db->wr_fd != -1);
1036 assert ((char *) cached->data > (char *) db->data);
1037 assert ((char *) cached->data - (char *) db->head
1038 + cached->recsize
1039 <= (sizeof (struct database_pers_head)
1040 + db->head->module * sizeof (ref_t)
1041 + db->head->data_size));
1042 nwritten = sendfileall (fd, db->wr_fd,
1043 (char *) cached->data
1044 - (char *) db->head, cached->recsize);
1045 # ifndef __ASSUME_SENDFILE
1046 if (nwritten == -1 && errno == ENOSYS)
1047 goto use_write;
1048 # endif
1050 else
1051 # ifndef __ASSUME_SENDFILE
1052 use_write:
1053 # endif
1054 #endif
1055 nwritten = writeall (fd, cached->data, cached->recsize);
1057 if (nwritten != cached->recsize
1058 && __builtin_expect (debug_level, 0) > 0)
1060 /* We have problems sending the result. */
1061 char buf[256];
1062 dbg_log (_("cannot write result: %s"),
1063 strerror_r (errno, buf, sizeof (buf)));
1066 pthread_rwlock_unlock (&db->lock);
1068 return;
1071 pthread_rwlock_unlock (&db->lock);
1073 else if (__builtin_expect (debug_level, 0) > 0)
1075 if (req->type == INVALIDATE)
1076 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1077 else
1078 dbg_log ("\t%s", serv2str[req->type]);
1081 /* Handle the request. */
1082 switch (req->type)
1084 case GETPWBYNAME:
1085 addpwbyname (db, fd, req, key, uid);
1086 break;
1088 case GETPWBYUID:
1089 addpwbyuid (db, fd, req, key, uid);
1090 break;
1092 case GETGRBYNAME:
1093 addgrbyname (db, fd, req, key, uid);
1094 break;
1096 case GETGRBYGID:
1097 addgrbygid (db, fd, req, key, uid);
1098 break;
1100 case GETHOSTBYNAME:
1101 addhstbyname (db, fd, req, key, uid);
1102 break;
1104 case GETHOSTBYNAMEv6:
1105 addhstbynamev6 (db, fd, req, key, uid);
1106 break;
1108 case GETHOSTBYADDR:
1109 addhstbyaddr (db, fd, req, key, uid);
1110 break;
1112 case GETHOSTBYADDRv6:
1113 addhstbyaddrv6 (db, fd, req, key, uid);
1114 break;
1116 case GETAI:
1117 addhstai (db, fd, req, key, uid);
1118 break;
1120 case INITGROUPS:
1121 addinitgroups (db, fd, req, key, uid);
1122 break;
1124 case GETSERVBYNAME:
1125 addservbyname (db, fd, req, key, uid);
1126 break;
1128 case GETSERVBYPORT:
1129 addservbyport (db, fd, req, key, uid);
1130 break;
1132 case GETSTAT:
1133 case SHUTDOWN:
1134 case INVALIDATE:
1136 /* Get the callers credentials. */
1137 #ifdef SO_PEERCRED
1138 struct ucred caller;
1139 socklen_t optlen = sizeof (caller);
1141 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1143 char buf[256];
1145 dbg_log (_("error getting caller's id: %s"),
1146 strerror_r (errno, buf, sizeof (buf)));
1147 break;
1150 uid = caller.uid;
1151 #else
1152 /* Some systems have no SO_PEERCRED implementation. They don't
1153 care about security so we don't as well. */
1154 uid = 0;
1155 #endif
1158 /* Accept shutdown, getstat and invalidate only from root. For
1159 the stat call also allow the user specified in the config file. */
1160 if (req->type == GETSTAT)
1162 if (uid == 0 || uid == stat_uid)
1163 send_stats (fd, dbs);
1165 else if (uid == 0)
1167 if (req->type == INVALIDATE)
1168 invalidate_cache (key, fd);
1169 else
1170 termination_handler (0);
1172 break;
1174 case GETFDPW:
1175 case GETFDGR:
1176 case GETFDHST:
1177 case GETFDSERV:
1178 #ifdef SCM_RIGHTS
1179 send_ro_fd (reqinfo[req->type].db, key, fd);
1180 #endif
1181 break;
1183 default:
1184 /* Ignore the command, it's nothing we know. */
1185 break;
1190 /* Restart the process. */
1191 static void
1192 restart (void)
1194 /* First determine the parameters. We do not use the parameters
1195 passed to main() since in case nscd is started by running the
1196 dynamic linker this will not work. Yes, this is not the usual
1197 case but nscd is part of glibc and we occasionally do this. */
1198 size_t buflen = 1024;
1199 char *buf = alloca (buflen);
1200 size_t readlen = 0;
1201 int fd = open ("/proc/self/cmdline", O_RDONLY);
1202 if (fd == -1)
1204 dbg_log (_("\
1205 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1206 strerror (errno));
1208 paranoia = 0;
1209 return;
1212 while (1)
1214 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1215 buflen - readlen));
1216 if (n == -1)
1218 dbg_log (_("\
1219 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1220 strerror (errno));
1222 close (fd);
1223 paranoia = 0;
1224 return;
1227 readlen += n;
1229 if (readlen < buflen)
1230 break;
1232 /* We might have to extend the buffer. */
1233 size_t old_buflen = buflen;
1234 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1235 buf = memmove (newp, buf, old_buflen);
1238 close (fd);
1240 /* Parse the command line. Worst case scenario: every two
1241 characters form one parameter (one character plus NUL). */
1242 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1243 int argc = 0;
1245 char *cp = buf;
1246 while (cp < buf + readlen)
1248 argv[argc++] = cp;
1249 cp = (char *) rawmemchr (cp, '\0') + 1;
1251 argv[argc] = NULL;
1253 /* Second, change back to the old user if we changed it. */
1254 if (server_user != NULL)
1256 if (setresuid (old_uid, old_uid, old_uid) != 0)
1258 dbg_log (_("\
1259 cannot change to old UID: %s; disabling paranoia mode"),
1260 strerror (errno));
1262 paranoia = 0;
1263 return;
1266 if (setresgid (old_gid, old_gid, old_gid) != 0)
1268 dbg_log (_("\
1269 cannot change to old GID: %s; disabling paranoia mode"),
1270 strerror (errno));
1272 setuid (server_uid);
1273 paranoia = 0;
1274 return;
1278 /* Next change back to the old working directory. */
1279 if (chdir (oldcwd) == -1)
1281 dbg_log (_("\
1282 cannot change to old working directory: %s; disabling paranoia mode"),
1283 strerror (errno));
1285 if (server_user != NULL)
1287 setuid (server_uid);
1288 setgid (server_gid);
1290 paranoia = 0;
1291 return;
1294 /* Synchronize memory. */
1295 for (int cnt = 0; cnt < lastdb; ++cnt)
1296 if (!dbs[cnt].enabled)
1298 /* Make sure nobody keeps using the database. */
1299 dbs[cnt].head->timestamp = 0;
1301 if (dbs[cnt].persistent)
1302 // XXX async OK?
1303 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1306 /* The preparations are done. */
1307 execv ("/proc/self/exe", argv);
1309 /* If we come here, we will never be able to re-exec. */
1310 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1311 strerror (errno));
1313 if (server_user != NULL)
1315 setuid (server_uid);
1316 setgid (server_gid);
1318 if (chdir ("/") != 0)
1319 dbg_log (_("cannot change current working directory to \"/\": %s"),
1320 strerror (errno));
1321 paranoia = 0;
1325 /* List of file descriptors. */
1326 struct fdlist
1328 int fd;
1329 struct fdlist *next;
1331 /* Memory allocated for the list. */
1332 static struct fdlist *fdlist;
1333 /* List of currently ready-to-read file descriptors. */
1334 static struct fdlist *readylist;
1336 /* Conditional variable and mutex to signal availability of entries in
1337 READYLIST. The condvar is initialized dynamically since we might
1338 use a different clock depending on availability. */
1339 static pthread_cond_t readylist_cond;
1340 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1342 /* The clock to use with the condvar. */
1343 static clockid_t timeout_clock = CLOCK_REALTIME;
1345 /* Number of threads ready to handle the READYLIST. */
1346 static unsigned long int nready;
1349 /* This is the main loop. It is replicated in different threads but the
1350 `poll' call makes sure only one thread handles an incoming connection. */
1351 static void *
1352 __attribute__ ((__noreturn__))
1353 nscd_run (void *p)
1355 const long int my_number = (long int) p;
1356 const int run_prune = my_number < lastdb && dbs[my_number].enabled;
1357 struct timespec prune_ts;
1358 int to = 0;
1359 char buf[256];
1361 if (run_prune)
1363 setup_thread (&dbs[my_number]);
1365 /* We are running. */
1366 dbs[my_number].head->timestamp = time (NULL);
1368 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1369 /* Should never happen. */
1370 abort ();
1372 /* Compute timeout time. */
1373 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1376 /* Initial locking. */
1377 pthread_mutex_lock (&readylist_lock);
1379 /* One more thread available. */
1380 ++nready;
1382 while (1)
1384 while (readylist == NULL)
1386 if (run_prune)
1388 /* Wait, but not forever. */
1389 to = pthread_cond_timedwait (&readylist_cond, &readylist_lock,
1390 &prune_ts);
1392 /* If we were woken and there is no work to be done,
1393 just start pruning. */
1394 if (readylist == NULL && to == ETIMEDOUT)
1396 --nready;
1397 pthread_mutex_unlock (&readylist_lock);
1398 goto only_prune;
1401 else
1402 /* No need to timeout. */
1403 pthread_cond_wait (&readylist_cond, &readylist_lock);
1406 struct fdlist *it = readylist->next;
1407 if (readylist->next == readylist)
1408 /* Just one entry on the list. */
1409 readylist = NULL;
1410 else
1411 readylist->next = it->next;
1413 /* Extract the information and mark the record ready to be used
1414 again. */
1415 int fd = it->fd;
1416 it->next = NULL;
1418 /* One more thread available. */
1419 --nready;
1421 /* We are done with the list. */
1422 pthread_mutex_unlock (&readylist_lock);
1424 /* We do not want to block on a short read or so. */
1425 int fl = fcntl (fd, F_GETFL);
1426 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1427 goto close_and_out;
1429 /* Now read the request. */
1430 request_header req;
1431 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1432 != sizeof (req), 0))
1434 /* We failed to read data. Note that this also might mean we
1435 failed because we would have blocked. */
1436 if (debug_level > 0)
1437 dbg_log (_("short read while reading request: %s"),
1438 strerror_r (errno, buf, sizeof (buf)));
1439 goto close_and_out;
1442 /* Check whether this is a valid request type. */
1443 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1444 goto close_and_out;
1446 /* Some systems have no SO_PEERCRED implementation. They don't
1447 care about security so we don't as well. */
1448 uid_t uid = -1;
1449 #ifdef SO_PEERCRED
1450 pid_t pid = 0;
1452 if (__builtin_expect (debug_level > 0, 0))
1454 struct ucred caller;
1455 socklen_t optlen = sizeof (caller);
1457 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1458 pid = caller.pid;
1460 #endif
1462 /* It should not be possible to crash the nscd with a silly
1463 request (i.e., a terribly large key). We limit the size to 1kb. */
1464 if (__builtin_expect (req.key_len, 1) < 0
1465 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1467 if (debug_level > 0)
1468 dbg_log (_("key length in request too long: %d"), req.key_len);
1470 else
1472 /* Get the key. */
1473 char keybuf[MAXKEYLEN];
1475 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1476 req.key_len))
1477 != req.key_len, 0))
1479 /* Again, this can also mean we would have blocked. */
1480 if (debug_level > 0)
1481 dbg_log (_("short read while reading request key: %s"),
1482 strerror_r (errno, buf, sizeof (buf)));
1483 goto close_and_out;
1486 if (__builtin_expect (debug_level, 0) > 0)
1488 #ifdef SO_PEERCRED
1489 if (pid != 0)
1490 dbg_log (_("\
1491 handle_request: request received (Version = %d) from PID %ld"),
1492 req.version, (long int) pid);
1493 else
1494 #endif
1495 dbg_log (_("\
1496 handle_request: request received (Version = %d)"), req.version);
1499 /* Phew, we got all the data, now process it. */
1500 handle_request (fd, &req, keybuf, uid);
1503 close_and_out:
1504 /* We are done. */
1505 close (fd);
1507 /* Check whether we should be pruning the cache. */
1508 assert (run_prune || to == 0);
1509 if (to == ETIMEDOUT)
1511 only_prune:
1512 /* The pthread_cond_timedwait() call timed out. It is time
1513 to clean up the cache. */
1514 assert (my_number < lastdb);
1515 prune_cache (&dbs[my_number], time (NULL), -1);
1517 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1518 /* Should never happen. */
1519 abort ();
1521 /* Compute next timeout time. */
1522 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1524 /* In case the list is emtpy we do not want to run the prune
1525 code right away again. */
1526 to = 0;
1529 /* Re-locking. */
1530 pthread_mutex_lock (&readylist_lock);
1532 /* One more thread available. */
1533 ++nready;
1538 static unsigned int nconns;
1540 static void
1541 fd_ready (int fd)
1543 pthread_mutex_lock (&readylist_lock);
1545 /* Find an empty entry in FDLIST. */
1546 size_t inner;
1547 for (inner = 0; inner < nconns; ++inner)
1548 if (fdlist[inner].next == NULL)
1549 break;
1550 assert (inner < nconns);
1552 fdlist[inner].fd = fd;
1554 if (readylist == NULL)
1555 readylist = fdlist[inner].next = &fdlist[inner];
1556 else
1558 fdlist[inner].next = readylist->next;
1559 readylist = readylist->next = &fdlist[inner];
1562 bool do_signal = true;
1563 if (__builtin_expect (nready == 0, 0))
1565 ++client_queued;
1566 do_signal = false;
1568 /* Try to start another thread to help out. */
1569 pthread_t th;
1570 if (nthreads < max_nthreads
1571 && pthread_create (&th, &attr, nscd_run,
1572 (void *) (long int) nthreads) == 0)
1574 /* We got another thread. */
1575 ++nthreads;
1576 /* The new thread might need a kick. */
1577 do_signal = true;
1582 pthread_mutex_unlock (&readylist_lock);
1584 /* Tell one of the worker threads there is work to do. */
1585 if (do_signal)
1586 pthread_cond_signal (&readylist_cond);
1590 /* Check whether restarting should happen. */
1591 static inline int
1592 restart_p (time_t now)
1594 return (paranoia && readylist == NULL && nready == nthreads
1595 && now >= restart_time);
1599 /* Array for times a connection was accepted. */
1600 static time_t *starttime;
1603 static void
1604 __attribute__ ((__noreturn__))
1605 main_loop_poll (void)
1607 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1608 * sizeof (conns[0]));
1610 conns[0].fd = sock;
1611 conns[0].events = POLLRDNORM;
1612 size_t nused = 1;
1613 size_t firstfree = 1;
1615 while (1)
1617 /* Wait for any event. We wait at most a couple of seconds so
1618 that we can check whether we should close any of the accepted
1619 connections since we have not received a request. */
1620 #define MAX_ACCEPT_TIMEOUT 30
1621 #define MIN_ACCEPT_TIMEOUT 5
1622 #define MAIN_THREAD_TIMEOUT \
1623 (MAX_ACCEPT_TIMEOUT * 1000 \
1624 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1626 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1628 time_t now = time (NULL);
1630 /* If there is a descriptor ready for reading or there is a new
1631 connection, process this now. */
1632 if (n > 0)
1634 if (conns[0].revents != 0)
1636 /* We have a new incoming connection. Accept the connection. */
1637 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1639 /* Use the descriptor if we have not reached the limit. */
1640 if (fd >= 0)
1642 if (firstfree < nconns)
1644 conns[firstfree].fd = fd;
1645 conns[firstfree].events = POLLRDNORM;
1646 starttime[firstfree] = now;
1647 if (firstfree >= nused)
1648 nused = firstfree + 1;
1651 ++firstfree;
1652 while (firstfree < nused && conns[firstfree].fd != -1);
1654 else
1655 /* We cannot use the connection so close it. */
1656 close (fd);
1659 --n;
1662 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1663 if (conns[cnt].revents != 0)
1665 fd_ready (conns[cnt].fd);
1667 /* Clean up the CONNS array. */
1668 conns[cnt].fd = -1;
1669 if (cnt < firstfree)
1670 firstfree = cnt;
1671 if (cnt == nused - 1)
1673 --nused;
1674 while (conns[nused - 1].fd == -1);
1676 --n;
1680 /* Now find entries which have timed out. */
1681 assert (nused > 0);
1683 /* We make the timeout length depend on the number of file
1684 descriptors currently used. */
1685 #define ACCEPT_TIMEOUT \
1686 (MAX_ACCEPT_TIMEOUT \
1687 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1688 time_t laststart = now - ACCEPT_TIMEOUT;
1690 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1692 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1694 /* Remove the entry, it timed out. */
1695 (void) close (conns[cnt].fd);
1696 conns[cnt].fd = -1;
1698 if (cnt < firstfree)
1699 firstfree = cnt;
1700 if (cnt == nused - 1)
1702 --nused;
1703 while (conns[nused - 1].fd == -1);
1707 if (restart_p (now))
1708 restart ();
1713 #ifdef HAVE_EPOLL
1714 static void
1715 main_loop_epoll (int efd)
1717 struct epoll_event ev = { 0, };
1718 int nused = 1;
1719 size_t highest = 0;
1721 /* Add the socket. */
1722 ev.events = EPOLLRDNORM;
1723 ev.data.fd = sock;
1724 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1725 /* We cannot use epoll. */
1726 return;
1728 while (1)
1730 struct epoll_event revs[100];
1731 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1733 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1735 time_t now = time (NULL);
1737 for (int cnt = 0; cnt < n; ++cnt)
1738 if (revs[cnt].data.fd == sock)
1740 /* A new connection. */
1741 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1743 if (fd >= 0)
1745 /* Try to add the new descriptor. */
1746 ev.data.fd = fd;
1747 if (fd >= nconns
1748 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1749 /* The descriptor is too large or something went
1750 wrong. Close the descriptor. */
1751 close (fd);
1752 else
1754 /* Remember when we accepted the connection. */
1755 starttime[fd] = now;
1757 if (fd > highest)
1758 highest = fd;
1760 ++nused;
1764 else
1766 /* Remove the descriptor from the epoll descriptor. */
1767 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1769 /* Get a worker to handle the request. */
1770 fd_ready (revs[cnt].data.fd);
1772 /* Reset the time. */
1773 starttime[revs[cnt].data.fd] = 0;
1774 if (revs[cnt].data.fd == highest)
1776 --highest;
1777 while (highest > 0 && starttime[highest] == 0);
1779 --nused;
1782 /* Now look for descriptors for accepted connections which have
1783 no reply in too long of a time. */
1784 time_t laststart = now - ACCEPT_TIMEOUT;
1785 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1786 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1788 /* We are waiting for this one for too long. Close it. */
1789 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
1791 (void) close (cnt);
1793 starttime[cnt] = 0;
1794 if (cnt == highest)
1795 --highest;
1797 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1798 --highest;
1800 if (restart_p (now))
1801 restart ();
1804 #endif
1807 /* Start all the threads we want. The initial process is thread no. 1. */
1808 void
1809 start_threads (void)
1811 /* Initialize the conditional variable we will use. The only
1812 non-standard attribute we might use is the clock selection. */
1813 pthread_condattr_t condattr;
1814 pthread_condattr_init (&condattr);
1816 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1817 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1818 /* Determine whether the monotonous clock is available. */
1819 struct timespec dummy;
1820 # if _POSIX_MONOTONIC_CLOCK == 0
1821 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1822 # endif
1823 # if _POSIX_CLOCK_SELECTION == 0
1824 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1825 # endif
1826 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1827 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1828 timeout_clock = CLOCK_MONOTONIC;
1829 #endif
1831 pthread_cond_init (&readylist_cond, &condattr);
1832 pthread_condattr_destroy (&condattr);
1835 /* Create the attribute for the threads. They are all created
1836 detached. */
1837 pthread_attr_init (&attr);
1838 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1839 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1840 pthread_attr_setstacksize (&attr, 1024 * 1024 * (sizeof (void *) / 4));
1842 /* We allow less than LASTDB threads only for debugging. */
1843 if (debug_level == 0)
1844 nthreads = MAX (nthreads, lastdb);
1846 int nfailed = 0;
1847 for (long int i = 0; i < nthreads; ++i)
1849 pthread_t th;
1850 if (pthread_create (&th, &attr, nscd_run, (void *) (i - nfailed)) != 0)
1851 ++nfailed;
1853 if (nthreads - nfailed < lastdb)
1855 /* We could not start enough threads. */
1856 dbg_log (_("could only start %d threads; terminating"),
1857 nthreads - nfailed);
1858 exit (1);
1861 /* Determine how much room for descriptors we should initially
1862 allocate. This might need to change later if we cap the number
1863 with MAXCONN. */
1864 const long int nfds = sysconf (_SC_OPEN_MAX);
1865 #define MINCONN 32
1866 #define MAXCONN 16384
1867 if (nfds == -1 || nfds > MAXCONN)
1868 nconns = MAXCONN;
1869 else if (nfds < MINCONN)
1870 nconns = MINCONN;
1871 else
1872 nconns = nfds;
1874 /* We need memory to pass descriptors on to the worker threads. */
1875 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1876 /* Array to keep track when connection was accepted. */
1877 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1879 /* In the main thread we execute the loop which handles incoming
1880 connections. */
1881 #ifdef HAVE_EPOLL
1882 int efd = epoll_create (100);
1883 if (efd != -1)
1885 main_loop_epoll (efd);
1886 close (efd);
1888 #endif
1890 main_loop_poll ();
1894 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1895 this function is called, we are not listening on the nscd socket yet so
1896 we can just use the ordinary lookup functions without causing a lockup */
1897 static void
1898 begin_drop_privileges (void)
1900 struct passwd *pwd = getpwnam (server_user);
1902 if (pwd == NULL)
1904 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1905 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1906 server_user);
1909 server_uid = pwd->pw_uid;
1910 server_gid = pwd->pw_gid;
1912 /* Save the old UID/GID if we have to change back. */
1913 if (paranoia)
1915 old_uid = getuid ();
1916 old_gid = getgid ();
1919 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1921 /* This really must never happen. */
1922 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1923 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
1926 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
1928 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
1929 == -1)
1931 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1932 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
1937 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
1938 run nscd as the user specified in the configuration file. */
1939 static void
1940 finish_drop_privileges (void)
1942 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1943 /* We need to preserve the capabilities to connect to the audit daemon. */
1944 cap_t new_caps = preserve_capabilities ();
1945 #endif
1947 if (setgroups (server_ngroups, server_groups) == -1)
1949 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1950 error (EXIT_FAILURE, errno, _("setgroups failed"));
1953 int res;
1954 if (paranoia)
1955 res = setresgid (server_gid, server_gid, old_gid);
1956 else
1957 res = setgid (server_gid);
1958 if (res == -1)
1960 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1961 perror ("setgid");
1962 exit (4);
1965 if (paranoia)
1966 res = setresuid (server_uid, server_uid, old_uid);
1967 else
1968 res = setuid (server_uid);
1969 if (res == -1)
1971 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1972 perror ("setuid");
1973 exit (4);
1976 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1977 /* Remove the temporary capabilities. */
1978 install_real_capabilities (new_caps);
1979 #endif