* sysdeps/unix/sysv/linux/bits/resource.h: Define RUSAGE_THREAD and
[glibc.git] / nscd / connections.c
blob15148bdf3dbd34fdec2b360afc9d25d6302d6ced
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2007, 2008 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #include <alloca.h>
21 #include <assert.h>
22 #include <atomic.h>
23 #include <error.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <grp.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <arpa/inet.h>
35 #ifdef HAVE_EPOLL
36 # include <sys/epoll.h>
37 #endif
38 #include <sys/mman.h>
39 #include <sys/param.h>
40 #include <sys/poll.h>
41 #ifdef HAVE_SENDFILE
42 # include <sys/sendfile.h>
43 #endif
44 #include <sys/socket.h>
45 #include <sys/stat.h>
46 #include <sys/un.h>
48 #include "nscd.h"
49 #include "dbg_log.h"
50 #include "selinux.h"
51 #ifdef HAVE_SENDFILE
52 # include <kernel-features.h>
53 #endif
56 /* Wrapper functions with error checking for standard functions. */
57 extern void *xmalloc (size_t n);
58 extern void *xcalloc (size_t n, size_t s);
59 extern void *xrealloc (void *o, size_t n);
61 /* Support to run nscd as an unprivileged user */
62 const char *server_user;
63 static uid_t server_uid;
64 static gid_t server_gid;
65 const char *stat_user;
66 uid_t stat_uid;
67 static gid_t *server_groups;
68 #ifndef NGROUPS
69 # define NGROUPS 32
70 #endif
71 static int server_ngroups;
73 static pthread_attr_t attr;
75 static void begin_drop_privileges (void);
76 static void finish_drop_privileges (void);
78 /* Map request type to a string. */
79 const char *const serv2str[LASTREQ] =
81 [GETPWBYNAME] = "GETPWBYNAME",
82 [GETPWBYUID] = "GETPWBYUID",
83 [GETGRBYNAME] = "GETGRBYNAME",
84 [GETGRBYGID] = "GETGRBYGID",
85 [GETHOSTBYNAME] = "GETHOSTBYNAME",
86 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
87 [GETHOSTBYADDR] = "GETHOSTBYADDR",
88 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
89 [SHUTDOWN] = "SHUTDOWN",
90 [GETSTAT] = "GETSTAT",
91 [INVALIDATE] = "INVALIDATE",
92 [GETFDPW] = "GETFDPW",
93 [GETFDGR] = "GETFDGR",
94 [GETFDHST] = "GETFDHST",
95 [GETAI] = "GETAI",
96 [INITGROUPS] = "INITGROUPS",
97 [GETSERVBYNAME] = "GETSERVBYNAME",
98 [GETSERVBYPORT] = "GETSERVBYPORT",
99 [GETFDSERV] = "GETFDSERV"
102 /* The control data structures for the services. */
103 struct database_dyn dbs[lastdb] =
105 [pwddb] = {
106 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
107 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
108 .enabled = 0,
109 .check_file = 1,
110 .persistent = 0,
111 .propagate = 1,
112 .shared = 0,
113 .max_db_size = DEFAULT_MAX_DB_SIZE,
114 .suggested_module = DEFAULT_SUGGESTED_MODULE,
115 .reset_res = 0,
116 .filename = "/etc/passwd",
117 .db_filename = _PATH_NSCD_PASSWD_DB,
118 .disabled_iov = &pwd_iov_disabled,
119 .postimeout = 3600,
120 .negtimeout = 20,
121 .wr_fd = -1,
122 .ro_fd = -1,
123 .mmap_used = false
125 [grpdb] = {
126 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
127 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
128 .enabled = 0,
129 .check_file = 1,
130 .persistent = 0,
131 .propagate = 1,
132 .shared = 0,
133 .max_db_size = DEFAULT_MAX_DB_SIZE,
134 .suggested_module = DEFAULT_SUGGESTED_MODULE,
135 .reset_res = 0,
136 .filename = "/etc/group",
137 .db_filename = _PATH_NSCD_GROUP_DB,
138 .disabled_iov = &grp_iov_disabled,
139 .postimeout = 3600,
140 .negtimeout = 60,
141 .wr_fd = -1,
142 .ro_fd = -1,
143 .mmap_used = false
145 [hstdb] = {
146 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
147 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
148 .enabled = 0,
149 .check_file = 1,
150 .persistent = 0,
151 .propagate = 0, /* Not used. */
152 .shared = 0,
153 .max_db_size = DEFAULT_MAX_DB_SIZE,
154 .suggested_module = DEFAULT_SUGGESTED_MODULE,
155 .reset_res = 1,
156 .filename = "/etc/hosts",
157 .db_filename = _PATH_NSCD_HOSTS_DB,
158 .disabled_iov = &hst_iov_disabled,
159 .postimeout = 3600,
160 .negtimeout = 20,
161 .wr_fd = -1,
162 .ro_fd = -1,
163 .mmap_used = false
165 [servdb] = {
166 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
167 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
168 .enabled = 0,
169 .check_file = 1,
170 .persistent = 0,
171 .propagate = 0, /* Not used. */
172 .shared = 0,
173 .max_db_size = DEFAULT_MAX_DB_SIZE,
174 .suggested_module = DEFAULT_SUGGESTED_MODULE,
175 .reset_res = 0,
176 .filename = "/etc/services",
177 .db_filename = _PATH_NSCD_SERVICES_DB,
178 .disabled_iov = &serv_iov_disabled,
179 .postimeout = 28800,
180 .negtimeout = 20,
181 .wr_fd = -1,
182 .ro_fd = -1,
183 .mmap_used = false
188 /* Mapping of request type to database. */
189 static struct
191 bool data_request;
192 struct database_dyn *db;
193 } const reqinfo[LASTREQ] =
195 [GETPWBYNAME] = { true, &dbs[pwddb] },
196 [GETPWBYUID] = { true, &dbs[pwddb] },
197 [GETGRBYNAME] = { true, &dbs[grpdb] },
198 [GETGRBYGID] = { true, &dbs[grpdb] },
199 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
200 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
201 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
202 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
203 [SHUTDOWN] = { false, NULL },
204 [GETSTAT] = { false, NULL },
205 [SHUTDOWN] = { false, NULL },
206 [GETFDPW] = { false, &dbs[pwddb] },
207 [GETFDGR] = { false, &dbs[grpdb] },
208 [GETFDHST] = { false, &dbs[hstdb] },
209 [GETAI] = { true, &dbs[hstdb] },
210 [INITGROUPS] = { true, &dbs[grpdb] },
211 [GETSERVBYNAME] = { true, &dbs[servdb] },
212 [GETSERVBYPORT] = { true, &dbs[servdb] },
213 [GETFDSERV] = { false, &dbs[servdb] }
217 /* Initial number of threads to use. */
218 int nthreads = -1;
219 /* Maximum number of threads to use. */
220 int max_nthreads = 32;
222 /* Socket for incoming connections. */
223 static int sock;
225 /* Number of times clients had to wait. */
226 unsigned long int client_queued;
228 /* Data structure for recording in-flight memory allocation. */
229 __thread struct mem_in_flight mem_in_flight attribute_tls_model_ie;
230 /* Global list of the mem_in_flight variables of all the threads. */
231 struct mem_in_flight *mem_in_flight_list;
234 ssize_t
235 writeall (int fd, const void *buf, size_t len)
237 size_t n = len;
238 ssize_t ret;
241 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
242 if (ret <= 0)
243 break;
244 buf = (const char *) buf + ret;
245 n -= ret;
247 while (n > 0);
248 return ret < 0 ? ret : len - n;
252 #ifdef HAVE_SENDFILE
253 ssize_t
254 sendfileall (int tofd, int fromfd, off_t off, size_t len)
256 ssize_t n = len;
257 ssize_t ret;
261 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
262 if (ret <= 0)
263 break;
264 n -= ret;
266 while (n > 0);
267 return ret < 0 ? ret : len - n;
269 #endif
272 enum usekey
274 use_not = 0,
275 /* The following three are not really used, they are symbolic constants. */
276 use_first = 16,
277 use_begin = 32,
278 use_end = 64,
280 use_he = 1,
281 use_he_begin = use_he | use_begin,
282 use_he_end = use_he | use_end,
283 #if SEPARATE_KEY
284 use_key = 2,
285 use_key_begin = use_key | use_begin,
286 use_key_end = use_key | use_end,
287 use_key_first = use_key_begin | use_first,
288 #endif
289 use_data = 3,
290 use_data_begin = use_data | use_begin,
291 use_data_end = use_data | use_end,
292 use_data_first = use_data_begin | use_first
296 static int
297 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
298 enum usekey use, ref_t start, size_t len)
300 assert (len >= 2);
302 if (start > first_free || start + len > first_free
303 || (start & BLOCK_ALIGN_M1))
304 return 0;
306 if (usemap[start] == use_not)
308 /* Add the start marker. */
309 usemap[start] = use | use_begin;
310 use &= ~use_first;
312 while (--len > 0)
313 if (usemap[++start] != use_not)
314 return 0;
315 else
316 usemap[start] = use;
318 /* Add the end marker. */
319 usemap[start] = use | use_end;
321 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
323 /* Hash entries can't be shared. */
324 if (use == use_he)
325 return 0;
327 usemap[start] |= (use & use_first);
328 use &= ~use_first;
330 while (--len > 1)
331 if (usemap[++start] != use)
332 return 0;
334 if (usemap[++start] != (use | use_end))
335 return 0;
337 else
338 /* Points to a wrong object or somewhere in the middle. */
339 return 0;
341 return 1;
345 /* Verify data in persistent database. */
346 static int
347 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
349 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb);
351 time_t now = time (NULL);
353 struct database_pers_head *head = mem;
354 struct database_pers_head head_copy = *head;
356 /* Check that the header that was read matches the head in the database. */
357 if (memcmp (head, readhead, sizeof (*head)) != 0)
358 return 0;
360 /* First some easy tests: make sure the database header is sane. */
361 if (head->version != DB_VERSION
362 || head->header_size != sizeof (*head)
363 /* We allow a timestamp to be one hour ahead of the current time.
364 This should cover daylight saving time changes. */
365 || head->timestamp > now + 60 * 60 + 60
366 || (head->gc_cycle & 1)
367 || head->module == 0
368 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
369 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
370 || head->first_free < 0
371 || head->first_free > head->data_size
372 || (head->first_free & BLOCK_ALIGN_M1) != 0
373 || head->maxnentries < 0
374 || head->maxnsearched < 0)
375 return 0;
377 uint8_t *usemap = calloc (head->first_free, 1);
378 if (usemap == NULL)
379 return 0;
381 const char *data = (char *) &head->array[roundup (head->module,
382 ALIGN / sizeof (ref_t))];
384 nscd_ssize_t he_cnt = 0;
385 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
387 ref_t trail = head->array[cnt];
388 ref_t work = trail;
389 int tick = 0;
391 while (work != ENDREF)
393 if (! check_use (data, head->first_free, usemap, use_he, work,
394 sizeof (struct hashentry)))
395 goto fail;
397 /* Now we know we can dereference the record. */
398 struct hashentry *here = (struct hashentry *) (data + work);
400 ++he_cnt;
402 /* Make sure the record is for this type of service. */
403 if (here->type >= LASTREQ
404 || reqinfo[here->type].db != &dbs[dbnr])
405 goto fail;
407 /* Validate boolean field value. */
408 if (here->first != false && here->first != true)
409 goto fail;
411 if (here->len < 0)
412 goto fail;
414 /* Now the data. */
415 if (here->packet < 0
416 || here->packet > head->first_free
417 || here->packet + sizeof (struct datahead) > head->first_free)
418 goto fail;
420 struct datahead *dh = (struct datahead *) (data + here->packet);
422 if (! check_use (data, head->first_free, usemap,
423 use_data | (here->first ? use_first : 0),
424 here->packet, dh->allocsize))
425 goto fail;
427 if (dh->allocsize < sizeof (struct datahead)
428 || dh->recsize > dh->allocsize
429 || (dh->notfound != false && dh->notfound != true)
430 || (dh->usable != false && dh->usable != true))
431 goto fail;
433 if (here->key < here->packet + sizeof (struct datahead)
434 || here->key > here->packet + dh->allocsize
435 || here->key + here->len > here->packet + dh->allocsize)
437 #if SEPARATE_KEY
438 /* If keys can appear outside of data, this should be done
439 instead. But gc doesn't mark the data in that case. */
440 if (! check_use (data, head->first_free, usemap,
441 use_key | (here->first ? use_first : 0),
442 here->key, here->len))
443 #endif
444 goto fail;
447 work = here->next;
449 if (work == trail)
450 /* A circular list, this must not happen. */
451 goto fail;
452 if (tick)
453 trail = ((struct hashentry *) (data + trail))->next;
454 tick = 1 - tick;
458 if (he_cnt != head->nentries)
459 goto fail;
461 /* See if all data and keys had at least one reference from
462 he->first == true hashentry. */
463 for (ref_t idx = 0; idx < head->first_free; ++idx)
465 #if SEPARATE_KEY
466 if (usemap[idx] == use_key_begin)
467 goto fail;
468 #endif
469 if (usemap[idx] == use_data_begin)
470 goto fail;
473 /* Finally, make sure the database hasn't changed since the first test. */
474 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
475 goto fail;
477 free (usemap);
478 return 1;
480 fail:
481 free (usemap);
482 return 0;
486 #ifdef O_CLOEXEC
487 # define EXTRA_O_FLAGS O_CLOEXEC
488 #else
489 # define EXTRA_O_FLAGS 0
490 #endif
493 /* Initialize database information structures. */
494 void
495 nscd_init (void)
497 /* Look up unprivileged uid/gid/groups before we start listening on the
498 socket */
499 if (server_user != NULL)
500 begin_drop_privileges ();
502 if (nthreads == -1)
503 /* No configuration for this value, assume a default. */
504 nthreads = 4;
506 for (size_t cnt = 0; cnt < lastdb; ++cnt)
507 if (dbs[cnt].enabled)
509 pthread_rwlock_init (&dbs[cnt].lock, NULL);
510 pthread_mutex_init (&dbs[cnt].memlock, NULL);
512 if (dbs[cnt].persistent)
514 /* Try to open the appropriate file on disk. */
515 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
516 if (fd != -1)
518 char *msg = NULL;
519 struct stat64 st;
520 void *mem;
521 size_t total;
522 struct database_pers_head head;
523 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
524 sizeof (head)));
525 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
527 fail_db_errno:
528 /* The code is single-threaded at this point so
529 using strerror is just fine. */
530 msg = strerror (errno);
531 fail_db:
532 dbg_log (_("invalid persistent database file \"%s\": %s"),
533 dbs[cnt].db_filename, msg);
534 unlink (dbs[cnt].db_filename);
536 else if (head.module == 0 && head.data_size == 0)
538 /* The file has been created, but the head has not
539 been initialized yet. */
540 msg = _("uninitialized header");
541 goto fail_db;
543 else if (head.header_size != (int) sizeof (head))
545 msg = _("header size does not match");
546 goto fail_db;
548 else if ((total = (sizeof (head)
549 + roundup (head.module * sizeof (ref_t),
550 ALIGN)
551 + head.data_size))
552 > st.st_size
553 || total < sizeof (head))
555 msg = _("file size does not match");
556 goto fail_db;
558 /* Note we map with the maximum size allowed for the
559 database. This is likely much larger than the
560 actual file size. This is OK on most OSes since
561 extensions of the underlying file will
562 automatically translate more pages available for
563 memory access. */
564 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
565 PROT_READ | PROT_WRITE,
566 MAP_SHARED, fd, 0))
567 == MAP_FAILED)
568 goto fail_db_errno;
569 else if (!verify_persistent_db (mem, &head, cnt))
571 munmap (mem, total);
572 msg = _("verification failed");
573 goto fail_db;
575 else
577 /* Success. We have the database. */
578 dbs[cnt].head = mem;
579 dbs[cnt].memsize = total;
580 dbs[cnt].data = (char *)
581 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
582 ALIGN / sizeof (ref_t))];
583 dbs[cnt].mmap_used = true;
585 if (dbs[cnt].suggested_module > head.module)
586 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
587 dbnames[cnt]);
589 dbs[cnt].wr_fd = fd;
590 fd = -1;
591 /* We also need a read-only descriptor. */
592 if (dbs[cnt].shared)
594 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
595 O_RDONLY | EXTRA_O_FLAGS);
596 if (dbs[cnt].ro_fd == -1)
597 dbg_log (_("\
598 cannot create read-only descriptor for \"%s\"; no mmap"),
599 dbs[cnt].db_filename);
602 // XXX Shall we test whether the descriptors actually
603 // XXX point to the same file?
606 /* Close the file descriptors in case something went
607 wrong in which case the variable have not been
608 assigned -1. */
609 if (fd != -1)
610 close (fd);
614 if (dbs[cnt].head == NULL)
616 /* No database loaded. Allocate the data structure,
617 possibly on disk. */
618 struct database_pers_head head;
619 size_t total = (sizeof (head)
620 + roundup (dbs[cnt].suggested_module
621 * sizeof (ref_t), ALIGN)
622 + (dbs[cnt].suggested_module
623 * DEFAULT_DATASIZE_PER_BUCKET));
625 /* Try to create the database. If we do not need a
626 persistent database create a temporary file. */
627 int fd;
628 int ro_fd = -1;
629 if (dbs[cnt].persistent)
631 fd = open (dbs[cnt].db_filename,
632 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
633 S_IRUSR | S_IWUSR);
634 if (fd != -1 && dbs[cnt].shared)
635 ro_fd = open (dbs[cnt].db_filename,
636 O_RDONLY | EXTRA_O_FLAGS);
638 else
640 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
641 fd = mkostemp (fname, EXTRA_O_FLAGS);
643 /* We do not need the file name anymore after we
644 opened another file descriptor in read-only mode. */
645 if (fd != -1)
647 if (dbs[cnt].shared)
648 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
650 unlink (fname);
654 if (fd == -1)
656 if (errno == EEXIST)
658 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
659 dbnames[cnt], dbs[cnt].db_filename);
660 // XXX Correct way to terminate?
661 exit (1);
664 if (dbs[cnt].persistent)
665 dbg_log (_("cannot create %s; no persistent database used"),
666 dbs[cnt].db_filename);
667 else
668 dbg_log (_("cannot create %s; no sharing possible"),
669 dbs[cnt].db_filename);
671 dbs[cnt].persistent = 0;
672 // XXX remember: no mmap
674 else
676 /* Tell the user if we could not create the read-only
677 descriptor. */
678 if (ro_fd == -1 && dbs[cnt].shared)
679 dbg_log (_("\
680 cannot create read-only descriptor for \"%s\"; no mmap"),
681 dbs[cnt].db_filename);
683 /* Before we create the header, initialiye the hash
684 table. So that if we get interrupted if writing
685 the header we can recognize a partially initialized
686 database. */
687 size_t ps = sysconf (_SC_PAGESIZE);
688 char tmpbuf[ps];
689 assert (~ENDREF == 0);
690 memset (tmpbuf, '\xff', ps);
692 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
693 off_t offset = sizeof (head);
695 size_t towrite;
696 if (offset % ps != 0)
698 towrite = MIN (remaining, ps - (offset % ps));
699 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
700 goto write_fail;
701 offset += towrite;
702 remaining -= towrite;
705 while (remaining > ps)
707 if (pwrite (fd, tmpbuf, ps, offset) == -1)
708 goto write_fail;
709 offset += ps;
710 remaining -= ps;
713 if (remaining > 0
714 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
715 goto write_fail;
717 /* Create the header of the file. */
718 struct database_pers_head head =
720 .version = DB_VERSION,
721 .header_size = sizeof (head),
722 .module = dbs[cnt].suggested_module,
723 .data_size = (dbs[cnt].suggested_module
724 * DEFAULT_DATASIZE_PER_BUCKET),
725 .first_free = 0
727 void *mem;
729 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
730 != sizeof (head))
731 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
732 != 0)
733 || (mem = mmap (NULL, dbs[cnt].max_db_size,
734 PROT_READ | PROT_WRITE,
735 MAP_SHARED, fd, 0)) == MAP_FAILED)
737 write_fail:
738 unlink (dbs[cnt].db_filename);
739 dbg_log (_("cannot write to database file %s: %s"),
740 dbs[cnt].db_filename, strerror (errno));
741 dbs[cnt].persistent = 0;
743 else
745 /* Success. */
746 dbs[cnt].head = mem;
747 dbs[cnt].data = (char *)
748 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
749 ALIGN / sizeof (ref_t))];
750 dbs[cnt].memsize = total;
751 dbs[cnt].mmap_used = true;
753 /* Remember the descriptors. */
754 dbs[cnt].wr_fd = fd;
755 dbs[cnt].ro_fd = ro_fd;
756 fd = -1;
757 ro_fd = -1;
760 if (fd != -1)
761 close (fd);
762 if (ro_fd != -1)
763 close (ro_fd);
767 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
768 /* We do not check here whether the O_CLOEXEC provided to the
769 open call was successful or not. The two fcntl calls are
770 only performed once each per process start-up and therefore
771 is not noticeable at all. */
772 if (paranoia
773 && ((dbs[cnt].wr_fd != -1
774 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
775 || (dbs[cnt].ro_fd != -1
776 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
778 dbg_log (_("\
779 cannot set socket to close on exec: %s; disabling paranoia mode"),
780 strerror (errno));
781 paranoia = 0;
783 #endif
785 if (dbs[cnt].head == NULL)
787 /* We do not use the persistent database. Just
788 create an in-memory data structure. */
789 assert (! dbs[cnt].persistent);
791 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
792 + (dbs[cnt].suggested_module
793 * sizeof (ref_t)));
794 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
795 assert (~ENDREF == 0);
796 memset (dbs[cnt].head->array, '\xff',
797 dbs[cnt].suggested_module * sizeof (ref_t));
798 dbs[cnt].head->module = dbs[cnt].suggested_module;
799 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
800 * dbs[cnt].head->module);
801 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
802 dbs[cnt].head->first_free = 0;
804 dbs[cnt].shared = 0;
805 assert (dbs[cnt].ro_fd == -1);
808 if (dbs[cnt].check_file)
810 /* We need the modification date of the file. */
811 struct stat64 st;
813 if (stat64 (dbs[cnt].filename, &st) < 0)
815 /* We cannot stat() the file, disable file checking. */
816 dbg_log (_("cannot stat() file `%s': %s"),
817 dbs[cnt].filename, strerror (errno));
818 dbs[cnt].check_file = 0;
820 else
821 dbs[cnt].file_mtime = st.st_mtime;
825 /* Create the socket. */
826 sock = socket (AF_UNIX, SOCK_STREAM, 0);
827 if (sock < 0)
829 dbg_log (_("cannot open socket: %s"), strerror (errno));
830 exit (errno == EACCES ? 4 : 1);
832 /* Bind a name to the socket. */
833 struct sockaddr_un sock_addr;
834 sock_addr.sun_family = AF_UNIX;
835 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
836 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
838 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
839 exit (errno == EACCES ? 4 : 1);
842 /* We don't want to get stuck on accept. */
843 int fl = fcntl (sock, F_GETFL);
844 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
846 dbg_log (_("cannot change socket to nonblocking mode: %s"),
847 strerror (errno));
848 exit (1);
851 /* The descriptor needs to be closed on exec. */
852 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
854 dbg_log (_("cannot set socket to close on exec: %s"),
855 strerror (errno));
856 exit (1);
859 /* Set permissions for the socket. */
860 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
862 /* Set the socket up to accept connections. */
863 if (listen (sock, SOMAXCONN) < 0)
865 dbg_log (_("cannot enable socket to accept connections: %s"),
866 strerror (errno));
867 exit (1);
870 /* Change to unprivileged uid/gid/groups if specifed in config file */
871 if (server_user != NULL)
872 finish_drop_privileges ();
876 /* Close the connections. */
877 void
878 close_sockets (void)
880 close (sock);
884 static void
885 invalidate_cache (char *key, int fd)
887 dbtype number;
888 int32_t resp;
890 for (number = pwddb; number < lastdb; ++number)
891 if (strcmp (key, dbnames[number]) == 0)
893 if (dbs[number].reset_res)
894 res_init ();
896 break;
899 if (number == lastdb)
901 resp = EINVAL;
902 writeall (fd, &resp, sizeof (resp));
903 return;
906 if (dbs[number].enabled)
908 pthread_mutex_lock (&dbs[number].prune_lock);
909 prune_cache (&dbs[number], LONG_MAX, fd);
910 pthread_mutex_unlock (&dbs[number].prune_lock);
912 else
914 resp = 0;
915 writeall (fd, &resp, sizeof (resp));
920 #ifdef SCM_RIGHTS
921 static void
922 send_ro_fd (struct database_dyn *db, char *key, int fd)
924 /* If we do not have an read-only file descriptor do nothing. */
925 if (db->ro_fd == -1)
926 return;
928 /* We need to send some data along with the descriptor. */
929 uint64_t mapsize = (db->head->data_size
930 + roundup (db->head->module * sizeof (ref_t), ALIGN)
931 + sizeof (struct database_pers_head));
932 struct iovec iov[2];
933 iov[0].iov_base = key;
934 iov[0].iov_len = strlen (key) + 1;
935 iov[1].iov_base = &mapsize;
936 iov[1].iov_len = sizeof (mapsize);
938 /* Prepare the control message to transfer the descriptor. */
939 union
941 struct cmsghdr hdr;
942 char bytes[CMSG_SPACE (sizeof (int))];
943 } buf;
944 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
945 .msg_control = buf.bytes,
946 .msg_controllen = sizeof (buf) };
947 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
949 cmsg->cmsg_level = SOL_SOCKET;
950 cmsg->cmsg_type = SCM_RIGHTS;
951 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
953 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
955 msg.msg_controllen = cmsg->cmsg_len;
957 /* Send the control message. We repeat when we are interrupted but
958 everything else is ignored. */
959 #ifndef MSG_NOSIGNAL
960 # define MSG_NOSIGNAL 0
961 #endif
962 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
964 if (__builtin_expect (debug_level > 0, 0))
965 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
967 #endif /* SCM_RIGHTS */
970 /* Handle new request. */
971 static void
972 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
974 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
976 if (debug_level > 0)
977 dbg_log (_("\
978 cannot handle old request version %d; current version is %d"),
979 req->version, NSCD_VERSION);
980 return;
983 /* Perform the SELinux check before we go on to the standard checks. */
984 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
986 if (debug_level > 0)
988 #ifdef SO_PEERCRED
989 # ifdef PATH_MAX
990 char buf[PATH_MAX];
991 # else
992 char buf[4096];
993 # endif
995 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
996 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
998 if (n <= 0)
999 dbg_log (_("\
1000 request from %ld not handled due to missing permission"), (long int) pid);
1001 else
1003 buf[n] = '\0';
1004 dbg_log (_("\
1005 request from '%s' [%ld] not handled due to missing permission"),
1006 buf, (long int) pid);
1008 #else
1009 dbg_log (_("request not handled due to missing permission"));
1010 #endif
1012 return;
1015 struct database_dyn *db = reqinfo[req->type].db;
1017 /* See whether we can service the request from the cache. */
1018 if (__builtin_expect (reqinfo[req->type].data_request, true))
1020 if (__builtin_expect (debug_level, 0) > 0)
1022 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1024 char buf[INET6_ADDRSTRLEN];
1026 dbg_log ("\t%s (%s)", serv2str[req->type],
1027 inet_ntop (req->type == GETHOSTBYADDR
1028 ? AF_INET : AF_INET6,
1029 key, buf, sizeof (buf)));
1031 else
1032 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1035 /* Is this service enabled? */
1036 if (__builtin_expect (!db->enabled, 0))
1038 /* No, sent the prepared record. */
1039 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1040 db->disabled_iov->iov_len,
1041 MSG_NOSIGNAL))
1042 != (ssize_t) db->disabled_iov->iov_len
1043 && __builtin_expect (debug_level, 0) > 0)
1045 /* We have problems sending the result. */
1046 char buf[256];
1047 dbg_log (_("cannot write result: %s"),
1048 strerror_r (errno, buf, sizeof (buf)));
1051 return;
1054 /* Be sure we can read the data. */
1055 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
1057 ++db->head->rdlockdelayed;
1058 pthread_rwlock_rdlock (&db->lock);
1061 /* See whether we can handle it from the cache. */
1062 struct datahead *cached;
1063 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1064 db, uid);
1065 if (cached != NULL)
1067 /* Hurray it's in the cache. */
1068 ssize_t nwritten;
1070 #ifdef HAVE_SENDFILE
1071 if (__builtin_expect (db->mmap_used, 1))
1073 assert (db->wr_fd != -1);
1074 assert ((char *) cached->data > (char *) db->data);
1075 assert ((char *) cached->data - (char *) db->head
1076 + cached->recsize
1077 <= (sizeof (struct database_pers_head)
1078 + db->head->module * sizeof (ref_t)
1079 + db->head->data_size));
1080 nwritten = sendfileall (fd, db->wr_fd,
1081 (char *) cached->data
1082 - (char *) db->head, cached->recsize);
1083 # ifndef __ASSUME_SENDFILE
1084 if (nwritten == -1 && errno == ENOSYS)
1085 goto use_write;
1086 # endif
1088 else
1089 # ifndef __ASSUME_SENDFILE
1090 use_write:
1091 # endif
1092 #endif
1093 nwritten = writeall (fd, cached->data, cached->recsize);
1095 if (nwritten != cached->recsize
1096 && __builtin_expect (debug_level, 0) > 0)
1098 /* We have problems sending the result. */
1099 char buf[256];
1100 dbg_log (_("cannot write result: %s"),
1101 strerror_r (errno, buf, sizeof (buf)));
1104 pthread_rwlock_unlock (&db->lock);
1106 return;
1109 pthread_rwlock_unlock (&db->lock);
1111 else if (__builtin_expect (debug_level, 0) > 0)
1113 if (req->type == INVALIDATE)
1114 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1115 else
1116 dbg_log ("\t%s", serv2str[req->type]);
1119 /* Handle the request. */
1120 switch (req->type)
1122 case GETPWBYNAME:
1123 addpwbyname (db, fd, req, key, uid);
1124 break;
1126 case GETPWBYUID:
1127 addpwbyuid (db, fd, req, key, uid);
1128 break;
1130 case GETGRBYNAME:
1131 addgrbyname (db, fd, req, key, uid);
1132 break;
1134 case GETGRBYGID:
1135 addgrbygid (db, fd, req, key, uid);
1136 break;
1138 case GETHOSTBYNAME:
1139 addhstbyname (db, fd, req, key, uid);
1140 break;
1142 case GETHOSTBYNAMEv6:
1143 addhstbynamev6 (db, fd, req, key, uid);
1144 break;
1146 case GETHOSTBYADDR:
1147 addhstbyaddr (db, fd, req, key, uid);
1148 break;
1150 case GETHOSTBYADDRv6:
1151 addhstbyaddrv6 (db, fd, req, key, uid);
1152 break;
1154 case GETAI:
1155 addhstai (db, fd, req, key, uid);
1156 break;
1158 case INITGROUPS:
1159 addinitgroups (db, fd, req, key, uid);
1160 break;
1162 case GETSERVBYNAME:
1163 addservbyname (db, fd, req, key, uid);
1164 break;
1166 case GETSERVBYPORT:
1167 addservbyport (db, fd, req, key, uid);
1168 break;
1170 case GETSTAT:
1171 case SHUTDOWN:
1172 case INVALIDATE:
1174 /* Get the callers credentials. */
1175 #ifdef SO_PEERCRED
1176 struct ucred caller;
1177 socklen_t optlen = sizeof (caller);
1179 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1181 char buf[256];
1183 dbg_log (_("error getting caller's id: %s"),
1184 strerror_r (errno, buf, sizeof (buf)));
1185 break;
1188 uid = caller.uid;
1189 #else
1190 /* Some systems have no SO_PEERCRED implementation. They don't
1191 care about security so we don't as well. */
1192 uid = 0;
1193 #endif
1196 /* Accept shutdown, getstat and invalidate only from root. For
1197 the stat call also allow the user specified in the config file. */
1198 if (req->type == GETSTAT)
1200 if (uid == 0 || uid == stat_uid)
1201 send_stats (fd, dbs);
1203 else if (uid == 0)
1205 if (req->type == INVALIDATE)
1206 invalidate_cache (key, fd);
1207 else
1208 termination_handler (0);
1210 break;
1212 case GETFDPW:
1213 case GETFDGR:
1214 case GETFDHST:
1215 case GETFDSERV:
1216 #ifdef SCM_RIGHTS
1217 send_ro_fd (reqinfo[req->type].db, key, fd);
1218 #endif
1219 break;
1221 default:
1222 /* Ignore the command, it's nothing we know. */
1223 break;
1228 /* Restart the process. */
1229 static void
1230 restart (void)
1232 /* First determine the parameters. We do not use the parameters
1233 passed to main() since in case nscd is started by running the
1234 dynamic linker this will not work. Yes, this is not the usual
1235 case but nscd is part of glibc and we occasionally do this. */
1236 size_t buflen = 1024;
1237 char *buf = alloca (buflen);
1238 size_t readlen = 0;
1239 int fd = open ("/proc/self/cmdline", O_RDONLY);
1240 if (fd == -1)
1242 dbg_log (_("\
1243 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1244 strerror (errno));
1246 paranoia = 0;
1247 return;
1250 while (1)
1252 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1253 buflen - readlen));
1254 if (n == -1)
1256 dbg_log (_("\
1257 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1258 strerror (errno));
1260 close (fd);
1261 paranoia = 0;
1262 return;
1265 readlen += n;
1267 if (readlen < buflen)
1268 break;
1270 /* We might have to extend the buffer. */
1271 size_t old_buflen = buflen;
1272 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1273 buf = memmove (newp, buf, old_buflen);
1276 close (fd);
1278 /* Parse the command line. Worst case scenario: every two
1279 characters form one parameter (one character plus NUL). */
1280 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1281 int argc = 0;
1283 char *cp = buf;
1284 while (cp < buf + readlen)
1286 argv[argc++] = cp;
1287 cp = (char *) rawmemchr (cp, '\0') + 1;
1289 argv[argc] = NULL;
1291 /* Second, change back to the old user if we changed it. */
1292 if (server_user != NULL)
1294 if (setresuid (old_uid, old_uid, old_uid) != 0)
1296 dbg_log (_("\
1297 cannot change to old UID: %s; disabling paranoia mode"),
1298 strerror (errno));
1300 paranoia = 0;
1301 return;
1304 if (setresgid (old_gid, old_gid, old_gid) != 0)
1306 dbg_log (_("\
1307 cannot change to old GID: %s; disabling paranoia mode"),
1308 strerror (errno));
1310 setuid (server_uid);
1311 paranoia = 0;
1312 return;
1316 /* Next change back to the old working directory. */
1317 if (chdir (oldcwd) == -1)
1319 dbg_log (_("\
1320 cannot change to old working directory: %s; disabling paranoia mode"),
1321 strerror (errno));
1323 if (server_user != NULL)
1325 setuid (server_uid);
1326 setgid (server_gid);
1328 paranoia = 0;
1329 return;
1332 /* Synchronize memory. */
1333 for (int cnt = 0; cnt < lastdb; ++cnt)
1334 if (dbs[cnt].enabled)
1336 /* Make sure nobody keeps using the database. */
1337 dbs[cnt].head->timestamp = 0;
1339 if (dbs[cnt].persistent)
1340 // XXX async OK?
1341 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1344 /* The preparations are done. */
1345 execv ("/proc/self/exe", argv);
1347 /* If we come here, we will never be able to re-exec. */
1348 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1349 strerror (errno));
1351 if (server_user != NULL)
1353 setuid (server_uid);
1354 setgid (server_gid);
1356 if (chdir ("/") != 0)
1357 dbg_log (_("cannot change current working directory to \"/\": %s"),
1358 strerror (errno));
1359 paranoia = 0;
1363 /* List of file descriptors. */
1364 struct fdlist
1366 int fd;
1367 struct fdlist *next;
1369 /* Memory allocated for the list. */
1370 static struct fdlist *fdlist;
1371 /* List of currently ready-to-read file descriptors. */
1372 static struct fdlist *readylist;
1374 /* Conditional variable and mutex to signal availability of entries in
1375 READYLIST. The condvar is initialized dynamically since we might
1376 use a different clock depending on availability. */
1377 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1378 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1380 /* The clock to use with the condvar. */
1381 static clockid_t timeout_clock = CLOCK_REALTIME;
1383 /* Number of threads ready to handle the READYLIST. */
1384 static unsigned long int nready;
1387 /* Function for the clean-up threads. */
1388 static void *
1389 __attribute__ ((__noreturn__))
1390 nscd_run_prune (void *p)
1392 const long int my_number = (long int) p;
1393 assert (dbs[my_number].enabled);
1395 int dont_need_update = setup_thread (&dbs[my_number]);
1397 /* We are running. */
1398 dbs[my_number].head->timestamp = time (NULL);
1400 struct timespec prune_ts;
1401 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1402 /* Should never happen. */
1403 abort ();
1405 /* Compute the initial timeout time. Prevent all the timers to go
1406 off at the same time by adding a db-based value. */
1407 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1409 pthread_mutex_lock (&dbs[my_number].prune_lock);
1410 while (1)
1412 /* Wait, but not forever. */
1413 int e = pthread_cond_timedwait (&dbs[my_number].prune_cond,
1414 &dbs[my_number].prune_lock,
1415 &prune_ts);
1416 assert (e == 0 || e == ETIMEDOUT);
1418 time_t next_wait;
1419 time_t now = time (NULL);
1420 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time)
1422 next_wait = prune_cache (&dbs[my_number], now, -1);
1423 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1424 /* If clients cannot determine for sure whether nscd is running
1425 we need to wake up occasionally to update the timestamp.
1426 Wait 90% of the update period. */
1427 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1428 if (__builtin_expect (! dont_need_update, 0))
1429 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1431 /* Make it known when we will wake up again. */
1432 dbs[my_number].wakeup_time = now + next_wait;
1434 else
1435 /* The cache was just pruned. Do not do it again now. Just
1436 use the new timeout value. */
1437 next_wait = dbs[my_number].wakeup_time - now;
1439 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1440 /* Should never happen. */
1441 abort ();
1443 /* Compute next timeout time. */
1444 prune_ts.tv_sec += next_wait;
1449 /* This is the main loop. It is replicated in different threads but
1450 the the use of the ready list makes sure only one thread handles an
1451 incoming connection. */
1452 static void *
1453 __attribute__ ((__noreturn__))
1454 nscd_run_worker (void *p)
1456 char buf[256];
1458 /* Initialize the memory-in-flight list. */
1459 for (enum in_flight idx = 0; idx < IDX_last; ++idx)
1460 mem_in_flight.block[idx].dbidx = -1;
1461 /* And queue this threads structure. */
1463 mem_in_flight.next = mem_in_flight_list;
1464 while (atomic_compare_and_exchange_bool_acq (&mem_in_flight_list,
1465 &mem_in_flight,
1466 mem_in_flight.next) != 0);
1468 /* Initial locking. */
1469 pthread_mutex_lock (&readylist_lock);
1471 /* One more thread available. */
1472 ++nready;
1474 while (1)
1476 while (readylist == NULL)
1477 pthread_cond_wait (&readylist_cond, &readylist_lock);
1479 struct fdlist *it = readylist->next;
1480 if (readylist->next == readylist)
1481 /* Just one entry on the list. */
1482 readylist = NULL;
1483 else
1484 readylist->next = it->next;
1486 /* Extract the information and mark the record ready to be used
1487 again. */
1488 int fd = it->fd;
1489 it->next = NULL;
1491 /* One more thread available. */
1492 --nready;
1494 /* We are done with the list. */
1495 pthread_mutex_unlock (&readylist_lock);
1497 /* We do not want to block on a short read or so. */
1498 int fl = fcntl (fd, F_GETFL);
1499 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1500 goto close_and_out;
1502 /* Now read the request. */
1503 request_header req;
1504 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1505 != sizeof (req), 0))
1507 /* We failed to read data. Note that this also might mean we
1508 failed because we would have blocked. */
1509 if (debug_level > 0)
1510 dbg_log (_("short read while reading request: %s"),
1511 strerror_r (errno, buf, sizeof (buf)));
1512 goto close_and_out;
1515 /* Check whether this is a valid request type. */
1516 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1517 goto close_and_out;
1519 /* Some systems have no SO_PEERCRED implementation. They don't
1520 care about security so we don't as well. */
1521 uid_t uid = -1;
1522 #ifdef SO_PEERCRED
1523 pid_t pid = 0;
1525 if (__builtin_expect (debug_level > 0, 0))
1527 struct ucred caller;
1528 socklen_t optlen = sizeof (caller);
1530 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1531 pid = caller.pid;
1533 #else
1534 const pid_t pid = 0;
1535 #endif
1537 /* It should not be possible to crash the nscd with a silly
1538 request (i.e., a terribly large key). We limit the size to 1kb. */
1539 if (__builtin_expect (req.key_len, 1) < 0
1540 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1542 if (debug_level > 0)
1543 dbg_log (_("key length in request too long: %d"), req.key_len);
1545 else
1547 /* Get the key. */
1548 char keybuf[MAXKEYLEN];
1550 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1551 req.key_len))
1552 != req.key_len, 0))
1554 /* Again, this can also mean we would have blocked. */
1555 if (debug_level > 0)
1556 dbg_log (_("short read while reading request key: %s"),
1557 strerror_r (errno, buf, sizeof (buf)));
1558 goto close_and_out;
1561 if (__builtin_expect (debug_level, 0) > 0)
1563 #ifdef SO_PEERCRED
1564 if (pid != 0)
1565 dbg_log (_("\
1566 handle_request: request received (Version = %d) from PID %ld"),
1567 req.version, (long int) pid);
1568 else
1569 #endif
1570 dbg_log (_("\
1571 handle_request: request received (Version = %d)"), req.version);
1574 /* Phew, we got all the data, now process it. */
1575 handle_request (fd, &req, keybuf, uid, pid);
1578 close_and_out:
1579 /* We are done. */
1580 close (fd);
1582 /* Re-locking. */
1583 pthread_mutex_lock (&readylist_lock);
1585 /* One more thread available. */
1586 ++nready;
1591 static unsigned int nconns;
1593 static void
1594 fd_ready (int fd)
1596 pthread_mutex_lock (&readylist_lock);
1598 /* Find an empty entry in FDLIST. */
1599 size_t inner;
1600 for (inner = 0; inner < nconns; ++inner)
1601 if (fdlist[inner].next == NULL)
1602 break;
1603 assert (inner < nconns);
1605 fdlist[inner].fd = fd;
1607 if (readylist == NULL)
1608 readylist = fdlist[inner].next = &fdlist[inner];
1609 else
1611 fdlist[inner].next = readylist->next;
1612 readylist = readylist->next = &fdlist[inner];
1615 bool do_signal = true;
1616 if (__builtin_expect (nready == 0, 0))
1618 ++client_queued;
1619 do_signal = false;
1621 /* Try to start another thread to help out. */
1622 pthread_t th;
1623 if (nthreads < max_nthreads
1624 && pthread_create (&th, &attr, nscd_run_worker,
1625 (void *) (long int) nthreads) == 0)
1627 /* We got another thread. */
1628 ++nthreads;
1629 /* The new thread might need a kick. */
1630 do_signal = true;
1635 pthread_mutex_unlock (&readylist_lock);
1637 /* Tell one of the worker threads there is work to do. */
1638 if (do_signal)
1639 pthread_cond_signal (&readylist_cond);
1643 /* Check whether restarting should happen. */
1644 static inline int
1645 restart_p (time_t now)
1647 return (paranoia && readylist == NULL && nready == nthreads
1648 && now >= restart_time);
1652 /* Array for times a connection was accepted. */
1653 static time_t *starttime;
1656 static void
1657 __attribute__ ((__noreturn__))
1658 main_loop_poll (void)
1660 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1661 * sizeof (conns[0]));
1663 conns[0].fd = sock;
1664 conns[0].events = POLLRDNORM;
1665 size_t nused = 1;
1666 size_t firstfree = 1;
1668 while (1)
1670 /* Wait for any event. We wait at most a couple of seconds so
1671 that we can check whether we should close any of the accepted
1672 connections since we have not received a request. */
1673 #define MAX_ACCEPT_TIMEOUT 30
1674 #define MIN_ACCEPT_TIMEOUT 5
1675 #define MAIN_THREAD_TIMEOUT \
1676 (MAX_ACCEPT_TIMEOUT * 1000 \
1677 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1679 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1681 time_t now = time (NULL);
1683 /* If there is a descriptor ready for reading or there is a new
1684 connection, process this now. */
1685 if (n > 0)
1687 if (conns[0].revents != 0)
1689 /* We have a new incoming connection. Accept the connection. */
1690 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1692 /* Use the descriptor if we have not reached the limit. */
1693 if (fd >= 0)
1695 if (firstfree < nconns)
1697 conns[firstfree].fd = fd;
1698 conns[firstfree].events = POLLRDNORM;
1699 starttime[firstfree] = now;
1700 if (firstfree >= nused)
1701 nused = firstfree + 1;
1704 ++firstfree;
1705 while (firstfree < nused && conns[firstfree].fd != -1);
1707 else
1708 /* We cannot use the connection so close it. */
1709 close (fd);
1712 --n;
1715 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1716 if (conns[cnt].revents != 0)
1718 fd_ready (conns[cnt].fd);
1720 /* Clean up the CONNS array. */
1721 conns[cnt].fd = -1;
1722 if (cnt < firstfree)
1723 firstfree = cnt;
1724 if (cnt == nused - 1)
1726 --nused;
1727 while (conns[nused - 1].fd == -1);
1729 --n;
1733 /* Now find entries which have timed out. */
1734 assert (nused > 0);
1736 /* We make the timeout length depend on the number of file
1737 descriptors currently used. */
1738 #define ACCEPT_TIMEOUT \
1739 (MAX_ACCEPT_TIMEOUT \
1740 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1741 time_t laststart = now - ACCEPT_TIMEOUT;
1743 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1745 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1747 /* Remove the entry, it timed out. */
1748 (void) close (conns[cnt].fd);
1749 conns[cnt].fd = -1;
1751 if (cnt < firstfree)
1752 firstfree = cnt;
1753 if (cnt == nused - 1)
1755 --nused;
1756 while (conns[nused - 1].fd == -1);
1760 if (restart_p (now))
1761 restart ();
1766 #ifdef HAVE_EPOLL
1767 static void
1768 main_loop_epoll (int efd)
1770 struct epoll_event ev = { 0, };
1771 int nused = 1;
1772 size_t highest = 0;
1774 /* Add the socket. */
1775 ev.events = EPOLLRDNORM;
1776 ev.data.fd = sock;
1777 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1778 /* We cannot use epoll. */
1779 return;
1781 while (1)
1783 struct epoll_event revs[100];
1784 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1786 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1788 time_t now = time (NULL);
1790 for (int cnt = 0; cnt < n; ++cnt)
1791 if (revs[cnt].data.fd == sock)
1793 /* A new connection. */
1794 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1796 if (fd >= 0)
1798 /* Try to add the new descriptor. */
1799 ev.data.fd = fd;
1800 if (fd >= nconns
1801 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1802 /* The descriptor is too large or something went
1803 wrong. Close the descriptor. */
1804 close (fd);
1805 else
1807 /* Remember when we accepted the connection. */
1808 starttime[fd] = now;
1810 if (fd > highest)
1811 highest = fd;
1813 ++nused;
1817 else
1819 /* Remove the descriptor from the epoll descriptor. */
1820 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1822 /* Get a worker to handle the request. */
1823 fd_ready (revs[cnt].data.fd);
1825 /* Reset the time. */
1826 starttime[revs[cnt].data.fd] = 0;
1827 if (revs[cnt].data.fd == highest)
1829 --highest;
1830 while (highest > 0 && starttime[highest] == 0);
1832 --nused;
1835 /* Now look for descriptors for accepted connections which have
1836 no reply in too long of a time. */
1837 time_t laststart = now - ACCEPT_TIMEOUT;
1838 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1839 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1841 /* We are waiting for this one for too long. Close it. */
1842 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
1844 (void) close (cnt);
1846 starttime[cnt] = 0;
1847 if (cnt == highest)
1848 --highest;
1850 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1851 --highest;
1853 if (restart_p (now))
1854 restart ();
1857 #endif
1860 /* Start all the threads we want. The initial process is thread no. 1. */
1861 void
1862 start_threads (void)
1864 /* Initialize the conditional variable we will use. The only
1865 non-standard attribute we might use is the clock selection. */
1866 pthread_condattr_t condattr;
1867 pthread_condattr_init (&condattr);
1869 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1870 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1871 /* Determine whether the monotonous clock is available. */
1872 struct timespec dummy;
1873 # if _POSIX_MONOTONIC_CLOCK == 0
1874 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1875 # endif
1876 # if _POSIX_CLOCK_SELECTION == 0
1877 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1878 # endif
1879 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1880 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1881 timeout_clock = CLOCK_MONOTONIC;
1882 #endif
1884 /* Create the attribute for the threads. They are all created
1885 detached. */
1886 pthread_attr_init (&attr);
1887 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1888 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1889 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
1891 /* We allow less than LASTDB threads only for debugging. */
1892 if (debug_level == 0)
1893 nthreads = MAX (nthreads, lastdb);
1895 /* Create the threads which prune the databases. */
1896 // XXX Ideally this work would be done by some of the worker threads.
1897 // XXX But this is problematic since we would need to be able to wake
1898 // XXX them up explicitly as well as part of the group handling the
1899 // XXX ready-list. This requires an operation where we can wait on
1900 // XXX two conditional variables at the same time. This operation
1901 // XXX does not exist (yet).
1902 for (long int i = 0; i < lastdb; ++i)
1904 /* Initialize the conditional variable. */
1905 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
1907 dbg_log (_("could not initialize conditional variable"));
1908 exit (1);
1911 pthread_t th;
1912 if (dbs[i].enabled
1913 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
1915 dbg_log (_("could not start clean-up thread; terminating"));
1916 exit (1);
1920 pthread_condattr_destroy (&condattr);
1922 for (long int i = 0; i < nthreads; ++i)
1924 pthread_t th;
1925 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
1927 if (i == 0)
1929 dbg_log (_("could not start any worker thread; terminating"));
1930 exit (1);
1933 break;
1937 /* Determine how much room for descriptors we should initially
1938 allocate. This might need to change later if we cap the number
1939 with MAXCONN. */
1940 const long int nfds = sysconf (_SC_OPEN_MAX);
1941 #define MINCONN 32
1942 #define MAXCONN 16384
1943 if (nfds == -1 || nfds > MAXCONN)
1944 nconns = MAXCONN;
1945 else if (nfds < MINCONN)
1946 nconns = MINCONN;
1947 else
1948 nconns = nfds;
1950 /* We need memory to pass descriptors on to the worker threads. */
1951 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1952 /* Array to keep track when connection was accepted. */
1953 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1955 /* In the main thread we execute the loop which handles incoming
1956 connections. */
1957 #ifdef HAVE_EPOLL
1958 int efd = epoll_create (100);
1959 if (efd != -1)
1961 main_loop_epoll (efd);
1962 close (efd);
1964 #endif
1966 main_loop_poll ();
1970 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1971 this function is called, we are not listening on the nscd socket yet so
1972 we can just use the ordinary lookup functions without causing a lockup */
1973 static void
1974 begin_drop_privileges (void)
1976 struct passwd *pwd = getpwnam (server_user);
1978 if (pwd == NULL)
1980 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1981 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1982 server_user);
1985 server_uid = pwd->pw_uid;
1986 server_gid = pwd->pw_gid;
1988 /* Save the old UID/GID if we have to change back. */
1989 if (paranoia)
1991 old_uid = getuid ();
1992 old_gid = getgid ();
1995 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1997 /* This really must never happen. */
1998 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1999 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
2002 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2004 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2005 == -1)
2007 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2008 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
2013 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2014 run nscd as the user specified in the configuration file. */
2015 static void
2016 finish_drop_privileges (void)
2018 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2019 /* We need to preserve the capabilities to connect to the audit daemon. */
2020 cap_t new_caps = preserve_capabilities ();
2021 #endif
2023 if (setgroups (server_ngroups, server_groups) == -1)
2025 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2026 error (EXIT_FAILURE, errno, _("setgroups failed"));
2029 int res;
2030 if (paranoia)
2031 res = setresgid (server_gid, server_gid, old_gid);
2032 else
2033 res = setgid (server_gid);
2034 if (res == -1)
2036 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2037 perror ("setgid");
2038 exit (4);
2041 if (paranoia)
2042 res = setresuid (server_uid, server_uid, old_uid);
2043 else
2044 res = setuid (server_uid);
2045 if (res == -1)
2047 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2048 perror ("setuid");
2049 exit (4);
2052 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2053 /* Remove the temporary capabilities. */
2054 install_real_capabilities (new_caps);
2055 #endif