* elf/soinit.c (__EH_FRAME_BEGIN__): Remove.
[glibc/pb-stable.git] / nscd / connections.c
blobe6a2c946ae19e5b90966fd4ae7a85d7d220cd603
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2007, 2008 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #include <alloca.h>
21 #include <assert.h>
22 #include <atomic.h>
23 #include <error.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <grp.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <arpa/inet.h>
35 #ifdef HAVE_EPOLL
36 # include <sys/epoll.h>
37 #endif
38 #include <sys/mman.h>
39 #include <sys/param.h>
40 #include <sys/poll.h>
41 #ifdef HAVE_SENDFILE
42 # include <sys/sendfile.h>
43 #endif
44 #include <sys/socket.h>
45 #include <sys/stat.h>
46 #include <sys/un.h>
48 #include "nscd.h"
49 #include "dbg_log.h"
50 #include "selinux.h"
51 #ifdef HAVE_SENDFILE
52 # include <kernel-features.h>
53 #endif
56 /* Wrapper functions with error checking for standard functions. */
57 extern void *xmalloc (size_t n);
58 extern void *xcalloc (size_t n, size_t s);
59 extern void *xrealloc (void *o, size_t n);
61 /* Support to run nscd as an unprivileged user */
62 const char *server_user;
63 static uid_t server_uid;
64 static gid_t server_gid;
65 const char *stat_user;
66 uid_t stat_uid;
67 static gid_t *server_groups;
68 #ifndef NGROUPS
69 # define NGROUPS 32
70 #endif
71 static int server_ngroups;
73 static pthread_attr_t attr;
75 static void begin_drop_privileges (void);
76 static void finish_drop_privileges (void);
78 /* Map request type to a string. */
79 const char *const serv2str[LASTREQ] =
81 [GETPWBYNAME] = "GETPWBYNAME",
82 [GETPWBYUID] = "GETPWBYUID",
83 [GETGRBYNAME] = "GETGRBYNAME",
84 [GETGRBYGID] = "GETGRBYGID",
85 [GETHOSTBYNAME] = "GETHOSTBYNAME",
86 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
87 [GETHOSTBYADDR] = "GETHOSTBYADDR",
88 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
89 [SHUTDOWN] = "SHUTDOWN",
90 [GETSTAT] = "GETSTAT",
91 [INVALIDATE] = "INVALIDATE",
92 [GETFDPW] = "GETFDPW",
93 [GETFDGR] = "GETFDGR",
94 [GETFDHST] = "GETFDHST",
95 [GETAI] = "GETAI",
96 [INITGROUPS] = "INITGROUPS",
97 [GETSERVBYNAME] = "GETSERVBYNAME",
98 [GETSERVBYPORT] = "GETSERVBYPORT",
99 [GETFDSERV] = "GETFDSERV"
102 /* The control data structures for the services. */
103 struct database_dyn dbs[lastdb] =
105 [pwddb] = {
106 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
107 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
108 .enabled = 0,
109 .check_file = 1,
110 .persistent = 0,
111 .propagate = 1,
112 .shared = 0,
113 .max_db_size = DEFAULT_MAX_DB_SIZE,
114 .suggested_module = DEFAULT_SUGGESTED_MODULE,
115 .reset_res = 0,
116 .filename = "/etc/passwd",
117 .db_filename = _PATH_NSCD_PASSWD_DB,
118 .disabled_iov = &pwd_iov_disabled,
119 .postimeout = 3600,
120 .negtimeout = 20,
121 .wr_fd = -1,
122 .ro_fd = -1,
123 .mmap_used = false
125 [grpdb] = {
126 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
127 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
128 .enabled = 0,
129 .check_file = 1,
130 .persistent = 0,
131 .propagate = 1,
132 .shared = 0,
133 .max_db_size = DEFAULT_MAX_DB_SIZE,
134 .suggested_module = DEFAULT_SUGGESTED_MODULE,
135 .reset_res = 0,
136 .filename = "/etc/group",
137 .db_filename = _PATH_NSCD_GROUP_DB,
138 .disabled_iov = &grp_iov_disabled,
139 .postimeout = 3600,
140 .negtimeout = 60,
141 .wr_fd = -1,
142 .ro_fd = -1,
143 .mmap_used = false
145 [hstdb] = {
146 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
147 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
148 .enabled = 0,
149 .check_file = 1,
150 .persistent = 0,
151 .propagate = 0, /* Not used. */
152 .shared = 0,
153 .max_db_size = DEFAULT_MAX_DB_SIZE,
154 .suggested_module = DEFAULT_SUGGESTED_MODULE,
155 .reset_res = 1,
156 .filename = "/etc/hosts",
157 .db_filename = _PATH_NSCD_HOSTS_DB,
158 .disabled_iov = &hst_iov_disabled,
159 .postimeout = 3600,
160 .negtimeout = 20,
161 .wr_fd = -1,
162 .ro_fd = -1,
163 .mmap_used = false
165 [servdb] = {
166 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
167 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
168 .enabled = 0,
169 .check_file = 1,
170 .persistent = 0,
171 .propagate = 0, /* Not used. */
172 .shared = 0,
173 .max_db_size = DEFAULT_MAX_DB_SIZE,
174 .suggested_module = DEFAULT_SUGGESTED_MODULE,
175 .reset_res = 0,
176 .filename = "/etc/services",
177 .db_filename = _PATH_NSCD_SERVICES_DB,
178 .disabled_iov = &serv_iov_disabled,
179 .postimeout = 28800,
180 .negtimeout = 20,
181 .wr_fd = -1,
182 .ro_fd = -1,
183 .mmap_used = false
188 /* Mapping of request type to database. */
189 static struct
191 bool data_request;
192 struct database_dyn *db;
193 } const reqinfo[LASTREQ] =
195 [GETPWBYNAME] = { true, &dbs[pwddb] },
196 [GETPWBYUID] = { true, &dbs[pwddb] },
197 [GETGRBYNAME] = { true, &dbs[grpdb] },
198 [GETGRBYGID] = { true, &dbs[grpdb] },
199 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
200 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
201 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
202 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
203 [SHUTDOWN] = { false, NULL },
204 [GETSTAT] = { false, NULL },
205 [SHUTDOWN] = { false, NULL },
206 [GETFDPW] = { false, &dbs[pwddb] },
207 [GETFDGR] = { false, &dbs[grpdb] },
208 [GETFDHST] = { false, &dbs[hstdb] },
209 [GETAI] = { true, &dbs[hstdb] },
210 [INITGROUPS] = { true, &dbs[grpdb] },
211 [GETSERVBYNAME] = { true, &dbs[servdb] },
212 [GETSERVBYPORT] = { true, &dbs[servdb] },
213 [GETFDSERV] = { false, &dbs[servdb] }
217 /* Initial number of threads to use. */
218 int nthreads = -1;
219 /* Maximum number of threads to use. */
220 int max_nthreads = 32;
222 /* Socket for incoming connections. */
223 static int sock;
225 /* Number of times clients had to wait. */
226 unsigned long int client_queued;
228 /* Data structure for recording in-flight memory allocation. */
229 __thread struct mem_in_flight mem_in_flight attribute_tls_model_ie;
230 /* Global list of the mem_in_flight variables of all the threads. */
231 struct mem_in_flight *mem_in_flight_list;
234 ssize_t
235 writeall (int fd, const void *buf, size_t len)
237 size_t n = len;
238 ssize_t ret;
241 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
242 if (ret <= 0)
243 break;
244 buf = (const char *) buf + ret;
245 n -= ret;
247 while (n > 0);
248 return ret < 0 ? ret : len - n;
252 #ifdef HAVE_SENDFILE
253 ssize_t
254 sendfileall (int tofd, int fromfd, off_t off, size_t len)
256 ssize_t n = len;
257 ssize_t ret;
261 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
262 if (ret <= 0)
263 break;
264 n -= ret;
266 while (n > 0);
267 return ret < 0 ? ret : len - n;
269 #endif
272 enum usekey
274 use_not = 0,
275 /* The following three are not really used, they are symbolic constants. */
276 use_first = 16,
277 use_begin = 32,
278 use_end = 64,
280 use_he = 1,
281 use_he_begin = use_he | use_begin,
282 use_he_end = use_he | use_end,
283 #if SEPARATE_KEY
284 use_key = 2,
285 use_key_begin = use_key | use_begin,
286 use_key_end = use_key | use_end,
287 use_key_first = use_key_begin | use_first,
288 #endif
289 use_data = 3,
290 use_data_begin = use_data | use_begin,
291 use_data_end = use_data | use_end,
292 use_data_first = use_data_begin | use_first
296 static int
297 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
298 enum usekey use, ref_t start, size_t len)
300 assert (len >= 2);
302 if (start > first_free || start + len > first_free
303 || (start & BLOCK_ALIGN_M1))
304 return 0;
306 if (usemap[start] == use_not)
308 /* Add the start marker. */
309 usemap[start] = use | use_begin;
310 use &= ~use_first;
312 while (--len > 0)
313 if (usemap[++start] != use_not)
314 return 0;
315 else
316 usemap[start] = use;
318 /* Add the end marker. */
319 usemap[start] = use | use_end;
321 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
323 /* Hash entries can't be shared. */
324 if (use == use_he)
325 return 0;
327 usemap[start] |= (use & use_first);
328 use &= ~use_first;
330 while (--len > 1)
331 if (usemap[++start] != use)
332 return 0;
334 if (usemap[++start] != (use | use_end))
335 return 0;
337 else
338 /* Points to a wrong object or somewhere in the middle. */
339 return 0;
341 return 1;
345 /* Verify data in persistent database. */
346 static int
347 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
349 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb);
351 time_t now = time (NULL);
353 struct database_pers_head *head = mem;
354 struct database_pers_head head_copy = *head;
356 /* Check that the header that was read matches the head in the database. */
357 if (memcmp (head, readhead, sizeof (*head)) != 0)
358 return 0;
360 /* First some easy tests: make sure the database header is sane. */
361 if (head->version != DB_VERSION
362 || head->header_size != sizeof (*head)
363 /* We allow a timestamp to be one hour ahead of the current time.
364 This should cover daylight saving time changes. */
365 || head->timestamp > now + 60 * 60 + 60
366 || (head->gc_cycle & 1)
367 || head->module == 0
368 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
369 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
370 || head->first_free < 0
371 || head->first_free > head->data_size
372 || (head->first_free & BLOCK_ALIGN_M1) != 0
373 || head->maxnentries < 0
374 || head->maxnsearched < 0)
375 return 0;
377 uint8_t *usemap = calloc (head->first_free, 1);
378 if (usemap == NULL)
379 return 0;
381 const char *data = (char *) &head->array[roundup (head->module,
382 ALIGN / sizeof (ref_t))];
384 nscd_ssize_t he_cnt = 0;
385 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
387 ref_t trail = head->array[cnt];
388 ref_t work = trail;
389 int tick = 0;
391 while (work != ENDREF)
393 if (! check_use (data, head->first_free, usemap, use_he, work,
394 sizeof (struct hashentry)))
395 goto fail;
397 /* Now we know we can dereference the record. */
398 struct hashentry *here = (struct hashentry *) (data + work);
400 ++he_cnt;
402 /* Make sure the record is for this type of service. */
403 if (here->type >= LASTREQ
404 || reqinfo[here->type].db != &dbs[dbnr])
405 goto fail;
407 /* Validate boolean field value. */
408 if (here->first != false && here->first != true)
409 goto fail;
411 if (here->len < 0)
412 goto fail;
414 /* Now the data. */
415 if (here->packet < 0
416 || here->packet > head->first_free
417 || here->packet + sizeof (struct datahead) > head->first_free)
418 goto fail;
420 struct datahead *dh = (struct datahead *) (data + here->packet);
422 if (! check_use (data, head->first_free, usemap,
423 use_data | (here->first ? use_first : 0),
424 here->packet, dh->allocsize))
425 goto fail;
427 if (dh->allocsize < sizeof (struct datahead)
428 || dh->recsize > dh->allocsize
429 || (dh->notfound != false && dh->notfound != true)
430 || (dh->usable != false && dh->usable != true))
431 goto fail;
433 if (here->key < here->packet + sizeof (struct datahead)
434 || here->key > here->packet + dh->allocsize
435 || here->key + here->len > here->packet + dh->allocsize)
437 #if SEPARATE_KEY
438 /* If keys can appear outside of data, this should be done
439 instead. But gc doesn't mark the data in that case. */
440 if (! check_use (data, head->first_free, usemap,
441 use_key | (here->first ? use_first : 0),
442 here->key, here->len))
443 #endif
444 goto fail;
447 work = here->next;
449 if (work == trail)
450 /* A circular list, this must not happen. */
451 goto fail;
452 if (tick)
453 trail = ((struct hashentry *) (data + trail))->next;
454 tick = 1 - tick;
458 if (he_cnt != head->nentries)
459 goto fail;
461 /* See if all data and keys had at least one reference from
462 he->first == true hashentry. */
463 for (ref_t idx = 0; idx < head->first_free; ++idx)
465 #if SEPARATE_KEY
466 if (usemap[idx] == use_key_begin)
467 goto fail;
468 #endif
469 if (usemap[idx] == use_data_begin)
470 goto fail;
473 /* Finally, make sure the database hasn't changed since the first test. */
474 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
475 goto fail;
477 free (usemap);
478 return 1;
480 fail:
481 free (usemap);
482 return 0;
486 #ifdef O_CLOEXEC
487 # define EXTRA_O_FLAGS O_CLOEXEC
488 #else
489 # define EXTRA_O_FLAGS 0
490 #endif
493 /* Initialize database information structures. */
494 void
495 nscd_init (void)
497 /* Look up unprivileged uid/gid/groups before we start listening on the
498 socket */
499 if (server_user != NULL)
500 begin_drop_privileges ();
502 if (nthreads == -1)
503 /* No configuration for this value, assume a default. */
504 nthreads = 4;
506 for (size_t cnt = 0; cnt < lastdb; ++cnt)
507 if (dbs[cnt].enabled)
509 pthread_rwlock_init (&dbs[cnt].lock, NULL);
510 pthread_mutex_init (&dbs[cnt].memlock, NULL);
512 if (dbs[cnt].persistent)
514 /* Try to open the appropriate file on disk. */
515 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
516 if (fd != -1)
518 char *msg = NULL;
519 struct stat64 st;
520 void *mem;
521 size_t total;
522 struct database_pers_head head;
523 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
524 sizeof (head)));
525 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
527 fail_db_errno:
528 /* The code is single-threaded at this point so
529 using strerror is just fine. */
530 msg = strerror (errno);
531 fail_db:
532 dbg_log (_("invalid persistent database file \"%s\": %s"),
533 dbs[cnt].db_filename, msg);
534 unlink (dbs[cnt].db_filename);
536 else if (head.module == 0 && head.data_size == 0)
538 /* The file has been created, but the head has not
539 been initialized yet. */
540 msg = _("uninitialized header");
541 goto fail_db;
543 else if (head.header_size != (int) sizeof (head))
545 msg = _("header size does not match");
546 goto fail_db;
548 else if ((total = (sizeof (head)
549 + roundup (head.module * sizeof (ref_t),
550 ALIGN)
551 + head.data_size))
552 > st.st_size
553 || total < sizeof (head))
555 msg = _("file size does not match");
556 goto fail_db;
558 /* Note we map with the maximum size allowed for the
559 database. This is likely much larger than the
560 actual file size. This is OK on most OSes since
561 extensions of the underlying file will
562 automatically translate more pages available for
563 memory access. */
564 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
565 PROT_READ | PROT_WRITE,
566 MAP_SHARED, fd, 0))
567 == MAP_FAILED)
568 goto fail_db_errno;
569 else if (!verify_persistent_db (mem, &head, cnt))
571 munmap (mem, total);
572 msg = _("verification failed");
573 goto fail_db;
575 else
577 /* Success. We have the database. */
578 dbs[cnt].head = mem;
579 dbs[cnt].memsize = total;
580 dbs[cnt].data = (char *)
581 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
582 ALIGN / sizeof (ref_t))];
583 dbs[cnt].mmap_used = true;
585 if (dbs[cnt].suggested_module > head.module)
586 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
587 dbnames[cnt]);
589 dbs[cnt].wr_fd = fd;
590 fd = -1;
591 /* We also need a read-only descriptor. */
592 if (dbs[cnt].shared)
594 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
595 O_RDONLY | EXTRA_O_FLAGS);
596 if (dbs[cnt].ro_fd == -1)
597 dbg_log (_("\
598 cannot create read-only descriptor for \"%s\"; no mmap"),
599 dbs[cnt].db_filename);
602 // XXX Shall we test whether the descriptors actually
603 // XXX point to the same file?
606 /* Close the file descriptors in case something went
607 wrong in which case the variable have not been
608 assigned -1. */
609 if (fd != -1)
610 close (fd);
614 if (dbs[cnt].head == NULL)
616 /* No database loaded. Allocate the data structure,
617 possibly on disk. */
618 struct database_pers_head head;
619 size_t total = (sizeof (head)
620 + roundup (dbs[cnt].suggested_module
621 * sizeof (ref_t), ALIGN)
622 + (dbs[cnt].suggested_module
623 * DEFAULT_DATASIZE_PER_BUCKET));
625 /* Try to create the database. If we do not need a
626 persistent database create a temporary file. */
627 int fd;
628 int ro_fd = -1;
629 if (dbs[cnt].persistent)
631 fd = open (dbs[cnt].db_filename,
632 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
633 S_IRUSR | S_IWUSR);
634 if (fd != -1 && dbs[cnt].shared)
635 ro_fd = open (dbs[cnt].db_filename,
636 O_RDONLY | EXTRA_O_FLAGS);
638 else
640 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
641 fd = mkostemp (fname, EXTRA_O_FLAGS);
643 /* We do not need the file name anymore after we
644 opened another file descriptor in read-only mode. */
645 if (fd != -1)
647 if (dbs[cnt].shared)
648 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
650 unlink (fname);
654 if (fd == -1)
656 if (errno == EEXIST)
658 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
659 dbnames[cnt], dbs[cnt].db_filename);
660 // XXX Correct way to terminate?
661 exit (1);
664 if (dbs[cnt].persistent)
665 dbg_log (_("cannot create %s; no persistent database used"),
666 dbs[cnt].db_filename);
667 else
668 dbg_log (_("cannot create %s; no sharing possible"),
669 dbs[cnt].db_filename);
671 dbs[cnt].persistent = 0;
672 // XXX remember: no mmap
674 else
676 /* Tell the user if we could not create the read-only
677 descriptor. */
678 if (ro_fd == -1 && dbs[cnt].shared)
679 dbg_log (_("\
680 cannot create read-only descriptor for \"%s\"; no mmap"),
681 dbs[cnt].db_filename);
683 /* Before we create the header, initialiye the hash
684 table. So that if we get interrupted if writing
685 the header we can recognize a partially initialized
686 database. */
687 size_t ps = sysconf (_SC_PAGESIZE);
688 char tmpbuf[ps];
689 assert (~ENDREF == 0);
690 memset (tmpbuf, '\xff', ps);
692 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
693 off_t offset = sizeof (head);
695 size_t towrite;
696 if (offset % ps != 0)
698 towrite = MIN (remaining, ps - (offset % ps));
699 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
700 goto write_fail;
701 offset += towrite;
702 remaining -= towrite;
705 while (remaining > ps)
707 if (pwrite (fd, tmpbuf, ps, offset) == -1)
708 goto write_fail;
709 offset += ps;
710 remaining -= ps;
713 if (remaining > 0
714 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
715 goto write_fail;
717 /* Create the header of the file. */
718 struct database_pers_head head =
720 .version = DB_VERSION,
721 .header_size = sizeof (head),
722 .module = dbs[cnt].suggested_module,
723 .data_size = (dbs[cnt].suggested_module
724 * DEFAULT_DATASIZE_PER_BUCKET),
725 .first_free = 0
727 void *mem;
729 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
730 != sizeof (head))
731 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
732 != 0)
733 || (mem = mmap (NULL, dbs[cnt].max_db_size,
734 PROT_READ | PROT_WRITE,
735 MAP_SHARED, fd, 0)) == MAP_FAILED)
737 write_fail:
738 unlink (dbs[cnt].db_filename);
739 dbg_log (_("cannot write to database file %s: %s"),
740 dbs[cnt].db_filename, strerror (errno));
741 dbs[cnt].persistent = 0;
743 else
745 /* Success. */
746 dbs[cnt].head = mem;
747 dbs[cnt].data = (char *)
748 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
749 ALIGN / sizeof (ref_t))];
750 dbs[cnt].memsize = total;
751 dbs[cnt].mmap_used = true;
753 /* Remember the descriptors. */
754 dbs[cnt].wr_fd = fd;
755 dbs[cnt].ro_fd = ro_fd;
756 fd = -1;
757 ro_fd = -1;
760 if (fd != -1)
761 close (fd);
762 if (ro_fd != -1)
763 close (ro_fd);
767 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
768 /* We do not check here whether the O_CLOEXEC provided to the
769 open call was successful or not. The two fcntl calls are
770 only performed once each per process start-up and therefore
771 is not noticeable at all. */
772 if (paranoia
773 && ((dbs[cnt].wr_fd != -1
774 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
775 || (dbs[cnt].ro_fd != -1
776 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
778 dbg_log (_("\
779 cannot set socket to close on exec: %s; disabling paranoia mode"),
780 strerror (errno));
781 paranoia = 0;
783 #endif
785 if (dbs[cnt].head == NULL)
787 /* We do not use the persistent database. Just
788 create an in-memory data structure. */
789 assert (! dbs[cnt].persistent);
791 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
792 + (dbs[cnt].suggested_module
793 * sizeof (ref_t)));
794 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
795 assert (~ENDREF == 0);
796 memset (dbs[cnt].head->array, '\xff',
797 dbs[cnt].suggested_module * sizeof (ref_t));
798 dbs[cnt].head->module = dbs[cnt].suggested_module;
799 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
800 * dbs[cnt].head->module);
801 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
802 dbs[cnt].head->first_free = 0;
804 dbs[cnt].shared = 0;
805 assert (dbs[cnt].ro_fd == -1);
808 if (dbs[cnt].check_file)
810 /* We need the modification date of the file. */
811 struct stat64 st;
813 if (stat64 (dbs[cnt].filename, &st) < 0)
815 /* We cannot stat() the file, disable file checking. */
816 dbg_log (_("cannot stat() file `%s': %s"),
817 dbs[cnt].filename, strerror (errno));
818 dbs[cnt].check_file = 0;
820 else
821 dbs[cnt].file_mtime = st.st_mtime;
825 /* Create the socket. */
826 sock = socket (AF_UNIX, SOCK_STREAM, 0);
827 if (sock < 0)
829 dbg_log (_("cannot open socket: %s"), strerror (errno));
830 exit (errno == EACCES ? 4 : 1);
832 /* Bind a name to the socket. */
833 struct sockaddr_un sock_addr;
834 sock_addr.sun_family = AF_UNIX;
835 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
836 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
838 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
839 exit (errno == EACCES ? 4 : 1);
842 /* We don't want to get stuck on accept. */
843 int fl = fcntl (sock, F_GETFL);
844 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
846 dbg_log (_("cannot change socket to nonblocking mode: %s"),
847 strerror (errno));
848 exit (1);
851 /* The descriptor needs to be closed on exec. */
852 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
854 dbg_log (_("cannot set socket to close on exec: %s"),
855 strerror (errno));
856 exit (1);
859 /* Set permissions for the socket. */
860 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
862 /* Set the socket up to accept connections. */
863 if (listen (sock, SOMAXCONN) < 0)
865 dbg_log (_("cannot enable socket to accept connections: %s"),
866 strerror (errno));
867 exit (1);
870 /* Change to unprivileged uid/gid/groups if specifed in config file */
871 if (server_user != NULL)
872 finish_drop_privileges ();
876 /* Close the connections. */
877 void
878 close_sockets (void)
880 close (sock);
884 static void
885 invalidate_cache (char *key, int fd)
887 dbtype number;
888 int32_t resp;
890 for (number = pwddb; number < lastdb; ++number)
891 if (strcmp (key, dbnames[number]) == 0)
893 if (dbs[number].reset_res)
894 res_init ();
896 break;
899 if (number == lastdb)
901 resp = EINVAL;
902 writeall (fd, &resp, sizeof (resp));
903 return;
906 if (dbs[number].enabled)
908 pthread_mutex_lock (&dbs[number].prune_lock);
909 prune_cache (&dbs[number], LONG_MAX, fd);
910 pthread_mutex_unlock (&dbs[number].prune_lock);
912 else
914 resp = 0;
915 writeall (fd, &resp, sizeof (resp));
920 #ifdef SCM_RIGHTS
921 static void
922 send_ro_fd (struct database_dyn *db, char *key, int fd)
924 /* If we do not have an read-only file descriptor do nothing. */
925 if (db->ro_fd == -1)
926 return;
928 /* We need to send some data along with the descriptor. */
929 uint64_t mapsize = (db->head->data_size
930 + roundup (db->head->module * sizeof (ref_t), ALIGN)
931 + sizeof (struct database_pers_head));
932 struct iovec iov[2];
933 iov[0].iov_base = key;
934 iov[0].iov_len = strlen (key) + 1;
935 iov[1].iov_base = &mapsize;
936 iov[1].iov_len = sizeof (mapsize);
938 /* Prepare the control message to transfer the descriptor. */
939 union
941 struct cmsghdr hdr;
942 char bytes[CMSG_SPACE (sizeof (int))];
943 } buf;
944 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
945 .msg_control = buf.bytes,
946 .msg_controllen = sizeof (buf) };
947 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
949 cmsg->cmsg_level = SOL_SOCKET;
950 cmsg->cmsg_type = SCM_RIGHTS;
951 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
953 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
955 msg.msg_controllen = cmsg->cmsg_len;
957 /* Send the control message. We repeat when we are interrupted but
958 everything else is ignored. */
959 #ifndef MSG_NOSIGNAL
960 # define MSG_NOSIGNAL 0
961 #endif
962 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
964 if (__builtin_expect (debug_level > 0, 0))
965 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
967 #endif /* SCM_RIGHTS */
970 /* Handle new request. */
971 static void
972 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
974 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
976 if (debug_level > 0)
977 dbg_log (_("\
978 cannot handle old request version %d; current version is %d"),
979 req->version, NSCD_VERSION);
980 return;
983 /* Perform the SELinux check before we go on to the standard checks. */
984 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
986 if (debug_level > 0)
988 #ifdef SO_PEERCRED
989 # ifdef PATH_MAX
990 char buf[PATH_MAX];
991 # else
992 char buf[4096];
993 # endif
995 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
996 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
998 if (n <= 0)
999 dbg_log (_("\
1000 request from %ld not handled due to missing permission"), (long int) pid);
1001 else
1003 buf[n] = '\0';
1004 dbg_log (_("\
1005 request from '%s' [%ld] not handled due to missing permission"),
1006 buf, (long int) pid);
1008 #else
1009 dbg_log (_("request not handled due to missing permission"));
1010 #endif
1012 return;
1015 struct database_dyn *db = reqinfo[req->type].db;
1017 /* See whether we can service the request from the cache. */
1018 if (__builtin_expect (reqinfo[req->type].data_request, true))
1020 if (__builtin_expect (debug_level, 0) > 0)
1022 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1024 char buf[INET6_ADDRSTRLEN];
1026 dbg_log ("\t%s (%s)", serv2str[req->type],
1027 inet_ntop (req->type == GETHOSTBYADDR
1028 ? AF_INET : AF_INET6,
1029 key, buf, sizeof (buf)));
1031 else
1032 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1035 /* Is this service enabled? */
1036 if (__builtin_expect (!db->enabled, 0))
1038 /* No, sent the prepared record. */
1039 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1040 db->disabled_iov->iov_len,
1041 MSG_NOSIGNAL))
1042 != (ssize_t) db->disabled_iov->iov_len
1043 && __builtin_expect (debug_level, 0) > 0)
1045 /* We have problems sending the result. */
1046 char buf[256];
1047 dbg_log (_("cannot write result: %s"),
1048 strerror_r (errno, buf, sizeof (buf)));
1051 return;
1054 /* Be sure we can read the data. */
1055 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
1057 ++db->head->rdlockdelayed;
1058 pthread_rwlock_rdlock (&db->lock);
1061 /* See whether we can handle it from the cache. */
1062 struct datahead *cached;
1063 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1064 db, uid);
1065 if (cached != NULL)
1067 /* Hurray it's in the cache. */
1068 ssize_t nwritten;
1070 #ifdef HAVE_SENDFILE
1071 if (__builtin_expect (db->mmap_used, 1))
1073 assert (db->wr_fd != -1);
1074 assert ((char *) cached->data > (char *) db->data);
1075 assert ((char *) cached->data - (char *) db->head
1076 + cached->recsize
1077 <= (sizeof (struct database_pers_head)
1078 + db->head->module * sizeof (ref_t)
1079 + db->head->data_size));
1080 nwritten = sendfileall (fd, db->wr_fd,
1081 (char *) cached->data
1082 - (char *) db->head, cached->recsize);
1083 # ifndef __ASSUME_SENDFILE
1084 if (nwritten == -1 && errno == ENOSYS)
1085 goto use_write;
1086 # endif
1088 else
1089 # ifndef __ASSUME_SENDFILE
1090 use_write:
1091 # endif
1092 #endif
1093 nwritten = writeall (fd, cached->data, cached->recsize);
1095 if (nwritten != cached->recsize
1096 && __builtin_expect (debug_level, 0) > 0)
1098 /* We have problems sending the result. */
1099 char buf[256];
1100 dbg_log (_("cannot write result: %s"),
1101 strerror_r (errno, buf, sizeof (buf)));
1104 pthread_rwlock_unlock (&db->lock);
1106 return;
1109 pthread_rwlock_unlock (&db->lock);
1111 else if (__builtin_expect (debug_level, 0) > 0)
1113 if (req->type == INVALIDATE)
1114 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1115 else
1116 dbg_log ("\t%s", serv2str[req->type]);
1119 /* Handle the request. */
1120 switch (req->type)
1122 case GETPWBYNAME:
1123 addpwbyname (db, fd, req, key, uid);
1124 break;
1126 case GETPWBYUID:
1127 addpwbyuid (db, fd, req, key, uid);
1128 break;
1130 case GETGRBYNAME:
1131 addgrbyname (db, fd, req, key, uid);
1132 break;
1134 case GETGRBYGID:
1135 addgrbygid (db, fd, req, key, uid);
1136 break;
1138 case GETHOSTBYNAME:
1139 addhstbyname (db, fd, req, key, uid);
1140 break;
1142 case GETHOSTBYNAMEv6:
1143 addhstbynamev6 (db, fd, req, key, uid);
1144 break;
1146 case GETHOSTBYADDR:
1147 addhstbyaddr (db, fd, req, key, uid);
1148 break;
1150 case GETHOSTBYADDRv6:
1151 addhstbyaddrv6 (db, fd, req, key, uid);
1152 break;
1154 case GETAI:
1155 addhstai (db, fd, req, key, uid);
1156 break;
1158 case INITGROUPS:
1159 addinitgroups (db, fd, req, key, uid);
1160 break;
1162 case GETSERVBYNAME:
1163 addservbyname (db, fd, req, key, uid);
1164 break;
1166 case GETSERVBYPORT:
1167 addservbyport (db, fd, req, key, uid);
1168 break;
1170 case GETSTAT:
1171 case SHUTDOWN:
1172 case INVALIDATE:
1174 /* Get the callers credentials. */
1175 #ifdef SO_PEERCRED
1176 struct ucred caller;
1177 socklen_t optlen = sizeof (caller);
1179 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1181 char buf[256];
1183 dbg_log (_("error getting caller's id: %s"),
1184 strerror_r (errno, buf, sizeof (buf)));
1185 break;
1188 uid = caller.uid;
1189 #else
1190 /* Some systems have no SO_PEERCRED implementation. They don't
1191 care about security so we don't as well. */
1192 uid = 0;
1193 #endif
1196 /* Accept shutdown, getstat and invalidate only from root. For
1197 the stat call also allow the user specified in the config file. */
1198 if (req->type == GETSTAT)
1200 if (uid == 0 || uid == stat_uid)
1201 send_stats (fd, dbs);
1203 else if (uid == 0)
1205 if (req->type == INVALIDATE)
1206 invalidate_cache (key, fd);
1207 else
1208 termination_handler (0);
1210 break;
1212 case GETFDPW:
1213 case GETFDGR:
1214 case GETFDHST:
1215 case GETFDSERV:
1216 #ifdef SCM_RIGHTS
1217 send_ro_fd (reqinfo[req->type].db, key, fd);
1218 #endif
1219 break;
1221 default:
1222 /* Ignore the command, it's nothing we know. */
1223 break;
1228 /* Restart the process. */
1229 static void
1230 restart (void)
1232 /* First determine the parameters. We do not use the parameters
1233 passed to main() since in case nscd is started by running the
1234 dynamic linker this will not work. Yes, this is not the usual
1235 case but nscd is part of glibc and we occasionally do this. */
1236 size_t buflen = 1024;
1237 char *buf = alloca (buflen);
1238 size_t readlen = 0;
1239 int fd = open ("/proc/self/cmdline", O_RDONLY);
1240 if (fd == -1)
1242 dbg_log (_("\
1243 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1244 strerror (errno));
1246 paranoia = 0;
1247 return;
1250 while (1)
1252 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1253 buflen - readlen));
1254 if (n == -1)
1256 dbg_log (_("\
1257 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1258 strerror (errno));
1260 close (fd);
1261 paranoia = 0;
1262 return;
1265 readlen += n;
1267 if (readlen < buflen)
1268 break;
1270 /* We might have to extend the buffer. */
1271 size_t old_buflen = buflen;
1272 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1273 buf = memmove (newp, buf, old_buflen);
1276 close (fd);
1278 /* Parse the command line. Worst case scenario: every two
1279 characters form one parameter (one character plus NUL). */
1280 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1281 int argc = 0;
1283 char *cp = buf;
1284 while (cp < buf + readlen)
1286 argv[argc++] = cp;
1287 cp = (char *) rawmemchr (cp, '\0') + 1;
1289 argv[argc] = NULL;
1291 /* Second, change back to the old user if we changed it. */
1292 if (server_user != NULL)
1294 if (setresuid (old_uid, old_uid, old_uid) != 0)
1296 dbg_log (_("\
1297 cannot change to old UID: %s; disabling paranoia mode"),
1298 strerror (errno));
1300 paranoia = 0;
1301 return;
1304 if (setresgid (old_gid, old_gid, old_gid) != 0)
1306 dbg_log (_("\
1307 cannot change to old GID: %s; disabling paranoia mode"),
1308 strerror (errno));
1310 setuid (server_uid);
1311 paranoia = 0;
1312 return;
1316 /* Next change back to the old working directory. */
1317 if (chdir (oldcwd) == -1)
1319 dbg_log (_("\
1320 cannot change to old working directory: %s; disabling paranoia mode"),
1321 strerror (errno));
1323 if (server_user != NULL)
1325 setuid (server_uid);
1326 setgid (server_gid);
1328 paranoia = 0;
1329 return;
1332 /* Synchronize memory. */
1333 int32_t certainly[lastdb];
1334 for (int cnt = 0; cnt < lastdb; ++cnt)
1335 if (dbs[cnt].enabled)
1337 /* Make sure nobody keeps using the database. */
1338 dbs[cnt].head->timestamp = 0;
1339 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1340 dbs[cnt].head->nscd_certainly_running = 0;
1342 if (dbs[cnt].persistent)
1343 // XXX async OK?
1344 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1347 /* The preparations are done. */
1348 execv ("/proc/self/exe", argv);
1350 /* If we come here, we will never be able to re-exec. */
1351 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1352 strerror (errno));
1354 if (server_user != NULL)
1356 setuid (server_uid);
1357 setgid (server_gid);
1359 if (chdir ("/") != 0)
1360 dbg_log (_("cannot change current working directory to \"/\": %s"),
1361 strerror (errno));
1362 paranoia = 0;
1364 /* Reenable the databases. */
1365 time_t now = time (NULL);
1366 for (int cnt = 0; cnt < lastdb; ++cnt)
1367 if (dbs[cnt].enabled)
1369 dbs[cnt].head->timestamp = now;
1370 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1375 /* List of file descriptors. */
1376 struct fdlist
1378 int fd;
1379 struct fdlist *next;
1381 /* Memory allocated for the list. */
1382 static struct fdlist *fdlist;
1383 /* List of currently ready-to-read file descriptors. */
1384 static struct fdlist *readylist;
1386 /* Conditional variable and mutex to signal availability of entries in
1387 READYLIST. The condvar is initialized dynamically since we might
1388 use a different clock depending on availability. */
1389 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1390 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1392 /* The clock to use with the condvar. */
1393 static clockid_t timeout_clock = CLOCK_REALTIME;
1395 /* Number of threads ready to handle the READYLIST. */
1396 static unsigned long int nready;
1399 /* Function for the clean-up threads. */
1400 static void *
1401 __attribute__ ((__noreturn__))
1402 nscd_run_prune (void *p)
1404 const long int my_number = (long int) p;
1405 assert (dbs[my_number].enabled);
1407 int dont_need_update = setup_thread (&dbs[my_number]);
1409 time_t now = time (NULL);
1411 /* We are running. */
1412 dbs[my_number].head->timestamp = now;
1414 struct timespec prune_ts;
1415 if (__builtin_expect (clock_gettime (timeout_clock, &prune_ts) == -1, 0))
1416 /* Should never happen. */
1417 abort ();
1419 /* Compute the initial timeout time. Prevent all the timers to go
1420 off at the same time by adding a db-based value. */
1421 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1422 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1424 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1425 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1427 pthread_mutex_lock (prune_lock);
1428 while (1)
1430 /* Wait, but not forever. */
1431 int e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1432 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1434 time_t next_wait;
1435 now = time (NULL);
1436 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time)
1438 /* We will determine the new timout values based on the
1439 cache content. Should there be concurrent additions to
1440 the cache which are not accounted for in the cache
1441 pruning we want to know about it. Therefore set the
1442 timeout to the maximum. It will be descreased when adding
1443 new entries to the cache, if necessary. */
1444 if (sizeof (time_t) == sizeof (long int))
1445 dbs[my_number].wakeup_time = LONG_MAX;
1446 else
1447 dbs[my_number].wakeup_time = INT_MAX;
1449 pthread_mutex_unlock (prune_lock);
1451 next_wait = prune_cache (&dbs[my_number], now, -1);
1453 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1454 /* If clients cannot determine for sure whether nscd is running
1455 we need to wake up occasionally to update the timestamp.
1456 Wait 90% of the update period. */
1457 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1458 if (__builtin_expect (! dont_need_update, 0))
1460 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1461 dbs[my_number].head->timestamp = now;
1464 pthread_mutex_lock (prune_lock);
1466 /* Make it known when we will wake up again. */
1467 if (now + next_wait < dbs[my_number].wakeup_time)
1468 dbs[my_number].wakeup_time = now + next_wait;
1469 else
1470 next_wait = dbs[my_number].wakeup_time - now;
1472 else
1473 /* The cache was just pruned. Do not do it again now. Just
1474 use the new timeout value. */
1475 next_wait = dbs[my_number].wakeup_time - now;
1477 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1478 /* Should never happen. */
1479 abort ();
1481 /* Compute next timeout time. */
1482 prune_ts.tv_sec += next_wait;
1487 /* This is the main loop. It is replicated in different threads but
1488 the the use of the ready list makes sure only one thread handles an
1489 incoming connection. */
1490 static void *
1491 __attribute__ ((__noreturn__))
1492 nscd_run_worker (void *p)
1494 char buf[256];
1496 /* Initialize the memory-in-flight list. */
1497 for (enum in_flight idx = 0; idx < IDX_last; ++idx)
1498 mem_in_flight.block[idx].dbidx = -1;
1499 /* And queue this threads structure. */
1501 mem_in_flight.next = mem_in_flight_list;
1502 while (atomic_compare_and_exchange_bool_acq (&mem_in_flight_list,
1503 &mem_in_flight,
1504 mem_in_flight.next) != 0);
1506 /* Initial locking. */
1507 pthread_mutex_lock (&readylist_lock);
1509 /* One more thread available. */
1510 ++nready;
1512 while (1)
1514 while (readylist == NULL)
1515 pthread_cond_wait (&readylist_cond, &readylist_lock);
1517 struct fdlist *it = readylist->next;
1518 if (readylist->next == readylist)
1519 /* Just one entry on the list. */
1520 readylist = NULL;
1521 else
1522 readylist->next = it->next;
1524 /* Extract the information and mark the record ready to be used
1525 again. */
1526 int fd = it->fd;
1527 it->next = NULL;
1529 /* One more thread available. */
1530 --nready;
1532 /* We are done with the list. */
1533 pthread_mutex_unlock (&readylist_lock);
1535 /* We do not want to block on a short read or so. */
1536 int fl = fcntl (fd, F_GETFL);
1537 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1538 goto close_and_out;
1540 /* Now read the request. */
1541 request_header req;
1542 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1543 != sizeof (req), 0))
1545 /* We failed to read data. Note that this also might mean we
1546 failed because we would have blocked. */
1547 if (debug_level > 0)
1548 dbg_log (_("short read while reading request: %s"),
1549 strerror_r (errno, buf, sizeof (buf)));
1550 goto close_and_out;
1553 /* Check whether this is a valid request type. */
1554 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1555 goto close_and_out;
1557 /* Some systems have no SO_PEERCRED implementation. They don't
1558 care about security so we don't as well. */
1559 uid_t uid = -1;
1560 #ifdef SO_PEERCRED
1561 pid_t pid = 0;
1563 if (__builtin_expect (debug_level > 0, 0))
1565 struct ucred caller;
1566 socklen_t optlen = sizeof (caller);
1568 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1569 pid = caller.pid;
1571 #else
1572 const pid_t pid = 0;
1573 #endif
1575 /* It should not be possible to crash the nscd with a silly
1576 request (i.e., a terribly large key). We limit the size to 1kb. */
1577 if (__builtin_expect (req.key_len, 1) < 0
1578 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1580 if (debug_level > 0)
1581 dbg_log (_("key length in request too long: %d"), req.key_len);
1583 else
1585 /* Get the key. */
1586 char keybuf[MAXKEYLEN];
1588 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1589 req.key_len))
1590 != req.key_len, 0))
1592 /* Again, this can also mean we would have blocked. */
1593 if (debug_level > 0)
1594 dbg_log (_("short read while reading request key: %s"),
1595 strerror_r (errno, buf, sizeof (buf)));
1596 goto close_and_out;
1599 if (__builtin_expect (debug_level, 0) > 0)
1601 #ifdef SO_PEERCRED
1602 if (pid != 0)
1603 dbg_log (_("\
1604 handle_request: request received (Version = %d) from PID %ld"),
1605 req.version, (long int) pid);
1606 else
1607 #endif
1608 dbg_log (_("\
1609 handle_request: request received (Version = %d)"), req.version);
1612 /* Phew, we got all the data, now process it. */
1613 handle_request (fd, &req, keybuf, uid, pid);
1616 close_and_out:
1617 /* We are done. */
1618 close (fd);
1620 /* Re-locking. */
1621 pthread_mutex_lock (&readylist_lock);
1623 /* One more thread available. */
1624 ++nready;
1629 static unsigned int nconns;
1631 static void
1632 fd_ready (int fd)
1634 pthread_mutex_lock (&readylist_lock);
1636 /* Find an empty entry in FDLIST. */
1637 size_t inner;
1638 for (inner = 0; inner < nconns; ++inner)
1639 if (fdlist[inner].next == NULL)
1640 break;
1641 assert (inner < nconns);
1643 fdlist[inner].fd = fd;
1645 if (readylist == NULL)
1646 readylist = fdlist[inner].next = &fdlist[inner];
1647 else
1649 fdlist[inner].next = readylist->next;
1650 readylist = readylist->next = &fdlist[inner];
1653 bool do_signal = true;
1654 if (__builtin_expect (nready == 0, 0))
1656 ++client_queued;
1657 do_signal = false;
1659 /* Try to start another thread to help out. */
1660 pthread_t th;
1661 if (nthreads < max_nthreads
1662 && pthread_create (&th, &attr, nscd_run_worker,
1663 (void *) (long int) nthreads) == 0)
1665 /* We got another thread. */
1666 ++nthreads;
1667 /* The new thread might need a kick. */
1668 do_signal = true;
1673 pthread_mutex_unlock (&readylist_lock);
1675 /* Tell one of the worker threads there is work to do. */
1676 if (do_signal)
1677 pthread_cond_signal (&readylist_cond);
1681 /* Check whether restarting should happen. */
1682 static inline int
1683 restart_p (time_t now)
1685 return (paranoia && readylist == NULL && nready == nthreads
1686 && now >= restart_time);
1690 /* Array for times a connection was accepted. */
1691 static time_t *starttime;
1694 static void
1695 __attribute__ ((__noreturn__))
1696 main_loop_poll (void)
1698 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1699 * sizeof (conns[0]));
1701 conns[0].fd = sock;
1702 conns[0].events = POLLRDNORM;
1703 size_t nused = 1;
1704 size_t firstfree = 1;
1706 while (1)
1708 /* Wait for any event. We wait at most a couple of seconds so
1709 that we can check whether we should close any of the accepted
1710 connections since we have not received a request. */
1711 #define MAX_ACCEPT_TIMEOUT 30
1712 #define MIN_ACCEPT_TIMEOUT 5
1713 #define MAIN_THREAD_TIMEOUT \
1714 (MAX_ACCEPT_TIMEOUT * 1000 \
1715 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1717 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1719 time_t now = time (NULL);
1721 /* If there is a descriptor ready for reading or there is a new
1722 connection, process this now. */
1723 if (n > 0)
1725 if (conns[0].revents != 0)
1727 /* We have a new incoming connection. Accept the connection. */
1728 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1730 /* Use the descriptor if we have not reached the limit. */
1731 if (fd >= 0)
1733 if (firstfree < nconns)
1735 conns[firstfree].fd = fd;
1736 conns[firstfree].events = POLLRDNORM;
1737 starttime[firstfree] = now;
1738 if (firstfree >= nused)
1739 nused = firstfree + 1;
1742 ++firstfree;
1743 while (firstfree < nused && conns[firstfree].fd != -1);
1745 else
1746 /* We cannot use the connection so close it. */
1747 close (fd);
1750 --n;
1753 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1754 if (conns[cnt].revents != 0)
1756 fd_ready (conns[cnt].fd);
1758 /* Clean up the CONNS array. */
1759 conns[cnt].fd = -1;
1760 if (cnt < firstfree)
1761 firstfree = cnt;
1762 if (cnt == nused - 1)
1764 --nused;
1765 while (conns[nused - 1].fd == -1);
1767 --n;
1771 /* Now find entries which have timed out. */
1772 assert (nused > 0);
1774 /* We make the timeout length depend on the number of file
1775 descriptors currently used. */
1776 #define ACCEPT_TIMEOUT \
1777 (MAX_ACCEPT_TIMEOUT \
1778 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1779 time_t laststart = now - ACCEPT_TIMEOUT;
1781 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1783 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1785 /* Remove the entry, it timed out. */
1786 (void) close (conns[cnt].fd);
1787 conns[cnt].fd = -1;
1789 if (cnt < firstfree)
1790 firstfree = cnt;
1791 if (cnt == nused - 1)
1793 --nused;
1794 while (conns[nused - 1].fd == -1);
1798 if (restart_p (now))
1799 restart ();
1804 #ifdef HAVE_EPOLL
1805 static void
1806 main_loop_epoll (int efd)
1808 struct epoll_event ev = { 0, };
1809 int nused = 1;
1810 size_t highest = 0;
1812 /* Add the socket. */
1813 ev.events = EPOLLRDNORM;
1814 ev.data.fd = sock;
1815 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1816 /* We cannot use epoll. */
1817 return;
1819 while (1)
1821 struct epoll_event revs[100];
1822 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1824 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1826 time_t now = time (NULL);
1828 for (int cnt = 0; cnt < n; ++cnt)
1829 if (revs[cnt].data.fd == sock)
1831 /* A new connection. */
1832 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1834 if (fd >= 0)
1836 /* Try to add the new descriptor. */
1837 ev.data.fd = fd;
1838 if (fd >= nconns
1839 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1840 /* The descriptor is too large or something went
1841 wrong. Close the descriptor. */
1842 close (fd);
1843 else
1845 /* Remember when we accepted the connection. */
1846 starttime[fd] = now;
1848 if (fd > highest)
1849 highest = fd;
1851 ++nused;
1855 else
1857 /* Remove the descriptor from the epoll descriptor. */
1858 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1860 /* Get a worker to handle the request. */
1861 fd_ready (revs[cnt].data.fd);
1863 /* Reset the time. */
1864 starttime[revs[cnt].data.fd] = 0;
1865 if (revs[cnt].data.fd == highest)
1867 --highest;
1868 while (highest > 0 && starttime[highest] == 0);
1870 --nused;
1873 /* Now look for descriptors for accepted connections which have
1874 no reply in too long of a time. */
1875 time_t laststart = now - ACCEPT_TIMEOUT;
1876 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1877 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1879 /* We are waiting for this one for too long. Close it. */
1880 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
1882 (void) close (cnt);
1884 starttime[cnt] = 0;
1885 if (cnt == highest)
1886 --highest;
1888 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1889 --highest;
1891 if (restart_p (now))
1892 restart ();
1895 #endif
1898 /* Start all the threads we want. The initial process is thread no. 1. */
1899 void
1900 start_threads (void)
1902 /* Initialize the conditional variable we will use. The only
1903 non-standard attribute we might use is the clock selection. */
1904 pthread_condattr_t condattr;
1905 pthread_condattr_init (&condattr);
1907 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1908 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1909 /* Determine whether the monotonous clock is available. */
1910 struct timespec dummy;
1911 # if _POSIX_MONOTONIC_CLOCK == 0
1912 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1913 # endif
1914 # if _POSIX_CLOCK_SELECTION == 0
1915 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1916 # endif
1917 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1918 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1919 timeout_clock = CLOCK_MONOTONIC;
1920 #endif
1922 /* Create the attribute for the threads. They are all created
1923 detached. */
1924 pthread_attr_init (&attr);
1925 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1926 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1927 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
1929 /* We allow less than LASTDB threads only for debugging. */
1930 if (debug_level == 0)
1931 nthreads = MAX (nthreads, lastdb);
1933 /* Create the threads which prune the databases. */
1934 // XXX Ideally this work would be done by some of the worker threads.
1935 // XXX But this is problematic since we would need to be able to wake
1936 // XXX them up explicitly as well as part of the group handling the
1937 // XXX ready-list. This requires an operation where we can wait on
1938 // XXX two conditional variables at the same time. This operation
1939 // XXX does not exist (yet).
1940 for (long int i = 0; i < lastdb; ++i)
1942 /* Initialize the conditional variable. */
1943 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
1945 dbg_log (_("could not initialize conditional variable"));
1946 exit (1);
1949 pthread_t th;
1950 if (dbs[i].enabled
1951 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
1953 dbg_log (_("could not start clean-up thread; terminating"));
1954 exit (1);
1958 pthread_condattr_destroy (&condattr);
1960 for (long int i = 0; i < nthreads; ++i)
1962 pthread_t th;
1963 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
1965 if (i == 0)
1967 dbg_log (_("could not start any worker thread; terminating"));
1968 exit (1);
1971 break;
1975 /* Determine how much room for descriptors we should initially
1976 allocate. This might need to change later if we cap the number
1977 with MAXCONN. */
1978 const long int nfds = sysconf (_SC_OPEN_MAX);
1979 #define MINCONN 32
1980 #define MAXCONN 16384
1981 if (nfds == -1 || nfds > MAXCONN)
1982 nconns = MAXCONN;
1983 else if (nfds < MINCONN)
1984 nconns = MINCONN;
1985 else
1986 nconns = nfds;
1988 /* We need memory to pass descriptors on to the worker threads. */
1989 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1990 /* Array to keep track when connection was accepted. */
1991 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1993 /* In the main thread we execute the loop which handles incoming
1994 connections. */
1995 #ifdef HAVE_EPOLL
1996 int efd = epoll_create (100);
1997 if (efd != -1)
1999 main_loop_epoll (efd);
2000 close (efd);
2002 #endif
2004 main_loop_poll ();
2008 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2009 this function is called, we are not listening on the nscd socket yet so
2010 we can just use the ordinary lookup functions without causing a lockup */
2011 static void
2012 begin_drop_privileges (void)
2014 struct passwd *pwd = getpwnam (server_user);
2016 if (pwd == NULL)
2018 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2019 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
2020 server_user);
2023 server_uid = pwd->pw_uid;
2024 server_gid = pwd->pw_gid;
2026 /* Save the old UID/GID if we have to change back. */
2027 if (paranoia)
2029 old_uid = getuid ();
2030 old_gid = getgid ();
2033 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2035 /* This really must never happen. */
2036 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2037 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
2040 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2042 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2043 == -1)
2045 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2046 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
2051 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2052 run nscd as the user specified in the configuration file. */
2053 static void
2054 finish_drop_privileges (void)
2056 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2057 /* We need to preserve the capabilities to connect to the audit daemon. */
2058 cap_t new_caps = preserve_capabilities ();
2059 #endif
2061 if (setgroups (server_ngroups, server_groups) == -1)
2063 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2064 error (EXIT_FAILURE, errno, _("setgroups failed"));
2067 int res;
2068 if (paranoia)
2069 res = setresgid (server_gid, server_gid, old_gid);
2070 else
2071 res = setgid (server_gid);
2072 if (res == -1)
2074 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2075 perror ("setgid");
2076 exit (4);
2079 if (paranoia)
2080 res = setresuid (server_uid, server_uid, old_uid);
2081 else
2082 res = setuid (server_uid);
2083 if (res == -1)
2085 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2086 perror ("setuid");
2087 exit (4);
2090 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2091 /* Remove the temporary capabilities. */
2092 install_real_capabilities (new_caps);
2093 #endif