Updated to fedora-glibc-20080703T1203
[glibc.git] / nscd / connections.c
blob0afc95a22763e38dbb4fc25a18726f2314d6e908
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2007, 2008 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #include <alloca.h>
21 #include <assert.h>
22 #include <atomic.h>
23 #include <error.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <grp.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <arpa/inet.h>
35 #ifdef HAVE_EPOLL
36 # include <sys/epoll.h>
37 #endif
38 #ifdef HAVE_INOTIFY
39 # include <sys/inotify.h>
40 #endif
41 #include <sys/mman.h>
42 #include <sys/param.h>
43 #include <sys/poll.h>
44 #ifdef HAVE_SENDFILE
45 # include <sys/sendfile.h>
46 #endif
47 #include <sys/socket.h>
48 #include <sys/stat.h>
49 #include <sys/un.h>
51 #include "nscd.h"
52 #include "dbg_log.h"
53 #include "selinux.h"
54 #include <resolv/resolv.h>
55 #ifdef HAVE_SENDFILE
56 # include <kernel-features.h>
57 #endif
60 /* Wrapper functions with error checking for standard functions. */
61 extern void *xmalloc (size_t n);
62 extern void *xcalloc (size_t n, size_t s);
63 extern void *xrealloc (void *o, size_t n);
65 /* Support to run nscd as an unprivileged user */
66 const char *server_user;
67 static uid_t server_uid;
68 static gid_t server_gid;
69 const char *stat_user;
70 uid_t stat_uid;
71 static gid_t *server_groups;
72 #ifndef NGROUPS
73 # define NGROUPS 32
74 #endif
75 static int server_ngroups;
77 static pthread_attr_t attr;
79 static void begin_drop_privileges (void);
80 static void finish_drop_privileges (void);
82 /* Map request type to a string. */
83 const char *const serv2str[LASTREQ] =
85 [GETPWBYNAME] = "GETPWBYNAME",
86 [GETPWBYUID] = "GETPWBYUID",
87 [GETGRBYNAME] = "GETGRBYNAME",
88 [GETGRBYGID] = "GETGRBYGID",
89 [GETHOSTBYNAME] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
93 [SHUTDOWN] = "SHUTDOWN",
94 [GETSTAT] = "GETSTAT",
95 [INVALIDATE] = "INVALIDATE",
96 [GETFDPW] = "GETFDPW",
97 [GETFDGR] = "GETFDGR",
98 [GETFDHST] = "GETFDHST",
99 [GETAI] = "GETAI",
100 [INITGROUPS] = "INITGROUPS",
101 [GETSERVBYNAME] = "GETSERVBYNAME",
102 [GETSERVBYPORT] = "GETSERVBYPORT",
103 [GETFDSERV] = "GETFDSERV"
106 /* The control data structures for the services. */
107 struct database_dyn dbs[lastdb] =
109 [pwddb] = {
110 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
111 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
112 .enabled = 0,
113 .check_file = 1,
114 .persistent = 0,
115 .propagate = 1,
116 .shared = 0,
117 .max_db_size = DEFAULT_MAX_DB_SIZE,
118 .suggested_module = DEFAULT_SUGGESTED_MODULE,
119 .reset_res = 0,
120 .filename = "/etc/passwd",
121 .db_filename = _PATH_NSCD_PASSWD_DB,
122 .disabled_iov = &pwd_iov_disabled,
123 .postimeout = 3600,
124 .negtimeout = 20,
125 .wr_fd = -1,
126 .ro_fd = -1,
127 .mmap_used = false
129 [grpdb] = {
130 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
131 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
132 .enabled = 0,
133 .check_file = 1,
134 .persistent = 0,
135 .propagate = 1,
136 .shared = 0,
137 .max_db_size = DEFAULT_MAX_DB_SIZE,
138 .suggested_module = DEFAULT_SUGGESTED_MODULE,
139 .reset_res = 0,
140 .filename = "/etc/group",
141 .db_filename = _PATH_NSCD_GROUP_DB,
142 .disabled_iov = &grp_iov_disabled,
143 .postimeout = 3600,
144 .negtimeout = 60,
145 .wr_fd = -1,
146 .ro_fd = -1,
147 .mmap_used = false
149 [hstdb] = {
150 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
151 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
152 .enabled = 0,
153 .check_file = 1,
154 .persistent = 0,
155 .propagate = 0, /* Not used. */
156 .shared = 0,
157 .max_db_size = DEFAULT_MAX_DB_SIZE,
158 .suggested_module = DEFAULT_SUGGESTED_MODULE,
159 .reset_res = 1,
160 .filename = "/etc/hosts",
161 .db_filename = _PATH_NSCD_HOSTS_DB,
162 .disabled_iov = &hst_iov_disabled,
163 .postimeout = 3600,
164 .negtimeout = 20,
165 .wr_fd = -1,
166 .ro_fd = -1,
167 .mmap_used = false
169 [servdb] = {
170 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
171 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
172 .enabled = 0,
173 .check_file = 1,
174 .persistent = 0,
175 .propagate = 0, /* Not used. */
176 .shared = 0,
177 .max_db_size = DEFAULT_MAX_DB_SIZE,
178 .suggested_module = DEFAULT_SUGGESTED_MODULE,
179 .reset_res = 0,
180 .filename = "/etc/services",
181 .db_filename = _PATH_NSCD_SERVICES_DB,
182 .disabled_iov = &serv_iov_disabled,
183 .postimeout = 28800,
184 .negtimeout = 20,
185 .wr_fd = -1,
186 .ro_fd = -1,
187 .mmap_used = false
192 /* Mapping of request type to database. */
193 static struct
195 bool data_request;
196 struct database_dyn *db;
197 } const reqinfo[LASTREQ] =
199 [GETPWBYNAME] = { true, &dbs[pwddb] },
200 [GETPWBYUID] = { true, &dbs[pwddb] },
201 [GETGRBYNAME] = { true, &dbs[grpdb] },
202 [GETGRBYGID] = { true, &dbs[grpdb] },
203 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
204 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
205 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
206 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
207 [SHUTDOWN] = { false, NULL },
208 [GETSTAT] = { false, NULL },
209 [SHUTDOWN] = { false, NULL },
210 [GETFDPW] = { false, &dbs[pwddb] },
211 [GETFDGR] = { false, &dbs[grpdb] },
212 [GETFDHST] = { false, &dbs[hstdb] },
213 [GETAI] = { true, &dbs[hstdb] },
214 [INITGROUPS] = { true, &dbs[grpdb] },
215 [GETSERVBYNAME] = { true, &dbs[servdb] },
216 [GETSERVBYPORT] = { true, &dbs[servdb] },
217 [GETFDSERV] = { false, &dbs[servdb] }
221 /* Initial number of threads to use. */
222 int nthreads = -1;
223 /* Maximum number of threads to use. */
224 int max_nthreads = 32;
226 /* Socket for incoming connections. */
227 static int sock;
229 #ifdef HAVE_INOTIFY
230 /* Inotify descriptor. */
231 static int inotify_fd = -1;
233 /* Watch descriptor for resolver configuration file. */
234 static int resolv_conf_descr = -1;
235 #endif
237 /* Number of times clients had to wait. */
238 unsigned long int client_queued;
240 /* Data structure for recording in-flight memory allocation. */
241 __thread struct mem_in_flight mem_in_flight attribute_tls_model_ie;
242 /* Global list of the mem_in_flight variables of all the threads. */
243 struct mem_in_flight *mem_in_flight_list;
246 ssize_t
247 writeall (int fd, const void *buf, size_t len)
249 size_t n = len;
250 ssize_t ret;
253 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
254 if (ret <= 0)
255 break;
256 buf = (const char *) buf + ret;
257 n -= ret;
259 while (n > 0);
260 return ret < 0 ? ret : len - n;
264 #ifdef HAVE_SENDFILE
265 ssize_t
266 sendfileall (int tofd, int fromfd, off_t off, size_t len)
268 ssize_t n = len;
269 ssize_t ret;
273 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
274 if (ret <= 0)
275 break;
276 n -= ret;
278 while (n > 0);
279 return ret < 0 ? ret : len - n;
281 #endif
284 enum usekey
286 use_not = 0,
287 /* The following three are not really used, they are symbolic constants. */
288 use_first = 16,
289 use_begin = 32,
290 use_end = 64,
292 use_he = 1,
293 use_he_begin = use_he | use_begin,
294 use_he_end = use_he | use_end,
295 #if SEPARATE_KEY
296 use_key = 2,
297 use_key_begin = use_key | use_begin,
298 use_key_end = use_key | use_end,
299 use_key_first = use_key_begin | use_first,
300 #endif
301 use_data = 3,
302 use_data_begin = use_data | use_begin,
303 use_data_end = use_data | use_end,
304 use_data_first = use_data_begin | use_first
308 static int
309 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
310 enum usekey use, ref_t start, size_t len)
312 assert (len >= 2);
314 if (start > first_free || start + len > first_free
315 || (start & BLOCK_ALIGN_M1))
316 return 0;
318 if (usemap[start] == use_not)
320 /* Add the start marker. */
321 usemap[start] = use | use_begin;
322 use &= ~use_first;
324 while (--len > 0)
325 if (usemap[++start] != use_not)
326 return 0;
327 else
328 usemap[start] = use;
330 /* Add the end marker. */
331 usemap[start] = use | use_end;
333 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
335 /* Hash entries can't be shared. */
336 if (use == use_he)
337 return 0;
339 usemap[start] |= (use & use_first);
340 use &= ~use_first;
342 while (--len > 1)
343 if (usemap[++start] != use)
344 return 0;
346 if (usemap[++start] != (use | use_end))
347 return 0;
349 else
350 /* Points to a wrong object or somewhere in the middle. */
351 return 0;
353 return 1;
357 /* Verify data in persistent database. */
358 static int
359 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
361 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb);
363 time_t now = time (NULL);
365 struct database_pers_head *head = mem;
366 struct database_pers_head head_copy = *head;
368 /* Check that the header that was read matches the head in the database. */
369 if (memcmp (head, readhead, sizeof (*head)) != 0)
370 return 0;
372 /* First some easy tests: make sure the database header is sane. */
373 if (head->version != DB_VERSION
374 || head->header_size != sizeof (*head)
375 /* We allow a timestamp to be one hour ahead of the current time.
376 This should cover daylight saving time changes. */
377 || head->timestamp > now + 60 * 60 + 60
378 || (head->gc_cycle & 1)
379 || head->module == 0
380 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
381 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
382 || head->first_free < 0
383 || head->first_free > head->data_size
384 || (head->first_free & BLOCK_ALIGN_M1) != 0
385 || head->maxnentries < 0
386 || head->maxnsearched < 0)
387 return 0;
389 uint8_t *usemap = calloc (head->first_free, 1);
390 if (usemap == NULL)
391 return 0;
393 const char *data = (char *) &head->array[roundup (head->module,
394 ALIGN / sizeof (ref_t))];
396 nscd_ssize_t he_cnt = 0;
397 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
399 ref_t trail = head->array[cnt];
400 ref_t work = trail;
401 int tick = 0;
403 while (work != ENDREF)
405 if (! check_use (data, head->first_free, usemap, use_he, work,
406 sizeof (struct hashentry)))
407 goto fail;
409 /* Now we know we can dereference the record. */
410 struct hashentry *here = (struct hashentry *) (data + work);
412 ++he_cnt;
414 /* Make sure the record is for this type of service. */
415 if (here->type >= LASTREQ
416 || reqinfo[here->type].db != &dbs[dbnr])
417 goto fail;
419 /* Validate boolean field value. */
420 if (here->first != false && here->first != true)
421 goto fail;
423 if (here->len < 0)
424 goto fail;
426 /* Now the data. */
427 if (here->packet < 0
428 || here->packet > head->first_free
429 || here->packet + sizeof (struct datahead) > head->first_free)
430 goto fail;
432 struct datahead *dh = (struct datahead *) (data + here->packet);
434 if (! check_use (data, head->first_free, usemap,
435 use_data | (here->first ? use_first : 0),
436 here->packet, dh->allocsize))
437 goto fail;
439 if (dh->allocsize < sizeof (struct datahead)
440 || dh->recsize > dh->allocsize
441 || (dh->notfound != false && dh->notfound != true)
442 || (dh->usable != false && dh->usable != true))
443 goto fail;
445 if (here->key < here->packet + sizeof (struct datahead)
446 || here->key > here->packet + dh->allocsize
447 || here->key + here->len > here->packet + dh->allocsize)
449 #if SEPARATE_KEY
450 /* If keys can appear outside of data, this should be done
451 instead. But gc doesn't mark the data in that case. */
452 if (! check_use (data, head->first_free, usemap,
453 use_key | (here->first ? use_first : 0),
454 here->key, here->len))
455 #endif
456 goto fail;
459 work = here->next;
461 if (work == trail)
462 /* A circular list, this must not happen. */
463 goto fail;
464 if (tick)
465 trail = ((struct hashentry *) (data + trail))->next;
466 tick = 1 - tick;
470 if (he_cnt != head->nentries)
471 goto fail;
473 /* See if all data and keys had at least one reference from
474 he->first == true hashentry. */
475 for (ref_t idx = 0; idx < head->first_free; ++idx)
477 #if SEPARATE_KEY
478 if (usemap[idx] == use_key_begin)
479 goto fail;
480 #endif
481 if (usemap[idx] == use_data_begin)
482 goto fail;
485 /* Finally, make sure the database hasn't changed since the first test. */
486 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
487 goto fail;
489 free (usemap);
490 return 1;
492 fail:
493 free (usemap);
494 return 0;
498 #ifdef O_CLOEXEC
499 # define EXTRA_O_FLAGS O_CLOEXEC
500 #else
501 # define EXTRA_O_FLAGS 0
502 #endif
505 /* Initialize database information structures. */
506 void
507 nscd_init (void)
509 /* Look up unprivileged uid/gid/groups before we start listening on the
510 socket */
511 if (server_user != NULL)
512 begin_drop_privileges ();
514 if (nthreads == -1)
515 /* No configuration for this value, assume a default. */
516 nthreads = 4;
518 #ifdef HAVE_INOTIFY
519 /* Use inotify to recognize changed files. */
520 inotify_fd = inotify_init ();
521 if (inotify_fd != -1)
522 fcntl (inotify_fd, F_SETFL, O_NONBLOCK);
523 #endif
525 for (size_t cnt = 0; cnt < lastdb; ++cnt)
526 if (dbs[cnt].enabled)
528 pthread_rwlock_init (&dbs[cnt].lock, NULL);
529 pthread_mutex_init (&dbs[cnt].memlock, NULL);
531 if (dbs[cnt].persistent)
533 /* Try to open the appropriate file on disk. */
534 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
535 if (fd != -1)
537 char *msg = NULL;
538 struct stat64 st;
539 void *mem;
540 size_t total;
541 struct database_pers_head head;
542 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
543 sizeof (head)));
544 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
546 fail_db_errno:
547 /* The code is single-threaded at this point so
548 using strerror is just fine. */
549 msg = strerror (errno);
550 fail_db:
551 dbg_log (_("invalid persistent database file \"%s\": %s"),
552 dbs[cnt].db_filename, msg);
553 unlink (dbs[cnt].db_filename);
555 else if (head.module == 0 && head.data_size == 0)
557 /* The file has been created, but the head has not
558 been initialized yet. */
559 msg = _("uninitialized header");
560 goto fail_db;
562 else if (head.header_size != (int) sizeof (head))
564 msg = _("header size does not match");
565 goto fail_db;
567 else if ((total = (sizeof (head)
568 + roundup (head.module * sizeof (ref_t),
569 ALIGN)
570 + head.data_size))
571 > st.st_size
572 || total < sizeof (head))
574 msg = _("file size does not match");
575 goto fail_db;
577 /* Note we map with the maximum size allowed for the
578 database. This is likely much larger than the
579 actual file size. This is OK on most OSes since
580 extensions of the underlying file will
581 automatically translate more pages available for
582 memory access. */
583 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
584 PROT_READ | PROT_WRITE,
585 MAP_SHARED, fd, 0))
586 == MAP_FAILED)
587 goto fail_db_errno;
588 else if (!verify_persistent_db (mem, &head, cnt))
590 munmap (mem, total);
591 msg = _("verification failed");
592 goto fail_db;
594 else
596 /* Success. We have the database. */
597 dbs[cnt].head = mem;
598 dbs[cnt].memsize = total;
599 dbs[cnt].data = (char *)
600 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
601 ALIGN / sizeof (ref_t))];
602 dbs[cnt].mmap_used = true;
604 if (dbs[cnt].suggested_module > head.module)
605 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
606 dbnames[cnt]);
608 dbs[cnt].wr_fd = fd;
609 fd = -1;
610 /* We also need a read-only descriptor. */
611 if (dbs[cnt].shared)
613 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
614 O_RDONLY | EXTRA_O_FLAGS);
615 if (dbs[cnt].ro_fd == -1)
616 dbg_log (_("\
617 cannot create read-only descriptor for \"%s\"; no mmap"),
618 dbs[cnt].db_filename);
621 // XXX Shall we test whether the descriptors actually
622 // XXX point to the same file?
625 /* Close the file descriptors in case something went
626 wrong in which case the variable have not been
627 assigned -1. */
628 if (fd != -1)
629 close (fd);
633 if (dbs[cnt].head == NULL)
635 /* No database loaded. Allocate the data structure,
636 possibly on disk. */
637 struct database_pers_head head;
638 size_t total = (sizeof (head)
639 + roundup (dbs[cnt].suggested_module
640 * sizeof (ref_t), ALIGN)
641 + (dbs[cnt].suggested_module
642 * DEFAULT_DATASIZE_PER_BUCKET));
644 /* Try to create the database. If we do not need a
645 persistent database create a temporary file. */
646 int fd;
647 int ro_fd = -1;
648 if (dbs[cnt].persistent)
650 fd = open (dbs[cnt].db_filename,
651 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
652 S_IRUSR | S_IWUSR);
653 if (fd != -1 && dbs[cnt].shared)
654 ro_fd = open (dbs[cnt].db_filename,
655 O_RDONLY | EXTRA_O_FLAGS);
657 else
659 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
660 fd = mkostemp (fname, EXTRA_O_FLAGS);
662 /* We do not need the file name anymore after we
663 opened another file descriptor in read-only mode. */
664 if (fd != -1)
666 if (dbs[cnt].shared)
667 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
669 unlink (fname);
673 if (fd == -1)
675 if (errno == EEXIST)
677 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
678 dbnames[cnt], dbs[cnt].db_filename);
679 // XXX Correct way to terminate?
680 exit (1);
683 if (dbs[cnt].persistent)
684 dbg_log (_("cannot create %s; no persistent database used"),
685 dbs[cnt].db_filename);
686 else
687 dbg_log (_("cannot create %s; no sharing possible"),
688 dbs[cnt].db_filename);
690 dbs[cnt].persistent = 0;
691 // XXX remember: no mmap
693 else
695 /* Tell the user if we could not create the read-only
696 descriptor. */
697 if (ro_fd == -1 && dbs[cnt].shared)
698 dbg_log (_("\
699 cannot create read-only descriptor for \"%s\"; no mmap"),
700 dbs[cnt].db_filename);
702 /* Before we create the header, initialiye the hash
703 table. So that if we get interrupted if writing
704 the header we can recognize a partially initialized
705 database. */
706 size_t ps = sysconf (_SC_PAGESIZE);
707 char tmpbuf[ps];
708 assert (~ENDREF == 0);
709 memset (tmpbuf, '\xff', ps);
711 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
712 off_t offset = sizeof (head);
714 size_t towrite;
715 if (offset % ps != 0)
717 towrite = MIN (remaining, ps - (offset % ps));
718 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
719 goto write_fail;
720 offset += towrite;
721 remaining -= towrite;
724 while (remaining > ps)
726 if (pwrite (fd, tmpbuf, ps, offset) == -1)
727 goto write_fail;
728 offset += ps;
729 remaining -= ps;
732 if (remaining > 0
733 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
734 goto write_fail;
736 /* Create the header of the file. */
737 struct database_pers_head head =
739 .version = DB_VERSION,
740 .header_size = sizeof (head),
741 .module = dbs[cnt].suggested_module,
742 .data_size = (dbs[cnt].suggested_module
743 * DEFAULT_DATASIZE_PER_BUCKET),
744 .first_free = 0
746 void *mem;
748 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
749 != sizeof (head))
750 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
751 != 0)
752 || (mem = mmap (NULL, dbs[cnt].max_db_size,
753 PROT_READ | PROT_WRITE,
754 MAP_SHARED, fd, 0)) == MAP_FAILED)
756 write_fail:
757 unlink (dbs[cnt].db_filename);
758 dbg_log (_("cannot write to database file %s: %s"),
759 dbs[cnt].db_filename, strerror (errno));
760 dbs[cnt].persistent = 0;
762 else
764 /* Success. */
765 dbs[cnt].head = mem;
766 dbs[cnt].data = (char *)
767 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
768 ALIGN / sizeof (ref_t))];
769 dbs[cnt].memsize = total;
770 dbs[cnt].mmap_used = true;
772 /* Remember the descriptors. */
773 dbs[cnt].wr_fd = fd;
774 dbs[cnt].ro_fd = ro_fd;
775 fd = -1;
776 ro_fd = -1;
779 if (fd != -1)
780 close (fd);
781 if (ro_fd != -1)
782 close (ro_fd);
786 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
787 /* We do not check here whether the O_CLOEXEC provided to the
788 open call was successful or not. The two fcntl calls are
789 only performed once each per process start-up and therefore
790 is not noticeable at all. */
791 if (paranoia
792 && ((dbs[cnt].wr_fd != -1
793 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
794 || (dbs[cnt].ro_fd != -1
795 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
797 dbg_log (_("\
798 cannot set socket to close on exec: %s; disabling paranoia mode"),
799 strerror (errno));
800 paranoia = 0;
802 #endif
804 if (dbs[cnt].head == NULL)
806 /* We do not use the persistent database. Just
807 create an in-memory data structure. */
808 assert (! dbs[cnt].persistent);
810 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
811 + (dbs[cnt].suggested_module
812 * sizeof (ref_t)));
813 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
814 assert (~ENDREF == 0);
815 memset (dbs[cnt].head->array, '\xff',
816 dbs[cnt].suggested_module * sizeof (ref_t));
817 dbs[cnt].head->module = dbs[cnt].suggested_module;
818 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
819 * dbs[cnt].head->module);
820 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
821 dbs[cnt].head->first_free = 0;
823 dbs[cnt].shared = 0;
824 assert (dbs[cnt].ro_fd == -1);
827 dbs[cnt].inotify_descr = -1;
828 if (dbs[cnt].check_file)
830 #ifdef HAVE_INOTIFY
831 if (inotify_fd < 0
832 || (dbs[cnt].inotify_descr
833 = inotify_add_watch (inotify_fd, dbs[cnt].filename,
834 IN_DELETE_SELF | IN_MODIFY)) < 0)
835 /* We cannot notice changes in the main thread. */
836 #endif
838 /* We need the modification date of the file. */
839 struct stat64 st;
841 if (stat64 (dbs[cnt].filename, &st) < 0)
843 /* We cannot stat() the file, disable file checking. */
844 dbg_log (_("cannot stat() file `%s': %s"),
845 dbs[cnt].filename, strerror (errno));
846 dbs[cnt].check_file = 0;
848 else
849 dbs[cnt].file_mtime = st.st_mtime;
853 #ifdef HAVE_INOTIFY
854 if (cnt == hstdb && inotify_fd >= -1)
855 /* We also monitor the resolver configuration file. */
856 resolv_conf_descr = inotify_add_watch (inotify_fd,
857 _PATH_RESCONF,
858 IN_DELETE_SELF | IN_MODIFY);
859 #endif
862 /* Create the socket. */
863 sock = socket (AF_UNIX, SOCK_STREAM, 0);
864 if (sock < 0)
866 dbg_log (_("cannot open socket: %s"), strerror (errno));
867 exit (errno == EACCES ? 4 : 1);
869 /* Bind a name to the socket. */
870 struct sockaddr_un sock_addr;
871 sock_addr.sun_family = AF_UNIX;
872 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
873 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
875 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
876 exit (errno == EACCES ? 4 : 1);
879 /* We don't want to get stuck on accept. */
880 int fl = fcntl (sock, F_GETFL);
881 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
883 dbg_log (_("cannot change socket to nonblocking mode: %s"),
884 strerror (errno));
885 exit (1);
888 /* The descriptor needs to be closed on exec. */
889 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
891 dbg_log (_("cannot set socket to close on exec: %s"),
892 strerror (errno));
893 exit (1);
896 /* Set permissions for the socket. */
897 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
899 /* Set the socket up to accept connections. */
900 if (listen (sock, SOMAXCONN) < 0)
902 dbg_log (_("cannot enable socket to accept connections: %s"),
903 strerror (errno));
904 exit (1);
907 /* Change to unprivileged uid/gid/groups if specifed in config file */
908 if (server_user != NULL)
909 finish_drop_privileges ();
913 /* Close the connections. */
914 void
915 close_sockets (void)
917 close (sock);
921 static void
922 invalidate_cache (char *key, int fd)
924 dbtype number;
925 int32_t resp;
927 for (number = pwddb; number < lastdb; ++number)
928 if (strcmp (key, dbnames[number]) == 0)
930 if (dbs[number].reset_res)
931 res_init ();
933 break;
936 if (number == lastdb)
938 resp = EINVAL;
939 writeall (fd, &resp, sizeof (resp));
940 return;
943 if (dbs[number].enabled)
945 pthread_mutex_lock (&dbs[number].prune_lock);
946 prune_cache (&dbs[number], LONG_MAX, fd);
947 pthread_mutex_unlock (&dbs[number].prune_lock);
949 else
951 resp = 0;
952 writeall (fd, &resp, sizeof (resp));
957 #ifdef SCM_RIGHTS
958 static void
959 send_ro_fd (struct database_dyn *db, char *key, int fd)
961 /* If we do not have an read-only file descriptor do nothing. */
962 if (db->ro_fd == -1)
963 return;
965 /* We need to send some data along with the descriptor. */
966 uint64_t mapsize = (db->head->data_size
967 + roundup (db->head->module * sizeof (ref_t), ALIGN)
968 + sizeof (struct database_pers_head));
969 struct iovec iov[2];
970 iov[0].iov_base = key;
971 iov[0].iov_len = strlen (key) + 1;
972 iov[1].iov_base = &mapsize;
973 iov[1].iov_len = sizeof (mapsize);
975 /* Prepare the control message to transfer the descriptor. */
976 union
978 struct cmsghdr hdr;
979 char bytes[CMSG_SPACE (sizeof (int))];
980 } buf;
981 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
982 .msg_control = buf.bytes,
983 .msg_controllen = sizeof (buf) };
984 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
986 cmsg->cmsg_level = SOL_SOCKET;
987 cmsg->cmsg_type = SCM_RIGHTS;
988 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
990 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
992 msg.msg_controllen = cmsg->cmsg_len;
994 /* Send the control message. We repeat when we are interrupted but
995 everything else is ignored. */
996 #ifndef MSG_NOSIGNAL
997 # define MSG_NOSIGNAL 0
998 #endif
999 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1001 if (__builtin_expect (debug_level > 0, 0))
1002 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1004 #endif /* SCM_RIGHTS */
1007 /* Handle new request. */
1008 static void
1009 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1011 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1013 if (debug_level > 0)
1014 dbg_log (_("\
1015 cannot handle old request version %d; current version is %d"),
1016 req->version, NSCD_VERSION);
1017 return;
1020 /* Perform the SELinux check before we go on to the standard checks. */
1021 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1023 if (debug_level > 0)
1025 #ifdef SO_PEERCRED
1026 # ifdef PATH_MAX
1027 char buf[PATH_MAX];
1028 # else
1029 char buf[4096];
1030 # endif
1032 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
1033 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
1035 if (n <= 0)
1036 dbg_log (_("\
1037 request from %ld not handled due to missing permission"), (long int) pid);
1038 else
1040 buf[n] = '\0';
1041 dbg_log (_("\
1042 request from '%s' [%ld] not handled due to missing permission"),
1043 buf, (long int) pid);
1045 #else
1046 dbg_log (_("request not handled due to missing permission"));
1047 #endif
1049 return;
1052 struct database_dyn *db = reqinfo[req->type].db;
1054 /* See whether we can service the request from the cache. */
1055 if (__builtin_expect (reqinfo[req->type].data_request, true))
1057 if (__builtin_expect (debug_level, 0) > 0)
1059 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1061 char buf[INET6_ADDRSTRLEN];
1063 dbg_log ("\t%s (%s)", serv2str[req->type],
1064 inet_ntop (req->type == GETHOSTBYADDR
1065 ? AF_INET : AF_INET6,
1066 key, buf, sizeof (buf)));
1068 else
1069 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1072 /* Is this service enabled? */
1073 if (__builtin_expect (!db->enabled, 0))
1075 /* No, sent the prepared record. */
1076 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1077 db->disabled_iov->iov_len,
1078 MSG_NOSIGNAL))
1079 != (ssize_t) db->disabled_iov->iov_len
1080 && __builtin_expect (debug_level, 0) > 0)
1082 /* We have problems sending the result. */
1083 char buf[256];
1084 dbg_log (_("cannot write result: %s"),
1085 strerror_r (errno, buf, sizeof (buf)));
1088 return;
1091 /* Be sure we can read the data. */
1092 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
1094 ++db->head->rdlockdelayed;
1095 pthread_rwlock_rdlock (&db->lock);
1098 /* See whether we can handle it from the cache. */
1099 struct datahead *cached;
1100 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1101 db, uid);
1102 if (cached != NULL)
1104 /* Hurray it's in the cache. */
1105 ssize_t nwritten;
1107 #ifdef HAVE_SENDFILE
1108 if (__builtin_expect (db->mmap_used, 1))
1110 assert (db->wr_fd != -1);
1111 assert ((char *) cached->data > (char *) db->data);
1112 assert ((char *) cached->data - (char *) db->head
1113 + cached->recsize
1114 <= (sizeof (struct database_pers_head)
1115 + db->head->module * sizeof (ref_t)
1116 + db->head->data_size));
1117 nwritten = sendfileall (fd, db->wr_fd,
1118 (char *) cached->data
1119 - (char *) db->head, cached->recsize);
1120 # ifndef __ASSUME_SENDFILE
1121 if (nwritten == -1 && errno == ENOSYS)
1122 goto use_write;
1123 # endif
1125 else
1126 # ifndef __ASSUME_SENDFILE
1127 use_write:
1128 # endif
1129 #endif
1130 nwritten = writeall (fd, cached->data, cached->recsize);
1132 if (nwritten != cached->recsize
1133 && __builtin_expect (debug_level, 0) > 0)
1135 /* We have problems sending the result. */
1136 char buf[256];
1137 dbg_log (_("cannot write result: %s"),
1138 strerror_r (errno, buf, sizeof (buf)));
1141 pthread_rwlock_unlock (&db->lock);
1143 return;
1146 pthread_rwlock_unlock (&db->lock);
1148 else if (__builtin_expect (debug_level, 0) > 0)
1150 if (req->type == INVALIDATE)
1151 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1152 else
1153 dbg_log ("\t%s", serv2str[req->type]);
1156 /* Handle the request. */
1157 switch (req->type)
1159 case GETPWBYNAME:
1160 addpwbyname (db, fd, req, key, uid);
1161 break;
1163 case GETPWBYUID:
1164 addpwbyuid (db, fd, req, key, uid);
1165 break;
1167 case GETGRBYNAME:
1168 addgrbyname (db, fd, req, key, uid);
1169 break;
1171 case GETGRBYGID:
1172 addgrbygid (db, fd, req, key, uid);
1173 break;
1175 case GETHOSTBYNAME:
1176 addhstbyname (db, fd, req, key, uid);
1177 break;
1179 case GETHOSTBYNAMEv6:
1180 addhstbynamev6 (db, fd, req, key, uid);
1181 break;
1183 case GETHOSTBYADDR:
1184 addhstbyaddr (db, fd, req, key, uid);
1185 break;
1187 case GETHOSTBYADDRv6:
1188 addhstbyaddrv6 (db, fd, req, key, uid);
1189 break;
1191 case GETAI:
1192 addhstai (db, fd, req, key, uid);
1193 break;
1195 case INITGROUPS:
1196 addinitgroups (db, fd, req, key, uid);
1197 break;
1199 case GETSERVBYNAME:
1200 addservbyname (db, fd, req, key, uid);
1201 break;
1203 case GETSERVBYPORT:
1204 addservbyport (db, fd, req, key, uid);
1205 break;
1207 case GETSTAT:
1208 case SHUTDOWN:
1209 case INVALIDATE:
1211 /* Get the callers credentials. */
1212 #ifdef SO_PEERCRED
1213 struct ucred caller;
1214 socklen_t optlen = sizeof (caller);
1216 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1218 char buf[256];
1220 dbg_log (_("error getting caller's id: %s"),
1221 strerror_r (errno, buf, sizeof (buf)));
1222 break;
1225 uid = caller.uid;
1226 #else
1227 /* Some systems have no SO_PEERCRED implementation. They don't
1228 care about security so we don't as well. */
1229 uid = 0;
1230 #endif
1233 /* Accept shutdown, getstat and invalidate only from root. For
1234 the stat call also allow the user specified in the config file. */
1235 if (req->type == GETSTAT)
1237 if (uid == 0 || uid == stat_uid)
1238 send_stats (fd, dbs);
1240 else if (uid == 0)
1242 if (req->type == INVALIDATE)
1243 invalidate_cache (key, fd);
1244 else
1245 termination_handler (0);
1247 break;
1249 case GETFDPW:
1250 case GETFDGR:
1251 case GETFDHST:
1252 case GETFDSERV:
1253 #ifdef SCM_RIGHTS
1254 send_ro_fd (reqinfo[req->type].db, key, fd);
1255 #endif
1256 break;
1258 default:
1259 /* Ignore the command, it's nothing we know. */
1260 break;
1265 /* Restart the process. */
1266 static void
1267 restart (void)
1269 /* First determine the parameters. We do not use the parameters
1270 passed to main() since in case nscd is started by running the
1271 dynamic linker this will not work. Yes, this is not the usual
1272 case but nscd is part of glibc and we occasionally do this. */
1273 size_t buflen = 1024;
1274 char *buf = alloca (buflen);
1275 size_t readlen = 0;
1276 int fd = open ("/proc/self/cmdline", O_RDONLY);
1277 if (fd == -1)
1279 dbg_log (_("\
1280 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1281 strerror (errno));
1283 paranoia = 0;
1284 return;
1287 while (1)
1289 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1290 buflen - readlen));
1291 if (n == -1)
1293 dbg_log (_("\
1294 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1295 strerror (errno));
1297 close (fd);
1298 paranoia = 0;
1299 return;
1302 readlen += n;
1304 if (readlen < buflen)
1305 break;
1307 /* We might have to extend the buffer. */
1308 size_t old_buflen = buflen;
1309 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1310 buf = memmove (newp, buf, old_buflen);
1313 close (fd);
1315 /* Parse the command line. Worst case scenario: every two
1316 characters form one parameter (one character plus NUL). */
1317 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1318 int argc = 0;
1320 char *cp = buf;
1321 while (cp < buf + readlen)
1323 argv[argc++] = cp;
1324 cp = (char *) rawmemchr (cp, '\0') + 1;
1326 argv[argc] = NULL;
1328 /* Second, change back to the old user if we changed it. */
1329 if (server_user != NULL)
1331 if (setresuid (old_uid, old_uid, old_uid) != 0)
1333 dbg_log (_("\
1334 cannot change to old UID: %s; disabling paranoia mode"),
1335 strerror (errno));
1337 paranoia = 0;
1338 return;
1341 if (setresgid (old_gid, old_gid, old_gid) != 0)
1343 dbg_log (_("\
1344 cannot change to old GID: %s; disabling paranoia mode"),
1345 strerror (errno));
1347 setuid (server_uid);
1348 paranoia = 0;
1349 return;
1353 /* Next change back to the old working directory. */
1354 if (chdir (oldcwd) == -1)
1356 dbg_log (_("\
1357 cannot change to old working directory: %s; disabling paranoia mode"),
1358 strerror (errno));
1360 if (server_user != NULL)
1362 setuid (server_uid);
1363 setgid (server_gid);
1365 paranoia = 0;
1366 return;
1369 /* Synchronize memory. */
1370 int32_t certainly[lastdb];
1371 for (int cnt = 0; cnt < lastdb; ++cnt)
1372 if (dbs[cnt].enabled)
1374 /* Make sure nobody keeps using the database. */
1375 dbs[cnt].head->timestamp = 0;
1376 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1377 dbs[cnt].head->nscd_certainly_running = 0;
1379 if (dbs[cnt].persistent)
1380 // XXX async OK?
1381 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1384 /* The preparations are done. */
1385 execv ("/proc/self/exe", argv);
1387 /* If we come here, we will never be able to re-exec. */
1388 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1389 strerror (errno));
1391 if (server_user != NULL)
1393 setuid (server_uid);
1394 setgid (server_gid);
1396 if (chdir ("/") != 0)
1397 dbg_log (_("cannot change current working directory to \"/\": %s"),
1398 strerror (errno));
1399 paranoia = 0;
1401 /* Reenable the databases. */
1402 time_t now = time (NULL);
1403 for (int cnt = 0; cnt < lastdb; ++cnt)
1404 if (dbs[cnt].enabled)
1406 dbs[cnt].head->timestamp = now;
1407 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1412 /* List of file descriptors. */
1413 struct fdlist
1415 int fd;
1416 struct fdlist *next;
1418 /* Memory allocated for the list. */
1419 static struct fdlist *fdlist;
1420 /* List of currently ready-to-read file descriptors. */
1421 static struct fdlist *readylist;
1423 /* Conditional variable and mutex to signal availability of entries in
1424 READYLIST. The condvar is initialized dynamically since we might
1425 use a different clock depending on availability. */
1426 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1427 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1429 /* The clock to use with the condvar. */
1430 static clockid_t timeout_clock = CLOCK_REALTIME;
1432 /* Number of threads ready to handle the READYLIST. */
1433 static unsigned long int nready;
1436 /* Function for the clean-up threads. */
1437 static void *
1438 __attribute__ ((__noreturn__))
1439 nscd_run_prune (void *p)
1441 const long int my_number = (long int) p;
1442 assert (dbs[my_number].enabled);
1444 int dont_need_update = setup_thread (&dbs[my_number]);
1446 time_t now = time (NULL);
1448 /* We are running. */
1449 dbs[my_number].head->timestamp = now;
1451 struct timespec prune_ts;
1452 if (__builtin_expect (clock_gettime (timeout_clock, &prune_ts) == -1, 0))
1453 /* Should never happen. */
1454 abort ();
1456 /* Compute the initial timeout time. Prevent all the timers to go
1457 off at the same time by adding a db-based value. */
1458 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1459 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1461 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1462 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1464 pthread_mutex_lock (prune_lock);
1465 while (1)
1467 /* Wait, but not forever. */
1468 int e = 0;
1469 if (! dbs[my_number].clear_cache)
1470 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1471 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1473 time_t next_wait;
1474 now = time (NULL);
1475 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1476 || dbs[my_number].clear_cache)
1478 /* We will determine the new timout values based on the
1479 cache content. Should there be concurrent additions to
1480 the cache which are not accounted for in the cache
1481 pruning we want to know about it. Therefore set the
1482 timeout to the maximum. It will be descreased when adding
1483 new entries to the cache, if necessary. */
1484 if (sizeof (time_t) == sizeof (long int))
1485 dbs[my_number].wakeup_time = LONG_MAX;
1486 else
1487 dbs[my_number].wakeup_time = INT_MAX;
1489 /* Unconditionally reset the flag. */
1490 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1491 dbs[my_number].clear_cache = 0;
1493 pthread_mutex_unlock (prune_lock);
1495 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1497 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1498 /* If clients cannot determine for sure whether nscd is running
1499 we need to wake up occasionally to update the timestamp.
1500 Wait 90% of the update period. */
1501 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1502 if (__builtin_expect (! dont_need_update, 0))
1504 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1505 dbs[my_number].head->timestamp = now;
1508 pthread_mutex_lock (prune_lock);
1510 /* Make it known when we will wake up again. */
1511 if (now + next_wait < dbs[my_number].wakeup_time)
1512 dbs[my_number].wakeup_time = now + next_wait;
1513 else
1514 next_wait = dbs[my_number].wakeup_time - now;
1516 else
1517 /* The cache was just pruned. Do not do it again now. Just
1518 use the new timeout value. */
1519 next_wait = dbs[my_number].wakeup_time - now;
1521 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1522 /* Should never happen. */
1523 abort ();
1525 /* Compute next timeout time. */
1526 prune_ts.tv_sec += next_wait;
1531 /* This is the main loop. It is replicated in different threads but
1532 the the use of the ready list makes sure only one thread handles an
1533 incoming connection. */
1534 static void *
1535 __attribute__ ((__noreturn__))
1536 nscd_run_worker (void *p)
1538 char buf[256];
1540 /* Initialize the memory-in-flight list. */
1541 for (enum in_flight idx = 0; idx < IDX_last; ++idx)
1542 mem_in_flight.block[idx].dbidx = -1;
1543 /* And queue this threads structure. */
1545 mem_in_flight.next = mem_in_flight_list;
1546 while (atomic_compare_and_exchange_bool_acq (&mem_in_flight_list,
1547 &mem_in_flight,
1548 mem_in_flight.next) != 0);
1550 /* Initial locking. */
1551 pthread_mutex_lock (&readylist_lock);
1553 /* One more thread available. */
1554 ++nready;
1556 while (1)
1558 while (readylist == NULL)
1559 pthread_cond_wait (&readylist_cond, &readylist_lock);
1561 struct fdlist *it = readylist->next;
1562 if (readylist->next == readylist)
1563 /* Just one entry on the list. */
1564 readylist = NULL;
1565 else
1566 readylist->next = it->next;
1568 /* Extract the information and mark the record ready to be used
1569 again. */
1570 int fd = it->fd;
1571 it->next = NULL;
1573 /* One more thread available. */
1574 --nready;
1576 /* We are done with the list. */
1577 pthread_mutex_unlock (&readylist_lock);
1579 /* We do not want to block on a short read or so. */
1580 int fl = fcntl (fd, F_GETFL);
1581 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1582 goto close_and_out;
1584 /* Now read the request. */
1585 request_header req;
1586 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1587 != sizeof (req), 0))
1589 /* We failed to read data. Note that this also might mean we
1590 failed because we would have blocked. */
1591 if (debug_level > 0)
1592 dbg_log (_("short read while reading request: %s"),
1593 strerror_r (errno, buf, sizeof (buf)));
1594 goto close_and_out;
1597 /* Check whether this is a valid request type. */
1598 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1599 goto close_and_out;
1601 /* Some systems have no SO_PEERCRED implementation. They don't
1602 care about security so we don't as well. */
1603 uid_t uid = -1;
1604 #ifdef SO_PEERCRED
1605 pid_t pid = 0;
1607 if (__builtin_expect (debug_level > 0, 0))
1609 struct ucred caller;
1610 socklen_t optlen = sizeof (caller);
1612 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1613 pid = caller.pid;
1615 #else
1616 const pid_t pid = 0;
1617 #endif
1619 /* It should not be possible to crash the nscd with a silly
1620 request (i.e., a terribly large key). We limit the size to 1kb. */
1621 if (__builtin_expect (req.key_len, 1) < 0
1622 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1624 if (debug_level > 0)
1625 dbg_log (_("key length in request too long: %d"), req.key_len);
1627 else
1629 /* Get the key. */
1630 char keybuf[MAXKEYLEN];
1632 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1633 req.key_len))
1634 != req.key_len, 0))
1636 /* Again, this can also mean we would have blocked. */
1637 if (debug_level > 0)
1638 dbg_log (_("short read while reading request key: %s"),
1639 strerror_r (errno, buf, sizeof (buf)));
1640 goto close_and_out;
1643 if (__builtin_expect (debug_level, 0) > 0)
1645 #ifdef SO_PEERCRED
1646 if (pid != 0)
1647 dbg_log (_("\
1648 handle_request: request received (Version = %d) from PID %ld"),
1649 req.version, (long int) pid);
1650 else
1651 #endif
1652 dbg_log (_("\
1653 handle_request: request received (Version = %d)"), req.version);
1656 /* Phew, we got all the data, now process it. */
1657 handle_request (fd, &req, keybuf, uid, pid);
1660 close_and_out:
1661 /* We are done. */
1662 close (fd);
1664 /* Re-locking. */
1665 pthread_mutex_lock (&readylist_lock);
1667 /* One more thread available. */
1668 ++nready;
1673 static unsigned int nconns;
1675 static void
1676 fd_ready (int fd)
1678 pthread_mutex_lock (&readylist_lock);
1680 /* Find an empty entry in FDLIST. */
1681 size_t inner;
1682 for (inner = 0; inner < nconns; ++inner)
1683 if (fdlist[inner].next == NULL)
1684 break;
1685 assert (inner < nconns);
1687 fdlist[inner].fd = fd;
1689 if (readylist == NULL)
1690 readylist = fdlist[inner].next = &fdlist[inner];
1691 else
1693 fdlist[inner].next = readylist->next;
1694 readylist = readylist->next = &fdlist[inner];
1697 bool do_signal = true;
1698 if (__builtin_expect (nready == 0, 0))
1700 ++client_queued;
1701 do_signal = false;
1703 /* Try to start another thread to help out. */
1704 pthread_t th;
1705 if (nthreads < max_nthreads
1706 && pthread_create (&th, &attr, nscd_run_worker,
1707 (void *) (long int) nthreads) == 0)
1709 /* We got another thread. */
1710 ++nthreads;
1711 /* The new thread might need a kick. */
1712 do_signal = true;
1717 pthread_mutex_unlock (&readylist_lock);
1719 /* Tell one of the worker threads there is work to do. */
1720 if (do_signal)
1721 pthread_cond_signal (&readylist_cond);
1725 /* Check whether restarting should happen. */
1726 static inline int
1727 restart_p (time_t now)
1729 return (paranoia && readylist == NULL && nready == nthreads
1730 && now >= restart_time);
1734 /* Array for times a connection was accepted. */
1735 static time_t *starttime;
1738 static void
1739 __attribute__ ((__noreturn__))
1740 main_loop_poll (void)
1742 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1743 * sizeof (conns[0]));
1745 conns[0].fd = sock;
1746 conns[0].events = POLLRDNORM;
1747 size_t nused = 1;
1748 size_t firstfree = 1;
1750 #ifdef HAVE_INOTIFY
1751 if (inotify_fd != -1)
1753 conns[1].fd = inotify_fd;
1754 conns[1].events = POLLRDNORM;
1755 nused = 2;
1756 firstfree = 2;
1758 #endif
1760 while (1)
1762 /* Wait for any event. We wait at most a couple of seconds so
1763 that we can check whether we should close any of the accepted
1764 connections since we have not received a request. */
1765 #define MAX_ACCEPT_TIMEOUT 30
1766 #define MIN_ACCEPT_TIMEOUT 5
1767 #define MAIN_THREAD_TIMEOUT \
1768 (MAX_ACCEPT_TIMEOUT * 1000 \
1769 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1771 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1773 time_t now = time (NULL);
1775 /* If there is a descriptor ready for reading or there is a new
1776 connection, process this now. */
1777 if (n > 0)
1779 if (conns[0].revents != 0)
1781 /* We have a new incoming connection. Accept the connection. */
1782 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1784 /* Use the descriptor if we have not reached the limit. */
1785 if (fd >= 0)
1787 if (firstfree < nconns)
1789 conns[firstfree].fd = fd;
1790 conns[firstfree].events = POLLRDNORM;
1791 starttime[firstfree] = now;
1792 if (firstfree >= nused)
1793 nused = firstfree + 1;
1796 ++firstfree;
1797 while (firstfree < nused && conns[firstfree].fd != -1);
1799 else
1800 /* We cannot use the connection so close it. */
1801 close (fd);
1804 --n;
1807 size_t first = 1;
1808 #ifdef HAVE_INOTIFY
1809 if (conns[1].fd == inotify_fd)
1811 if (conns[1].revents != 0)
1813 bool done[lastdb] = { false, };
1814 union
1816 struct inotify_event i;
1817 char buf[100];
1818 } inev;
1820 while (TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
1821 sizeof (inev)))
1822 >= (ssize_t) sizeof (struct inotify_event))
1824 /* Check which of the files changed. */
1825 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1826 if (!done[dbcnt]
1827 && (inev.i.wd == dbs[dbcnt].inotify_descr
1828 || (dbcnt == hstdb
1829 && inev.i.wd == resolv_conf_descr)))
1831 if (dbcnt == hstdb
1832 && inev.i.wd == resolv_conf_descr)
1833 res_init ();
1835 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
1836 dbs[dbcnt].clear_cache = 1;
1837 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1838 pthread_cond_signal (&dbs[dbcnt].prune_cond);
1840 done[dbcnt] = true;
1841 break;
1845 --n;
1848 first = 2;
1850 #endif
1852 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
1853 if (conns[cnt].revents != 0)
1855 fd_ready (conns[cnt].fd);
1857 /* Clean up the CONNS array. */
1858 conns[cnt].fd = -1;
1859 if (cnt < firstfree)
1860 firstfree = cnt;
1861 if (cnt == nused - 1)
1863 --nused;
1864 while (conns[nused - 1].fd == -1);
1866 --n;
1870 /* Now find entries which have timed out. */
1871 assert (nused > 0);
1873 /* We make the timeout length depend on the number of file
1874 descriptors currently used. */
1875 #define ACCEPT_TIMEOUT \
1876 (MAX_ACCEPT_TIMEOUT \
1877 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1878 time_t laststart = now - ACCEPT_TIMEOUT;
1880 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1882 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1884 /* Remove the entry, it timed out. */
1885 (void) close (conns[cnt].fd);
1886 conns[cnt].fd = -1;
1888 if (cnt < firstfree)
1889 firstfree = cnt;
1890 if (cnt == nused - 1)
1892 --nused;
1893 while (conns[nused - 1].fd == -1);
1897 if (restart_p (now))
1898 restart ();
1903 #ifdef HAVE_EPOLL
1904 static void
1905 main_loop_epoll (int efd)
1907 struct epoll_event ev = { 0, };
1908 int nused = 1;
1909 size_t highest = 0;
1911 /* Add the socket. */
1912 ev.events = EPOLLRDNORM;
1913 ev.data.fd = sock;
1914 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1915 /* We cannot use epoll. */
1916 return;
1918 #ifdef HAVE_INOTIFY
1919 if (inotify_fd != -1)
1921 ev.events = EPOLLRDNORM;
1922 ev.data.fd = inotify_fd;
1923 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
1924 /* We cannot use epoll. */
1925 return;
1926 nused = 2;
1928 #endif
1930 while (1)
1932 struct epoll_event revs[100];
1933 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1935 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1937 time_t now = time (NULL);
1939 for (int cnt = 0; cnt < n; ++cnt)
1940 if (revs[cnt].data.fd == sock)
1942 /* A new connection. */
1943 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1945 if (fd >= 0)
1947 /* Try to add the new descriptor. */
1948 ev.data.fd = fd;
1949 if (fd >= nconns
1950 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1951 /* The descriptor is too large or something went
1952 wrong. Close the descriptor. */
1953 close (fd);
1954 else
1956 /* Remember when we accepted the connection. */
1957 starttime[fd] = now;
1959 if (fd > highest)
1960 highest = fd;
1962 ++nused;
1966 #ifdef HAVE_INOTIFY
1967 else if (revs[cnt].data.fd == inotify_fd)
1969 union
1971 struct inotify_event i;
1972 char buf[100];
1973 } inev;
1975 while (TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
1976 sizeof (inev)))
1977 >= (ssize_t) sizeof (struct inotify_event))
1979 /* Check which of the files changed. */
1980 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1981 if (inev.i.wd == dbs[dbcnt].inotify_descr)
1983 pthread_mutex_trylock (&dbs[dbcnt].prune_lock);
1984 dbs[dbcnt].clear_cache = 1;
1985 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1986 pthread_cond_signal (&dbs[dbcnt].prune_cond);
1987 break;
1991 #endif
1992 else
1994 /* Remove the descriptor from the epoll descriptor. */
1995 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1997 /* Get a worker to handle the request. */
1998 fd_ready (revs[cnt].data.fd);
2000 /* Reset the time. */
2001 starttime[revs[cnt].data.fd] = 0;
2002 if (revs[cnt].data.fd == highest)
2004 --highest;
2005 while (highest > 0 && starttime[highest] == 0);
2007 --nused;
2010 /* Now look for descriptors for accepted connections which have
2011 no reply in too long of a time. */
2012 time_t laststart = now - ACCEPT_TIMEOUT;
2013 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2014 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
2016 /* We are waiting for this one for too long. Close it. */
2017 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2019 (void) close (cnt);
2021 starttime[cnt] = 0;
2022 if (cnt == highest)
2023 --highest;
2025 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2026 --highest;
2028 if (restart_p (now))
2029 restart ();
2032 #endif
2035 /* Start all the threads we want. The initial process is thread no. 1. */
2036 void
2037 start_threads (void)
2039 /* Initialize the conditional variable we will use. The only
2040 non-standard attribute we might use is the clock selection. */
2041 pthread_condattr_t condattr;
2042 pthread_condattr_init (&condattr);
2044 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2045 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2046 /* Determine whether the monotonous clock is available. */
2047 struct timespec dummy;
2048 # if _POSIX_MONOTONIC_CLOCK == 0
2049 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2050 # endif
2051 # if _POSIX_CLOCK_SELECTION == 0
2052 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2053 # endif
2054 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2055 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2056 timeout_clock = CLOCK_MONOTONIC;
2057 #endif
2059 /* Create the attribute for the threads. They are all created
2060 detached. */
2061 pthread_attr_init (&attr);
2062 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2063 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2064 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2066 /* We allow less than LASTDB threads only for debugging. */
2067 if (debug_level == 0)
2068 nthreads = MAX (nthreads, lastdb);
2070 /* Create the threads which prune the databases. */
2071 // XXX Ideally this work would be done by some of the worker threads.
2072 // XXX But this is problematic since we would need to be able to wake
2073 // XXX them up explicitly as well as part of the group handling the
2074 // XXX ready-list. This requires an operation where we can wait on
2075 // XXX two conditional variables at the same time. This operation
2076 // XXX does not exist (yet).
2077 for (long int i = 0; i < lastdb; ++i)
2079 /* Initialize the conditional variable. */
2080 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2082 dbg_log (_("could not initialize conditional variable"));
2083 exit (1);
2086 pthread_t th;
2087 if (dbs[i].enabled
2088 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2090 dbg_log (_("could not start clean-up thread; terminating"));
2091 exit (1);
2095 pthread_condattr_destroy (&condattr);
2097 for (long int i = 0; i < nthreads; ++i)
2099 pthread_t th;
2100 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2102 if (i == 0)
2104 dbg_log (_("could not start any worker thread; terminating"));
2105 exit (1);
2108 break;
2112 /* Determine how much room for descriptors we should initially
2113 allocate. This might need to change later if we cap the number
2114 with MAXCONN. */
2115 const long int nfds = sysconf (_SC_OPEN_MAX);
2116 #define MINCONN 32
2117 #define MAXCONN 16384
2118 if (nfds == -1 || nfds > MAXCONN)
2119 nconns = MAXCONN;
2120 else if (nfds < MINCONN)
2121 nconns = MINCONN;
2122 else
2123 nconns = nfds;
2125 /* We need memory to pass descriptors on to the worker threads. */
2126 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2127 /* Array to keep track when connection was accepted. */
2128 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2130 /* In the main thread we execute the loop which handles incoming
2131 connections. */
2132 #ifdef HAVE_EPOLL
2133 int efd = epoll_create (100);
2134 if (efd != -1)
2136 main_loop_epoll (efd);
2137 close (efd);
2139 #endif
2141 main_loop_poll ();
2145 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2146 this function is called, we are not listening on the nscd socket yet so
2147 we can just use the ordinary lookup functions without causing a lockup */
2148 static void
2149 begin_drop_privileges (void)
2151 struct passwd *pwd = getpwnam (server_user);
2153 if (pwd == NULL)
2155 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2156 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
2157 server_user);
2160 server_uid = pwd->pw_uid;
2161 server_gid = pwd->pw_gid;
2163 /* Save the old UID/GID if we have to change back. */
2164 if (paranoia)
2166 old_uid = getuid ();
2167 old_gid = getgid ();
2170 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2172 /* This really must never happen. */
2173 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2174 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
2177 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2179 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2180 == -1)
2182 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2183 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
2188 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2189 run nscd as the user specified in the configuration file. */
2190 static void
2191 finish_drop_privileges (void)
2193 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2194 /* We need to preserve the capabilities to connect to the audit daemon. */
2195 cap_t new_caps = preserve_capabilities ();
2196 #endif
2198 if (setgroups (server_ngroups, server_groups) == -1)
2200 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2201 error (EXIT_FAILURE, errno, _("setgroups failed"));
2204 int res;
2205 if (paranoia)
2206 res = setresgid (server_gid, server_gid, old_gid);
2207 else
2208 res = setgid (server_gid);
2209 if (res == -1)
2211 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2212 perror ("setgid");
2213 exit (4);
2216 if (paranoia)
2217 res = setresuid (server_uid, server_uid, old_uid);
2218 else
2219 res = setuid (server_uid);
2220 if (res == -1)
2222 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2223 perror ("setuid");
2224 exit (4);
2227 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2228 /* Remove the temporary capabilities. */
2229 install_real_capabilities (new_caps);
2230 #endif