* nscd/connections.c (sighup_pending): New variable.
[glibc.git] / nscd / connections.c
blobc3f9d0e7df15fed56f6facc8e09b8274f7d918ff
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License version 2 as
8 published by the Free Software Foundation.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <libintl.h>
27 #include <pthread.h>
28 #include <pwd.h>
29 #include <resolv.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <arpa/inet.h>
34 #ifdef HAVE_EPOLL
35 # include <sys/epoll.h>
36 #endif
37 #include <sys/mman.h>
38 #include <sys/param.h>
39 #include <sys/poll.h>
40 #ifdef HAVE_SENDFILE
41 # include <sys/sendfile.h>
42 #endif
43 #include <sys/socket.h>
44 #include <sys/stat.h>
45 #include <sys/un.h>
47 #include "nscd.h"
48 #include "dbg_log.h"
49 #include "selinux.h"
50 #ifdef HAVE_SENDFILE
51 # include <kernel-features.h>
52 #endif
55 /* Wrapper functions with error checking for standard functions. */
56 extern void *xmalloc (size_t n);
57 extern void *xcalloc (size_t n, size_t s);
58 extern void *xrealloc (void *o, size_t n);
60 /* Support to run nscd as an unprivileged user */
61 const char *server_user;
62 static uid_t server_uid;
63 static gid_t server_gid;
64 const char *stat_user;
65 uid_t stat_uid;
66 static gid_t *server_groups;
67 #ifndef NGROUPS
68 # define NGROUPS 32
69 #endif
70 static int server_ngroups;
71 static volatile int sighup_pending;
73 static pthread_attr_t attr;
75 static void begin_drop_privileges (void);
76 static void finish_drop_privileges (void);
78 /* Map request type to a string. */
79 const char *const serv2str[LASTREQ] =
81 [GETPWBYNAME] = "GETPWBYNAME",
82 [GETPWBYUID] = "GETPWBYUID",
83 [GETGRBYNAME] = "GETGRBYNAME",
84 [GETGRBYGID] = "GETGRBYGID",
85 [GETHOSTBYNAME] = "GETHOSTBYNAME",
86 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
87 [GETHOSTBYADDR] = "GETHOSTBYADDR",
88 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
89 [SHUTDOWN] = "SHUTDOWN",
90 [GETSTAT] = "GETSTAT",
91 [INVALIDATE] = "INVALIDATE",
92 [GETFDPW] = "GETFDPW",
93 [GETFDGR] = "GETFDGR",
94 [GETFDHST] = "GETFDHST",
95 [GETAI] = "GETAI",
96 [INITGROUPS] = "INITGROUPS",
97 [GETSERVBYNAME] = "GETSERVBYNAME",
98 [GETSERVBYPORT] = "GETSERVBYPORT",
99 [GETFDSERV] = "GETFDSERV"
102 /* The control data structures for the services. */
103 struct database_dyn dbs[lastdb] =
105 [pwddb] = {
106 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
107 .prunelock = PTHREAD_MUTEX_INITIALIZER,
108 .enabled = 0,
109 .check_file = 1,
110 .persistent = 0,
111 .propagate = 1,
112 .shared = 0,
113 .max_db_size = DEFAULT_MAX_DB_SIZE,
114 .reset_res = 0,
115 .filename = "/etc/passwd",
116 .db_filename = _PATH_NSCD_PASSWD_DB,
117 .disabled_iov = &pwd_iov_disabled,
118 .postimeout = 3600,
119 .negtimeout = 20,
120 .wr_fd = -1,
121 .ro_fd = -1,
122 .mmap_used = false
124 [grpdb] = {
125 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
126 .prunelock = PTHREAD_MUTEX_INITIALIZER,
127 .enabled = 0,
128 .check_file = 1,
129 .persistent = 0,
130 .propagate = 1,
131 .shared = 0,
132 .max_db_size = DEFAULT_MAX_DB_SIZE,
133 .reset_res = 0,
134 .filename = "/etc/group",
135 .db_filename = _PATH_NSCD_GROUP_DB,
136 .disabled_iov = &grp_iov_disabled,
137 .postimeout = 3600,
138 .negtimeout = 60,
139 .wr_fd = -1,
140 .ro_fd = -1,
141 .mmap_used = false
143 [hstdb] = {
144 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
145 .prunelock = PTHREAD_MUTEX_INITIALIZER,
146 .enabled = 0,
147 .check_file = 1,
148 .persistent = 0,
149 .propagate = 0, /* Not used. */
150 .shared = 0,
151 .max_db_size = DEFAULT_MAX_DB_SIZE,
152 .reset_res = 1,
153 .filename = "/etc/hosts",
154 .db_filename = _PATH_NSCD_HOSTS_DB,
155 .disabled_iov = &hst_iov_disabled,
156 .postimeout = 3600,
157 .negtimeout = 20,
158 .wr_fd = -1,
159 .ro_fd = -1,
160 .mmap_used = false
162 [servdb] = {
163 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
164 .prunelock = PTHREAD_MUTEX_INITIALIZER,
165 .enabled = 0,
166 .check_file = 1,
167 .persistent = 0,
168 .propagate = 0, /* Not used. */
169 .shared = 0,
170 .max_db_size = DEFAULT_MAX_DB_SIZE,
171 .reset_res = 0,
172 .filename = "/etc/services",
173 .db_filename = _PATH_NSCD_SERVICES_DB,
174 .disabled_iov = &serv_iov_disabled,
175 .postimeout = 28800,
176 .negtimeout = 20,
177 .wr_fd = -1,
178 .ro_fd = -1,
179 .mmap_used = false
184 /* Mapping of request type to database. */
185 static struct
187 bool data_request;
188 struct database_dyn *db;
189 } const reqinfo[LASTREQ] =
191 [GETPWBYNAME] = { true, &dbs[pwddb] },
192 [GETPWBYUID] = { true, &dbs[pwddb] },
193 [GETGRBYNAME] = { true, &dbs[grpdb] },
194 [GETGRBYGID] = { true, &dbs[grpdb] },
195 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
196 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
197 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
198 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
199 [SHUTDOWN] = { false, NULL },
200 [GETSTAT] = { false, NULL },
201 [SHUTDOWN] = { false, NULL },
202 [GETFDPW] = { false, &dbs[pwddb] },
203 [GETFDGR] = { false, &dbs[grpdb] },
204 [GETFDHST] = { false, &dbs[hstdb] },
205 [GETAI] = { true, &dbs[hstdb] },
206 [INITGROUPS] = { true, &dbs[grpdb] },
207 [GETSERVBYNAME] = { true, &dbs[servdb] },
208 [GETSERVBYPORT] = { true, &dbs[servdb] },
209 [GETFDSERV] = { false, &dbs[servdb] }
213 /* Number of seconds between two cache pruning runs. */
214 #define CACHE_PRUNE_INTERVAL 15
217 /* Initial number of threads to use. */
218 int nthreads = -1;
219 /* Maximum number of threads to use. */
220 int max_nthreads = 32;
222 /* Socket for incoming connections. */
223 static int sock;
225 /* Number of times clients had to wait. */
226 unsigned long int client_queued;
229 ssize_t
230 writeall (int fd, const void *buf, size_t len)
232 size_t n = len;
233 ssize_t ret;
236 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
237 if (ret <= 0)
238 break;
239 buf = (const char *) buf + ret;
240 n -= ret;
242 while (n > 0);
243 return ret < 0 ? ret : len - n;
247 #ifdef HAVE_SENDFILE
248 ssize_t
249 sendfileall (int tofd, int fromfd, off_t off, size_t len)
251 ssize_t n = len;
252 ssize_t ret;
256 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
257 if (ret <= 0)
258 break;
259 n -= ret;
261 while (n > 0);
262 return ret < 0 ? ret : len - n;
264 #endif
267 enum usekey
269 use_not = 0,
270 /* The following three are not really used, they are symbolic constants. */
271 use_first = 16,
272 use_begin = 32,
273 use_end = 64,
275 use_he = 1,
276 use_he_begin = use_he | use_begin,
277 use_he_end = use_he | use_end,
278 #if SEPARATE_KEY
279 use_key = 2,
280 use_key_begin = use_key | use_begin,
281 use_key_end = use_key | use_end,
282 use_key_first = use_key_begin | use_first,
283 #endif
284 use_data = 3,
285 use_data_begin = use_data | use_begin,
286 use_data_end = use_data | use_end,
287 use_data_first = use_data_begin | use_first
291 static int
292 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
293 enum usekey use, ref_t start, size_t len)
295 assert (len >= 2);
297 if (start > first_free || start + len > first_free
298 || (start & BLOCK_ALIGN_M1))
299 return 0;
301 if (usemap[start] == use_not)
303 /* Add the start marker. */
304 usemap[start] = use | use_begin;
305 use &= ~use_first;
307 while (--len > 0)
308 if (usemap[++start] != use_not)
309 return 0;
310 else
311 usemap[start] = use;
313 /* Add the end marker. */
314 usemap[start] = use | use_end;
316 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
318 /* Hash entries can't be shared. */
319 if (use == use_he)
320 return 0;
322 usemap[start] |= (use & use_first);
323 use &= ~use_first;
325 while (--len > 1)
326 if (usemap[++start] != use)
327 return 0;
329 if (usemap[++start] != (use | use_end))
330 return 0;
332 else
333 /* Points to a wrong object or somewhere in the middle. */
334 return 0;
336 return 1;
340 /* Verify data in persistent database. */
341 static int
342 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
344 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb);
346 time_t now = time (NULL);
348 struct database_pers_head *head = mem;
349 struct database_pers_head head_copy = *head;
351 /* Check that the header that was read matches the head in the database. */
352 if (readhead != NULL && memcmp (head, readhead, sizeof (*head)) != 0)
353 return 0;
355 /* First some easy tests: make sure the database header is sane. */
356 if (head->version != DB_VERSION
357 || head->header_size != sizeof (*head)
358 /* We allow a timestamp to be one hour ahead of the current time.
359 This should cover daylight saving time changes. */
360 || head->timestamp > now + 60 * 60 + 60
361 || (head->gc_cycle & 1)
362 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
363 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
364 || head->first_free < 0
365 || head->first_free > head->data_size
366 || (head->first_free & BLOCK_ALIGN_M1) != 0
367 || head->maxnentries < 0
368 || head->maxnsearched < 0)
369 return 0;
371 uint8_t *usemap = calloc (head->first_free, 1);
372 if (usemap == NULL)
373 return 0;
375 const char *data = (char *) &head->array[roundup (head->module,
376 ALIGN / sizeof (ref_t))];
378 nscd_ssize_t he_cnt = 0;
379 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
381 ref_t work = head->array[cnt];
383 while (work != ENDREF)
385 if (! check_use (data, head->first_free, usemap, use_he, work,
386 sizeof (struct hashentry)))
387 goto fail;
389 /* Now we know we can dereference the record. */
390 struct hashentry *here = (struct hashentry *) (data + work);
392 ++he_cnt;
394 /* Make sure the record is for this type of service. */
395 if (here->type >= LASTREQ
396 || reqinfo[here->type].db != &dbs[dbnr])
397 goto fail;
399 /* Validate boolean field value. */
400 if (here->first != false && here->first != true)
401 goto fail;
403 if (here->len < 0)
404 goto fail;
406 /* Now the data. */
407 if (here->packet < 0
408 || here->packet > head->first_free
409 || here->packet + sizeof (struct datahead) > head->first_free)
410 goto fail;
412 struct datahead *dh = (struct datahead *) (data + here->packet);
414 if (! check_use (data, head->first_free, usemap,
415 use_data | (here->first ? use_first : 0),
416 here->packet, dh->allocsize))
417 goto fail;
419 if (dh->allocsize < sizeof (struct datahead)
420 || dh->recsize > dh->allocsize
421 || (dh->notfound != false && dh->notfound != true)
422 || (dh->usable != false && dh->usable != true))
423 goto fail;
425 if (here->key < here->packet + sizeof (struct datahead)
426 || here->key > here->packet + dh->allocsize
427 || here->key + here->len > here->packet + dh->allocsize)
429 #if SEPARATE_KEY
430 /* If keys can appear outside of data, this should be done
431 instead. But gc doesn't mark the data in that case. */
432 if (! check_use (data, head->first_free, usemap,
433 use_key | (here->first ? use_first : 0),
434 here->key, here->len))
435 #endif
436 goto fail;
439 work = here->next;
443 if (he_cnt != head->nentries)
444 goto fail;
446 /* See if all data and keys had at least one reference from
447 he->first == true hashentry. */
448 for (ref_t idx = 0; idx < head->first_free; ++idx)
450 #if SEPARATE_KEY
451 if (usemap[idx] == use_key_begin)
452 goto fail;
453 #endif
454 if (usemap[idx] == use_data_begin)
455 goto fail;
458 /* Finally, make sure the database hasn't changed since the first test. */
459 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
460 goto fail;
462 free (usemap);
463 return 1;
465 fail:
466 free (usemap);
467 return 0;
471 /* Initialize database information structures. */
472 void
473 nscd_init (void)
475 /* Look up unprivileged uid/gid/groups before we start listening on the
476 socket */
477 if (server_user != NULL)
478 begin_drop_privileges ();
480 if (nthreads == -1)
481 /* No configuration for this value, assume a default. */
482 nthreads = 2 * lastdb;
484 for (size_t cnt = 0; cnt < lastdb; ++cnt)
485 if (dbs[cnt].enabled)
487 pthread_rwlock_init (&dbs[cnt].lock, NULL);
488 pthread_mutex_init (&dbs[cnt].memlock, NULL);
490 if (dbs[cnt].persistent)
492 /* Try to open the appropriate file on disk. */
493 int fd = open (dbs[cnt].db_filename, O_RDWR);
494 if (fd != -1)
496 struct stat64 st;
497 void *mem;
498 size_t total;
499 struct database_pers_head head;
500 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
501 sizeof (head)));
502 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
504 fail_db:
505 dbg_log (_("invalid persistent database file \"%s\": %s"),
506 dbs[cnt].db_filename, strerror (errno));
507 unlink (dbs[cnt].db_filename);
509 else if (head.module == 0 && head.data_size == 0)
511 /* The file has been created, but the head has not been
512 initialized yet. Remove the old file. */
513 unlink (dbs[cnt].db_filename);
515 else if (head.header_size != (int) sizeof (head))
517 dbg_log (_("invalid persistent database file \"%s\": %s"),
518 dbs[cnt].db_filename,
519 _("header size does not match"));
520 unlink (dbs[cnt].db_filename);
522 else if ((total = (sizeof (head)
523 + roundup (head.module * sizeof (ref_t),
524 ALIGN)
525 + head.data_size))
526 > st.st_size
527 || total < sizeof (head))
529 dbg_log (_("invalid persistent database file \"%s\": %s"),
530 dbs[cnt].db_filename,
531 _("file size does not match"));
532 unlink (dbs[cnt].db_filename);
534 /* Note we map with the maximum size allowed for the
535 database. This is likely much larger than the
536 actual file size. This is OK on most OSes since
537 extensions of the underlying file will
538 automatically translate more pages available for
539 memory access. */
540 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
541 PROT_READ | PROT_WRITE,
542 MAP_SHARED, fd, 0))
543 == MAP_FAILED)
544 goto fail_db;
545 else if (!verify_persistent_db (mem, &head, cnt))
547 munmap (mem, total);
548 dbg_log (_("invalid persistent database file \"%s\": %s"),
549 dbs[cnt].db_filename,
550 _("verification failed"));
551 unlink (dbs[cnt].db_filename);
553 else
555 /* Success. We have the database. */
556 dbs[cnt].head = mem;
557 dbs[cnt].memsize = total;
558 dbs[cnt].data = (char *)
559 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
560 ALIGN / sizeof (ref_t))];
561 dbs[cnt].mmap_used = true;
563 if (dbs[cnt].suggested_module > head.module)
564 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
565 dbnames[cnt]);
567 dbs[cnt].wr_fd = fd;
568 fd = -1;
569 /* We also need a read-only descriptor. */
570 if (dbs[cnt].shared)
572 dbs[cnt].ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
573 if (dbs[cnt].ro_fd == -1)
574 dbg_log (_("\
575 cannot create read-only descriptor for \"%s\"; no mmap"),
576 dbs[cnt].db_filename);
579 // XXX Shall we test whether the descriptors actually
580 // XXX point to the same file?
583 /* Close the file descriptors in case something went
584 wrong in which case the variable have not been
585 assigned -1. */
586 if (fd != -1)
587 close (fd);
591 if (dbs[cnt].head == NULL)
593 /* No database loaded. Allocate the data structure,
594 possibly on disk. */
595 struct database_pers_head head;
596 size_t total = (sizeof (head)
597 + roundup (dbs[cnt].suggested_module
598 * sizeof (ref_t), ALIGN)
599 + (dbs[cnt].suggested_module
600 * DEFAULT_DATASIZE_PER_BUCKET));
602 /* Try to create the database. If we do not need a
603 persistent database create a temporary file. */
604 int fd;
605 int ro_fd = -1;
606 if (dbs[cnt].persistent)
608 fd = open (dbs[cnt].db_filename,
609 O_RDWR | O_CREAT | O_EXCL | O_TRUNC,
610 S_IRUSR | S_IWUSR);
611 if (fd != -1 && dbs[cnt].shared)
612 ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
614 else
616 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
617 fd = mkstemp (fname);
619 /* We do not need the file name anymore after we
620 opened another file descriptor in read-only mode. */
621 if (fd != -1)
623 if (dbs[cnt].shared)
624 ro_fd = open (fname, O_RDONLY);
626 unlink (fname);
630 if (fd == -1)
632 if (errno == EEXIST)
634 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
635 dbnames[cnt], dbs[cnt].db_filename);
636 // XXX Correct way to terminate?
637 exit (1);
640 if (dbs[cnt].persistent)
641 dbg_log (_("cannot create %s; no persistent database used"),
642 dbs[cnt].db_filename);
643 else
644 dbg_log (_("cannot create %s; no sharing possible"),
645 dbs[cnt].db_filename);
647 dbs[cnt].persistent = 0;
648 // XXX remember: no mmap
650 else
652 /* Tell the user if we could not create the read-only
653 descriptor. */
654 if (ro_fd == -1 && dbs[cnt].shared)
655 dbg_log (_("\
656 cannot create read-only descriptor for \"%s\"; no mmap"),
657 dbs[cnt].db_filename);
659 /* Before we create the header, initialiye the hash
660 table. So that if we get interrupted if writing
661 the header we can recognize a partially initialized
662 database. */
663 size_t ps = sysconf (_SC_PAGESIZE);
664 char tmpbuf[ps];
665 assert (~ENDREF == 0);
666 memset (tmpbuf, '\xff', ps);
668 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
669 off_t offset = sizeof (head);
671 size_t towrite;
672 if (offset % ps != 0)
674 towrite = MIN (remaining, ps - (offset % ps));
675 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
676 goto write_fail;
677 offset += towrite;
678 remaining -= towrite;
681 while (remaining > ps)
683 if (pwrite (fd, tmpbuf, ps, offset) == -1)
684 goto write_fail;
685 offset += ps;
686 remaining -= ps;
689 if (remaining > 0
690 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
691 goto write_fail;
693 /* Create the header of the file. */
694 struct database_pers_head head =
696 .version = DB_VERSION,
697 .header_size = sizeof (head),
698 .module = dbs[cnt].suggested_module,
699 .data_size = (dbs[cnt].suggested_module
700 * DEFAULT_DATASIZE_PER_BUCKET),
701 .first_free = 0
703 void *mem;
705 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
706 != sizeof (head))
707 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
708 != 0)
709 || (mem = mmap (NULL, dbs[cnt].max_db_size,
710 PROT_READ | PROT_WRITE,
711 MAP_SHARED, fd, 0)) == MAP_FAILED)
713 write_fail:
714 unlink (dbs[cnt].db_filename);
715 dbg_log (_("cannot write to database file %s: %s"),
716 dbs[cnt].db_filename, strerror (errno));
717 dbs[cnt].persistent = 0;
719 else
721 /* Success. */
722 dbs[cnt].head = mem;
723 dbs[cnt].data = (char *)
724 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
725 ALIGN / sizeof (ref_t))];
726 dbs[cnt].memsize = total;
727 dbs[cnt].mmap_used = true;
729 /* Remember the descriptors. */
730 dbs[cnt].wr_fd = fd;
731 dbs[cnt].ro_fd = ro_fd;
732 fd = -1;
733 ro_fd = -1;
736 if (fd != -1)
737 close (fd);
738 if (ro_fd != -1)
739 close (ro_fd);
743 if (paranoia
744 && ((dbs[cnt].wr_fd != -1
745 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
746 || (dbs[cnt].ro_fd != -1
747 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
749 dbg_log (_("\
750 cannot set socket to close on exec: %s; disabling paranoia mode"),
751 strerror (errno));
752 paranoia = 0;
755 if (dbs[cnt].head == NULL)
757 /* We do not use the persistent database. Just
758 create an in-memory data structure. */
759 assert (! dbs[cnt].persistent);
761 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
762 + (dbs[cnt].suggested_module
763 * sizeof (ref_t)));
764 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
765 assert (~ENDREF == 0);
766 memset (dbs[cnt].head->array, '\xff',
767 dbs[cnt].suggested_module * sizeof (ref_t));
768 dbs[cnt].head->module = dbs[cnt].suggested_module;
769 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
770 * dbs[cnt].head->module);
771 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
772 dbs[cnt].head->first_free = 0;
774 dbs[cnt].shared = 0;
775 assert (dbs[cnt].ro_fd == -1);
778 if (dbs[cnt].check_file)
780 /* We need the modification date of the file. */
781 struct stat64 st;
783 if (stat64 (dbs[cnt].filename, &st) < 0)
785 /* We cannot stat() the file, disable file checking. */
786 dbg_log (_("cannot stat() file `%s': %s"),
787 dbs[cnt].filename, strerror (errno));
788 dbs[cnt].check_file = 0;
790 else
791 dbs[cnt].file_mtime = st.st_mtime;
795 /* Create the socket. */
796 sock = socket (AF_UNIX, SOCK_STREAM, 0);
797 if (sock < 0)
799 dbg_log (_("cannot open socket: %s"), strerror (errno));
800 exit (errno == EACCES ? 4 : 1);
802 /* Bind a name to the socket. */
803 struct sockaddr_un sock_addr;
804 sock_addr.sun_family = AF_UNIX;
805 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
806 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
808 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
809 exit (errno == EACCES ? 4 : 1);
812 /* We don't want to get stuck on accept. */
813 int fl = fcntl (sock, F_GETFL);
814 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
816 dbg_log (_("cannot change socket to nonblocking mode: %s"),
817 strerror (errno));
818 exit (1);
821 /* The descriptor needs to be closed on exec. */
822 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
824 dbg_log (_("cannot set socket to close on exec: %s"),
825 strerror (errno));
826 exit (1);
829 /* Set permissions for the socket. */
830 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
832 /* Set the socket up to accept connections. */
833 if (listen (sock, SOMAXCONN) < 0)
835 dbg_log (_("cannot enable socket to accept connections: %s"),
836 strerror (errno));
837 exit (1);
840 /* Change to unprivileged uid/gid/groups if specifed in config file */
841 if (server_user != NULL)
842 finish_drop_privileges ();
846 /* Close the connections. */
847 void
848 close_sockets (void)
850 close (sock);
854 static void
855 invalidate_cache (char *key, int fd)
857 dbtype number;
858 int32_t resp;
860 for (number = pwddb; number < lastdb; ++number)
861 if (strcmp (key, dbnames[number]) == 0)
863 if (dbs[number].reset_res)
864 res_init ();
866 break;
869 if (number == lastdb)
871 resp = EINVAL;
872 writeall (fd, &resp, sizeof (resp));
873 return;
876 if (dbs[number].enabled)
877 prune_cache (&dbs[number], LONG_MAX, fd);
878 else
880 resp = 0;
881 writeall (fd, &resp, sizeof (resp));
886 #ifdef SCM_RIGHTS
887 static void
888 send_ro_fd (struct database_dyn *db, char *key, int fd)
890 /* If we do not have an read-only file descriptor do nothing. */
891 if (db->ro_fd == -1)
892 return;
894 /* We need to send some data along with the descriptor. */
895 struct iovec iov[1];
896 iov[0].iov_base = key;
897 iov[0].iov_len = strlen (key) + 1;
899 /* Prepare the control message to transfer the descriptor. */
900 union
902 struct cmsghdr hdr;
903 char bytes[CMSG_SPACE (sizeof (int))];
904 } buf;
905 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1,
906 .msg_control = buf.bytes,
907 .msg_controllen = sizeof (buf) };
908 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
910 cmsg->cmsg_level = SOL_SOCKET;
911 cmsg->cmsg_type = SCM_RIGHTS;
912 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
914 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
916 msg.msg_controllen = cmsg->cmsg_len;
918 /* Send the control message. We repeat when we are interrupted but
919 everything else is ignored. */
920 #ifndef MSG_NOSIGNAL
921 # define MSG_NOSIGNAL 0
922 #endif
923 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
925 if (__builtin_expect (debug_level > 0, 0))
926 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
928 #endif /* SCM_RIGHTS */
931 /* Handle new request. */
932 static void
933 handle_request (int fd, request_header *req, void *key, uid_t uid)
935 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
937 if (debug_level > 0)
938 dbg_log (_("\
939 cannot handle old request version %d; current version is %d"),
940 req->version, NSCD_VERSION);
941 return;
944 /* Make the SELinux check before we go on to the standard checks. */
945 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
946 return;
948 struct database_dyn *db = reqinfo[req->type].db;
950 /* See whether we can service the request from the cache. */
951 if (__builtin_expect (reqinfo[req->type].data_request, true))
953 if (__builtin_expect (debug_level, 0) > 0)
955 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
957 char buf[INET6_ADDRSTRLEN];
959 dbg_log ("\t%s (%s)", serv2str[req->type],
960 inet_ntop (req->type == GETHOSTBYADDR
961 ? AF_INET : AF_INET6,
962 key, buf, sizeof (buf)));
964 else
965 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
968 /* Is this service enabled? */
969 if (__builtin_expect (!db->enabled, 0))
971 /* No, sent the prepared record. */
972 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
973 db->disabled_iov->iov_len,
974 MSG_NOSIGNAL))
975 != (ssize_t) db->disabled_iov->iov_len
976 && __builtin_expect (debug_level, 0) > 0)
978 /* We have problems sending the result. */
979 char buf[256];
980 dbg_log (_("cannot write result: %s"),
981 strerror_r (errno, buf, sizeof (buf)));
984 return;
987 /* Be sure we can read the data. */
988 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
990 ++db->head->rdlockdelayed;
991 pthread_rwlock_rdlock (&db->lock);
994 /* See whether we can handle it from the cache. */
995 struct datahead *cached;
996 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
997 db, uid);
998 if (cached != NULL)
1000 /* Hurray it's in the cache. */
1001 ssize_t nwritten;
1003 #ifdef HAVE_SENDFILE
1004 if (db->mmap_used || !cached->notfound)
1006 assert (db->wr_fd != -1);
1007 assert ((char *) cached->data > (char *) db->data);
1008 assert ((char *) cached->data - (char *) db->head
1009 + cached->recsize
1010 <= (sizeof (struct database_pers_head)
1011 + db->head->module * sizeof (ref_t)
1012 + db->head->data_size));
1013 nwritten = sendfileall (fd, db->wr_fd,
1014 (char *) cached->data
1015 - (char *) db->head, cached->recsize);
1016 # ifndef __ASSUME_SENDFILE
1017 if (nwritten == -1 && errno == ENOSYS)
1018 goto use_write;
1019 # endif
1021 else
1022 # ifndef __ASSUME_SENDFILE
1023 use_write:
1024 # endif
1025 #endif
1026 nwritten = writeall (fd, cached->data, cached->recsize);
1028 if (nwritten != cached->recsize
1029 && __builtin_expect (debug_level, 0) > 0)
1031 /* We have problems sending the result. */
1032 char buf[256];
1033 dbg_log (_("cannot write result: %s"),
1034 strerror_r (errno, buf, sizeof (buf)));
1037 pthread_rwlock_unlock (&db->lock);
1039 return;
1042 pthread_rwlock_unlock (&db->lock);
1044 else if (__builtin_expect (debug_level, 0) > 0)
1046 if (req->type == INVALIDATE)
1047 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1048 else
1049 dbg_log ("\t%s", serv2str[req->type]);
1052 /* Handle the request. */
1053 switch (req->type)
1055 case GETPWBYNAME:
1056 addpwbyname (db, fd, req, key, uid);
1057 break;
1059 case GETPWBYUID:
1060 addpwbyuid (db, fd, req, key, uid);
1061 break;
1063 case GETGRBYNAME:
1064 addgrbyname (db, fd, req, key, uid);
1065 break;
1067 case GETGRBYGID:
1068 addgrbygid (db, fd, req, key, uid);
1069 break;
1071 case GETHOSTBYNAME:
1072 addhstbyname (db, fd, req, key, uid);
1073 break;
1075 case GETHOSTBYNAMEv6:
1076 addhstbynamev6 (db, fd, req, key, uid);
1077 break;
1079 case GETHOSTBYADDR:
1080 addhstbyaddr (db, fd, req, key, uid);
1081 break;
1083 case GETHOSTBYADDRv6:
1084 addhstbyaddrv6 (db, fd, req, key, uid);
1085 break;
1087 case GETAI:
1088 addhstai (db, fd, req, key, uid);
1089 break;
1091 case INITGROUPS:
1092 addinitgroups (db, fd, req, key, uid);
1093 break;
1095 case GETSERVBYNAME:
1096 addservbyname (db, fd, req, key, uid);
1097 break;
1099 case GETSERVBYPORT:
1100 addservbyport (db, fd, req, key, uid);
1101 break;
1103 case GETSTAT:
1104 case SHUTDOWN:
1105 case INVALIDATE:
1107 /* Get the callers credentials. */
1108 #ifdef SO_PEERCRED
1109 struct ucred caller;
1110 socklen_t optlen = sizeof (caller);
1112 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1114 char buf[256];
1116 dbg_log (_("error getting caller's id: %s"),
1117 strerror_r (errno, buf, sizeof (buf)));
1118 break;
1121 uid = caller.uid;
1122 #else
1123 /* Some systems have no SO_PEERCRED implementation. They don't
1124 care about security so we don't as well. */
1125 uid = 0;
1126 #endif
1129 /* Accept shutdown, getstat and invalidate only from root. For
1130 the stat call also allow the user specified in the config file. */
1131 if (req->type == GETSTAT)
1133 if (uid == 0 || uid == stat_uid)
1134 send_stats (fd, dbs);
1136 else if (uid == 0)
1138 if (req->type == INVALIDATE)
1139 invalidate_cache (key, fd);
1140 else
1141 termination_handler (0);
1143 break;
1145 case GETFDPW:
1146 case GETFDGR:
1147 case GETFDHST:
1148 case GETFDSERV:
1149 #ifdef SCM_RIGHTS
1150 send_ro_fd (reqinfo[req->type].db, key, fd);
1151 #endif
1152 break;
1154 default:
1155 /* Ignore the command, it's nothing we know. */
1156 break;
1161 /* Restart the process. */
1162 static void
1163 restart (void)
1165 /* First determine the parameters. We do not use the parameters
1166 passed to main() since in case nscd is started by running the
1167 dynamic linker this will not work. Yes, this is not the usual
1168 case but nscd is part of glibc and we occasionally do this. */
1169 size_t buflen = 1024;
1170 char *buf = alloca (buflen);
1171 size_t readlen = 0;
1172 int fd = open ("/proc/self/cmdline", O_RDONLY);
1173 if (fd == -1)
1175 dbg_log (_("\
1176 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1177 strerror (errno));
1179 paranoia = 0;
1180 return;
1183 while (1)
1185 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1186 buflen - readlen));
1187 if (n == -1)
1189 dbg_log (_("\
1190 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1191 strerror (errno));
1193 close (fd);
1194 paranoia = 0;
1195 return;
1198 readlen += n;
1200 if (readlen < buflen)
1201 break;
1203 /* We might have to extend the buffer. */
1204 size_t old_buflen = buflen;
1205 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1206 buf = memmove (newp, buf, old_buflen);
1209 close (fd);
1211 /* Parse the command line. Worst case scenario: every two
1212 characters form one parameter (one character plus NUL). */
1213 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1214 int argc = 0;
1216 char *cp = buf;
1217 while (cp < buf + readlen)
1219 argv[argc++] = cp;
1220 cp = (char *) rawmemchr (cp, '\0') + 1;
1222 argv[argc] = NULL;
1224 /* Second, change back to the old user if we changed it. */
1225 if (server_user != NULL)
1227 if (setresuid (old_uid, old_uid, old_uid) != 0)
1229 dbg_log (_("\
1230 cannot change to old UID: %s; disabling paranoia mode"),
1231 strerror (errno));
1233 paranoia = 0;
1234 return;
1237 if (setresgid (old_gid, old_gid, old_gid) != 0)
1239 dbg_log (_("\
1240 cannot change to old GID: %s; disabling paranoia mode"),
1241 strerror (errno));
1243 setuid (server_uid);
1244 paranoia = 0;
1245 return;
1249 /* Next change back to the old working directory. */
1250 if (chdir (oldcwd) == -1)
1252 dbg_log (_("\
1253 cannot change to old working directory: %s; disabling paranoia mode"),
1254 strerror (errno));
1256 if (server_user != NULL)
1258 setuid (server_uid);
1259 setgid (server_gid);
1261 paranoia = 0;
1262 return;
1265 /* Synchronize memory. */
1266 for (int cnt = 0; cnt < lastdb; ++cnt)
1268 /* Make sure nobody keeps using the database. */
1269 dbs[cnt].head->timestamp = 0;
1271 if (dbs[cnt].persistent)
1272 // XXX async OK?
1273 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1276 /* The preparations are done. */
1277 execv ("/proc/self/exe", argv);
1279 /* If we come here, we will never be able to re-exec. */
1280 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1281 strerror (errno));
1283 if (server_user != NULL)
1285 setuid (server_uid);
1286 setgid (server_gid);
1288 if (chdir ("/") != 0)
1289 dbg_log (_("cannot change current working directory to \"/\": %s"),
1290 strerror (errno));
1291 paranoia = 0;
1295 /* List of file descriptors. */
1296 struct fdlist
1298 int fd;
1299 struct fdlist *next;
1301 /* Memory allocated for the list. */
1302 static struct fdlist *fdlist;
1303 /* List of currently ready-to-read file descriptors. */
1304 static struct fdlist *readylist;
1306 /* Conditional variable and mutex to signal availability of entries in
1307 READYLIST. The condvar is initialized dynamically since we might
1308 use a different clock depending on availability. */
1309 static pthread_cond_t readylist_cond;
1310 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1312 /* The clock to use with the condvar. */
1313 static clockid_t timeout_clock = CLOCK_REALTIME;
1315 /* Number of threads ready to handle the READYLIST. */
1316 static unsigned long int nready;
1319 /* This is the main loop. It is replicated in different threads but the
1320 `poll' call makes sure only one thread handles an incoming connection. */
1321 static void *
1322 __attribute__ ((__noreturn__))
1323 nscd_run (void *p)
1325 const long int my_number = (long int) p;
1326 const int run_prune = my_number < lastdb && dbs[my_number].enabled;
1327 struct timespec prune_ts;
1328 int to = 0;
1329 char buf[256];
1331 if (run_prune)
1333 setup_thread (&dbs[my_number]);
1335 /* We are running. */
1336 dbs[my_number].head->timestamp = time (NULL);
1338 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1339 /* Should never happen. */
1340 abort ();
1342 /* Compute timeout time. */
1343 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1346 /* Initial locking. */
1347 pthread_mutex_lock (&readylist_lock);
1349 /* One more thread available. */
1350 ++nready;
1352 while (1)
1354 while (readylist == NULL)
1356 if (run_prune)
1358 /* Wait, but not forever. */
1359 to = pthread_cond_timedwait (&readylist_cond, &readylist_lock,
1360 &prune_ts);
1362 /* If we were woken and there is no work to be done,
1363 just start pruning. */
1364 if (readylist == NULL && to == ETIMEDOUT)
1366 --nready;
1368 if (sighup_pending)
1369 goto sighup_prune;
1371 pthread_mutex_unlock (&readylist_lock);
1372 goto only_prune;
1375 else
1376 /* No need to timeout. */
1377 pthread_cond_wait (&readylist_cond, &readylist_lock);
1380 if (sighup_pending)
1382 --nready;
1383 pthread_cond_signal (&readylist_cond);
1384 sighup_prune:
1385 sighup_pending = 0;
1386 pthread_mutex_unlock (&readylist_lock);
1388 /* Prune the password database. */
1389 if (dbs[pwddb].enabled)
1390 prune_cache (&dbs[pwddb], LONG_MAX, -1);
1392 /* Prune the group database. */
1393 if (dbs[grpdb].enabled)
1394 prune_cache (&dbs[grpdb], LONG_MAX, -1);
1396 /* Prune the host database. */
1397 if (dbs[hstdb].enabled)
1398 prune_cache (&dbs[hstdb], LONG_MAX, -1);
1400 /* Re-locking. */
1401 pthread_mutex_lock (&readylist_lock);
1403 /* One more thread available. */
1404 ++nready;
1405 continue;
1408 struct fdlist *it = readylist->next;
1409 if (readylist->next == readylist)
1410 /* Just one entry on the list. */
1411 readylist = NULL;
1412 else
1413 readylist->next = it->next;
1415 /* Extract the information and mark the record ready to be used
1416 again. */
1417 int fd = it->fd;
1418 it->next = NULL;
1420 /* One more thread available. */
1421 --nready;
1423 /* We are done with the list. */
1424 pthread_mutex_unlock (&readylist_lock);
1426 /* We do not want to block on a short read or so. */
1427 int fl = fcntl (fd, F_GETFL);
1428 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1429 goto close_and_out;
1431 /* Now read the request. */
1432 request_header req;
1433 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1434 != sizeof (req), 0))
1436 /* We failed to read data. Note that this also might mean we
1437 failed because we would have blocked. */
1438 if (debug_level > 0)
1439 dbg_log (_("short read while reading request: %s"),
1440 strerror_r (errno, buf, sizeof (buf)));
1441 goto close_and_out;
1444 /* Check whether this is a valid request type. */
1445 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1446 goto close_and_out;
1448 /* Some systems have no SO_PEERCRED implementation. They don't
1449 care about security so we don't as well. */
1450 uid_t uid = -1;
1451 #ifdef SO_PEERCRED
1452 pid_t pid = 0;
1454 if (__builtin_expect (debug_level > 0, 0))
1456 struct ucred caller;
1457 socklen_t optlen = sizeof (caller);
1459 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1460 pid = caller.pid;
1462 #endif
1464 /* It should not be possible to crash the nscd with a silly
1465 request (i.e., a terribly large key). We limit the size to 1kb. */
1466 if (__builtin_expect (req.key_len, 1) < 0
1467 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1469 if (debug_level > 0)
1470 dbg_log (_("key length in request too long: %d"), req.key_len);
1472 else
1474 /* Get the key. */
1475 char keybuf[MAXKEYLEN];
1477 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1478 req.key_len))
1479 != req.key_len, 0))
1481 /* Again, this can also mean we would have blocked. */
1482 if (debug_level > 0)
1483 dbg_log (_("short read while reading request key: %s"),
1484 strerror_r (errno, buf, sizeof (buf)));
1485 goto close_and_out;
1488 if (__builtin_expect (debug_level, 0) > 0)
1490 #ifdef SO_PEERCRED
1491 if (pid != 0)
1492 dbg_log (_("\
1493 handle_request: request received (Version = %d) from PID %ld"),
1494 req.version, (long int) pid);
1495 else
1496 #endif
1497 dbg_log (_("\
1498 handle_request: request received (Version = %d)"), req.version);
1501 /* Phew, we got all the data, now process it. */
1502 handle_request (fd, &req, keybuf, uid);
1505 close_and_out:
1506 /* We are done. */
1507 close (fd);
1509 /* Check whether we should be pruning the cache. */
1510 assert (run_prune || to == 0);
1511 if (to == ETIMEDOUT)
1513 only_prune:
1514 /* The pthread_cond_timedwait() call timed out. It is time
1515 to clean up the cache. */
1516 assert (my_number < lastdb);
1517 prune_cache (&dbs[my_number], time (NULL), -1);
1519 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1520 /* Should never happen. */
1521 abort ();
1523 /* Compute next timeout time. */
1524 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1526 /* In case the list is emtpy we do not want to run the prune
1527 code right away again. */
1528 to = 0;
1531 /* Re-locking. */
1532 pthread_mutex_lock (&readylist_lock);
1534 /* One more thread available. */
1535 ++nready;
1540 static unsigned int nconns;
1542 static void
1543 fd_ready (int fd)
1545 pthread_mutex_lock (&readylist_lock);
1547 /* Find an empty entry in FDLIST. */
1548 size_t inner;
1549 for (inner = 0; inner < nconns; ++inner)
1550 if (fdlist[inner].next == NULL)
1551 break;
1552 assert (inner < nconns);
1554 fdlist[inner].fd = fd;
1556 if (readylist == NULL)
1557 readylist = fdlist[inner].next = &fdlist[inner];
1558 else
1560 fdlist[inner].next = readylist->next;
1561 readylist = readylist->next = &fdlist[inner];
1564 bool do_signal = true;
1565 if (__builtin_expect (nready == 0, 0))
1567 ++client_queued;
1568 do_signal = false;
1570 /* Try to start another thread to help out. */
1571 pthread_t th;
1572 if (nthreads < max_nthreads
1573 && pthread_create (&th, &attr, nscd_run,
1574 (void *) (long int) nthreads) == 0)
1576 /* We got another thread. */
1577 ++nthreads;
1578 /* The new thread might need a kick. */
1579 do_signal = true;
1584 pthread_mutex_unlock (&readylist_lock);
1586 /* Tell one of the worker threads there is work to do. */
1587 if (do_signal)
1588 pthread_cond_signal (&readylist_cond);
1592 /* Check whether restarting should happen. */
1593 static inline int
1594 restart_p (time_t now)
1596 return (paranoia && readylist == NULL && nready == nthreads
1597 && now >= restart_time);
1601 /* Array for times a connection was accepted. */
1602 static time_t *starttime;
1605 static void
1606 __attribute__ ((__noreturn__))
1607 main_loop_poll (void)
1609 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1610 * sizeof (conns[0]));
1612 conns[0].fd = sock;
1613 conns[0].events = POLLRDNORM;
1614 size_t nused = 1;
1615 size_t firstfree = 1;
1617 while (1)
1619 /* Wait for any event. We wait at most a couple of seconds so
1620 that we can check whether we should close any of the accepted
1621 connections since we have not received a request. */
1622 #define MAX_ACCEPT_TIMEOUT 30
1623 #define MIN_ACCEPT_TIMEOUT 5
1624 #define MAIN_THREAD_TIMEOUT \
1625 (MAX_ACCEPT_TIMEOUT * 1000 \
1626 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1628 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1630 time_t now = time (NULL);
1632 /* If there is a descriptor ready for reading or there is a new
1633 connection, process this now. */
1634 if (n > 0)
1636 if (conns[0].revents != 0)
1638 /* We have a new incoming connection. Accept the connection. */
1639 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1641 /* Use the descriptor if we have not reached the limit. */
1642 if (fd >= 0)
1644 if (firstfree < nconns)
1646 conns[firstfree].fd = fd;
1647 conns[firstfree].events = POLLRDNORM;
1648 starttime[firstfree] = now;
1649 if (firstfree >= nused)
1650 nused = firstfree + 1;
1653 ++firstfree;
1654 while (firstfree < nused && conns[firstfree].fd != -1);
1656 else
1657 /* We cannot use the connection so close it. */
1658 close (fd);
1661 --n;
1664 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1665 if (conns[cnt].revents != 0)
1667 fd_ready (conns[cnt].fd);
1669 /* Clean up the CONNS array. */
1670 conns[cnt].fd = -1;
1671 if (cnt < firstfree)
1672 firstfree = cnt;
1673 if (cnt == nused - 1)
1675 --nused;
1676 while (conns[nused - 1].fd == -1);
1678 --n;
1682 /* Now find entries which have timed out. */
1683 assert (nused > 0);
1685 /* We make the timeout length depend on the number of file
1686 descriptors currently used. */
1687 #define ACCEPT_TIMEOUT \
1688 (MAX_ACCEPT_TIMEOUT \
1689 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1690 time_t laststart = now - ACCEPT_TIMEOUT;
1692 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1694 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1696 /* Remove the entry, it timed out. */
1697 (void) close (conns[cnt].fd);
1698 conns[cnt].fd = -1;
1700 if (cnt < firstfree)
1701 firstfree = cnt;
1702 if (cnt == nused - 1)
1704 --nused;
1705 while (conns[nused - 1].fd == -1);
1709 if (restart_p (now))
1710 restart ();
1715 #ifdef HAVE_EPOLL
1716 static void
1717 main_loop_epoll (int efd)
1719 struct epoll_event ev = { 0, };
1720 int nused = 1;
1721 size_t highest = 0;
1723 /* Add the socket. */
1724 ev.events = EPOLLRDNORM;
1725 ev.data.fd = sock;
1726 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1727 /* We cannot use epoll. */
1728 return;
1730 while (1)
1732 struct epoll_event revs[100];
1733 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1735 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1737 time_t now = time (NULL);
1739 for (int cnt = 0; cnt < n; ++cnt)
1740 if (revs[cnt].data.fd == sock)
1742 /* A new connection. */
1743 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1745 if (fd >= 0)
1747 /* Try to add the new descriptor. */
1748 ev.data.fd = fd;
1749 if (fd >= nconns
1750 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1751 /* The descriptor is too large or something went
1752 wrong. Close the descriptor. */
1753 close (fd);
1754 else
1756 /* Remember when we accepted the connection. */
1757 starttime[fd] = now;
1759 if (fd > highest)
1760 highest = fd;
1762 ++nused;
1766 else
1768 /* Remove the descriptor from the epoll descriptor. */
1769 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1771 /* Get a worker to handle the request. */
1772 fd_ready (revs[cnt].data.fd);
1774 /* Reset the time. */
1775 starttime[revs[cnt].data.fd] = 0;
1776 if (revs[cnt].data.fd == highest)
1778 --highest;
1779 while (highest > 0 && starttime[highest] == 0);
1781 --nused;
1784 /* Now look for descriptors for accepted connections which have
1785 no reply in too long of a time. */
1786 time_t laststart = now - ACCEPT_TIMEOUT;
1787 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1788 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1790 /* We are waiting for this one for too long. Close it. */
1791 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
1793 (void) close (cnt);
1795 starttime[cnt] = 0;
1796 if (cnt == highest)
1797 --highest;
1799 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1800 --highest;
1802 if (restart_p (now))
1803 restart ();
1806 #endif
1809 /* Start all the threads we want. The initial process is thread no. 1. */
1810 void
1811 start_threads (void)
1813 /* Initialize the conditional variable we will use. The only
1814 non-standard attribute we might use is the clock selection. */
1815 pthread_condattr_t condattr;
1816 pthread_condattr_init (&condattr);
1818 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1819 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1820 /* Determine whether the monotonous clock is available. */
1821 struct timespec dummy;
1822 # if _POSIX_MONOTONIC_CLOCK == 0
1823 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1824 # endif
1825 # if _POSIX_CLOCK_SELECTION == 0
1826 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1827 # endif
1828 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1829 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1830 timeout_clock = CLOCK_MONOTONIC;
1831 #endif
1833 pthread_cond_init (&readylist_cond, &condattr);
1834 pthread_condattr_destroy (&condattr);
1837 /* Create the attribute for the threads. They are all created
1838 detached. */
1839 pthread_attr_init (&attr);
1840 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1841 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1842 pthread_attr_setstacksize (&attr, 1024 * 1024 * (sizeof (void *) / 4));
1844 /* We allow less than LASTDB threads only for debugging. */
1845 if (debug_level == 0)
1846 nthreads = MAX (nthreads, lastdb);
1848 int nfailed = 0;
1849 for (long int i = 0; i < nthreads; ++i)
1851 pthread_t th;
1852 if (pthread_create (&th, &attr, nscd_run, (void *) (i - nfailed)) != 0)
1853 ++nfailed;
1855 if (nthreads - nfailed < lastdb)
1857 /* We could not start enough threads. */
1858 dbg_log (_("could only start %d threads; terminating"),
1859 nthreads - nfailed);
1860 exit (1);
1863 /* Determine how much room for descriptors we should initially
1864 allocate. This might need to change later if we cap the number
1865 with MAXCONN. */
1866 const long int nfds = sysconf (_SC_OPEN_MAX);
1867 #define MINCONN 32
1868 #define MAXCONN 16384
1869 if (nfds == -1 || nfds > MAXCONN)
1870 nconns = MAXCONN;
1871 else if (nfds < MINCONN)
1872 nconns = MINCONN;
1873 else
1874 nconns = nfds;
1876 /* We need memory to pass descriptors on to the worker threads. */
1877 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1878 /* Array to keep track when connection was accepted. */
1879 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1881 /* In the main thread we execute the loop which handles incoming
1882 connections. */
1883 #ifdef HAVE_EPOLL
1884 int efd = epoll_create (100);
1885 if (efd != -1)
1887 main_loop_epoll (efd);
1888 close (efd);
1890 #endif
1892 main_loop_poll ();
1896 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1897 this function is called, we are not listening on the nscd socket yet so
1898 we can just use the ordinary lookup functions without causing a lockup */
1899 static void
1900 begin_drop_privileges (void)
1902 struct passwd *pwd = getpwnam (server_user);
1904 if (pwd == NULL)
1906 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1907 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1908 server_user);
1911 server_uid = pwd->pw_uid;
1912 server_gid = pwd->pw_gid;
1914 /* Save the old UID/GID if we have to change back. */
1915 if (paranoia)
1917 old_uid = getuid ();
1918 old_gid = getgid ();
1921 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1923 /* This really must never happen. */
1924 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1925 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
1928 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
1930 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
1931 == -1)
1933 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1934 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
1939 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
1940 run nscd as the user specified in the configuration file. */
1941 static void
1942 finish_drop_privileges (void)
1944 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1945 /* We need to preserve the capabilities to connect to the audit daemon. */
1946 cap_t new_caps = preserve_capabilities ();
1947 #endif
1949 if (setgroups (server_ngroups, server_groups) == -1)
1951 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1952 error (EXIT_FAILURE, errno, _("setgroups failed"));
1955 int res;
1956 if (paranoia)
1957 res = setresgid (server_gid, server_gid, old_gid);
1958 else
1959 res = setgid (server_gid);
1960 if (res == -1)
1962 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1963 perror ("setgid");
1964 exit (4);
1967 if (paranoia)
1968 res = setresuid (server_uid, server_uid, old_uid);
1969 else
1970 res = setuid (server_uid);
1971 if (res == -1)
1973 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1974 perror ("setuid");
1975 exit (4);
1978 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1979 /* Remove the temporary capabilities. */
1980 install_real_capabilities (new_caps);
1981 #endif
1984 /* Handle the HUP signal which will force a dump of the cache */
1985 void
1986 sighup_handler (int signum)
1988 sighup_pending = 1;