1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2007, 2008, 2009 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
34 #include <arpa/inet.h>
36 # include <sys/epoll.h>
39 # include <sys/inotify.h>
42 #include <sys/param.h>
45 # include <sys/sendfile.h>
47 #include <sys/socket.h>
54 #include <resolv/resolv.h>
56 # include <kernel-features.h>
60 /* Wrapper functions with error checking for standard functions. */
61 extern void *xmalloc (size_t n
);
62 extern void *xcalloc (size_t n
, size_t s
);
63 extern void *xrealloc (void *o
, size_t n
);
65 /* Support to run nscd as an unprivileged user */
66 const char *server_user
;
67 static uid_t server_uid
;
68 static gid_t server_gid
;
69 const char *stat_user
;
71 static gid_t
*server_groups
;
75 static int server_ngroups
;
77 static pthread_attr_t attr
;
79 static void begin_drop_privileges (void);
80 static void finish_drop_privileges (void);
82 /* Map request type to a string. */
83 const char *const serv2str
[LASTREQ
] =
85 [GETPWBYNAME
] = "GETPWBYNAME",
86 [GETPWBYUID
] = "GETPWBYUID",
87 [GETGRBYNAME
] = "GETGRBYNAME",
88 [GETGRBYGID
] = "GETGRBYGID",
89 [GETHOSTBYNAME
] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6
] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR
] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6
] = "GETHOSTBYADDRv6",
93 [SHUTDOWN
] = "SHUTDOWN",
94 [GETSTAT
] = "GETSTAT",
95 [INVALIDATE
] = "INVALIDATE",
96 [GETFDPW
] = "GETFDPW",
97 [GETFDGR
] = "GETFDGR",
98 [GETFDHST
] = "GETFDHST",
100 [INITGROUPS
] = "INITGROUPS",
101 [GETSERVBYNAME
] = "GETSERVBYNAME",
102 [GETSERVBYPORT
] = "GETSERVBYPORT",
103 [GETFDSERV
] = "GETFDSERV"
106 /* The control data structures for the services. */
107 struct database_dyn dbs
[lastdb
] =
110 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
111 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
112 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
118 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
119 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
121 .filename
= "/etc/passwd",
122 .db_filename
= _PATH_NSCD_PASSWD_DB
,
123 .disabled_iov
= &pwd_iov_disabled
,
131 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
132 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
133 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
139 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
140 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
142 .filename
= "/etc/group",
143 .db_filename
= _PATH_NSCD_GROUP_DB
,
144 .disabled_iov
= &grp_iov_disabled
,
152 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
153 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
154 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
158 .propagate
= 0, /* Not used. */
160 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
161 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
163 .filename
= "/etc/hosts",
164 .db_filename
= _PATH_NSCD_HOSTS_DB
,
165 .disabled_iov
= &hst_iov_disabled
,
173 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
174 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
175 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
179 .propagate
= 0, /* Not used. */
181 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
182 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
184 .filename
= "/etc/services",
185 .db_filename
= _PATH_NSCD_SERVICES_DB
,
186 .disabled_iov
= &serv_iov_disabled
,
196 /* Mapping of request type to database. */
200 struct database_dyn
*db
;
201 } const reqinfo
[LASTREQ
] =
203 [GETPWBYNAME
] = { true, &dbs
[pwddb
] },
204 [GETPWBYUID
] = { true, &dbs
[pwddb
] },
205 [GETGRBYNAME
] = { true, &dbs
[grpdb
] },
206 [GETGRBYGID
] = { true, &dbs
[grpdb
] },
207 [GETHOSTBYNAME
] = { true, &dbs
[hstdb
] },
208 [GETHOSTBYNAMEv6
] = { true, &dbs
[hstdb
] },
209 [GETHOSTBYADDR
] = { true, &dbs
[hstdb
] },
210 [GETHOSTBYADDRv6
] = { true, &dbs
[hstdb
] },
211 [SHUTDOWN
] = { false, NULL
},
212 [GETSTAT
] = { false, NULL
},
213 [SHUTDOWN
] = { false, NULL
},
214 [GETFDPW
] = { false, &dbs
[pwddb
] },
215 [GETFDGR
] = { false, &dbs
[grpdb
] },
216 [GETFDHST
] = { false, &dbs
[hstdb
] },
217 [GETAI
] = { true, &dbs
[hstdb
] },
218 [INITGROUPS
] = { true, &dbs
[grpdb
] },
219 [GETSERVBYNAME
] = { true, &dbs
[servdb
] },
220 [GETSERVBYPORT
] = { true, &dbs
[servdb
] },
221 [GETFDSERV
] = { false, &dbs
[servdb
] }
225 /* Initial number of threads to use. */
227 /* Maximum number of threads to use. */
228 int max_nthreads
= 32;
230 /* Socket for incoming connections. */
234 /* Inotify descriptor. */
235 static int inotify_fd
= -1;
237 /* Watch descriptor for resolver configuration file. */
238 static int resolv_conf_descr
= -1;
241 #ifndef __ASSUME_SOCK_CLOEXEC
242 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
243 before be know the result. */
244 static int have_sock_cloexec
;
246 #ifndef __ASSUME_ACCEPT4
247 static int have_accept4
;
250 /* Number of times clients had to wait. */
251 unsigned long int client_queued
;
255 writeall (int fd
, const void *buf
, size_t len
)
261 ret
= TEMP_FAILURE_RETRY (send (fd
, buf
, n
, MSG_NOSIGNAL
));
264 buf
= (const char *) buf
+ ret
;
268 return ret
< 0 ? ret
: len
- n
;
274 sendfileall (int tofd
, int fromfd
, off_t off
, size_t len
)
281 ret
= TEMP_FAILURE_RETRY (sendfile (tofd
, fromfd
, &off
, n
));
287 return ret
< 0 ? ret
: len
- n
;
295 /* The following three are not really used, they are symbolic constants. */
301 use_he_begin
= use_he
| use_begin
,
302 use_he_end
= use_he
| use_end
,
305 use_key_begin
= use_key
| use_begin
,
306 use_key_end
= use_key
| use_end
,
307 use_key_first
= use_key_begin
| use_first
,
310 use_data_begin
= use_data
| use_begin
,
311 use_data_end
= use_data
| use_end
,
312 use_data_first
= use_data_begin
| use_first
317 check_use (const char *data
, nscd_ssize_t first_free
, uint8_t *usemap
,
318 enum usekey use
, ref_t start
, size_t len
)
322 if (start
> first_free
|| start
+ len
> first_free
323 || (start
& BLOCK_ALIGN_M1
))
326 if (usemap
[start
] == use_not
)
328 /* Add the start marker. */
329 usemap
[start
] = use
| use_begin
;
333 if (usemap
[++start
] != use_not
)
338 /* Add the end marker. */
339 usemap
[start
] = use
| use_end
;
341 else if ((usemap
[start
] & ~use_first
) == ((use
| use_begin
) & ~use_first
))
343 /* Hash entries can't be shared. */
347 usemap
[start
] |= (use
& use_first
);
351 if (usemap
[++start
] != use
)
354 if (usemap
[++start
] != (use
| use_end
))
358 /* Points to a wrong object or somewhere in the middle. */
365 /* Verify data in persistent database. */
367 verify_persistent_db (void *mem
, struct database_pers_head
*readhead
, int dbnr
)
369 assert (dbnr
== pwddb
|| dbnr
== grpdb
|| dbnr
== hstdb
|| dbnr
== servdb
);
371 time_t now
= time (NULL
);
373 struct database_pers_head
*head
= mem
;
374 struct database_pers_head head_copy
= *head
;
376 /* Check that the header that was read matches the head in the database. */
377 if (memcmp (head
, readhead
, sizeof (*head
)) != 0)
380 /* First some easy tests: make sure the database header is sane. */
381 if (head
->version
!= DB_VERSION
382 || head
->header_size
!= sizeof (*head
)
383 /* We allow a timestamp to be one hour ahead of the current time.
384 This should cover daylight saving time changes. */
385 || head
->timestamp
> now
+ 60 * 60 + 60
386 || (head
->gc_cycle
& 1)
388 || (size_t) head
->module
> INT32_MAX
/ sizeof (ref_t
)
389 || (size_t) head
->data_size
> INT32_MAX
- head
->module
* sizeof (ref_t
)
390 || head
->first_free
< 0
391 || head
->first_free
> head
->data_size
392 || (head
->first_free
& BLOCK_ALIGN_M1
) != 0
393 || head
->maxnentries
< 0
394 || head
->maxnsearched
< 0)
397 uint8_t *usemap
= calloc (head
->first_free
, 1);
401 const char *data
= (char *) &head
->array
[roundup (head
->module
,
402 ALIGN
/ sizeof (ref_t
))];
404 nscd_ssize_t he_cnt
= 0;
405 for (nscd_ssize_t cnt
= 0; cnt
< head
->module
; ++cnt
)
407 ref_t trail
= head
->array
[cnt
];
411 while (work
!= ENDREF
)
413 if (! check_use (data
, head
->first_free
, usemap
, use_he
, work
,
414 sizeof (struct hashentry
)))
417 /* Now we know we can dereference the record. */
418 struct hashentry
*here
= (struct hashentry
*) (data
+ work
);
422 /* Make sure the record is for this type of service. */
423 if (here
->type
>= LASTREQ
424 || reqinfo
[here
->type
].db
!= &dbs
[dbnr
])
427 /* Validate boolean field value. */
428 if (here
->first
!= false && here
->first
!= true)
436 || here
->packet
> head
->first_free
437 || here
->packet
+ sizeof (struct datahead
) > head
->first_free
)
440 struct datahead
*dh
= (struct datahead
*) (data
+ here
->packet
);
442 if (! check_use (data
, head
->first_free
, usemap
,
443 use_data
| (here
->first
? use_first
: 0),
444 here
->packet
, dh
->allocsize
))
447 if (dh
->allocsize
< sizeof (struct datahead
)
448 || dh
->recsize
> dh
->allocsize
449 || (dh
->notfound
!= false && dh
->notfound
!= true)
450 || (dh
->usable
!= false && dh
->usable
!= true))
453 if (here
->key
< here
->packet
+ sizeof (struct datahead
)
454 || here
->key
> here
->packet
+ dh
->allocsize
455 || here
->key
+ here
->len
> here
->packet
+ dh
->allocsize
)
458 /* If keys can appear outside of data, this should be done
459 instead. But gc doesn't mark the data in that case. */
460 if (! check_use (data
, head
->first_free
, usemap
,
461 use_key
| (here
->first
? use_first
: 0),
462 here
->key
, here
->len
))
470 /* A circular list, this must not happen. */
473 trail
= ((struct hashentry
*) (data
+ trail
))->next
;
478 if (he_cnt
!= head
->nentries
)
481 /* See if all data and keys had at least one reference from
482 he->first == true hashentry. */
483 for (ref_t idx
= 0; idx
< head
->first_free
; ++idx
)
486 if (usemap
[idx
] == use_key_begin
)
489 if (usemap
[idx
] == use_data_begin
)
493 /* Finally, make sure the database hasn't changed since the first test. */
494 if (memcmp (mem
, &head_copy
, sizeof (*head
)) != 0)
507 # define EXTRA_O_FLAGS O_CLOEXEC
509 # define EXTRA_O_FLAGS 0
513 /* Initialize database information structures. */
517 /* Look up unprivileged uid/gid/groups before we start listening on the
519 if (server_user
!= NULL
)
520 begin_drop_privileges ();
523 /* No configuration for this value, assume a default. */
527 /* Use inotify to recognize changed files. */
528 inotify_fd
= inotify_init1 (IN_NONBLOCK
);
529 # ifndef __ASSUME_IN_NONBLOCK
530 if (inotify_fd
== -1 && errno
== ENOSYS
)
532 inotify_fd
= inotify_init ();
533 if (inotify_fd
!= -1)
534 fcntl (inotify_fd
, F_SETFL
, O_RDONLY
| O_NONBLOCK
);
539 for (size_t cnt
= 0; cnt
< lastdb
; ++cnt
)
540 if (dbs
[cnt
].enabled
)
542 pthread_rwlock_init (&dbs
[cnt
].lock
, NULL
);
543 pthread_mutex_init (&dbs
[cnt
].memlock
, NULL
);
545 if (dbs
[cnt
].persistent
)
547 /* Try to open the appropriate file on disk. */
548 int fd
= open (dbs
[cnt
].db_filename
, O_RDWR
| EXTRA_O_FLAGS
);
555 struct database_pers_head head
;
556 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, &head
,
558 if (n
!= sizeof (head
) || fstat64 (fd
, &st
) != 0)
561 /* The code is single-threaded at this point so
562 using strerror is just fine. */
563 msg
= strerror (errno
);
565 dbg_log (_("invalid persistent database file \"%s\": %s"),
566 dbs
[cnt
].db_filename
, msg
);
567 unlink (dbs
[cnt
].db_filename
);
569 else if (head
.module
== 0 && head
.data_size
== 0)
571 /* The file has been created, but the head has not
572 been initialized yet. */
573 msg
= _("uninitialized header");
576 else if (head
.header_size
!= (int) sizeof (head
))
578 msg
= _("header size does not match");
581 else if ((total
= (sizeof (head
)
582 + roundup (head
.module
* sizeof (ref_t
),
586 || total
< sizeof (head
))
588 msg
= _("file size does not match");
591 /* Note we map with the maximum size allowed for the
592 database. This is likely much larger than the
593 actual file size. This is OK on most OSes since
594 extensions of the underlying file will
595 automatically translate more pages available for
597 else if ((mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
598 PROT_READ
| PROT_WRITE
,
602 else if (!verify_persistent_db (mem
, &head
, cnt
))
605 msg
= _("verification failed");
610 /* Success. We have the database. */
612 dbs
[cnt
].memsize
= total
;
613 dbs
[cnt
].data
= (char *)
614 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
615 ALIGN
/ sizeof (ref_t
))];
616 dbs
[cnt
].mmap_used
= true;
618 if (dbs
[cnt
].suggested_module
> head
.module
)
619 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
624 /* We also need a read-only descriptor. */
627 dbs
[cnt
].ro_fd
= open (dbs
[cnt
].db_filename
,
628 O_RDONLY
| EXTRA_O_FLAGS
);
629 if (dbs
[cnt
].ro_fd
== -1)
631 cannot create read-only descriptor for \"%s\"; no mmap"),
632 dbs
[cnt
].db_filename
);
635 // XXX Shall we test whether the descriptors actually
636 // XXX point to the same file?
639 /* Close the file descriptors in case something went
640 wrong in which case the variable have not been
645 else if (errno
== EACCES
)
646 error (EXIT_FAILURE
, 0, _("cannot access '%s'"),
647 dbs
[cnt
].db_filename
);
650 if (dbs
[cnt
].head
== NULL
)
652 /* No database loaded. Allocate the data structure,
654 struct database_pers_head head
;
655 size_t total
= (sizeof (head
)
656 + roundup (dbs
[cnt
].suggested_module
657 * sizeof (ref_t
), ALIGN
)
658 + (dbs
[cnt
].suggested_module
659 * DEFAULT_DATASIZE_PER_BUCKET
));
661 /* Try to create the database. If we do not need a
662 persistent database create a temporary file. */
665 if (dbs
[cnt
].persistent
)
667 fd
= open (dbs
[cnt
].db_filename
,
668 O_RDWR
| O_CREAT
| O_EXCL
| O_TRUNC
| EXTRA_O_FLAGS
,
670 if (fd
!= -1 && dbs
[cnt
].shared
)
671 ro_fd
= open (dbs
[cnt
].db_filename
,
672 O_RDONLY
| EXTRA_O_FLAGS
);
676 char fname
[] = _PATH_NSCD_XYZ_DB_TMP
;
677 fd
= mkostemp (fname
, EXTRA_O_FLAGS
);
679 /* We do not need the file name anymore after we
680 opened another file descriptor in read-only mode. */
684 ro_fd
= open (fname
, O_RDONLY
| EXTRA_O_FLAGS
);
694 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
695 dbnames
[cnt
], dbs
[cnt
].db_filename
);
696 // XXX Correct way to terminate?
700 if (dbs
[cnt
].persistent
)
701 dbg_log (_("cannot create %s; no persistent database used"),
702 dbs
[cnt
].db_filename
);
704 dbg_log (_("cannot create %s; no sharing possible"),
705 dbs
[cnt
].db_filename
);
707 dbs
[cnt
].persistent
= 0;
708 // XXX remember: no mmap
712 /* Tell the user if we could not create the read-only
714 if (ro_fd
== -1 && dbs
[cnt
].shared
)
716 cannot create read-only descriptor for \"%s\"; no mmap"),
717 dbs
[cnt
].db_filename
);
719 /* Before we create the header, initialiye the hash
720 table. So that if we get interrupted if writing
721 the header we can recognize a partially initialized
723 size_t ps
= sysconf (_SC_PAGESIZE
);
725 assert (~ENDREF
== 0);
726 memset (tmpbuf
, '\xff', ps
);
728 size_t remaining
= dbs
[cnt
].suggested_module
* sizeof (ref_t
);
729 off_t offset
= sizeof (head
);
732 if (offset
% ps
!= 0)
734 towrite
= MIN (remaining
, ps
- (offset
% ps
));
735 if (pwrite (fd
, tmpbuf
, towrite
, offset
) != towrite
)
738 remaining
-= towrite
;
741 while (remaining
> ps
)
743 if (pwrite (fd
, tmpbuf
, ps
, offset
) == -1)
750 && pwrite (fd
, tmpbuf
, remaining
, offset
) != remaining
)
753 /* Create the header of the file. */
754 struct database_pers_head head
=
756 .version
= DB_VERSION
,
757 .header_size
= sizeof (head
),
758 .module
= dbs
[cnt
].suggested_module
,
759 .data_size
= (dbs
[cnt
].suggested_module
760 * DEFAULT_DATASIZE_PER_BUCKET
),
765 if ((TEMP_FAILURE_RETRY (write (fd
, &head
, sizeof (head
)))
767 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd
, 0, total
))
769 || (mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
770 PROT_READ
| PROT_WRITE
,
771 MAP_SHARED
, fd
, 0)) == MAP_FAILED
)
774 unlink (dbs
[cnt
].db_filename
);
775 dbg_log (_("cannot write to database file %s: %s"),
776 dbs
[cnt
].db_filename
, strerror (errno
));
777 dbs
[cnt
].persistent
= 0;
783 dbs
[cnt
].data
= (char *)
784 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
785 ALIGN
/ sizeof (ref_t
))];
786 dbs
[cnt
].memsize
= total
;
787 dbs
[cnt
].mmap_used
= true;
789 /* Remember the descriptors. */
791 dbs
[cnt
].ro_fd
= ro_fd
;
803 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
804 /* We do not check here whether the O_CLOEXEC provided to the
805 open call was successful or not. The two fcntl calls are
806 only performed once each per process start-up and therefore
807 is not noticeable at all. */
809 && ((dbs
[cnt
].wr_fd
!= -1
810 && fcntl (dbs
[cnt
].wr_fd
, F_SETFD
, FD_CLOEXEC
) == -1)
811 || (dbs
[cnt
].ro_fd
!= -1
812 && fcntl (dbs
[cnt
].ro_fd
, F_SETFD
, FD_CLOEXEC
) == -1)))
815 cannot set socket to close on exec: %s; disabling paranoia mode"),
821 if (dbs
[cnt
].head
== NULL
)
823 /* We do not use the persistent database. Just
824 create an in-memory data structure. */
825 assert (! dbs
[cnt
].persistent
);
827 dbs
[cnt
].head
= xmalloc (sizeof (struct database_pers_head
)
828 + (dbs
[cnt
].suggested_module
830 memset (dbs
[cnt
].head
, '\0', sizeof (struct database_pers_head
));
831 assert (~ENDREF
== 0);
832 memset (dbs
[cnt
].head
->array
, '\xff',
833 dbs
[cnt
].suggested_module
* sizeof (ref_t
));
834 dbs
[cnt
].head
->module
= dbs
[cnt
].suggested_module
;
835 dbs
[cnt
].head
->data_size
= (DEFAULT_DATASIZE_PER_BUCKET
836 * dbs
[cnt
].head
->module
);
837 dbs
[cnt
].data
= xmalloc (dbs
[cnt
].head
->data_size
);
838 dbs
[cnt
].head
->first_free
= 0;
841 assert (dbs
[cnt
].ro_fd
== -1);
844 dbs
[cnt
].inotify_descr
= -1;
845 if (dbs
[cnt
].check_file
)
849 || (dbs
[cnt
].inotify_descr
850 = inotify_add_watch (inotify_fd
, dbs
[cnt
].filename
,
851 IN_DELETE_SELF
| IN_MODIFY
)) < 0)
852 /* We cannot notice changes in the main thread. */
855 /* We need the modification date of the file. */
858 if (stat64 (dbs
[cnt
].filename
, &st
) < 0)
860 /* We cannot stat() the file, disable file checking. */
861 dbg_log (_("cannot stat() file `%s': %s"),
862 dbs
[cnt
].filename
, strerror (errno
));
863 dbs
[cnt
].check_file
= 0;
866 dbs
[cnt
].file_mtime
= st
.st_mtime
;
871 if (cnt
== hstdb
&& inotify_fd
>= -1)
872 /* We also monitor the resolver configuration file. */
873 resolv_conf_descr
= inotify_add_watch (inotify_fd
,
875 IN_DELETE_SELF
| IN_MODIFY
);
879 /* Create the socket. */
880 #ifndef __ASSUME_SOCK_CLOEXEC
882 if (have_sock_cloexec
>= 0)
885 sock
= socket (AF_UNIX
, SOCK_STREAM
| SOCK_CLOEXEC
| SOCK_NONBLOCK
, 0);
886 #ifndef __ASSUME_SOCK_CLOEXEC
887 if (have_sock_cloexec
== 0)
888 have_sock_cloexec
= sock
!= -1 || errno
!= EINVAL
? 1 : -1;
891 #ifndef __ASSUME_SOCK_CLOEXEC
892 if (have_sock_cloexec
< 0)
893 sock
= socket (AF_UNIX
, SOCK_STREAM
, 0);
897 dbg_log (_("cannot open socket: %s"), strerror (errno
));
898 exit (errno
== EACCES
? 4 : 1);
900 /* Bind a name to the socket. */
901 struct sockaddr_un sock_addr
;
902 sock_addr
.sun_family
= AF_UNIX
;
903 strcpy (sock_addr
.sun_path
, _PATH_NSCDSOCKET
);
904 if (bind (sock
, (struct sockaddr
*) &sock_addr
, sizeof (sock_addr
)) < 0)
906 dbg_log ("%s: %s", _PATH_NSCDSOCKET
, strerror (errno
));
907 exit (errno
== EACCES
? 4 : 1);
910 #ifndef __ASSUME_SOCK_CLOEXEC
911 if (have_sock_cloexec
< 0)
913 /* We don't want to get stuck on accept. */
914 int fl
= fcntl (sock
, F_GETFL
);
915 if (fl
== -1 || fcntl (sock
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
917 dbg_log (_("cannot change socket to nonblocking mode: %s"),
922 /* The descriptor needs to be closed on exec. */
923 if (paranoia
&& fcntl (sock
, F_SETFD
, FD_CLOEXEC
) == -1)
925 dbg_log (_("cannot set socket to close on exec: %s"),
932 /* Set permissions for the socket. */
933 chmod (_PATH_NSCDSOCKET
, DEFFILEMODE
);
935 /* Set the socket up to accept connections. */
936 if (listen (sock
, SOMAXCONN
) < 0)
938 dbg_log (_("cannot enable socket to accept connections: %s"),
943 /* Change to unprivileged uid/gid/groups if specifed in config file */
944 if (server_user
!= NULL
)
945 finish_drop_privileges ();
949 /* Close the connections. */
958 invalidate_cache (char *key
, int fd
)
963 for (number
= pwddb
; number
< lastdb
; ++number
)
964 if (strcmp (key
, dbnames
[number
]) == 0)
966 if (dbs
[number
].reset_res
)
972 if (number
== lastdb
)
975 writeall (fd
, &resp
, sizeof (resp
));
979 if (dbs
[number
].enabled
)
981 pthread_mutex_lock (&dbs
[number
].prune_run_lock
);
982 prune_cache (&dbs
[number
], LONG_MAX
, fd
);
983 pthread_mutex_unlock (&dbs
[number
].prune_run_lock
);
988 writeall (fd
, &resp
, sizeof (resp
));
995 send_ro_fd (struct database_dyn
*db
, char *key
, int fd
)
997 /* If we do not have an read-only file descriptor do nothing. */
1001 /* We need to send some data along with the descriptor. */
1002 uint64_t mapsize
= (db
->head
->data_size
1003 + roundup (db
->head
->module
* sizeof (ref_t
), ALIGN
)
1004 + sizeof (struct database_pers_head
));
1005 struct iovec iov
[2];
1006 iov
[0].iov_base
= key
;
1007 iov
[0].iov_len
= strlen (key
) + 1;
1008 iov
[1].iov_base
= &mapsize
;
1009 iov
[1].iov_len
= sizeof (mapsize
);
1011 /* Prepare the control message to transfer the descriptor. */
1015 char bytes
[CMSG_SPACE (sizeof (int))];
1017 struct msghdr msg
= { .msg_iov
= iov
, .msg_iovlen
= 2,
1018 .msg_control
= buf
.bytes
,
1019 .msg_controllen
= sizeof (buf
) };
1020 struct cmsghdr
*cmsg
= CMSG_FIRSTHDR (&msg
);
1022 cmsg
->cmsg_level
= SOL_SOCKET
;
1023 cmsg
->cmsg_type
= SCM_RIGHTS
;
1024 cmsg
->cmsg_len
= CMSG_LEN (sizeof (int));
1026 int *ip
= (int *) CMSG_DATA (cmsg
);
1029 msg
.msg_controllen
= cmsg
->cmsg_len
;
1031 /* Send the control message. We repeat when we are interrupted but
1032 everything else is ignored. */
1033 #ifndef MSG_NOSIGNAL
1034 # define MSG_NOSIGNAL 0
1036 (void) TEMP_FAILURE_RETRY (sendmsg (fd
, &msg
, MSG_NOSIGNAL
));
1038 if (__builtin_expect (debug_level
> 0, 0))
1039 dbg_log (_("provide access to FD %d, for %s"), db
->ro_fd
, key
);
1041 #endif /* SCM_RIGHTS */
1044 /* Handle new request. */
1046 handle_request (int fd
, request_header
*req
, void *key
, uid_t uid
, pid_t pid
)
1048 if (__builtin_expect (req
->version
, NSCD_VERSION
) != NSCD_VERSION
)
1050 if (debug_level
> 0)
1052 cannot handle old request version %d; current version is %d"),
1053 req
->version
, NSCD_VERSION
);
1057 /* Perform the SELinux check before we go on to the standard checks. */
1058 if (selinux_enabled
&& nscd_request_avc_has_perm (fd
, req
->type
) != 0)
1060 if (debug_level
> 0)
1069 snprintf (buf
, sizeof (buf
), "/proc/%ld/exe", (long int) pid
);
1070 ssize_t n
= readlink (buf
, buf
, sizeof (buf
) - 1);
1074 request from %ld not handled due to missing permission"), (long int) pid
);
1079 request from '%s' [%ld] not handled due to missing permission"),
1080 buf
, (long int) pid
);
1083 dbg_log (_("request not handled due to missing permission"));
1089 struct database_dyn
*db
= reqinfo
[req
->type
].db
;
1091 /* See whether we can service the request from the cache. */
1092 if (__builtin_expect (reqinfo
[req
->type
].data_request
, true))
1094 if (__builtin_expect (debug_level
, 0) > 0)
1096 if (req
->type
== GETHOSTBYADDR
|| req
->type
== GETHOSTBYADDRv6
)
1098 char buf
[INET6_ADDRSTRLEN
];
1100 dbg_log ("\t%s (%s)", serv2str
[req
->type
],
1101 inet_ntop (req
->type
== GETHOSTBYADDR
1102 ? AF_INET
: AF_INET6
,
1103 key
, buf
, sizeof (buf
)));
1106 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1109 /* Is this service enabled? */
1110 if (__builtin_expect (!db
->enabled
, 0))
1112 /* No, sent the prepared record. */
1113 if (TEMP_FAILURE_RETRY (send (fd
, db
->disabled_iov
->iov_base
,
1114 db
->disabled_iov
->iov_len
,
1116 != (ssize_t
) db
->disabled_iov
->iov_len
1117 && __builtin_expect (debug_level
, 0) > 0)
1119 /* We have problems sending the result. */
1121 dbg_log (_("cannot write result: %s"),
1122 strerror_r (errno
, buf
, sizeof (buf
)));
1128 /* Be sure we can read the data. */
1129 if (__builtin_expect (pthread_rwlock_tryrdlock (&db
->lock
) != 0, 0))
1131 ++db
->head
->rdlockdelayed
;
1132 pthread_rwlock_rdlock (&db
->lock
);
1135 /* See whether we can handle it from the cache. */
1136 struct datahead
*cached
;
1137 cached
= (struct datahead
*) cache_search (req
->type
, key
, req
->key_len
,
1141 /* Hurray it's in the cache. */
1144 #ifdef HAVE_SENDFILE
1145 if (__builtin_expect (db
->mmap_used
, 1))
1147 assert (db
->wr_fd
!= -1);
1148 assert ((char *) cached
->data
> (char *) db
->data
);
1149 assert ((char *) cached
->data
- (char *) db
->head
1151 <= (sizeof (struct database_pers_head
)
1152 + db
->head
->module
* sizeof (ref_t
)
1153 + db
->head
->data_size
));
1154 nwritten
= sendfileall (fd
, db
->wr_fd
,
1155 (char *) cached
->data
1156 - (char *) db
->head
, cached
->recsize
);
1157 # ifndef __ASSUME_SENDFILE
1158 if (nwritten
== -1 && errno
== ENOSYS
)
1163 # ifndef __ASSUME_SENDFILE
1167 nwritten
= writeall (fd
, cached
->data
, cached
->recsize
);
1169 if (nwritten
!= cached
->recsize
1170 && __builtin_expect (debug_level
, 0) > 0)
1172 /* We have problems sending the result. */
1174 dbg_log (_("cannot write result: %s"),
1175 strerror_r (errno
, buf
, sizeof (buf
)));
1178 pthread_rwlock_unlock (&db
->lock
);
1183 pthread_rwlock_unlock (&db
->lock
);
1185 else if (__builtin_expect (debug_level
, 0) > 0)
1187 if (req
->type
== INVALIDATE
)
1188 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1190 dbg_log ("\t%s", serv2str
[req
->type
]);
1193 /* Handle the request. */
1197 addpwbyname (db
, fd
, req
, key
, uid
);
1201 addpwbyuid (db
, fd
, req
, key
, uid
);
1205 addgrbyname (db
, fd
, req
, key
, uid
);
1209 addgrbygid (db
, fd
, req
, key
, uid
);
1213 addhstbyname (db
, fd
, req
, key
, uid
);
1216 case GETHOSTBYNAMEv6
:
1217 addhstbynamev6 (db
, fd
, req
, key
, uid
);
1221 addhstbyaddr (db
, fd
, req
, key
, uid
);
1224 case GETHOSTBYADDRv6
:
1225 addhstbyaddrv6 (db
, fd
, req
, key
, uid
);
1229 addhstai (db
, fd
, req
, key
, uid
);
1233 addinitgroups (db
, fd
, req
, key
, uid
);
1237 addservbyname (db
, fd
, req
, key
, uid
);
1241 addservbyport (db
, fd
, req
, key
, uid
);
1248 /* Get the callers credentials. */
1250 struct ucred caller
;
1251 socklen_t optlen
= sizeof (caller
);
1253 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) < 0)
1257 dbg_log (_("error getting caller's id: %s"),
1258 strerror_r (errno
, buf
, sizeof (buf
)));
1264 /* Some systems have no SO_PEERCRED implementation. They don't
1265 care about security so we don't as well. */
1270 /* Accept shutdown, getstat and invalidate only from root. For
1271 the stat call also allow the user specified in the config file. */
1272 if (req
->type
== GETSTAT
)
1274 if (uid
== 0 || uid
== stat_uid
)
1275 send_stats (fd
, dbs
);
1279 if (req
->type
== INVALIDATE
)
1280 invalidate_cache (key
, fd
);
1282 termination_handler (0);
1291 send_ro_fd (reqinfo
[req
->type
].db
, key
, fd
);
1296 /* Ignore the command, it's nothing we know. */
1302 /* Restart the process. */
1306 /* First determine the parameters. We do not use the parameters
1307 passed to main() since in case nscd is started by running the
1308 dynamic linker this will not work. Yes, this is not the usual
1309 case but nscd is part of glibc and we occasionally do this. */
1310 size_t buflen
= 1024;
1311 char *buf
= alloca (buflen
);
1313 int fd
= open ("/proc/self/cmdline", O_RDONLY
);
1317 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1326 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, buf
+ readlen
,
1331 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1341 if (readlen
< buflen
)
1344 /* We might have to extend the buffer. */
1345 size_t old_buflen
= buflen
;
1346 char *newp
= extend_alloca (buf
, buflen
, 2 * buflen
);
1347 buf
= memmove (newp
, buf
, old_buflen
);
1352 /* Parse the command line. Worst case scenario: every two
1353 characters form one parameter (one character plus NUL). */
1354 char **argv
= alloca ((readlen
/ 2 + 1) * sizeof (argv
[0]));
1358 while (cp
< buf
+ readlen
)
1361 cp
= (char *) rawmemchr (cp
, '\0') + 1;
1365 /* Second, change back to the old user if we changed it. */
1366 if (server_user
!= NULL
)
1368 if (setresuid (old_uid
, old_uid
, old_uid
) != 0)
1371 cannot change to old UID: %s; disabling paranoia mode"),
1378 if (setresgid (old_gid
, old_gid
, old_gid
) != 0)
1381 cannot change to old GID: %s; disabling paranoia mode"),
1384 setuid (server_uid
);
1390 /* Next change back to the old working directory. */
1391 if (chdir (oldcwd
) == -1)
1394 cannot change to old working directory: %s; disabling paranoia mode"),
1397 if (server_user
!= NULL
)
1399 setuid (server_uid
);
1400 setgid (server_gid
);
1406 /* Synchronize memory. */
1407 int32_t certainly
[lastdb
];
1408 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1409 if (dbs
[cnt
].enabled
)
1411 /* Make sure nobody keeps using the database. */
1412 dbs
[cnt
].head
->timestamp
= 0;
1413 certainly
[cnt
] = dbs
[cnt
].head
->nscd_certainly_running
;
1414 dbs
[cnt
].head
->nscd_certainly_running
= 0;
1416 if (dbs
[cnt
].persistent
)
1418 msync (dbs
[cnt
].head
, dbs
[cnt
].memsize
, MS_ASYNC
);
1421 /* The preparations are done. */
1423 char pathbuf
[PATH_MAX
];
1427 /* Try to exec the real nscd program so the process name (as reported
1428 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1429 if readlink fails */
1430 ssize_t n
= readlink ("/proc/self/exe", pathbuf
, sizeof (pathbuf
) - 1);
1432 execv ("/proc/self/exe", argv
);
1436 execv (pathbuf
, argv
);
1439 /* If we come here, we will never be able to re-exec. */
1440 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1443 if (server_user
!= NULL
)
1445 setuid (server_uid
);
1446 setgid (server_gid
);
1448 if (chdir ("/") != 0)
1449 dbg_log (_("cannot change current working directory to \"/\": %s"),
1453 /* Reenable the databases. */
1454 time_t now
= time (NULL
);
1455 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1456 if (dbs
[cnt
].enabled
)
1458 dbs
[cnt
].head
->timestamp
= now
;
1459 dbs
[cnt
].head
->nscd_certainly_running
= certainly
[cnt
];
1464 /* List of file descriptors. */
1468 struct fdlist
*next
;
1470 /* Memory allocated for the list. */
1471 static struct fdlist
*fdlist
;
1472 /* List of currently ready-to-read file descriptors. */
1473 static struct fdlist
*readylist
;
1475 /* Conditional variable and mutex to signal availability of entries in
1476 READYLIST. The condvar is initialized dynamically since we might
1477 use a different clock depending on availability. */
1478 static pthread_cond_t readylist_cond
= PTHREAD_COND_INITIALIZER
;
1479 static pthread_mutex_t readylist_lock
= PTHREAD_MUTEX_INITIALIZER
;
1481 /* The clock to use with the condvar. */
1482 static clockid_t timeout_clock
= CLOCK_REALTIME
;
1484 /* Number of threads ready to handle the READYLIST. */
1485 static unsigned long int nready
;
1488 /* Function for the clean-up threads. */
1490 __attribute__ ((__noreturn__
))
1491 nscd_run_prune (void *p
)
1493 const long int my_number
= (long int) p
;
1494 assert (dbs
[my_number
].enabled
);
1496 int dont_need_update
= setup_thread (&dbs
[my_number
]);
1498 time_t now
= time (NULL
);
1500 /* We are running. */
1501 dbs
[my_number
].head
->timestamp
= now
;
1503 struct timespec prune_ts
;
1504 if (__builtin_expect (clock_gettime (timeout_clock
, &prune_ts
) == -1, 0))
1505 /* Should never happen. */
1508 /* Compute the initial timeout time. Prevent all the timers to go
1509 off at the same time by adding a db-based value. */
1510 prune_ts
.tv_sec
+= CACHE_PRUNE_INTERVAL
+ my_number
;
1511 dbs
[my_number
].wakeup_time
= now
+ CACHE_PRUNE_INTERVAL
+ my_number
;
1513 pthread_mutex_t
*prune_lock
= &dbs
[my_number
].prune_lock
;
1514 pthread_mutex_t
*prune_run_lock
= &dbs
[my_number
].prune_run_lock
;
1515 pthread_cond_t
*prune_cond
= &dbs
[my_number
].prune_cond
;
1517 pthread_mutex_lock (prune_lock
);
1520 /* Wait, but not forever. */
1522 if (! dbs
[my_number
].clear_cache
)
1523 e
= pthread_cond_timedwait (prune_cond
, prune_lock
, &prune_ts
);
1524 assert (__builtin_expect (e
== 0 || e
== ETIMEDOUT
, 1));
1528 if (e
== ETIMEDOUT
|| now
>= dbs
[my_number
].wakeup_time
1529 || dbs
[my_number
].clear_cache
)
1531 /* We will determine the new timout values based on the
1532 cache content. Should there be concurrent additions to
1533 the cache which are not accounted for in the cache
1534 pruning we want to know about it. Therefore set the
1535 timeout to the maximum. It will be descreased when adding
1536 new entries to the cache, if necessary. */
1537 if (sizeof (time_t) == sizeof (long int))
1538 dbs
[my_number
].wakeup_time
= LONG_MAX
;
1540 dbs
[my_number
].wakeup_time
= INT_MAX
;
1542 /* Unconditionally reset the flag. */
1543 time_t prune_now
= dbs
[my_number
].clear_cache
? LONG_MAX
: now
;
1544 dbs
[my_number
].clear_cache
= 0;
1546 pthread_mutex_unlock (prune_lock
);
1548 /* We use a separate lock for running the prune function (instead
1549 of keeping prune_lock locked) because this enables concurrent
1550 invocations of cache_add which might modify the timeout value. */
1551 pthread_mutex_lock (prune_run_lock
);
1552 next_wait
= prune_cache (&dbs
[my_number
], prune_now
, -1);
1553 pthread_mutex_unlock (prune_run_lock
);
1555 next_wait
= MAX (next_wait
, CACHE_PRUNE_INTERVAL
);
1556 /* If clients cannot determine for sure whether nscd is running
1557 we need to wake up occasionally to update the timestamp.
1558 Wait 90% of the update period. */
1559 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1560 if (__builtin_expect (! dont_need_update
, 0))
1562 next_wait
= MIN (UPDATE_MAPPING_TIMEOUT
, next_wait
);
1563 dbs
[my_number
].head
->timestamp
= now
;
1566 pthread_mutex_lock (prune_lock
);
1568 /* Make it known when we will wake up again. */
1569 if (now
+ next_wait
< dbs
[my_number
].wakeup_time
)
1570 dbs
[my_number
].wakeup_time
= now
+ next_wait
;
1572 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1575 /* The cache was just pruned. Do not do it again now. Just
1576 use the new timeout value. */
1577 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1579 if (clock_gettime (timeout_clock
, &prune_ts
) == -1)
1580 /* Should never happen. */
1583 /* Compute next timeout time. */
1584 prune_ts
.tv_sec
+= next_wait
;
1589 /* This is the main loop. It is replicated in different threads but
1590 the the use of the ready list makes sure only one thread handles an
1591 incoming connection. */
1593 __attribute__ ((__noreturn__
))
1594 nscd_run_worker (void *p
)
1598 /* Initial locking. */
1599 pthread_mutex_lock (&readylist_lock
);
1601 /* One more thread available. */
1606 while (readylist
== NULL
)
1607 pthread_cond_wait (&readylist_cond
, &readylist_lock
);
1609 struct fdlist
*it
= readylist
->next
;
1610 if (readylist
->next
== readylist
)
1611 /* Just one entry on the list. */
1614 readylist
->next
= it
->next
;
1616 /* Extract the information and mark the record ready to be used
1621 /* One more thread available. */
1624 /* We are done with the list. */
1625 pthread_mutex_unlock (&readylist_lock
);
1627 #ifndef __ASSUME_ACCEPT4
1628 if (have_accept4
< 0)
1630 /* We do not want to block on a short read or so. */
1631 int fl
= fcntl (fd
, F_GETFL
);
1632 if (fl
== -1 || fcntl (fd
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
1637 /* Now read the request. */
1639 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, &req
, sizeof (req
)))
1640 != sizeof (req
), 0))
1642 /* We failed to read data. Note that this also might mean we
1643 failed because we would have blocked. */
1644 if (debug_level
> 0)
1645 dbg_log (_("short read while reading request: %s"),
1646 strerror_r (errno
, buf
, sizeof (buf
)));
1650 /* Check whether this is a valid request type. */
1651 if (req
.type
< GETPWBYNAME
|| req
.type
>= LASTREQ
)
1654 /* Some systems have no SO_PEERCRED implementation. They don't
1655 care about security so we don't as well. */
1660 if (__builtin_expect (debug_level
> 0, 0))
1662 struct ucred caller
;
1663 socklen_t optlen
= sizeof (caller
);
1665 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) == 0)
1669 const pid_t pid
= 0;
1672 /* It should not be possible to crash the nscd with a silly
1673 request (i.e., a terribly large key). We limit the size to 1kb. */
1674 if (__builtin_expect (req
.key_len
, 1) < 0
1675 || __builtin_expect (req
.key_len
, 1) > MAXKEYLEN
)
1677 if (debug_level
> 0)
1678 dbg_log (_("key length in request too long: %d"), req
.key_len
);
1683 char keybuf
[MAXKEYLEN
];
1685 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, keybuf
,
1689 /* Again, this can also mean we would have blocked. */
1690 if (debug_level
> 0)
1691 dbg_log (_("short read while reading request key: %s"),
1692 strerror_r (errno
, buf
, sizeof (buf
)));
1696 if (__builtin_expect (debug_level
, 0) > 0)
1701 handle_request: request received (Version = %d) from PID %ld"),
1702 req
.version
, (long int) pid
);
1706 handle_request: request received (Version = %d)"), req
.version
);
1709 /* Phew, we got all the data, now process it. */
1710 handle_request (fd
, &req
, keybuf
, uid
, pid
);
1718 pthread_mutex_lock (&readylist_lock
);
1720 /* One more thread available. */
1727 static unsigned int nconns
;
1732 pthread_mutex_lock (&readylist_lock
);
1734 /* Find an empty entry in FDLIST. */
1736 for (inner
= 0; inner
< nconns
; ++inner
)
1737 if (fdlist
[inner
].next
== NULL
)
1739 assert (inner
< nconns
);
1741 fdlist
[inner
].fd
= fd
;
1743 if (readylist
== NULL
)
1744 readylist
= fdlist
[inner
].next
= &fdlist
[inner
];
1747 fdlist
[inner
].next
= readylist
->next
;
1748 readylist
= readylist
->next
= &fdlist
[inner
];
1751 bool do_signal
= true;
1752 if (__builtin_expect (nready
== 0, 0))
1757 /* Try to start another thread to help out. */
1759 if (nthreads
< max_nthreads
1760 && pthread_create (&th
, &attr
, nscd_run_worker
,
1761 (void *) (long int) nthreads
) == 0)
1763 /* We got another thread. */
1765 /* The new thread might need a kick. */
1771 pthread_mutex_unlock (&readylist_lock
);
1773 /* Tell one of the worker threads there is work to do. */
1775 pthread_cond_signal (&readylist_cond
);
1779 /* Check whether restarting should happen. */
1781 restart_p (time_t now
)
1783 return (paranoia
&& readylist
== NULL
&& nready
== nthreads
1784 && now
>= restart_time
);
1788 /* Array for times a connection was accepted. */
1789 static time_t *starttime
;
1793 __attribute__ ((__noreturn__
))
1794 main_loop_poll (void)
1796 struct pollfd
*conns
= (struct pollfd
*) xmalloc (nconns
1797 * sizeof (conns
[0]));
1800 conns
[0].events
= POLLRDNORM
;
1802 size_t firstfree
= 1;
1805 if (inotify_fd
!= -1)
1807 conns
[1].fd
= inotify_fd
;
1808 conns
[1].events
= POLLRDNORM
;
1816 /* Wait for any event. We wait at most a couple of seconds so
1817 that we can check whether we should close any of the accepted
1818 connections since we have not received a request. */
1819 #define MAX_ACCEPT_TIMEOUT 30
1820 #define MIN_ACCEPT_TIMEOUT 5
1821 #define MAIN_THREAD_TIMEOUT \
1822 (MAX_ACCEPT_TIMEOUT * 1000 \
1823 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1825 int n
= poll (conns
, nused
, MAIN_THREAD_TIMEOUT
);
1827 time_t now
= time (NULL
);
1829 /* If there is a descriptor ready for reading or there is a new
1830 connection, process this now. */
1833 if (conns
[0].revents
!= 0)
1835 /* We have a new incoming connection. Accept the connection. */
1838 #ifndef __ASSUME_ACCEPT4
1840 if (have_accept4
>= 0)
1843 fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
1845 #ifndef __ASSUME_ACCEPT4
1846 if (have_accept4
== 0)
1847 have_accept4
= fd
!= -1 || errno
!= ENOSYS
? 1 : -1;
1850 #ifndef __ASSUME_ACCEPT4
1851 if (have_accept4
< 0)
1852 fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
1855 /* Use the descriptor if we have not reached the limit. */
1858 if (firstfree
< nconns
)
1860 conns
[firstfree
].fd
= fd
;
1861 conns
[firstfree
].events
= POLLRDNORM
;
1862 starttime
[firstfree
] = now
;
1863 if (firstfree
>= nused
)
1864 nused
= firstfree
+ 1;
1868 while (firstfree
< nused
&& conns
[firstfree
].fd
!= -1);
1871 /* We cannot use the connection so close it. */
1880 if (inotify_fd
!= -1 && conns
[1].fd
== inotify_fd
)
1882 if (conns
[1].revents
!= 0)
1884 bool to_clear
[lastdb
] = { false, };
1888 # define PATH_MAX 1024
1890 struct inotify_event i
;
1891 char buf
[sizeof (struct inotify_event
) + PATH_MAX
];
1896 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
, &inev
,
1898 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
1900 if (__builtin_expect (nb
== -1 && errno
!= EAGAIN
,
1903 /* Something went wrong when reading the inotify
1904 data. Better disable inotify. */
1906 disabled inotify after read error %d"),
1918 /* Check which of the files changed. */
1919 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
1920 if (inev
.i
.wd
== dbs
[dbcnt
].inotify_descr
)
1922 to_clear
[dbcnt
] = true;
1926 if (inev
.i
.wd
== resolv_conf_descr
)
1929 to_clear
[hstdb
] = true;
1934 /* Actually perform the cache clearing. */
1935 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
1936 if (to_clear
[dbcnt
])
1938 pthread_mutex_lock (&dbs
[dbcnt
].prune_lock
);
1939 dbs
[dbcnt
].clear_cache
= 1;
1940 pthread_mutex_unlock (&dbs
[dbcnt
].prune_lock
);
1941 pthread_cond_signal (&dbs
[dbcnt
].prune_cond
);
1951 for (size_t cnt
= first
; cnt
< nused
&& n
> 0; ++cnt
)
1952 if (conns
[cnt
].revents
!= 0)
1954 fd_ready (conns
[cnt
].fd
);
1956 /* Clean up the CONNS array. */
1958 if (cnt
< firstfree
)
1960 if (cnt
== nused
- 1)
1963 while (conns
[nused
- 1].fd
== -1);
1969 /* Now find entries which have timed out. */
1972 /* We make the timeout length depend on the number of file
1973 descriptors currently used. */
1974 #define ACCEPT_TIMEOUT \
1975 (MAX_ACCEPT_TIMEOUT \
1976 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1977 time_t laststart
= now
- ACCEPT_TIMEOUT
;
1979 for (size_t cnt
= nused
- 1; cnt
> 0; --cnt
)
1981 if (conns
[cnt
].fd
!= -1 && starttime
[cnt
] < laststart
)
1983 /* Remove the entry, it timed out. */
1984 (void) close (conns
[cnt
].fd
);
1987 if (cnt
< firstfree
)
1989 if (cnt
== nused
- 1)
1992 while (conns
[nused
- 1].fd
== -1);
1996 if (restart_p (now
))
2004 main_loop_epoll (int efd
)
2006 struct epoll_event ev
= { 0, };
2010 /* Add the socket. */
2011 ev
.events
= EPOLLRDNORM
;
2013 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, sock
, &ev
) == -1)
2014 /* We cannot use epoll. */
2017 # ifdef HAVE_INOTIFY
2018 if (inotify_fd
!= -1)
2020 ev
.events
= EPOLLRDNORM
;
2021 ev
.data
.fd
= inotify_fd
;
2022 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, inotify_fd
, &ev
) == -1)
2023 /* We cannot use epoll. */
2031 struct epoll_event revs
[100];
2032 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2034 int n
= epoll_wait (efd
, revs
, nrevs
, MAIN_THREAD_TIMEOUT
);
2036 time_t now
= time (NULL
);
2038 for (int cnt
= 0; cnt
< n
; ++cnt
)
2039 if (revs
[cnt
].data
.fd
== sock
)
2041 /* A new connection. */
2044 # ifndef __ASSUME_ACCEPT4
2046 if (have_accept4
>= 0)
2049 fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
2051 # ifndef __ASSUME_ACCEPT4
2052 if (have_accept4
== 0)
2053 have_accept4
= fd
!= -1 || errno
!= ENOSYS
? 1 : -1;
2056 # ifndef __ASSUME_ACCEPT4
2057 if (have_accept4
< 0)
2058 fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
2061 /* Use the descriptor if we have not reached the limit. */
2064 /* Try to add the new descriptor. */
2067 || epoll_ctl (efd
, EPOLL_CTL_ADD
, fd
, &ev
) == -1)
2068 /* The descriptor is too large or something went
2069 wrong. Close the descriptor. */
2073 /* Remember when we accepted the connection. */
2074 starttime
[fd
] = now
;
2083 # ifdef HAVE_INOTIFY
2084 else if (revs
[cnt
].data
.fd
== inotify_fd
)
2086 bool to_clear
[lastdb
] = { false, };
2089 struct inotify_event i
;
2090 char buf
[sizeof (struct inotify_event
) + PATH_MAX
];
2095 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
, &inev
,
2097 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
2099 if (__builtin_expect (nb
== -1 && errno
!= EAGAIN
, 0))
2101 /* Something went wrong when reading the inotify
2102 data. Better disable inotify. */
2103 dbg_log (_("disabled inotify after read error %d"),
2105 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, inotify_fd
,
2113 /* Check which of the files changed. */
2114 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
2115 if (inev
.i
.wd
== dbs
[dbcnt
].inotify_descr
)
2117 to_clear
[dbcnt
] = true;
2121 if (inev
.i
.wd
== resolv_conf_descr
)
2124 to_clear
[hstdb
] = true;
2129 /* Actually perform the cache clearing. */
2130 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
2131 if (to_clear
[dbcnt
])
2133 pthread_mutex_lock (&dbs
[dbcnt
].prune_lock
);
2134 dbs
[dbcnt
].clear_cache
= 1;
2135 pthread_mutex_unlock (&dbs
[dbcnt
].prune_lock
);
2136 pthread_cond_signal (&dbs
[dbcnt
].prune_cond
);
2142 /* Remove the descriptor from the epoll descriptor. */
2143 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, revs
[cnt
].data
.fd
, NULL
);
2145 /* Get a worker to handle the request. */
2146 fd_ready (revs
[cnt
].data
.fd
);
2148 /* Reset the time. */
2149 starttime
[revs
[cnt
].data
.fd
] = 0;
2150 if (revs
[cnt
].data
.fd
== highest
)
2153 while (highest
> 0 && starttime
[highest
] == 0);
2158 /* Now look for descriptors for accepted connections which have
2159 no reply in too long of a time. */
2160 time_t laststart
= now
- ACCEPT_TIMEOUT
;
2161 assert (starttime
[sock
] == 0);
2162 assert (inotify_fd
== -1 || starttime
[inotify_fd
] == 0);
2163 for (int cnt
= highest
; cnt
> STDERR_FILENO
; --cnt
)
2164 if (starttime
[cnt
] != 0 && starttime
[cnt
] < laststart
)
2166 /* We are waiting for this one for too long. Close it. */
2167 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, cnt
, NULL
);
2175 else if (cnt
!= sock
&& starttime
[cnt
] == 0 && cnt
== highest
)
2178 if (restart_p (now
))
2185 /* Start all the threads we want. The initial process is thread no. 1. */
2187 start_threads (void)
2189 /* Initialize the conditional variable we will use. The only
2190 non-standard attribute we might use is the clock selection. */
2191 pthread_condattr_t condattr
;
2192 pthread_condattr_init (&condattr
);
2194 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2195 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2196 /* Determine whether the monotonous clock is available. */
2197 struct timespec dummy
;
2198 # if _POSIX_MONOTONIC_CLOCK == 0
2199 if (sysconf (_SC_MONOTONIC_CLOCK
) > 0)
2201 # if _POSIX_CLOCK_SELECTION == 0
2202 if (sysconf (_SC_CLOCK_SELECTION
) > 0)
2204 if (clock_getres (CLOCK_MONOTONIC
, &dummy
) == 0
2205 && pthread_condattr_setclock (&condattr
, CLOCK_MONOTONIC
) == 0)
2206 timeout_clock
= CLOCK_MONOTONIC
;
2209 /* Create the attribute for the threads. They are all created
2211 pthread_attr_init (&attr
);
2212 pthread_attr_setdetachstate (&attr
, PTHREAD_CREATE_DETACHED
);
2213 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2214 pthread_attr_setstacksize (&attr
, NSCD_THREAD_STACKSIZE
);
2216 /* We allow less than LASTDB threads only for debugging. */
2217 if (debug_level
== 0)
2218 nthreads
= MAX (nthreads
, lastdb
);
2220 /* Create the threads which prune the databases. */
2221 // XXX Ideally this work would be done by some of the worker threads.
2222 // XXX But this is problematic since we would need to be able to wake
2223 // XXX them up explicitly as well as part of the group handling the
2224 // XXX ready-list. This requires an operation where we can wait on
2225 // XXX two conditional variables at the same time. This operation
2226 // XXX does not exist (yet).
2227 for (long int i
= 0; i
< lastdb
; ++i
)
2229 /* Initialize the conditional variable. */
2230 if (pthread_cond_init (&dbs
[i
].prune_cond
, &condattr
) != 0)
2232 dbg_log (_("could not initialize conditional variable"));
2238 && pthread_create (&th
, &attr
, nscd_run_prune
, (void *) i
) != 0)
2240 dbg_log (_("could not start clean-up thread; terminating"));
2245 pthread_condattr_destroy (&condattr
);
2247 for (long int i
= 0; i
< nthreads
; ++i
)
2250 if (pthread_create (&th
, &attr
, nscd_run_worker
, NULL
) != 0)
2254 dbg_log (_("could not start any worker thread; terminating"));
2262 /* Determine how much room for descriptors we should initially
2263 allocate. This might need to change later if we cap the number
2265 const long int nfds
= sysconf (_SC_OPEN_MAX
);
2267 #define MAXCONN 16384
2268 if (nfds
== -1 || nfds
> MAXCONN
)
2270 else if (nfds
< MINCONN
)
2275 /* We need memory to pass descriptors on to the worker threads. */
2276 fdlist
= (struct fdlist
*) xcalloc (nconns
, sizeof (fdlist
[0]));
2277 /* Array to keep track when connection was accepted. */
2278 starttime
= (time_t *) xcalloc (nconns
, sizeof (starttime
[0]));
2280 /* In the main thread we execute the loop which handles incoming
2283 int efd
= epoll_create (100);
2286 main_loop_epoll (efd
);
2295 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2296 this function is called, we are not listening on the nscd socket yet so
2297 we can just use the ordinary lookup functions without causing a lockup */
2299 begin_drop_privileges (void)
2301 struct passwd
*pwd
= getpwnam (server_user
);
2305 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2306 error (EXIT_FAILURE
, 0, _("Failed to run nscd as user '%s'"),
2310 server_uid
= pwd
->pw_uid
;
2311 server_gid
= pwd
->pw_gid
;
2313 /* Save the old UID/GID if we have to change back. */
2316 old_uid
= getuid ();
2317 old_gid
= getgid ();
2320 if (getgrouplist (server_user
, server_gid
, NULL
, &server_ngroups
) == 0)
2322 /* This really must never happen. */
2323 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2324 error (EXIT_FAILURE
, errno
, _("initial getgrouplist failed"));
2327 server_groups
= (gid_t
*) xmalloc (server_ngroups
* sizeof (gid_t
));
2329 if (getgrouplist (server_user
, server_gid
, server_groups
, &server_ngroups
)
2332 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2333 error (EXIT_FAILURE
, errno
, _("getgrouplist failed"));
2338 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2339 run nscd as the user specified in the configuration file. */
2341 finish_drop_privileges (void)
2343 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2344 /* We need to preserve the capabilities to connect to the audit daemon. */
2345 cap_t new_caps
= preserve_capabilities ();
2348 if (setgroups (server_ngroups
, server_groups
) == -1)
2350 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2351 error (EXIT_FAILURE
, errno
, _("setgroups failed"));
2356 res
= setresgid (server_gid
, server_gid
, old_gid
);
2358 res
= setgid (server_gid
);
2361 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2367 res
= setresuid (server_uid
, server_uid
, old_uid
);
2369 res
= setuid (server_uid
);
2372 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2377 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2378 /* Remove the temporary capabilities. */
2379 install_real_capabilities (new_caps
);