1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2007, 2008, 2009, 2011 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
34 #include <arpa/inet.h>
36 # include <sys/epoll.h>
39 # include <sys/inotify.h>
42 #include <sys/param.h>
45 # include <sys/sendfile.h>
47 #include <sys/socket.h>
54 #include <resolv/resolv.h>
56 # include <kernel-features.h>
60 /* Wrapper functions with error checking for standard functions. */
61 extern void *xmalloc (size_t n
);
62 extern void *xcalloc (size_t n
, size_t s
);
63 extern void *xrealloc (void *o
, size_t n
);
65 /* Support to run nscd as an unprivileged user */
66 const char *server_user
;
67 static uid_t server_uid
;
68 static gid_t server_gid
;
69 const char *stat_user
;
71 static gid_t
*server_groups
;
75 static int server_ngroups
;
77 static pthread_attr_t attr
;
79 static void begin_drop_privileges (void);
80 static void finish_drop_privileges (void);
82 /* Map request type to a string. */
83 const char *const serv2str
[LASTREQ
] =
85 [GETPWBYNAME
] = "GETPWBYNAME",
86 [GETPWBYUID
] = "GETPWBYUID",
87 [GETGRBYNAME
] = "GETGRBYNAME",
88 [GETGRBYGID
] = "GETGRBYGID",
89 [GETHOSTBYNAME
] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6
] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR
] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6
] = "GETHOSTBYADDRv6",
93 [SHUTDOWN
] = "SHUTDOWN",
94 [GETSTAT
] = "GETSTAT",
95 [INVALIDATE
] = "INVALIDATE",
96 [GETFDPW
] = "GETFDPW",
97 [GETFDGR
] = "GETFDGR",
98 [GETFDHST
] = "GETFDHST",
100 [INITGROUPS
] = "INITGROUPS",
101 [GETSERVBYNAME
] = "GETSERVBYNAME",
102 [GETSERVBYPORT
] = "GETSERVBYPORT",
103 [GETFDSERV
] = "GETFDSERV"
106 /* The control data structures for the services. */
107 struct database_dyn dbs
[lastdb
] =
110 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
111 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
112 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
118 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
119 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
120 .db_filename
= _PATH_NSCD_PASSWD_DB
,
121 .disabled_iov
= &pwd_iov_disabled
,
129 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
130 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
131 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
137 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
138 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
139 .db_filename
= _PATH_NSCD_GROUP_DB
,
140 .disabled_iov
= &grp_iov_disabled
,
148 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
149 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
150 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
154 .propagate
= 0, /* Not used. */
156 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
157 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
158 .db_filename
= _PATH_NSCD_HOSTS_DB
,
159 .disabled_iov
= &hst_iov_disabled
,
167 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
168 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
169 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
173 .propagate
= 0, /* Not used. */
175 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
176 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
177 .db_filename
= _PATH_NSCD_SERVICES_DB
,
178 .disabled_iov
= &serv_iov_disabled
,
188 /* Mapping of request type to database. */
192 struct database_dyn
*db
;
193 } const reqinfo
[LASTREQ
] =
195 [GETPWBYNAME
] = { true, &dbs
[pwddb
] },
196 [GETPWBYUID
] = { true, &dbs
[pwddb
] },
197 [GETGRBYNAME
] = { true, &dbs
[grpdb
] },
198 [GETGRBYGID
] = { true, &dbs
[grpdb
] },
199 [GETHOSTBYNAME
] = { true, &dbs
[hstdb
] },
200 [GETHOSTBYNAMEv6
] = { true, &dbs
[hstdb
] },
201 [GETHOSTBYADDR
] = { true, &dbs
[hstdb
] },
202 [GETHOSTBYADDRv6
] = { true, &dbs
[hstdb
] },
203 [SHUTDOWN
] = { false, NULL
},
204 [GETSTAT
] = { false, NULL
},
205 [SHUTDOWN
] = { false, NULL
},
206 [GETFDPW
] = { false, &dbs
[pwddb
] },
207 [GETFDGR
] = { false, &dbs
[grpdb
] },
208 [GETFDHST
] = { false, &dbs
[hstdb
] },
209 [GETAI
] = { true, &dbs
[hstdb
] },
210 [INITGROUPS
] = { true, &dbs
[grpdb
] },
211 [GETSERVBYNAME
] = { true, &dbs
[servdb
] },
212 [GETSERVBYPORT
] = { true, &dbs
[servdb
] },
213 [GETFDSERV
] = { false, &dbs
[servdb
] }
217 /* Initial number of threads to use. */
219 /* Maximum number of threads to use. */
220 int max_nthreads
= 32;
222 /* Socket for incoming connections. */
226 /* Inotify descriptor. */
230 #ifndef __ASSUME_SOCK_CLOEXEC
231 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
232 before be know the result. */
233 static int have_sock_cloexec
;
235 #ifndef __ASSUME_ACCEPT4
236 static int have_accept4
;
239 /* Number of times clients had to wait. */
240 unsigned long int client_queued
;
244 writeall (int fd
, const void *buf
, size_t len
)
250 ret
= TEMP_FAILURE_RETRY (send (fd
, buf
, n
, MSG_NOSIGNAL
));
253 buf
= (const char *) buf
+ ret
;
257 return ret
< 0 ? ret
: len
- n
;
263 sendfileall (int tofd
, int fromfd
, off_t off
, size_t len
)
270 ret
= TEMP_FAILURE_RETRY (sendfile (tofd
, fromfd
, &off
, n
));
276 return ret
< 0 ? ret
: len
- n
;
284 /* The following three are not really used, they are symbolic constants. */
290 use_he_begin
= use_he
| use_begin
,
291 use_he_end
= use_he
| use_end
,
294 use_key_begin
= use_key
| use_begin
,
295 use_key_end
= use_key
| use_end
,
296 use_key_first
= use_key_begin
| use_first
,
299 use_data_begin
= use_data
| use_begin
,
300 use_data_end
= use_data
| use_end
,
301 use_data_first
= use_data_begin
| use_first
306 check_use (const char *data
, nscd_ssize_t first_free
, uint8_t *usemap
,
307 enum usekey use
, ref_t start
, size_t len
)
311 if (start
> first_free
|| start
+ len
> first_free
312 || (start
& BLOCK_ALIGN_M1
))
315 if (usemap
[start
] == use_not
)
317 /* Add the start marker. */
318 usemap
[start
] = use
| use_begin
;
322 if (usemap
[++start
] != use_not
)
327 /* Add the end marker. */
328 usemap
[start
] = use
| use_end
;
330 else if ((usemap
[start
] & ~use_first
) == ((use
| use_begin
) & ~use_first
))
332 /* Hash entries can't be shared. */
336 usemap
[start
] |= (use
& use_first
);
340 if (usemap
[++start
] != use
)
343 if (usemap
[++start
] != (use
| use_end
))
347 /* Points to a wrong object or somewhere in the middle. */
354 /* Verify data in persistent database. */
356 verify_persistent_db (void *mem
, struct database_pers_head
*readhead
, int dbnr
)
358 assert (dbnr
== pwddb
|| dbnr
== grpdb
|| dbnr
== hstdb
|| dbnr
== servdb
);
360 time_t now
= time (NULL
);
362 struct database_pers_head
*head
= mem
;
363 struct database_pers_head head_copy
= *head
;
365 /* Check that the header that was read matches the head in the database. */
366 if (memcmp (head
, readhead
, sizeof (*head
)) != 0)
369 /* First some easy tests: make sure the database header is sane. */
370 if (head
->version
!= DB_VERSION
371 || head
->header_size
!= sizeof (*head
)
372 /* We allow a timestamp to be one hour ahead of the current time.
373 This should cover daylight saving time changes. */
374 || head
->timestamp
> now
+ 60 * 60 + 60
375 || (head
->gc_cycle
& 1)
377 || (size_t) head
->module
> INT32_MAX
/ sizeof (ref_t
)
378 || (size_t) head
->data_size
> INT32_MAX
- head
->module
* sizeof (ref_t
)
379 || head
->first_free
< 0
380 || head
->first_free
> head
->data_size
381 || (head
->first_free
& BLOCK_ALIGN_M1
) != 0
382 || head
->maxnentries
< 0
383 || head
->maxnsearched
< 0)
386 uint8_t *usemap
= calloc (head
->first_free
, 1);
390 const char *data
= (char *) &head
->array
[roundup (head
->module
,
391 ALIGN
/ sizeof (ref_t
))];
393 nscd_ssize_t he_cnt
= 0;
394 for (nscd_ssize_t cnt
= 0; cnt
< head
->module
; ++cnt
)
396 ref_t trail
= head
->array
[cnt
];
400 while (work
!= ENDREF
)
402 if (! check_use (data
, head
->first_free
, usemap
, use_he
, work
,
403 sizeof (struct hashentry
)))
406 /* Now we know we can dereference the record. */
407 struct hashentry
*here
= (struct hashentry
*) (data
+ work
);
411 /* Make sure the record is for this type of service. */
412 if (here
->type
>= LASTREQ
413 || reqinfo
[here
->type
].db
!= &dbs
[dbnr
])
416 /* Validate boolean field value. */
417 if (here
->first
!= false && here
->first
!= true)
425 || here
->packet
> head
->first_free
426 || here
->packet
+ sizeof (struct datahead
) > head
->first_free
)
429 struct datahead
*dh
= (struct datahead
*) (data
+ here
->packet
);
431 if (! check_use (data
, head
->first_free
, usemap
,
432 use_data
| (here
->first
? use_first
: 0),
433 here
->packet
, dh
->allocsize
))
436 if (dh
->allocsize
< sizeof (struct datahead
)
437 || dh
->recsize
> dh
->allocsize
438 || (dh
->notfound
!= false && dh
->notfound
!= true)
439 || (dh
->usable
!= false && dh
->usable
!= true))
442 if (here
->key
< here
->packet
+ sizeof (struct datahead
)
443 || here
->key
> here
->packet
+ dh
->allocsize
444 || here
->key
+ here
->len
> here
->packet
+ dh
->allocsize
)
447 /* If keys can appear outside of data, this should be done
448 instead. But gc doesn't mark the data in that case. */
449 if (! check_use (data
, head
->first_free
, usemap
,
450 use_key
| (here
->first
? use_first
: 0),
451 here
->key
, here
->len
))
459 /* A circular list, this must not happen. */
462 trail
= ((struct hashentry
*) (data
+ trail
))->next
;
467 if (he_cnt
!= head
->nentries
)
470 /* See if all data and keys had at least one reference from
471 he->first == true hashentry. */
472 for (ref_t idx
= 0; idx
< head
->first_free
; ++idx
)
475 if (usemap
[idx
] == use_key_begin
)
478 if (usemap
[idx
] == use_data_begin
)
482 /* Finally, make sure the database hasn't changed since the first test. */
483 if (memcmp (mem
, &head_copy
, sizeof (*head
)) != 0)
496 # define EXTRA_O_FLAGS O_CLOEXEC
498 # define EXTRA_O_FLAGS 0
502 /* Initialize database information structures. */
506 /* Look up unprivileged uid/gid/groups before we start listening on the
508 if (server_user
!= NULL
)
509 begin_drop_privileges ();
512 /* No configuration for this value, assume a default. */
515 for (size_t cnt
= 0; cnt
< lastdb
; ++cnt
)
516 if (dbs
[cnt
].enabled
)
518 pthread_rwlock_init (&dbs
[cnt
].lock
, NULL
);
519 pthread_mutex_init (&dbs
[cnt
].memlock
, NULL
);
521 if (dbs
[cnt
].persistent
)
523 /* Try to open the appropriate file on disk. */
524 int fd
= open (dbs
[cnt
].db_filename
, O_RDWR
| EXTRA_O_FLAGS
);
531 struct database_pers_head head
;
532 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, &head
,
534 if (n
!= sizeof (head
) || fstat64 (fd
, &st
) != 0)
537 /* The code is single-threaded at this point so
538 using strerror is just fine. */
539 msg
= strerror (errno
);
541 dbg_log (_("invalid persistent database file \"%s\": %s"),
542 dbs
[cnt
].db_filename
, msg
);
543 unlink (dbs
[cnt
].db_filename
);
545 else if (head
.module
== 0 && head
.data_size
== 0)
547 /* The file has been created, but the head has not
548 been initialized yet. */
549 msg
= _("uninitialized header");
552 else if (head
.header_size
!= (int) sizeof (head
))
554 msg
= _("header size does not match");
557 else if ((total
= (sizeof (head
)
558 + roundup (head
.module
* sizeof (ref_t
),
562 || total
< sizeof (head
))
564 msg
= _("file size does not match");
567 /* Note we map with the maximum size allowed for the
568 database. This is likely much larger than the
569 actual file size. This is OK on most OSes since
570 extensions of the underlying file will
571 automatically translate more pages available for
573 else if ((mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
574 PROT_READ
| PROT_WRITE
,
578 else if (!verify_persistent_db (mem
, &head
, cnt
))
581 msg
= _("verification failed");
586 /* Success. We have the database. */
588 dbs
[cnt
].memsize
= total
;
589 dbs
[cnt
].data
= (char *)
590 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
591 ALIGN
/ sizeof (ref_t
))];
592 dbs
[cnt
].mmap_used
= true;
594 if (dbs
[cnt
].suggested_module
> head
.module
)
595 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
600 /* We also need a read-only descriptor. */
603 dbs
[cnt
].ro_fd
= open (dbs
[cnt
].db_filename
,
604 O_RDONLY
| EXTRA_O_FLAGS
);
605 if (dbs
[cnt
].ro_fd
== -1)
607 cannot create read-only descriptor for \"%s\"; no mmap"),
608 dbs
[cnt
].db_filename
);
611 // XXX Shall we test whether the descriptors actually
612 // XXX point to the same file?
615 /* Close the file descriptors in case something went
616 wrong in which case the variable have not been
621 else if (errno
== EACCES
)
622 error (EXIT_FAILURE
, 0, _("cannot access '%s'"),
623 dbs
[cnt
].db_filename
);
626 if (dbs
[cnt
].head
== NULL
)
628 /* No database loaded. Allocate the data structure,
630 struct database_pers_head head
;
631 size_t total
= (sizeof (head
)
632 + roundup (dbs
[cnt
].suggested_module
633 * sizeof (ref_t
), ALIGN
)
634 + (dbs
[cnt
].suggested_module
635 * DEFAULT_DATASIZE_PER_BUCKET
));
637 /* Try to create the database. If we do not need a
638 persistent database create a temporary file. */
641 if (dbs
[cnt
].persistent
)
643 fd
= open (dbs
[cnt
].db_filename
,
644 O_RDWR
| O_CREAT
| O_EXCL
| O_TRUNC
| EXTRA_O_FLAGS
,
646 if (fd
!= -1 && dbs
[cnt
].shared
)
647 ro_fd
= open (dbs
[cnt
].db_filename
,
648 O_RDONLY
| EXTRA_O_FLAGS
);
652 char fname
[] = _PATH_NSCD_XYZ_DB_TMP
;
653 fd
= mkostemp (fname
, EXTRA_O_FLAGS
);
655 /* We do not need the file name anymore after we
656 opened another file descriptor in read-only mode. */
660 ro_fd
= open (fname
, O_RDONLY
| EXTRA_O_FLAGS
);
670 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
671 dbnames
[cnt
], dbs
[cnt
].db_filename
);
672 // XXX Correct way to terminate?
676 if (dbs
[cnt
].persistent
)
677 dbg_log (_("cannot create %s; no persistent database used"),
678 dbs
[cnt
].db_filename
);
680 dbg_log (_("cannot create %s; no sharing possible"),
681 dbs
[cnt
].db_filename
);
683 dbs
[cnt
].persistent
= 0;
684 // XXX remember: no mmap
688 /* Tell the user if we could not create the read-only
690 if (ro_fd
== -1 && dbs
[cnt
].shared
)
692 cannot create read-only descriptor for \"%s\"; no mmap"),
693 dbs
[cnt
].db_filename
);
695 /* Before we create the header, initialiye the hash
696 table. So that if we get interrupted if writing
697 the header we can recognize a partially initialized
699 size_t ps
= sysconf (_SC_PAGESIZE
);
701 assert (~ENDREF
== 0);
702 memset (tmpbuf
, '\xff', ps
);
704 size_t remaining
= dbs
[cnt
].suggested_module
* sizeof (ref_t
);
705 off_t offset
= sizeof (head
);
708 if (offset
% ps
!= 0)
710 towrite
= MIN (remaining
, ps
- (offset
% ps
));
711 if (pwrite (fd
, tmpbuf
, towrite
, offset
) != towrite
)
714 remaining
-= towrite
;
717 while (remaining
> ps
)
719 if (pwrite (fd
, tmpbuf
, ps
, offset
) == -1)
726 && pwrite (fd
, tmpbuf
, remaining
, offset
) != remaining
)
729 /* Create the header of the file. */
730 struct database_pers_head head
=
732 .version
= DB_VERSION
,
733 .header_size
= sizeof (head
),
734 .module
= dbs
[cnt
].suggested_module
,
735 .data_size
= (dbs
[cnt
].suggested_module
736 * DEFAULT_DATASIZE_PER_BUCKET
),
741 if ((TEMP_FAILURE_RETRY (write (fd
, &head
, sizeof (head
)))
743 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd
, 0, total
))
745 || (mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
746 PROT_READ
| PROT_WRITE
,
747 MAP_SHARED
, fd
, 0)) == MAP_FAILED
)
750 unlink (dbs
[cnt
].db_filename
);
751 dbg_log (_("cannot write to database file %s: %s"),
752 dbs
[cnt
].db_filename
, strerror (errno
));
753 dbs
[cnt
].persistent
= 0;
759 dbs
[cnt
].data
= (char *)
760 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
761 ALIGN
/ sizeof (ref_t
))];
762 dbs
[cnt
].memsize
= total
;
763 dbs
[cnt
].mmap_used
= true;
765 /* Remember the descriptors. */
767 dbs
[cnt
].ro_fd
= ro_fd
;
779 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
780 /* We do not check here whether the O_CLOEXEC provided to the
781 open call was successful or not. The two fcntl calls are
782 only performed once each per process start-up and therefore
783 is not noticeable at all. */
785 && ((dbs
[cnt
].wr_fd
!= -1
786 && fcntl (dbs
[cnt
].wr_fd
, F_SETFD
, FD_CLOEXEC
) == -1)
787 || (dbs
[cnt
].ro_fd
!= -1
788 && fcntl (dbs
[cnt
].ro_fd
, F_SETFD
, FD_CLOEXEC
) == -1)))
791 cannot set socket to close on exec: %s; disabling paranoia mode"),
797 if (dbs
[cnt
].head
== NULL
)
799 /* We do not use the persistent database. Just
800 create an in-memory data structure. */
801 assert (! dbs
[cnt
].persistent
);
803 dbs
[cnt
].head
= xmalloc (sizeof (struct database_pers_head
)
804 + (dbs
[cnt
].suggested_module
806 memset (dbs
[cnt
].head
, '\0', sizeof (struct database_pers_head
));
807 assert (~ENDREF
== 0);
808 memset (dbs
[cnt
].head
->array
, '\xff',
809 dbs
[cnt
].suggested_module
* sizeof (ref_t
));
810 dbs
[cnt
].head
->module
= dbs
[cnt
].suggested_module
;
811 dbs
[cnt
].head
->data_size
= (DEFAULT_DATASIZE_PER_BUCKET
812 * dbs
[cnt
].head
->module
);
813 dbs
[cnt
].data
= xmalloc (dbs
[cnt
].head
->data_size
);
814 dbs
[cnt
].head
->first_free
= 0;
817 assert (dbs
[cnt
].ro_fd
== -1);
821 /* Create the socket. */
822 #ifndef __ASSUME_SOCK_CLOEXEC
824 if (have_sock_cloexec
>= 0)
827 sock
= socket (AF_UNIX
, SOCK_STREAM
| SOCK_CLOEXEC
| SOCK_NONBLOCK
, 0);
828 #ifndef __ASSUME_SOCK_CLOEXEC
829 if (have_sock_cloexec
== 0)
830 have_sock_cloexec
= sock
!= -1 || errno
!= EINVAL
? 1 : -1;
833 #ifndef __ASSUME_SOCK_CLOEXEC
834 if (have_sock_cloexec
< 0)
835 sock
= socket (AF_UNIX
, SOCK_STREAM
, 0);
839 dbg_log (_("cannot open socket: %s"), strerror (errno
));
840 exit (errno
== EACCES
? 4 : 1);
842 /* Bind a name to the socket. */
843 struct sockaddr_un sock_addr
;
844 sock_addr
.sun_family
= AF_UNIX
;
845 strcpy (sock_addr
.sun_path
, _PATH_NSCDSOCKET
);
846 if (bind (sock
, (struct sockaddr
*) &sock_addr
, sizeof (sock_addr
)) < 0)
848 dbg_log ("%s: %s", _PATH_NSCDSOCKET
, strerror (errno
));
849 exit (errno
== EACCES
? 4 : 1);
852 #ifndef __ASSUME_SOCK_CLOEXEC
853 if (have_sock_cloexec
< 0)
855 /* We don't want to get stuck on accept. */
856 int fl
= fcntl (sock
, F_GETFL
);
857 if (fl
== -1 || fcntl (sock
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
859 dbg_log (_("cannot change socket to nonblocking mode: %s"),
864 /* The descriptor needs to be closed on exec. */
865 if (paranoia
&& fcntl (sock
, F_SETFD
, FD_CLOEXEC
) == -1)
867 dbg_log (_("cannot set socket to close on exec: %s"),
874 /* Set permissions for the socket. */
875 chmod (_PATH_NSCDSOCKET
, DEFFILEMODE
);
877 /* Set the socket up to accept connections. */
878 if (listen (sock
, SOMAXCONN
) < 0)
880 dbg_log (_("cannot enable socket to accept connections: %s"),
885 /* Change to unprivileged uid/gid/groups if specified in config file */
886 if (server_user
!= NULL
)
887 finish_drop_privileges ();
892 register_traced_file (size_t dbidx
, struct traced_file
*finfo
)
894 if (! dbs
[dbidx
].check_file
)
897 if (__builtin_expect (debug_level
> 0, 0))
898 dbg_log (_("register trace file %s for database %s"),
899 finfo
->fname
, dbnames
[dbidx
]);
903 || (finfo
->inotify_descr
= inotify_add_watch (inotify_fd
, finfo
->fname
,
908 /* We need the modification date of the file. */
911 if (stat64 (finfo
->fname
, &st
) < 0)
913 /* We cannot stat() the file, disable file checking. */
914 dbg_log (_("cannot stat() file `%s': %s"),
915 finfo
->fname
, strerror (errno
));
919 finfo
->inotify_descr
= -1;
920 finfo
->mtime
= st
.st_mtime
;
923 /* Queue up the file name. */
924 finfo
->next
= dbs
[dbidx
].traced_files
;
925 dbs
[dbidx
].traced_files
= finfo
;
929 /* Close the connections. */
938 invalidate_cache (char *key
, int fd
)
943 for (number
= pwddb
; number
< lastdb
; ++number
)
944 if (strcmp (key
, dbnames
[number
]) == 0)
948 struct traced_file
*runp
= dbs
[hstdb
].traced_files
;
950 if (runp
->call_res_init
)
961 if (number
== lastdb
)
964 writeall (fd
, &resp
, sizeof (resp
));
968 if (dbs
[number
].enabled
)
970 pthread_mutex_lock (&dbs
[number
].prune_run_lock
);
971 prune_cache (&dbs
[number
], LONG_MAX
, fd
);
972 pthread_mutex_unlock (&dbs
[number
].prune_run_lock
);
977 writeall (fd
, &resp
, sizeof (resp
));
984 send_ro_fd (struct database_dyn
*db
, char *key
, int fd
)
986 /* If we do not have an read-only file descriptor do nothing. */
990 /* We need to send some data along with the descriptor. */
991 uint64_t mapsize
= (db
->head
->data_size
992 + roundup (db
->head
->module
* sizeof (ref_t
), ALIGN
)
993 + sizeof (struct database_pers_head
));
995 iov
[0].iov_base
= key
;
996 iov
[0].iov_len
= strlen (key
) + 1;
997 iov
[1].iov_base
= &mapsize
;
998 iov
[1].iov_len
= sizeof (mapsize
);
1000 /* Prepare the control message to transfer the descriptor. */
1004 char bytes
[CMSG_SPACE (sizeof (int))];
1006 struct msghdr msg
= { .msg_iov
= iov
, .msg_iovlen
= 2,
1007 .msg_control
= buf
.bytes
,
1008 .msg_controllen
= sizeof (buf
) };
1009 struct cmsghdr
*cmsg
= CMSG_FIRSTHDR (&msg
);
1011 cmsg
->cmsg_level
= SOL_SOCKET
;
1012 cmsg
->cmsg_type
= SCM_RIGHTS
;
1013 cmsg
->cmsg_len
= CMSG_LEN (sizeof (int));
1015 int *ip
= (int *) CMSG_DATA (cmsg
);
1018 msg
.msg_controllen
= cmsg
->cmsg_len
;
1020 /* Send the control message. We repeat when we are interrupted but
1021 everything else is ignored. */
1022 #ifndef MSG_NOSIGNAL
1023 # define MSG_NOSIGNAL 0
1025 (void) TEMP_FAILURE_RETRY (sendmsg (fd
, &msg
, MSG_NOSIGNAL
));
1027 if (__builtin_expect (debug_level
> 0, 0))
1028 dbg_log (_("provide access to FD %d, for %s"), db
->ro_fd
, key
);
1030 #endif /* SCM_RIGHTS */
1033 /* Handle new request. */
1035 handle_request (int fd
, request_header
*req
, void *key
, uid_t uid
, pid_t pid
)
1037 if (__builtin_expect (req
->version
, NSCD_VERSION
) != NSCD_VERSION
)
1039 if (debug_level
> 0)
1041 cannot handle old request version %d; current version is %d"),
1042 req
->version
, NSCD_VERSION
);
1046 /* Perform the SELinux check before we go on to the standard checks. */
1047 if (selinux_enabled
&& nscd_request_avc_has_perm (fd
, req
->type
) != 0)
1049 if (debug_level
> 0)
1058 snprintf (buf
, sizeof (buf
), "/proc/%ld/exe", (long int) pid
);
1059 ssize_t n
= readlink (buf
, buf
, sizeof (buf
) - 1);
1063 request from %ld not handled due to missing permission"), (long int) pid
);
1068 request from '%s' [%ld] not handled due to missing permission"),
1069 buf
, (long int) pid
);
1072 dbg_log (_("request not handled due to missing permission"));
1078 struct database_dyn
*db
= reqinfo
[req
->type
].db
;
1080 /* See whether we can service the request from the cache. */
1081 if (__builtin_expect (reqinfo
[req
->type
].data_request
, true))
1083 if (__builtin_expect (debug_level
, 0) > 0)
1085 if (req
->type
== GETHOSTBYADDR
|| req
->type
== GETHOSTBYADDRv6
)
1087 char buf
[INET6_ADDRSTRLEN
];
1089 dbg_log ("\t%s (%s)", serv2str
[req
->type
],
1090 inet_ntop (req
->type
== GETHOSTBYADDR
1091 ? AF_INET
: AF_INET6
,
1092 key
, buf
, sizeof (buf
)));
1095 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1098 /* Is this service enabled? */
1099 if (__builtin_expect (!db
->enabled
, 0))
1101 /* No, sent the prepared record. */
1102 if (TEMP_FAILURE_RETRY (send (fd
, db
->disabled_iov
->iov_base
,
1103 db
->disabled_iov
->iov_len
,
1105 != (ssize_t
) db
->disabled_iov
->iov_len
1106 && __builtin_expect (debug_level
, 0) > 0)
1108 /* We have problems sending the result. */
1110 dbg_log (_("cannot write result: %s"),
1111 strerror_r (errno
, buf
, sizeof (buf
)));
1117 /* Be sure we can read the data. */
1118 if (__builtin_expect (pthread_rwlock_tryrdlock (&db
->lock
) != 0, 0))
1120 ++db
->head
->rdlockdelayed
;
1121 pthread_rwlock_rdlock (&db
->lock
);
1124 /* See whether we can handle it from the cache. */
1125 struct datahead
*cached
;
1126 cached
= (struct datahead
*) cache_search (req
->type
, key
, req
->key_len
,
1130 /* Hurray it's in the cache. */
1133 #ifdef HAVE_SENDFILE
1134 if (__builtin_expect (db
->mmap_used
, 1))
1136 assert (db
->wr_fd
!= -1);
1137 assert ((char *) cached
->data
> (char *) db
->data
);
1138 assert ((char *) cached
->data
- (char *) db
->head
1140 <= (sizeof (struct database_pers_head
)
1141 + db
->head
->module
* sizeof (ref_t
)
1142 + db
->head
->data_size
));
1143 nwritten
= sendfileall (fd
, db
->wr_fd
,
1144 (char *) cached
->data
1145 - (char *) db
->head
, cached
->recsize
);
1146 # ifndef __ASSUME_SENDFILE
1147 if (nwritten
== -1 && errno
== ENOSYS
)
1152 # ifndef __ASSUME_SENDFILE
1156 nwritten
= writeall (fd
, cached
->data
, cached
->recsize
);
1158 if (nwritten
!= cached
->recsize
1159 && __builtin_expect (debug_level
, 0) > 0)
1161 /* We have problems sending the result. */
1163 dbg_log (_("cannot write result: %s"),
1164 strerror_r (errno
, buf
, sizeof (buf
)));
1167 pthread_rwlock_unlock (&db
->lock
);
1172 pthread_rwlock_unlock (&db
->lock
);
1174 else if (__builtin_expect (debug_level
, 0) > 0)
1176 if (req
->type
== INVALIDATE
)
1177 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1179 dbg_log ("\t%s", serv2str
[req
->type
]);
1182 /* Handle the request. */
1186 addpwbyname (db
, fd
, req
, key
, uid
);
1190 addpwbyuid (db
, fd
, req
, key
, uid
);
1194 addgrbyname (db
, fd
, req
, key
, uid
);
1198 addgrbygid (db
, fd
, req
, key
, uid
);
1202 addhstbyname (db
, fd
, req
, key
, uid
);
1205 case GETHOSTBYNAMEv6
:
1206 addhstbynamev6 (db
, fd
, req
, key
, uid
);
1210 addhstbyaddr (db
, fd
, req
, key
, uid
);
1213 case GETHOSTBYADDRv6
:
1214 addhstbyaddrv6 (db
, fd
, req
, key
, uid
);
1218 addhstai (db
, fd
, req
, key
, uid
);
1222 addinitgroups (db
, fd
, req
, key
, uid
);
1226 addservbyname (db
, fd
, req
, key
, uid
);
1230 addservbyport (db
, fd
, req
, key
, uid
);
1237 /* Get the callers credentials. */
1239 struct ucred caller
;
1240 socklen_t optlen
= sizeof (caller
);
1242 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) < 0)
1246 dbg_log (_("error getting caller's id: %s"),
1247 strerror_r (errno
, buf
, sizeof (buf
)));
1253 /* Some systems have no SO_PEERCRED implementation. They don't
1254 care about security so we don't as well. */
1259 /* Accept shutdown, getstat and invalidate only from root. For
1260 the stat call also allow the user specified in the config file. */
1261 if (req
->type
== GETSTAT
)
1263 if (uid
== 0 || uid
== stat_uid
)
1264 send_stats (fd
, dbs
);
1268 if (req
->type
== INVALIDATE
)
1269 invalidate_cache (key
, fd
);
1271 termination_handler (0);
1280 send_ro_fd (reqinfo
[req
->type
].db
, key
, fd
);
1285 /* Ignore the command, it's nothing we know. */
1291 /* Restart the process. */
1295 /* First determine the parameters. We do not use the parameters
1296 passed to main() since in case nscd is started by running the
1297 dynamic linker this will not work. Yes, this is not the usual
1298 case but nscd is part of glibc and we occasionally do this. */
1299 size_t buflen
= 1024;
1300 char *buf
= alloca (buflen
);
1302 int fd
= open ("/proc/self/cmdline", O_RDONLY
);
1306 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1315 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, buf
+ readlen
,
1320 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1330 if (readlen
< buflen
)
1333 /* We might have to extend the buffer. */
1334 size_t old_buflen
= buflen
;
1335 char *newp
= extend_alloca (buf
, buflen
, 2 * buflen
);
1336 buf
= memmove (newp
, buf
, old_buflen
);
1341 /* Parse the command line. Worst case scenario: every two
1342 characters form one parameter (one character plus NUL). */
1343 char **argv
= alloca ((readlen
/ 2 + 1) * sizeof (argv
[0]));
1347 while (cp
< buf
+ readlen
)
1350 cp
= (char *) rawmemchr (cp
, '\0') + 1;
1354 /* Second, change back to the old user if we changed it. */
1355 if (server_user
!= NULL
)
1357 if (setresuid (old_uid
, old_uid
, old_uid
) != 0)
1360 cannot change to old UID: %s; disabling paranoia mode"),
1367 if (setresgid (old_gid
, old_gid
, old_gid
) != 0)
1370 cannot change to old GID: %s; disabling paranoia mode"),
1373 setuid (server_uid
);
1379 /* Next change back to the old working directory. */
1380 if (chdir (oldcwd
) == -1)
1383 cannot change to old working directory: %s; disabling paranoia mode"),
1386 if (server_user
!= NULL
)
1388 setuid (server_uid
);
1389 setgid (server_gid
);
1395 /* Synchronize memory. */
1396 int32_t certainly
[lastdb
];
1397 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1398 if (dbs
[cnt
].enabled
)
1400 /* Make sure nobody keeps using the database. */
1401 dbs
[cnt
].head
->timestamp
= 0;
1402 certainly
[cnt
] = dbs
[cnt
].head
->nscd_certainly_running
;
1403 dbs
[cnt
].head
->nscd_certainly_running
= 0;
1405 if (dbs
[cnt
].persistent
)
1407 msync (dbs
[cnt
].head
, dbs
[cnt
].memsize
, MS_ASYNC
);
1410 /* The preparations are done. */
1412 char pathbuf
[PATH_MAX
];
1416 /* Try to exec the real nscd program so the process name (as reported
1417 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1418 if readlink or the exec with the result of the readlink call fails. */
1419 ssize_t n
= readlink ("/proc/self/exe", pathbuf
, sizeof (pathbuf
) - 1);
1423 execv (pathbuf
, argv
);
1425 execv ("/proc/self/exe", argv
);
1427 /* If we come here, we will never be able to re-exec. */
1428 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1431 if (server_user
!= NULL
)
1433 setuid (server_uid
);
1434 setgid (server_gid
);
1436 if (chdir ("/") != 0)
1437 dbg_log (_("cannot change current working directory to \"/\": %s"),
1441 /* Reenable the databases. */
1442 time_t now
= time (NULL
);
1443 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1444 if (dbs
[cnt
].enabled
)
1446 dbs
[cnt
].head
->timestamp
= now
;
1447 dbs
[cnt
].head
->nscd_certainly_running
= certainly
[cnt
];
1452 /* List of file descriptors. */
1456 struct fdlist
*next
;
1458 /* Memory allocated for the list. */
1459 static struct fdlist
*fdlist
;
1460 /* List of currently ready-to-read file descriptors. */
1461 static struct fdlist
*readylist
;
1463 /* Conditional variable and mutex to signal availability of entries in
1464 READYLIST. The condvar is initialized dynamically since we might
1465 use a different clock depending on availability. */
1466 static pthread_cond_t readylist_cond
= PTHREAD_COND_INITIALIZER
;
1467 static pthread_mutex_t readylist_lock
= PTHREAD_MUTEX_INITIALIZER
;
1469 /* The clock to use with the condvar. */
1470 static clockid_t timeout_clock
= CLOCK_REALTIME
;
1472 /* Number of threads ready to handle the READYLIST. */
1473 static unsigned long int nready
;
1476 /* Function for the clean-up threads. */
1478 __attribute__ ((__noreturn__
))
1479 nscd_run_prune (void *p
)
1481 const long int my_number
= (long int) p
;
1482 assert (dbs
[my_number
].enabled
);
1484 int dont_need_update
= setup_thread (&dbs
[my_number
]);
1486 time_t now
= time (NULL
);
1488 /* We are running. */
1489 dbs
[my_number
].head
->timestamp
= now
;
1491 struct timespec prune_ts
;
1492 if (__builtin_expect (clock_gettime (timeout_clock
, &prune_ts
) == -1, 0))
1493 /* Should never happen. */
1496 /* Compute the initial timeout time. Prevent all the timers to go
1497 off at the same time by adding a db-based value. */
1498 prune_ts
.tv_sec
+= CACHE_PRUNE_INTERVAL
+ my_number
;
1499 dbs
[my_number
].wakeup_time
= now
+ CACHE_PRUNE_INTERVAL
+ my_number
;
1501 pthread_mutex_t
*prune_lock
= &dbs
[my_number
].prune_lock
;
1502 pthread_mutex_t
*prune_run_lock
= &dbs
[my_number
].prune_run_lock
;
1503 pthread_cond_t
*prune_cond
= &dbs
[my_number
].prune_cond
;
1505 pthread_mutex_lock (prune_lock
);
1508 /* Wait, but not forever. */
1510 if (! dbs
[my_number
].clear_cache
)
1511 e
= pthread_cond_timedwait (prune_cond
, prune_lock
, &prune_ts
);
1512 assert (__builtin_expect (e
== 0 || e
== ETIMEDOUT
, 1));
1516 if (e
== ETIMEDOUT
|| now
>= dbs
[my_number
].wakeup_time
1517 || dbs
[my_number
].clear_cache
)
1519 /* We will determine the new timout values based on the
1520 cache content. Should there be concurrent additions to
1521 the cache which are not accounted for in the cache
1522 pruning we want to know about it. Therefore set the
1523 timeout to the maximum. It will be descreased when adding
1524 new entries to the cache, if necessary. */
1525 dbs
[my_number
].wakeup_time
= MAX_TIMEOUT_VALUE
;
1527 /* Unconditionally reset the flag. */
1528 time_t prune_now
= dbs
[my_number
].clear_cache
? LONG_MAX
: now
;
1529 dbs
[my_number
].clear_cache
= 0;
1531 pthread_mutex_unlock (prune_lock
);
1533 /* We use a separate lock for running the prune function (instead
1534 of keeping prune_lock locked) because this enables concurrent
1535 invocations of cache_add which might modify the timeout value. */
1536 pthread_mutex_lock (prune_run_lock
);
1537 next_wait
= prune_cache (&dbs
[my_number
], prune_now
, -1);
1538 pthread_mutex_unlock (prune_run_lock
);
1540 next_wait
= MAX (next_wait
, CACHE_PRUNE_INTERVAL
);
1541 /* If clients cannot determine for sure whether nscd is running
1542 we need to wake up occasionally to update the timestamp.
1543 Wait 90% of the update period. */
1544 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1545 if (__builtin_expect (! dont_need_update
, 0))
1547 next_wait
= MIN (UPDATE_MAPPING_TIMEOUT
, next_wait
);
1548 dbs
[my_number
].head
->timestamp
= now
;
1551 pthread_mutex_lock (prune_lock
);
1553 /* Make it known when we will wake up again. */
1554 if (now
+ next_wait
< dbs
[my_number
].wakeup_time
)
1555 dbs
[my_number
].wakeup_time
= now
+ next_wait
;
1557 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1560 /* The cache was just pruned. Do not do it again now. Just
1561 use the new timeout value. */
1562 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1564 if (clock_gettime (timeout_clock
, &prune_ts
) == -1)
1565 /* Should never happen. */
1568 /* Compute next timeout time. */
1569 prune_ts
.tv_sec
+= next_wait
;
1574 /* This is the main loop. It is replicated in different threads but
1575 the use of the ready list makes sure only one thread handles an
1576 incoming connection. */
1578 __attribute__ ((__noreturn__
))
1579 nscd_run_worker (void *p
)
1583 /* Initial locking. */
1584 pthread_mutex_lock (&readylist_lock
);
1586 /* One more thread available. */
1591 while (readylist
== NULL
)
1592 pthread_cond_wait (&readylist_cond
, &readylist_lock
);
1594 struct fdlist
*it
= readylist
->next
;
1595 if (readylist
->next
== readylist
)
1596 /* Just one entry on the list. */
1599 readylist
->next
= it
->next
;
1601 /* Extract the information and mark the record ready to be used
1606 /* One more thread available. */
1609 /* We are done with the list. */
1610 pthread_mutex_unlock (&readylist_lock
);
1612 #ifndef __ASSUME_ACCEPT4
1613 if (have_accept4
< 0)
1615 /* We do not want to block on a short read or so. */
1616 int fl
= fcntl (fd
, F_GETFL
);
1617 if (fl
== -1 || fcntl (fd
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
1622 /* Now read the request. */
1624 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, &req
, sizeof (req
)))
1625 != sizeof (req
), 0))
1627 /* We failed to read data. Note that this also might mean we
1628 failed because we would have blocked. */
1629 if (debug_level
> 0)
1630 dbg_log (_("short read while reading request: %s"),
1631 strerror_r (errno
, buf
, sizeof (buf
)));
1635 /* Check whether this is a valid request type. */
1636 if (req
.type
< GETPWBYNAME
|| req
.type
>= LASTREQ
)
1639 /* Some systems have no SO_PEERCRED implementation. They don't
1640 care about security so we don't as well. */
1645 if (__builtin_expect (debug_level
> 0, 0))
1647 struct ucred caller
;
1648 socklen_t optlen
= sizeof (caller
);
1650 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) == 0)
1654 const pid_t pid
= 0;
1657 /* It should not be possible to crash the nscd with a silly
1658 request (i.e., a terribly large key). We limit the size to 1kb. */
1659 if (__builtin_expect (req
.key_len
, 1) < 0
1660 || __builtin_expect (req
.key_len
, 1) > MAXKEYLEN
)
1662 if (debug_level
> 0)
1663 dbg_log (_("key length in request too long: %d"), req
.key_len
);
1668 char keybuf
[MAXKEYLEN
];
1670 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, keybuf
,
1674 /* Again, this can also mean we would have blocked. */
1675 if (debug_level
> 0)
1676 dbg_log (_("short read while reading request key: %s"),
1677 strerror_r (errno
, buf
, sizeof (buf
)));
1681 if (__builtin_expect (debug_level
, 0) > 0)
1686 handle_request: request received (Version = %d) from PID %ld"),
1687 req
.version
, (long int) pid
);
1691 handle_request: request received (Version = %d)"), req
.version
);
1694 /* Phew, we got all the data, now process it. */
1695 handle_request (fd
, &req
, keybuf
, uid
, pid
);
1703 pthread_mutex_lock (&readylist_lock
);
1705 /* One more thread available. */
1712 static unsigned int nconns
;
1717 pthread_mutex_lock (&readylist_lock
);
1719 /* Find an empty entry in FDLIST. */
1721 for (inner
= 0; inner
< nconns
; ++inner
)
1722 if (fdlist
[inner
].next
== NULL
)
1724 assert (inner
< nconns
);
1726 fdlist
[inner
].fd
= fd
;
1728 if (readylist
== NULL
)
1729 readylist
= fdlist
[inner
].next
= &fdlist
[inner
];
1732 fdlist
[inner
].next
= readylist
->next
;
1733 readylist
= readylist
->next
= &fdlist
[inner
];
1736 bool do_signal
= true;
1737 if (__builtin_expect (nready
== 0, 0))
1742 /* Try to start another thread to help out. */
1744 if (nthreads
< max_nthreads
1745 && pthread_create (&th
, &attr
, nscd_run_worker
,
1746 (void *) (long int) nthreads
) == 0)
1748 /* We got another thread. */
1750 /* The new thread might need a kick. */
1756 pthread_mutex_unlock (&readylist_lock
);
1758 /* Tell one of the worker threads there is work to do. */
1760 pthread_cond_signal (&readylist_cond
);
1764 /* Check whether restarting should happen. */
1766 restart_p (time_t now
)
1768 return (paranoia
&& readylist
== NULL
&& nready
== nthreads
1769 && now
>= restart_time
);
1773 /* Array for times a connection was accepted. */
1774 static time_t *starttime
;
1778 __attribute__ ((__noreturn__
))
1779 main_loop_poll (void)
1781 struct pollfd
*conns
= (struct pollfd
*) xmalloc (nconns
1782 * sizeof (conns
[0]));
1785 conns
[0].events
= POLLRDNORM
;
1787 size_t firstfree
= 1;
1790 if (inotify_fd
!= -1)
1792 conns
[1].fd
= inotify_fd
;
1793 conns
[1].events
= POLLRDNORM
;
1801 /* Wait for any event. We wait at most a couple of seconds so
1802 that we can check whether we should close any of the accepted
1803 connections since we have not received a request. */
1804 #define MAX_ACCEPT_TIMEOUT 30
1805 #define MIN_ACCEPT_TIMEOUT 5
1806 #define MAIN_THREAD_TIMEOUT \
1807 (MAX_ACCEPT_TIMEOUT * 1000 \
1808 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1810 int n
= poll (conns
, nused
, MAIN_THREAD_TIMEOUT
);
1812 time_t now
= time (NULL
);
1814 /* If there is a descriptor ready for reading or there is a new
1815 connection, process this now. */
1818 if (conns
[0].revents
!= 0)
1820 /* We have a new incoming connection. Accept the connection. */
1823 #ifndef __ASSUME_ACCEPT4
1825 if (have_accept4
>= 0)
1828 fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
1830 #ifndef __ASSUME_ACCEPT4
1831 if (have_accept4
== 0)
1832 have_accept4
= fd
!= -1 || errno
!= ENOSYS
? 1 : -1;
1835 #ifndef __ASSUME_ACCEPT4
1836 if (have_accept4
< 0)
1837 fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
1840 /* Use the descriptor if we have not reached the limit. */
1843 if (firstfree
< nconns
)
1845 conns
[firstfree
].fd
= fd
;
1846 conns
[firstfree
].events
= POLLRDNORM
;
1847 starttime
[firstfree
] = now
;
1848 if (firstfree
>= nused
)
1849 nused
= firstfree
+ 1;
1853 while (firstfree
< nused
&& conns
[firstfree
].fd
!= -1);
1856 /* We cannot use the connection so close it. */
1865 if (inotify_fd
!= -1 && conns
[1].fd
== inotify_fd
)
1867 if (conns
[1].revents
!= 0)
1869 bool to_clear
[lastdb
] = { false, };
1873 # define PATH_MAX 1024
1875 struct inotify_event i
;
1876 char buf
[sizeof (struct inotify_event
) + PATH_MAX
];
1881 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
, &inev
,
1883 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
1885 if (__builtin_expect (nb
== -1 && errno
!= EAGAIN
,
1888 /* Something went wrong when reading the inotify
1889 data. Better disable inotify. */
1891 disabled inotify after read error %d"),
1903 /* Check which of the files changed. */
1904 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
1906 struct traced_file
*finfo
= dbs
[dbcnt
].traced_files
;
1908 while (finfo
!= NULL
)
1910 if (finfo
->inotify_descr
== inev
.i
.wd
)
1912 to_clear
[dbcnt
] = true;
1913 if (finfo
->call_res_init
)
1918 finfo
= finfo
->next
;
1924 /* Actually perform the cache clearing. */
1925 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
1926 if (to_clear
[dbcnt
])
1928 pthread_mutex_lock (&dbs
[dbcnt
].prune_lock
);
1929 dbs
[dbcnt
].clear_cache
= 1;
1930 pthread_mutex_unlock (&dbs
[dbcnt
].prune_lock
);
1931 pthread_cond_signal (&dbs
[dbcnt
].prune_cond
);
1941 for (size_t cnt
= first
; cnt
< nused
&& n
> 0; ++cnt
)
1942 if (conns
[cnt
].revents
!= 0)
1944 fd_ready (conns
[cnt
].fd
);
1946 /* Clean up the CONNS array. */
1948 if (cnt
< firstfree
)
1950 if (cnt
== nused
- 1)
1953 while (conns
[nused
- 1].fd
== -1);
1959 /* Now find entries which have timed out. */
1962 /* We make the timeout length depend on the number of file
1963 descriptors currently used. */
1964 #define ACCEPT_TIMEOUT \
1965 (MAX_ACCEPT_TIMEOUT \
1966 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1967 time_t laststart
= now
- ACCEPT_TIMEOUT
;
1969 for (size_t cnt
= nused
- 1; cnt
> 0; --cnt
)
1971 if (conns
[cnt
].fd
!= -1 && starttime
[cnt
] < laststart
)
1973 /* Remove the entry, it timed out. */
1974 (void) close (conns
[cnt
].fd
);
1977 if (cnt
< firstfree
)
1979 if (cnt
== nused
- 1)
1982 while (conns
[nused
- 1].fd
== -1);
1986 if (restart_p (now
))
1994 main_loop_epoll (int efd
)
1996 struct epoll_event ev
= { 0, };
2000 /* Add the socket. */
2001 ev
.events
= EPOLLRDNORM
;
2003 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, sock
, &ev
) == -1)
2004 /* We cannot use epoll. */
2007 # ifdef HAVE_INOTIFY
2008 if (inotify_fd
!= -1)
2010 ev
.events
= EPOLLRDNORM
;
2011 ev
.data
.fd
= inotify_fd
;
2012 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, inotify_fd
, &ev
) == -1)
2013 /* We cannot use epoll. */
2021 struct epoll_event revs
[100];
2022 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2024 int n
= epoll_wait (efd
, revs
, nrevs
, MAIN_THREAD_TIMEOUT
);
2026 time_t now
= time (NULL
);
2028 for (int cnt
= 0; cnt
< n
; ++cnt
)
2029 if (revs
[cnt
].data
.fd
== sock
)
2031 /* A new connection. */
2034 # ifndef __ASSUME_ACCEPT4
2036 if (have_accept4
>= 0)
2039 fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
2041 # ifndef __ASSUME_ACCEPT4
2042 if (have_accept4
== 0)
2043 have_accept4
= fd
!= -1 || errno
!= ENOSYS
? 1 : -1;
2046 # ifndef __ASSUME_ACCEPT4
2047 if (have_accept4
< 0)
2048 fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
2051 /* Use the descriptor if we have not reached the limit. */
2054 /* Try to add the new descriptor. */
2057 || epoll_ctl (efd
, EPOLL_CTL_ADD
, fd
, &ev
) == -1)
2058 /* The descriptor is too large or something went
2059 wrong. Close the descriptor. */
2063 /* Remember when we accepted the connection. */
2064 starttime
[fd
] = now
;
2073 # ifdef HAVE_INOTIFY
2074 else if (revs
[cnt
].data
.fd
== inotify_fd
)
2076 bool to_clear
[lastdb
] = { false, };
2079 struct inotify_event i
;
2080 char buf
[sizeof (struct inotify_event
) + PATH_MAX
];
2085 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
, &inev
,
2087 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
2089 if (__builtin_expect (nb
== -1 && errno
!= EAGAIN
, 0))
2091 /* Something went wrong when reading the inotify
2092 data. Better disable inotify. */
2093 dbg_log (_("disabled inotify after read error %d"),
2095 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, inotify_fd
,
2103 /* Check which of the files changed. */
2104 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
2106 struct traced_file
*finfo
= dbs
[dbcnt
].traced_files
;
2108 while (finfo
!= NULL
)
2110 if (finfo
->inotify_descr
== inev
.i
.wd
)
2112 to_clear
[dbcnt
] = true;
2113 if (finfo
->call_res_init
)
2118 finfo
= finfo
->next
;
2124 /* Actually perform the cache clearing. */
2125 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
2126 if (to_clear
[dbcnt
])
2128 pthread_mutex_lock (&dbs
[dbcnt
].prune_lock
);
2129 dbs
[dbcnt
].clear_cache
= 1;
2130 pthread_mutex_unlock (&dbs
[dbcnt
].prune_lock
);
2131 pthread_cond_signal (&dbs
[dbcnt
].prune_cond
);
2137 /* Remove the descriptor from the epoll descriptor. */
2138 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, revs
[cnt
].data
.fd
, NULL
);
2140 /* Get a worker to handle the request. */
2141 fd_ready (revs
[cnt
].data
.fd
);
2143 /* Reset the time. */
2144 starttime
[revs
[cnt
].data
.fd
] = 0;
2145 if (revs
[cnt
].data
.fd
== highest
)
2148 while (highest
> 0 && starttime
[highest
] == 0);
2153 /* Now look for descriptors for accepted connections which have
2154 no reply in too long of a time. */
2155 time_t laststart
= now
- ACCEPT_TIMEOUT
;
2156 assert (starttime
[sock
] == 0);
2157 assert (inotify_fd
== -1 || starttime
[inotify_fd
] == 0);
2158 for (int cnt
= highest
; cnt
> STDERR_FILENO
; --cnt
)
2159 if (starttime
[cnt
] != 0 && starttime
[cnt
] < laststart
)
2161 /* We are waiting for this one for too long. Close it. */
2162 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, cnt
, NULL
);
2170 else if (cnt
!= sock
&& starttime
[cnt
] == 0 && cnt
== highest
)
2173 if (restart_p (now
))
2180 /* Start all the threads we want. The initial process is thread no. 1. */
2182 start_threads (void)
2184 /* Initialize the conditional variable we will use. The only
2185 non-standard attribute we might use is the clock selection. */
2186 pthread_condattr_t condattr
;
2187 pthread_condattr_init (&condattr
);
2189 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2190 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2191 /* Determine whether the monotonous clock is available. */
2192 struct timespec dummy
;
2193 # if _POSIX_MONOTONIC_CLOCK == 0
2194 if (sysconf (_SC_MONOTONIC_CLOCK
) > 0)
2196 # if _POSIX_CLOCK_SELECTION == 0
2197 if (sysconf (_SC_CLOCK_SELECTION
) > 0)
2199 if (clock_getres (CLOCK_MONOTONIC
, &dummy
) == 0
2200 && pthread_condattr_setclock (&condattr
, CLOCK_MONOTONIC
) == 0)
2201 timeout_clock
= CLOCK_MONOTONIC
;
2204 /* Create the attribute for the threads. They are all created
2206 pthread_attr_init (&attr
);
2207 pthread_attr_setdetachstate (&attr
, PTHREAD_CREATE_DETACHED
);
2208 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2209 pthread_attr_setstacksize (&attr
, NSCD_THREAD_STACKSIZE
);
2211 /* We allow less than LASTDB threads only for debugging. */
2212 if (debug_level
== 0)
2213 nthreads
= MAX (nthreads
, lastdb
);
2215 /* Create the threads which prune the databases. */
2216 // XXX Ideally this work would be done by some of the worker threads.
2217 // XXX But this is problematic since we would need to be able to wake
2218 // XXX them up explicitly as well as part of the group handling the
2219 // XXX ready-list. This requires an operation where we can wait on
2220 // XXX two conditional variables at the same time. This operation
2221 // XXX does not exist (yet).
2222 for (long int i
= 0; i
< lastdb
; ++i
)
2224 /* Initialize the conditional variable. */
2225 if (pthread_cond_init (&dbs
[i
].prune_cond
, &condattr
) != 0)
2227 dbg_log (_("could not initialize conditional variable"));
2233 && pthread_create (&th
, &attr
, nscd_run_prune
, (void *) i
) != 0)
2235 dbg_log (_("could not start clean-up thread; terminating"));
2240 pthread_condattr_destroy (&condattr
);
2242 for (long int i
= 0; i
< nthreads
; ++i
)
2245 if (pthread_create (&th
, &attr
, nscd_run_worker
, NULL
) != 0)
2249 dbg_log (_("could not start any worker thread; terminating"));
2257 /* Determine how much room for descriptors we should initially
2258 allocate. This might need to change later if we cap the number
2260 const long int nfds
= sysconf (_SC_OPEN_MAX
);
2262 #define MAXCONN 16384
2263 if (nfds
== -1 || nfds
> MAXCONN
)
2265 else if (nfds
< MINCONN
)
2270 /* We need memory to pass descriptors on to the worker threads. */
2271 fdlist
= (struct fdlist
*) xcalloc (nconns
, sizeof (fdlist
[0]));
2272 /* Array to keep track when connection was accepted. */
2273 starttime
= (time_t *) xcalloc (nconns
, sizeof (starttime
[0]));
2275 /* In the main thread we execute the loop which handles incoming
2278 int efd
= epoll_create (100);
2281 main_loop_epoll (efd
);
2290 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2291 this function is called, we are not listening on the nscd socket yet so
2292 we can just use the ordinary lookup functions without causing a lockup */
2294 begin_drop_privileges (void)
2296 struct passwd
*pwd
= getpwnam (server_user
);
2300 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2301 error (EXIT_FAILURE
, 0, _("Failed to run nscd as user '%s'"),
2305 server_uid
= pwd
->pw_uid
;
2306 server_gid
= pwd
->pw_gid
;
2308 /* Save the old UID/GID if we have to change back. */
2311 old_uid
= getuid ();
2312 old_gid
= getgid ();
2315 if (getgrouplist (server_user
, server_gid
, NULL
, &server_ngroups
) == 0)
2317 /* This really must never happen. */
2318 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2319 error (EXIT_FAILURE
, errno
, _("initial getgrouplist failed"));
2322 server_groups
= (gid_t
*) xmalloc (server_ngroups
* sizeof (gid_t
));
2324 if (getgrouplist (server_user
, server_gid
, server_groups
, &server_ngroups
)
2327 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2328 error (EXIT_FAILURE
, errno
, _("getgrouplist failed"));
2333 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2334 run nscd as the user specified in the configuration file. */
2336 finish_drop_privileges (void)
2338 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2339 /* We need to preserve the capabilities to connect to the audit daemon. */
2340 cap_t new_caps
= preserve_capabilities ();
2343 if (setgroups (server_ngroups
, server_groups
) == -1)
2345 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2346 error (EXIT_FAILURE
, errno
, _("setgroups failed"));
2351 res
= setresgid (server_gid
, server_gid
, old_gid
);
2353 res
= setgid (server_gid
);
2356 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2362 res
= setresuid (server_uid
, server_uid
, old_uid
);
2364 res
= setuid (server_uid
);
2367 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2372 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2373 /* Remove the temporary capabilities. */
2374 install_real_capabilities (new_caps
);