1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2012 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
34 #include <arpa/inet.h>
36 # include <linux/netlink.h>
37 # include <linux/rtnetlink.h>
40 # include <sys/epoll.h>
43 # include <sys/inotify.h>
46 #include <sys/param.h>
49 # include <sys/sendfile.h>
51 #include <sys/socket.h>
58 #include <resolv/resolv.h>
60 #include <kernel-features.h>
63 /* Support to run nscd as an unprivileged user */
64 const char *server_user
;
65 static uid_t server_uid
;
66 static gid_t server_gid
;
67 const char *stat_user
;
69 static gid_t
*server_groups
;
73 static int server_ngroups
;
75 static pthread_attr_t attr
;
77 static void begin_drop_privileges (void);
78 static void finish_drop_privileges (void);
80 /* Map request type to a string. */
81 const char *const serv2str
[LASTREQ
] =
83 [GETPWBYNAME
] = "GETPWBYNAME",
84 [GETPWBYUID
] = "GETPWBYUID",
85 [GETGRBYNAME
] = "GETGRBYNAME",
86 [GETGRBYGID
] = "GETGRBYGID",
87 [GETHOSTBYNAME
] = "GETHOSTBYNAME",
88 [GETHOSTBYNAMEv6
] = "GETHOSTBYNAMEv6",
89 [GETHOSTBYADDR
] = "GETHOSTBYADDR",
90 [GETHOSTBYADDRv6
] = "GETHOSTBYADDRv6",
91 [SHUTDOWN
] = "SHUTDOWN",
92 [GETSTAT
] = "GETSTAT",
93 [INVALIDATE
] = "INVALIDATE",
94 [GETFDPW
] = "GETFDPW",
95 [GETFDGR
] = "GETFDGR",
96 [GETFDHST
] = "GETFDHST",
98 [INITGROUPS
] = "INITGROUPS",
99 [GETSERVBYNAME
] = "GETSERVBYNAME",
100 [GETSERVBYPORT
] = "GETSERVBYPORT",
101 [GETFDSERV
] = "GETFDSERV",
102 [GETNETGRENT
] = "GETNETGRENT",
103 [INNETGR
] = "INNETGR",
104 [GETFDNETGR
] = "GETFDNETGR"
107 /* The control data structures for the services. */
108 struct database_dyn dbs
[lastdb
] =
111 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
112 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
113 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
119 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
120 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
121 .db_filename
= _PATH_NSCD_PASSWD_DB
,
122 .disabled_iov
= &pwd_iov_disabled
,
130 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
131 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
132 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
138 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
139 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
140 .db_filename
= _PATH_NSCD_GROUP_DB
,
141 .disabled_iov
= &grp_iov_disabled
,
149 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
150 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
151 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
155 .propagate
= 0, /* Not used. */
157 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
158 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
159 .db_filename
= _PATH_NSCD_HOSTS_DB
,
160 .disabled_iov
= &hst_iov_disabled
,
168 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
169 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
170 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
174 .propagate
= 0, /* Not used. */
176 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
177 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
178 .db_filename
= _PATH_NSCD_SERVICES_DB
,
179 .disabled_iov
= &serv_iov_disabled
,
187 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
188 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
189 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
193 .propagate
= 0, /* Not used. */
195 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
196 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
197 .db_filename
= _PATH_NSCD_NETGROUP_DB
,
198 .disabled_iov
= &netgroup_iov_disabled
,
208 /* Mapping of request type to database. */
212 struct database_dyn
*db
;
213 } const reqinfo
[LASTREQ
] =
215 [GETPWBYNAME
] = { true, &dbs
[pwddb
] },
216 [GETPWBYUID
] = { true, &dbs
[pwddb
] },
217 [GETGRBYNAME
] = { true, &dbs
[grpdb
] },
218 [GETGRBYGID
] = { true, &dbs
[grpdb
] },
219 [GETHOSTBYNAME
] = { true, &dbs
[hstdb
] },
220 [GETHOSTBYNAMEv6
] = { true, &dbs
[hstdb
] },
221 [GETHOSTBYADDR
] = { true, &dbs
[hstdb
] },
222 [GETHOSTBYADDRv6
] = { true, &dbs
[hstdb
] },
223 [SHUTDOWN
] = { false, NULL
},
224 [GETSTAT
] = { false, NULL
},
225 [SHUTDOWN
] = { false, NULL
},
226 [GETFDPW
] = { false, &dbs
[pwddb
] },
227 [GETFDGR
] = { false, &dbs
[grpdb
] },
228 [GETFDHST
] = { false, &dbs
[hstdb
] },
229 [GETAI
] = { true, &dbs
[hstdb
] },
230 [INITGROUPS
] = { true, &dbs
[grpdb
] },
231 [GETSERVBYNAME
] = { true, &dbs
[servdb
] },
232 [GETSERVBYPORT
] = { true, &dbs
[servdb
] },
233 [GETFDSERV
] = { false, &dbs
[servdb
] },
234 [GETNETGRENT
] = { true, &dbs
[netgrdb
] },
235 [INNETGR
] = { true, &dbs
[netgrdb
] },
236 [GETFDNETGR
] = { false, &dbs
[netgrdb
] }
240 /* Initial number of threads to use. */
242 /* Maximum number of threads to use. */
243 int max_nthreads
= 32;
245 /* Socket for incoming connections. */
249 /* Inotify descriptor. */
254 /* Descriptor for netlink status updates. */
255 static int nl_status_fd
= -1;
258 #ifndef __ASSUME_SOCK_CLOEXEC
259 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
260 before be know the result. */
261 static int have_sock_cloexec
;
263 #ifndef __ASSUME_ACCEPT4
264 static int have_accept4
;
267 /* Number of times clients had to wait. */
268 unsigned long int client_queued
;
272 writeall (int fd
, const void *buf
, size_t len
)
278 ret
= TEMP_FAILURE_RETRY (send (fd
, buf
, n
, MSG_NOSIGNAL
));
281 buf
= (const char *) buf
+ ret
;
285 return ret
< 0 ? ret
: len
- n
;
291 sendfileall (int tofd
, int fromfd
, off_t off
, size_t len
)
298 ret
= TEMP_FAILURE_RETRY (sendfile (tofd
, fromfd
, &off
, n
));
304 return ret
< 0 ? ret
: len
- n
;
312 /* The following three are not really used, they are symbolic constants. */
318 use_he_begin
= use_he
| use_begin
,
319 use_he_end
= use_he
| use_end
,
322 use_key_begin
= use_key
| use_begin
,
323 use_key_end
= use_key
| use_end
,
324 use_key_first
= use_key_begin
| use_first
,
327 use_data_begin
= use_data
| use_begin
,
328 use_data_end
= use_data
| use_end
,
329 use_data_first
= use_data_begin
| use_first
334 check_use (const char *data
, nscd_ssize_t first_free
, uint8_t *usemap
,
335 enum usekey use
, ref_t start
, size_t len
)
339 if (start
> first_free
|| start
+ len
> first_free
340 || (start
& BLOCK_ALIGN_M1
))
343 if (usemap
[start
] == use_not
)
345 /* Add the start marker. */
346 usemap
[start
] = use
| use_begin
;
350 if (usemap
[++start
] != use_not
)
355 /* Add the end marker. */
356 usemap
[start
] = use
| use_end
;
358 else if ((usemap
[start
] & ~use_first
) == ((use
| use_begin
) & ~use_first
))
360 /* Hash entries can't be shared. */
364 usemap
[start
] |= (use
& use_first
);
368 if (usemap
[++start
] != use
)
371 if (usemap
[++start
] != (use
| use_end
))
375 /* Points to a wrong object or somewhere in the middle. */
382 /* Verify data in persistent database. */
384 verify_persistent_db (void *mem
, struct database_pers_head
*readhead
, int dbnr
)
386 assert (dbnr
== pwddb
|| dbnr
== grpdb
|| dbnr
== hstdb
|| dbnr
== servdb
389 time_t now
= time (NULL
);
391 struct database_pers_head
*head
= mem
;
392 struct database_pers_head head_copy
= *head
;
394 /* Check that the header that was read matches the head in the database. */
395 if (memcmp (head
, readhead
, sizeof (*head
)) != 0)
398 /* First some easy tests: make sure the database header is sane. */
399 if (head
->version
!= DB_VERSION
400 || head
->header_size
!= sizeof (*head
)
401 /* We allow a timestamp to be one hour ahead of the current time.
402 This should cover daylight saving time changes. */
403 || head
->timestamp
> now
+ 60 * 60 + 60
404 || (head
->gc_cycle
& 1)
406 || (size_t) head
->module
> INT32_MAX
/ sizeof (ref_t
)
407 || (size_t) head
->data_size
> INT32_MAX
- head
->module
* sizeof (ref_t
)
408 || head
->first_free
< 0
409 || head
->first_free
> head
->data_size
410 || (head
->first_free
& BLOCK_ALIGN_M1
) != 0
411 || head
->maxnentries
< 0
412 || head
->maxnsearched
< 0)
415 uint8_t *usemap
= calloc (head
->first_free
, 1);
419 const char *data
= (char *) &head
->array
[roundup (head
->module
,
420 ALIGN
/ sizeof (ref_t
))];
422 nscd_ssize_t he_cnt
= 0;
423 for (nscd_ssize_t cnt
= 0; cnt
< head
->module
; ++cnt
)
425 ref_t trail
= head
->array
[cnt
];
429 while (work
!= ENDREF
)
431 if (! check_use (data
, head
->first_free
, usemap
, use_he
, work
,
432 sizeof (struct hashentry
)))
435 /* Now we know we can dereference the record. */
436 struct hashentry
*here
= (struct hashentry
*) (data
+ work
);
440 /* Make sure the record is for this type of service. */
441 if (here
->type
>= LASTREQ
442 || reqinfo
[here
->type
].db
!= &dbs
[dbnr
])
445 /* Validate boolean field value. */
446 if (here
->first
!= false && here
->first
!= true)
454 || here
->packet
> head
->first_free
455 || here
->packet
+ sizeof (struct datahead
) > head
->first_free
)
458 struct datahead
*dh
= (struct datahead
*) (data
+ here
->packet
);
460 if (! check_use (data
, head
->first_free
, usemap
,
461 use_data
| (here
->first
? use_first
: 0),
462 here
->packet
, dh
->allocsize
))
465 if (dh
->allocsize
< sizeof (struct datahead
)
466 || dh
->recsize
> dh
->allocsize
467 || (dh
->notfound
!= false && dh
->notfound
!= true)
468 || (dh
->usable
!= false && dh
->usable
!= true))
471 if (here
->key
< here
->packet
+ sizeof (struct datahead
)
472 || here
->key
> here
->packet
+ dh
->allocsize
473 || here
->key
+ here
->len
> here
->packet
+ dh
->allocsize
)
476 /* If keys can appear outside of data, this should be done
477 instead. But gc doesn't mark the data in that case. */
478 if (! check_use (data
, head
->first_free
, usemap
,
479 use_key
| (here
->first
? use_first
: 0),
480 here
->key
, here
->len
))
488 /* A circular list, this must not happen. */
491 trail
= ((struct hashentry
*) (data
+ trail
))->next
;
496 if (he_cnt
!= head
->nentries
)
499 /* See if all data and keys had at least one reference from
500 he->first == true hashentry. */
501 for (ref_t idx
= 0; idx
< head
->first_free
; ++idx
)
504 if (usemap
[idx
] == use_key_begin
)
507 if (usemap
[idx
] == use_data_begin
)
511 /* Finally, make sure the database hasn't changed since the first test. */
512 if (memcmp (mem
, &head_copy
, sizeof (*head
)) != 0)
525 # define EXTRA_O_FLAGS O_CLOEXEC
527 # define EXTRA_O_FLAGS 0
531 /* Initialize database information structures. */
535 /* Look up unprivileged uid/gid/groups before we start listening on the
537 if (server_user
!= NULL
)
538 begin_drop_privileges ();
541 /* No configuration for this value, assume a default. */
544 for (size_t cnt
= 0; cnt
< lastdb
; ++cnt
)
545 if (dbs
[cnt
].enabled
)
547 pthread_rwlock_init (&dbs
[cnt
].lock
, NULL
);
548 pthread_mutex_init (&dbs
[cnt
].memlock
, NULL
);
550 if (dbs
[cnt
].persistent
)
552 /* Try to open the appropriate file on disk. */
553 int fd
= open (dbs
[cnt
].db_filename
, O_RDWR
| EXTRA_O_FLAGS
);
560 struct database_pers_head head
;
561 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, &head
,
563 if (n
!= sizeof (head
) || fstat64 (fd
, &st
) != 0)
566 /* The code is single-threaded at this point so
567 using strerror is just fine. */
568 msg
= strerror (errno
);
570 dbg_log (_("invalid persistent database file \"%s\": %s"),
571 dbs
[cnt
].db_filename
, msg
);
572 unlink (dbs
[cnt
].db_filename
);
574 else if (head
.module
== 0 && head
.data_size
== 0)
576 /* The file has been created, but the head has not
577 been initialized yet. */
578 msg
= _("uninitialized header");
581 else if (head
.header_size
!= (int) sizeof (head
))
583 msg
= _("header size does not match");
586 else if ((total
= (sizeof (head
)
587 + roundup (head
.module
* sizeof (ref_t
),
591 || total
< sizeof (head
))
593 msg
= _("file size does not match");
596 /* Note we map with the maximum size allowed for the
597 database. This is likely much larger than the
598 actual file size. This is OK on most OSes since
599 extensions of the underlying file will
600 automatically translate more pages available for
602 else if ((mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
603 PROT_READ
| PROT_WRITE
,
607 else if (!verify_persistent_db (mem
, &head
, cnt
))
610 msg
= _("verification failed");
615 /* Success. We have the database. */
617 dbs
[cnt
].memsize
= total
;
618 dbs
[cnt
].data
= (char *)
619 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
620 ALIGN
/ sizeof (ref_t
))];
621 dbs
[cnt
].mmap_used
= true;
623 if (dbs
[cnt
].suggested_module
> head
.module
)
624 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
629 /* We also need a read-only descriptor. */
632 dbs
[cnt
].ro_fd
= open (dbs
[cnt
].db_filename
,
633 O_RDONLY
| EXTRA_O_FLAGS
);
634 if (dbs
[cnt
].ro_fd
== -1)
636 cannot create read-only descriptor for \"%s\"; no mmap"),
637 dbs
[cnt
].db_filename
);
640 // XXX Shall we test whether the descriptors actually
641 // XXX point to the same file?
644 /* Close the file descriptors in case something went
645 wrong in which case the variable have not been
650 else if (errno
== EACCES
)
651 error (EXIT_FAILURE
, 0, _("cannot access '%s'"),
652 dbs
[cnt
].db_filename
);
655 if (dbs
[cnt
].head
== NULL
)
657 /* No database loaded. Allocate the data structure,
659 struct database_pers_head head
;
660 size_t total
= (sizeof (head
)
661 + roundup (dbs
[cnt
].suggested_module
662 * sizeof (ref_t
), ALIGN
)
663 + (dbs
[cnt
].suggested_module
664 * DEFAULT_DATASIZE_PER_BUCKET
));
666 /* Try to create the database. If we do not need a
667 persistent database create a temporary file. */
670 if (dbs
[cnt
].persistent
)
672 fd
= open (dbs
[cnt
].db_filename
,
673 O_RDWR
| O_CREAT
| O_EXCL
| O_TRUNC
| EXTRA_O_FLAGS
,
675 if (fd
!= -1 && dbs
[cnt
].shared
)
676 ro_fd
= open (dbs
[cnt
].db_filename
,
677 O_RDONLY
| EXTRA_O_FLAGS
);
681 char fname
[] = _PATH_NSCD_XYZ_DB_TMP
;
682 fd
= mkostemp (fname
, EXTRA_O_FLAGS
);
684 /* We do not need the file name anymore after we
685 opened another file descriptor in read-only mode. */
689 ro_fd
= open (fname
, O_RDONLY
| EXTRA_O_FLAGS
);
699 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
700 dbnames
[cnt
], dbs
[cnt
].db_filename
);
701 // XXX Correct way to terminate?
705 if (dbs
[cnt
].persistent
)
706 dbg_log (_("cannot create %s; no persistent database used"),
707 dbs
[cnt
].db_filename
);
709 dbg_log (_("cannot create %s; no sharing possible"),
710 dbs
[cnt
].db_filename
);
712 dbs
[cnt
].persistent
= 0;
713 // XXX remember: no mmap
717 /* Tell the user if we could not create the read-only
719 if (ro_fd
== -1 && dbs
[cnt
].shared
)
721 cannot create read-only descriptor for \"%s\"; no mmap"),
722 dbs
[cnt
].db_filename
);
724 /* Before we create the header, initialiye the hash
725 table. So that if we get interrupted if writing
726 the header we can recognize a partially initialized
728 size_t ps
= sysconf (_SC_PAGESIZE
);
730 assert (~ENDREF
== 0);
731 memset (tmpbuf
, '\xff', ps
);
733 size_t remaining
= dbs
[cnt
].suggested_module
* sizeof (ref_t
);
734 off_t offset
= sizeof (head
);
737 if (offset
% ps
!= 0)
739 towrite
= MIN (remaining
, ps
- (offset
% ps
));
740 if (pwrite (fd
, tmpbuf
, towrite
, offset
) != towrite
)
743 remaining
-= towrite
;
746 while (remaining
> ps
)
748 if (pwrite (fd
, tmpbuf
, ps
, offset
) == -1)
755 && pwrite (fd
, tmpbuf
, remaining
, offset
) != remaining
)
758 /* Create the header of the file. */
759 struct database_pers_head head
=
761 .version
= DB_VERSION
,
762 .header_size
= sizeof (head
),
763 .module
= dbs
[cnt
].suggested_module
,
764 .data_size
= (dbs
[cnt
].suggested_module
765 * DEFAULT_DATASIZE_PER_BUCKET
),
770 if ((TEMP_FAILURE_RETRY (write (fd
, &head
, sizeof (head
)))
772 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd
, 0, total
))
774 || (mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
775 PROT_READ
| PROT_WRITE
,
776 MAP_SHARED
, fd
, 0)) == MAP_FAILED
)
779 unlink (dbs
[cnt
].db_filename
);
780 dbg_log (_("cannot write to database file %s: %s"),
781 dbs
[cnt
].db_filename
, strerror (errno
));
782 dbs
[cnt
].persistent
= 0;
788 dbs
[cnt
].data
= (char *)
789 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
790 ALIGN
/ sizeof (ref_t
))];
791 dbs
[cnt
].memsize
= total
;
792 dbs
[cnt
].mmap_used
= true;
794 /* Remember the descriptors. */
796 dbs
[cnt
].ro_fd
= ro_fd
;
808 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
809 /* We do not check here whether the O_CLOEXEC provided to the
810 open call was successful or not. The two fcntl calls are
811 only performed once each per process start-up and therefore
812 is not noticeable at all. */
814 && ((dbs
[cnt
].wr_fd
!= -1
815 && fcntl (dbs
[cnt
].wr_fd
, F_SETFD
, FD_CLOEXEC
) == -1)
816 || (dbs
[cnt
].ro_fd
!= -1
817 && fcntl (dbs
[cnt
].ro_fd
, F_SETFD
, FD_CLOEXEC
) == -1)))
820 cannot set socket to close on exec: %s; disabling paranoia mode"),
826 if (dbs
[cnt
].head
== NULL
)
828 /* We do not use the persistent database. Just
829 create an in-memory data structure. */
830 assert (! dbs
[cnt
].persistent
);
832 dbs
[cnt
].head
= xmalloc (sizeof (struct database_pers_head
)
833 + (dbs
[cnt
].suggested_module
835 memset (dbs
[cnt
].head
, '\0', sizeof (struct database_pers_head
));
836 assert (~ENDREF
== 0);
837 memset (dbs
[cnt
].head
->array
, '\xff',
838 dbs
[cnt
].suggested_module
* sizeof (ref_t
));
839 dbs
[cnt
].head
->module
= dbs
[cnt
].suggested_module
;
840 dbs
[cnt
].head
->data_size
= (DEFAULT_DATASIZE_PER_BUCKET
841 * dbs
[cnt
].head
->module
);
842 dbs
[cnt
].data
= xmalloc (dbs
[cnt
].head
->data_size
);
843 dbs
[cnt
].head
->first_free
= 0;
846 assert (dbs
[cnt
].ro_fd
== -1);
850 /* Create the socket. */
851 #ifndef __ASSUME_SOCK_CLOEXEC
853 if (have_sock_cloexec
>= 0)
856 sock
= socket (AF_UNIX
, SOCK_STREAM
| SOCK_CLOEXEC
| SOCK_NONBLOCK
, 0);
857 #ifndef __ASSUME_SOCK_CLOEXEC
858 if (have_sock_cloexec
== 0)
859 have_sock_cloexec
= sock
!= -1 || errno
!= EINVAL
? 1 : -1;
862 #ifndef __ASSUME_SOCK_CLOEXEC
863 if (have_sock_cloexec
< 0)
864 sock
= socket (AF_UNIX
, SOCK_STREAM
, 0);
868 dbg_log (_("cannot open socket: %s"), strerror (errno
));
869 exit (errno
== EACCES
? 4 : 1);
871 /* Bind a name to the socket. */
872 struct sockaddr_un sock_addr
;
873 sock_addr
.sun_family
= AF_UNIX
;
874 strcpy (sock_addr
.sun_path
, _PATH_NSCDSOCKET
);
875 if (bind (sock
, (struct sockaddr
*) &sock_addr
, sizeof (sock_addr
)) < 0)
877 dbg_log ("%s: %s", _PATH_NSCDSOCKET
, strerror (errno
));
878 exit (errno
== EACCES
? 4 : 1);
881 #ifndef __ASSUME_SOCK_CLOEXEC
882 if (have_sock_cloexec
< 0)
884 /* We don't want to get stuck on accept. */
885 int fl
= fcntl (sock
, F_GETFL
);
886 if (fl
== -1 || fcntl (sock
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
888 dbg_log (_("cannot change socket to nonblocking mode: %s"),
893 /* The descriptor needs to be closed on exec. */
894 if (paranoia
&& fcntl (sock
, F_SETFD
, FD_CLOEXEC
) == -1)
896 dbg_log (_("cannot set socket to close on exec: %s"),
903 /* Set permissions for the socket. */
904 chmod (_PATH_NSCDSOCKET
, DEFFILEMODE
);
906 /* Set the socket up to accept connections. */
907 if (listen (sock
, SOMAXCONN
) < 0)
909 dbg_log (_("cannot enable socket to accept connections: %s"),
915 if (dbs
[hstdb
].enabled
)
917 /* Try to open netlink socket to monitor network setting changes. */
918 nl_status_fd
= socket (AF_NETLINK
,
919 SOCK_RAW
| SOCK_CLOEXEC
| SOCK_NONBLOCK
,
921 if (nl_status_fd
!= -1)
923 struct sockaddr_nl snl
;
924 memset (&snl
, '\0', sizeof (snl
));
925 snl
.nl_family
= AF_NETLINK
;
926 /* XXX Is this the best set to use? */
927 snl
.nl_groups
= (RTMGRP_IPV4_IFADDR
| RTMGRP_TC
| RTMGRP_IPV4_MROUTE
928 | RTMGRP_IPV4_ROUTE
| RTMGRP_IPV4_RULE
929 | RTMGRP_IPV6_IFADDR
| RTMGRP_IPV6_MROUTE
930 | RTMGRP_IPV6_ROUTE
| RTMGRP_IPV6_IFINFO
931 | RTMGRP_IPV6_PREFIX
);
933 if (bind (nl_status_fd
, (struct sockaddr
*) &snl
, sizeof (snl
)) != 0)
935 close (nl_status_fd
);
940 /* Start the timestamp process. */
941 dbs
[hstdb
].head
->extra_data
[NSCD_HST_IDX_CONF_TIMESTAMP
]
942 = __bump_nl_timestamp ();
944 # ifndef __ASSUME_SOCK_CLOEXEC
945 if (have_sock_cloexec
< 0)
947 /* We don't want to get stuck on accept. */
948 int fl
= fcntl (nl_status_fd
, F_GETFL
);
950 || fcntl (nl_status_fd
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
953 cannot change socket to nonblocking mode: %s"),
958 /* The descriptor needs to be closed on exec. */
960 && fcntl (nl_status_fd
, F_SETFD
, FD_CLOEXEC
) == -1)
962 dbg_log (_("cannot set socket to close on exec: %s"),
973 /* Change to unprivileged uid/gid/groups if specified in config file */
974 if (server_user
!= NULL
)
975 finish_drop_privileges ();
980 register_traced_file (size_t dbidx
, struct traced_file
*finfo
)
982 if (! dbs
[dbidx
].enabled
|| ! dbs
[dbidx
].check_file
)
985 if (__builtin_expect (debug_level
> 0, 0))
986 dbg_log (_("register trace file %s for database %s"),
987 finfo
->fname
, dbnames
[dbidx
]);
991 || (finfo
->inotify_descr
= inotify_add_watch (inotify_fd
, finfo
->fname
,
996 /* We need the modification date of the file. */
999 if (stat64 (finfo
->fname
, &st
) < 0)
1001 /* We cannot stat() the file, disable file checking. */
1002 dbg_log (_("cannot stat() file `%s': %s"),
1003 finfo
->fname
, strerror (errno
));
1007 finfo
->inotify_descr
= -1;
1008 finfo
->mtime
= st
.st_mtime
;
1011 /* Queue up the file name. */
1012 finfo
->next
= dbs
[dbidx
].traced_files
;
1013 dbs
[dbidx
].traced_files
= finfo
;
1017 /* Close the connections. */
1019 close_sockets (void)
1026 invalidate_cache (char *key
, int fd
)
1031 for (number
= pwddb
; number
< lastdb
; ++number
)
1032 if (strcmp (key
, dbnames
[number
]) == 0)
1034 if (number
== hstdb
)
1036 struct traced_file
*runp
= dbs
[hstdb
].traced_files
;
1037 while (runp
!= NULL
)
1038 if (runp
->call_res_init
)
1049 if (number
== lastdb
)
1052 writeall (fd
, &resp
, sizeof (resp
));
1056 if (dbs
[number
].enabled
)
1058 pthread_mutex_lock (&dbs
[number
].prune_run_lock
);
1059 prune_cache (&dbs
[number
], LONG_MAX
, fd
);
1060 pthread_mutex_unlock (&dbs
[number
].prune_run_lock
);
1065 writeall (fd
, &resp
, sizeof (resp
));
1072 send_ro_fd (struct database_dyn
*db
, char *key
, int fd
)
1074 /* If we do not have an read-only file descriptor do nothing. */
1075 if (db
->ro_fd
== -1)
1078 /* We need to send some data along with the descriptor. */
1079 uint64_t mapsize
= (db
->head
->data_size
1080 + roundup (db
->head
->module
* sizeof (ref_t
), ALIGN
)
1081 + sizeof (struct database_pers_head
));
1082 struct iovec iov
[2];
1083 iov
[0].iov_base
= key
;
1084 iov
[0].iov_len
= strlen (key
) + 1;
1085 iov
[1].iov_base
= &mapsize
;
1086 iov
[1].iov_len
= sizeof (mapsize
);
1088 /* Prepare the control message to transfer the descriptor. */
1092 char bytes
[CMSG_SPACE (sizeof (int))];
1094 struct msghdr msg
= { .msg_iov
= iov
, .msg_iovlen
= 2,
1095 .msg_control
= buf
.bytes
,
1096 .msg_controllen
= sizeof (buf
) };
1097 struct cmsghdr
*cmsg
= CMSG_FIRSTHDR (&msg
);
1099 cmsg
->cmsg_level
= SOL_SOCKET
;
1100 cmsg
->cmsg_type
= SCM_RIGHTS
;
1101 cmsg
->cmsg_len
= CMSG_LEN (sizeof (int));
1103 int *ip
= (int *) CMSG_DATA (cmsg
);
1106 msg
.msg_controllen
= cmsg
->cmsg_len
;
1108 /* Send the control message. We repeat when we are interrupted but
1109 everything else is ignored. */
1110 #ifndef MSG_NOSIGNAL
1111 # define MSG_NOSIGNAL 0
1113 (void) TEMP_FAILURE_RETRY (sendmsg (fd
, &msg
, MSG_NOSIGNAL
));
1115 if (__builtin_expect (debug_level
> 0, 0))
1116 dbg_log (_("provide access to FD %d, for %s"), db
->ro_fd
, key
);
1118 #endif /* SCM_RIGHTS */
1121 /* Handle new request. */
1123 handle_request (int fd
, request_header
*req
, void *key
, uid_t uid
, pid_t pid
)
1125 if (__builtin_expect (req
->version
, NSCD_VERSION
) != NSCD_VERSION
)
1127 if (debug_level
> 0)
1129 cannot handle old request version %d; current version is %d"),
1130 req
->version
, NSCD_VERSION
);
1134 /* Perform the SELinux check before we go on to the standard checks. */
1135 if (selinux_enabled
&& nscd_request_avc_has_perm (fd
, req
->type
) != 0)
1137 if (debug_level
> 0)
1146 snprintf (buf
, sizeof (buf
), "/proc/%ld/exe", (long int) pid
);
1147 ssize_t n
= readlink (buf
, buf
, sizeof (buf
) - 1);
1151 request from %ld not handled due to missing permission"), (long int) pid
);
1156 request from '%s' [%ld] not handled due to missing permission"),
1157 buf
, (long int) pid
);
1160 dbg_log (_("request not handled due to missing permission"));
1166 struct database_dyn
*db
= reqinfo
[req
->type
].db
;
1168 /* See whether we can service the request from the cache. */
1169 if (__builtin_expect (reqinfo
[req
->type
].data_request
, true))
1171 if (__builtin_expect (debug_level
, 0) > 0)
1173 if (req
->type
== GETHOSTBYADDR
|| req
->type
== GETHOSTBYADDRv6
)
1175 char buf
[INET6_ADDRSTRLEN
];
1177 dbg_log ("\t%s (%s)", serv2str
[req
->type
],
1178 inet_ntop (req
->type
== GETHOSTBYADDR
1179 ? AF_INET
: AF_INET6
,
1180 key
, buf
, sizeof (buf
)));
1183 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1186 /* Is this service enabled? */
1187 if (__builtin_expect (!db
->enabled
, 0))
1189 /* No, sent the prepared record. */
1190 if (TEMP_FAILURE_RETRY (send (fd
, db
->disabled_iov
->iov_base
,
1191 db
->disabled_iov
->iov_len
,
1193 != (ssize_t
) db
->disabled_iov
->iov_len
1194 && __builtin_expect (debug_level
, 0) > 0)
1196 /* We have problems sending the result. */
1198 dbg_log (_("cannot write result: %s"),
1199 strerror_r (errno
, buf
, sizeof (buf
)));
1205 /* Be sure we can read the data. */
1206 if (__builtin_expect (pthread_rwlock_tryrdlock (&db
->lock
) != 0, 0))
1208 ++db
->head
->rdlockdelayed
;
1209 pthread_rwlock_rdlock (&db
->lock
);
1212 /* See whether we can handle it from the cache. */
1213 struct datahead
*cached
;
1214 cached
= (struct datahead
*) cache_search (req
->type
, key
, req
->key_len
,
1218 /* Hurray it's in the cache. */
1221 #ifdef HAVE_SENDFILE
1222 if (__builtin_expect (db
->mmap_used
, 1))
1224 assert (db
->wr_fd
!= -1);
1225 assert ((char *) cached
->data
> (char *) db
->data
);
1226 assert ((char *) cached
->data
- (char *) db
->head
1228 <= (sizeof (struct database_pers_head
)
1229 + db
->head
->module
* sizeof (ref_t
)
1230 + db
->head
->data_size
));
1231 nwritten
= sendfileall (fd
, db
->wr_fd
,
1232 (char *) cached
->data
1233 - (char *) db
->head
, cached
->recsize
);
1234 # ifndef __ASSUME_SENDFILE
1235 if (nwritten
== -1 && errno
== ENOSYS
)
1240 # ifndef __ASSUME_SENDFILE
1244 nwritten
= writeall (fd
, cached
->data
, cached
->recsize
);
1246 if (nwritten
!= cached
->recsize
1247 && __builtin_expect (debug_level
, 0) > 0)
1249 /* We have problems sending the result. */
1251 dbg_log (_("cannot write result: %s"),
1252 strerror_r (errno
, buf
, sizeof (buf
)));
1255 pthread_rwlock_unlock (&db
->lock
);
1260 pthread_rwlock_unlock (&db
->lock
);
1262 else if (__builtin_expect (debug_level
, 0) > 0)
1264 if (req
->type
== INVALIDATE
)
1265 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1267 dbg_log ("\t%s", serv2str
[req
->type
]);
1270 /* Handle the request. */
1274 addpwbyname (db
, fd
, req
, key
, uid
);
1278 addpwbyuid (db
, fd
, req
, key
, uid
);
1282 addgrbyname (db
, fd
, req
, key
, uid
);
1286 addgrbygid (db
, fd
, req
, key
, uid
);
1290 addhstbyname (db
, fd
, req
, key
, uid
);
1293 case GETHOSTBYNAMEv6
:
1294 addhstbynamev6 (db
, fd
, req
, key
, uid
);
1298 addhstbyaddr (db
, fd
, req
, key
, uid
);
1301 case GETHOSTBYADDRv6
:
1302 addhstbyaddrv6 (db
, fd
, req
, key
, uid
);
1306 addhstai (db
, fd
, req
, key
, uid
);
1310 addinitgroups (db
, fd
, req
, key
, uid
);
1314 addservbyname (db
, fd
, req
, key
, uid
);
1318 addservbyport (db
, fd
, req
, key
, uid
);
1322 addgetnetgrent (db
, fd
, req
, key
, uid
);
1326 addinnetgr (db
, fd
, req
, key
, uid
);
1333 /* Get the callers credentials. */
1335 struct ucred caller
;
1336 socklen_t optlen
= sizeof (caller
);
1338 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) < 0)
1342 dbg_log (_("error getting caller's id: %s"),
1343 strerror_r (errno
, buf
, sizeof (buf
)));
1349 /* Some systems have no SO_PEERCRED implementation. They don't
1350 care about security so we don't as well. */
1355 /* Accept shutdown, getstat and invalidate only from root. For
1356 the stat call also allow the user specified in the config file. */
1357 if (req
->type
== GETSTAT
)
1359 if (uid
== 0 || uid
== stat_uid
)
1360 send_stats (fd
, dbs
);
1364 if (req
->type
== INVALIDATE
)
1365 invalidate_cache (key
, fd
);
1367 termination_handler (0);
1377 send_ro_fd (reqinfo
[req
->type
].db
, key
, fd
);
1382 /* Ignore the command, it's nothing we know. */
1388 /* Restart the process. */
1392 /* First determine the parameters. We do not use the parameters
1393 passed to main() since in case nscd is started by running the
1394 dynamic linker this will not work. Yes, this is not the usual
1395 case but nscd is part of glibc and we occasionally do this. */
1396 size_t buflen
= 1024;
1397 char *buf
= alloca (buflen
);
1399 int fd
= open ("/proc/self/cmdline", O_RDONLY
);
1403 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1412 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, buf
+ readlen
,
1417 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1427 if (readlen
< buflen
)
1430 /* We might have to extend the buffer. */
1431 size_t old_buflen
= buflen
;
1432 char *newp
= extend_alloca (buf
, buflen
, 2 * buflen
);
1433 buf
= memmove (newp
, buf
, old_buflen
);
1438 /* Parse the command line. Worst case scenario: every two
1439 characters form one parameter (one character plus NUL). */
1440 char **argv
= alloca ((readlen
/ 2 + 1) * sizeof (argv
[0]));
1444 while (cp
< buf
+ readlen
)
1447 cp
= (char *) rawmemchr (cp
, '\0') + 1;
1451 /* Second, change back to the old user if we changed it. */
1452 if (server_user
!= NULL
)
1454 if (setresuid (old_uid
, old_uid
, old_uid
) != 0)
1457 cannot change to old UID: %s; disabling paranoia mode"),
1464 if (setresgid (old_gid
, old_gid
, old_gid
) != 0)
1467 cannot change to old GID: %s; disabling paranoia mode"),
1470 setuid (server_uid
);
1476 /* Next change back to the old working directory. */
1477 if (chdir (oldcwd
) == -1)
1480 cannot change to old working directory: %s; disabling paranoia mode"),
1483 if (server_user
!= NULL
)
1485 setuid (server_uid
);
1486 setgid (server_gid
);
1492 /* Synchronize memory. */
1493 int32_t certainly
[lastdb
];
1494 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1495 if (dbs
[cnt
].enabled
)
1497 /* Make sure nobody keeps using the database. */
1498 dbs
[cnt
].head
->timestamp
= 0;
1499 certainly
[cnt
] = dbs
[cnt
].head
->nscd_certainly_running
;
1500 dbs
[cnt
].head
->nscd_certainly_running
= 0;
1502 if (dbs
[cnt
].persistent
)
1504 msync (dbs
[cnt
].head
, dbs
[cnt
].memsize
, MS_ASYNC
);
1507 /* The preparations are done. */
1509 char pathbuf
[PATH_MAX
];
1513 /* Try to exec the real nscd program so the process name (as reported
1514 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1515 if readlink or the exec with the result of the readlink call fails. */
1516 ssize_t n
= readlink ("/proc/self/exe", pathbuf
, sizeof (pathbuf
) - 1);
1520 execv (pathbuf
, argv
);
1522 execv ("/proc/self/exe", argv
);
1524 /* If we come here, we will never be able to re-exec. */
1525 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1528 if (server_user
!= NULL
)
1530 setuid (server_uid
);
1531 setgid (server_gid
);
1533 if (chdir ("/") != 0)
1534 dbg_log (_("cannot change current working directory to \"/\": %s"),
1538 /* Reenable the databases. */
1539 time_t now
= time (NULL
);
1540 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1541 if (dbs
[cnt
].enabled
)
1543 dbs
[cnt
].head
->timestamp
= now
;
1544 dbs
[cnt
].head
->nscd_certainly_running
= certainly
[cnt
];
1549 /* List of file descriptors. */
1553 struct fdlist
*next
;
1555 /* Memory allocated for the list. */
1556 static struct fdlist
*fdlist
;
1557 /* List of currently ready-to-read file descriptors. */
1558 static struct fdlist
*readylist
;
1560 /* Conditional variable and mutex to signal availability of entries in
1561 READYLIST. The condvar is initialized dynamically since we might
1562 use a different clock depending on availability. */
1563 static pthread_cond_t readylist_cond
= PTHREAD_COND_INITIALIZER
;
1564 static pthread_mutex_t readylist_lock
= PTHREAD_MUTEX_INITIALIZER
;
1566 /* The clock to use with the condvar. */
1567 static clockid_t timeout_clock
= CLOCK_REALTIME
;
1569 /* Number of threads ready to handle the READYLIST. */
1570 static unsigned long int nready
;
1573 /* Function for the clean-up threads. */
1575 __attribute__ ((__noreturn__
))
1576 nscd_run_prune (void *p
)
1578 const long int my_number
= (long int) p
;
1579 assert (dbs
[my_number
].enabled
);
1581 int dont_need_update
= setup_thread (&dbs
[my_number
]);
1583 time_t now
= time (NULL
);
1585 /* We are running. */
1586 dbs
[my_number
].head
->timestamp
= now
;
1588 struct timespec prune_ts
;
1589 if (__builtin_expect (clock_gettime (timeout_clock
, &prune_ts
) == -1, 0))
1590 /* Should never happen. */
1593 /* Compute the initial timeout time. Prevent all the timers to go
1594 off at the same time by adding a db-based value. */
1595 prune_ts
.tv_sec
+= CACHE_PRUNE_INTERVAL
+ my_number
;
1596 dbs
[my_number
].wakeup_time
= now
+ CACHE_PRUNE_INTERVAL
+ my_number
;
1598 pthread_mutex_t
*prune_lock
= &dbs
[my_number
].prune_lock
;
1599 pthread_mutex_t
*prune_run_lock
= &dbs
[my_number
].prune_run_lock
;
1600 pthread_cond_t
*prune_cond
= &dbs
[my_number
].prune_cond
;
1602 pthread_mutex_lock (prune_lock
);
1605 /* Wait, but not forever. */
1607 if (! dbs
[my_number
].clear_cache
)
1608 e
= pthread_cond_timedwait (prune_cond
, prune_lock
, &prune_ts
);
1609 assert (__builtin_expect (e
== 0 || e
== ETIMEDOUT
, 1));
1613 if (e
== ETIMEDOUT
|| now
>= dbs
[my_number
].wakeup_time
1614 || dbs
[my_number
].clear_cache
)
1616 /* We will determine the new timout values based on the
1617 cache content. Should there be concurrent additions to
1618 the cache which are not accounted for in the cache
1619 pruning we want to know about it. Therefore set the
1620 timeout to the maximum. It will be descreased when adding
1621 new entries to the cache, if necessary. */
1622 dbs
[my_number
].wakeup_time
= MAX_TIMEOUT_VALUE
;
1624 /* Unconditionally reset the flag. */
1625 time_t prune_now
= dbs
[my_number
].clear_cache
? LONG_MAX
: now
;
1626 dbs
[my_number
].clear_cache
= 0;
1628 pthread_mutex_unlock (prune_lock
);
1630 /* We use a separate lock for running the prune function (instead
1631 of keeping prune_lock locked) because this enables concurrent
1632 invocations of cache_add which might modify the timeout value. */
1633 pthread_mutex_lock (prune_run_lock
);
1634 next_wait
= prune_cache (&dbs
[my_number
], prune_now
, -1);
1635 pthread_mutex_unlock (prune_run_lock
);
1637 next_wait
= MAX (next_wait
, CACHE_PRUNE_INTERVAL
);
1638 /* If clients cannot determine for sure whether nscd is running
1639 we need to wake up occasionally to update the timestamp.
1640 Wait 90% of the update period. */
1641 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1642 if (__builtin_expect (! dont_need_update
, 0))
1644 next_wait
= MIN (UPDATE_MAPPING_TIMEOUT
, next_wait
);
1645 dbs
[my_number
].head
->timestamp
= now
;
1648 pthread_mutex_lock (prune_lock
);
1650 /* Make it known when we will wake up again. */
1651 if (now
+ next_wait
< dbs
[my_number
].wakeup_time
)
1652 dbs
[my_number
].wakeup_time
= now
+ next_wait
;
1654 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1657 /* The cache was just pruned. Do not do it again now. Just
1658 use the new timeout value. */
1659 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1661 if (clock_gettime (timeout_clock
, &prune_ts
) == -1)
1662 /* Should never happen. */
1665 /* Compute next timeout time. */
1666 prune_ts
.tv_sec
+= next_wait
;
1671 /* This is the main loop. It is replicated in different threads but
1672 the use of the ready list makes sure only one thread handles an
1673 incoming connection. */
1675 __attribute__ ((__noreturn__
))
1676 nscd_run_worker (void *p
)
1680 /* Initial locking. */
1681 pthread_mutex_lock (&readylist_lock
);
1683 /* One more thread available. */
1688 while (readylist
== NULL
)
1689 pthread_cond_wait (&readylist_cond
, &readylist_lock
);
1691 struct fdlist
*it
= readylist
->next
;
1692 if (readylist
->next
== readylist
)
1693 /* Just one entry on the list. */
1696 readylist
->next
= it
->next
;
1698 /* Extract the information and mark the record ready to be used
1703 /* One more thread available. */
1706 /* We are done with the list. */
1707 pthread_mutex_unlock (&readylist_lock
);
1709 #ifndef __ASSUME_ACCEPT4
1710 if (have_accept4
< 0)
1712 /* We do not want to block on a short read or so. */
1713 int fl
= fcntl (fd
, F_GETFL
);
1714 if (fl
== -1 || fcntl (fd
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
1719 /* Now read the request. */
1721 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, &req
, sizeof (req
)))
1722 != sizeof (req
), 0))
1724 /* We failed to read data. Note that this also might mean we
1725 failed because we would have blocked. */
1726 if (debug_level
> 0)
1727 dbg_log (_("short read while reading request: %s"),
1728 strerror_r (errno
, buf
, sizeof (buf
)));
1732 /* Check whether this is a valid request type. */
1733 if (req
.type
< GETPWBYNAME
|| req
.type
>= LASTREQ
)
1736 /* Some systems have no SO_PEERCRED implementation. They don't
1737 care about security so we don't as well. */
1742 if (__builtin_expect (debug_level
> 0, 0))
1744 struct ucred caller
;
1745 socklen_t optlen
= sizeof (caller
);
1747 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) == 0)
1751 const pid_t pid
= 0;
1754 /* It should not be possible to crash the nscd with a silly
1755 request (i.e., a terribly large key). We limit the size to 1kb. */
1756 if (__builtin_expect (req
.key_len
, 1) < 0
1757 || __builtin_expect (req
.key_len
, 1) > MAXKEYLEN
)
1759 if (debug_level
> 0)
1760 dbg_log (_("key length in request too long: %d"), req
.key_len
);
1765 char keybuf
[MAXKEYLEN
];
1767 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, keybuf
,
1771 /* Again, this can also mean we would have blocked. */
1772 if (debug_level
> 0)
1773 dbg_log (_("short read while reading request key: %s"),
1774 strerror_r (errno
, buf
, sizeof (buf
)));
1778 if (__builtin_expect (debug_level
, 0) > 0)
1783 handle_request: request received (Version = %d) from PID %ld"),
1784 req
.version
, (long int) pid
);
1788 handle_request: request received (Version = %d)"), req
.version
);
1791 /* Phew, we got all the data, now process it. */
1792 handle_request (fd
, &req
, keybuf
, uid
, pid
);
1800 pthread_mutex_lock (&readylist_lock
);
1802 /* One more thread available. */
1809 static unsigned int nconns
;
1814 pthread_mutex_lock (&readylist_lock
);
1816 /* Find an empty entry in FDLIST. */
1818 for (inner
= 0; inner
< nconns
; ++inner
)
1819 if (fdlist
[inner
].next
== NULL
)
1821 assert (inner
< nconns
);
1823 fdlist
[inner
].fd
= fd
;
1825 if (readylist
== NULL
)
1826 readylist
= fdlist
[inner
].next
= &fdlist
[inner
];
1829 fdlist
[inner
].next
= readylist
->next
;
1830 readylist
= readylist
->next
= &fdlist
[inner
];
1833 bool do_signal
= true;
1834 if (__builtin_expect (nready
== 0, 0))
1839 /* Try to start another thread to help out. */
1841 if (nthreads
< max_nthreads
1842 && pthread_create (&th
, &attr
, nscd_run_worker
,
1843 (void *) (long int) nthreads
) == 0)
1845 /* We got another thread. */
1847 /* The new thread might need a kick. */
1853 pthread_mutex_unlock (&readylist_lock
);
1855 /* Tell one of the worker threads there is work to do. */
1857 pthread_cond_signal (&readylist_cond
);
1861 /* Check whether restarting should happen. */
1863 restart_p (time_t now
)
1865 return (paranoia
&& readylist
== NULL
&& nready
== nthreads
1866 && now
>= restart_time
);
1870 /* Array for times a connection was accepted. */
1871 static time_t *starttime
;
1875 __attribute__ ((__noreturn__
))
1876 main_loop_poll (void)
1878 struct pollfd
*conns
= (struct pollfd
*) xmalloc (nconns
1879 * sizeof (conns
[0]));
1882 conns
[0].events
= POLLRDNORM
;
1884 size_t firstfree
= 1;
1887 if (inotify_fd
!= -1)
1889 conns
[1].fd
= inotify_fd
;
1890 conns
[1].events
= POLLRDNORM
;
1897 size_t idx_nl_status_fd
= 0;
1898 if (nl_status_fd
!= -1)
1900 idx_nl_status_fd
= nused
;
1901 conns
[nused
].fd
= nl_status_fd
;
1902 conns
[nused
].events
= POLLRDNORM
;
1910 /* Wait for any event. We wait at most a couple of seconds so
1911 that we can check whether we should close any of the accepted
1912 connections since we have not received a request. */
1913 #define MAX_ACCEPT_TIMEOUT 30
1914 #define MIN_ACCEPT_TIMEOUT 5
1915 #define MAIN_THREAD_TIMEOUT \
1916 (MAX_ACCEPT_TIMEOUT * 1000 \
1917 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1919 int n
= poll (conns
, nused
, MAIN_THREAD_TIMEOUT
);
1921 time_t now
= time (NULL
);
1923 /* If there is a descriptor ready for reading or there is a new
1924 connection, process this now. */
1927 if (conns
[0].revents
!= 0)
1929 /* We have a new incoming connection. Accept the connection. */
1932 #ifndef __ASSUME_ACCEPT4
1934 if (have_accept4
>= 0)
1937 fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
1939 #ifndef __ASSUME_ACCEPT4
1940 if (have_accept4
== 0)
1941 have_accept4
= fd
!= -1 || errno
!= ENOSYS
? 1 : -1;
1944 #ifndef __ASSUME_ACCEPT4
1945 if (have_accept4
< 0)
1946 fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
1949 /* Use the descriptor if we have not reached the limit. */
1952 if (firstfree
< nconns
)
1954 conns
[firstfree
].fd
= fd
;
1955 conns
[firstfree
].events
= POLLRDNORM
;
1956 starttime
[firstfree
] = now
;
1957 if (firstfree
>= nused
)
1958 nused
= firstfree
+ 1;
1962 while (firstfree
< nused
&& conns
[firstfree
].fd
!= -1);
1965 /* We cannot use the connection so close it. */
1974 if (inotify_fd
!= -1 && conns
[1].fd
== inotify_fd
)
1976 if (conns
[1].revents
!= 0)
1978 bool to_clear
[lastdb
] = { false, };
1982 # define PATH_MAX 1024
1984 struct inotify_event i
;
1985 char buf
[sizeof (struct inotify_event
) + PATH_MAX
];
1990 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
, &inev
,
1992 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
1994 if (__builtin_expect (nb
== -1 && errno
!= EAGAIN
,
1997 /* Something went wrong when reading the inotify
1998 data. Better disable inotify. */
2000 disabled inotify after read error %d"),
2012 /* Check which of the files changed. */
2013 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
2015 struct traced_file
*finfo
= dbs
[dbcnt
].traced_files
;
2017 while (finfo
!= NULL
)
2019 if (finfo
->inotify_descr
== inev
.i
.wd
)
2021 to_clear
[dbcnt
] = true;
2022 if (finfo
->call_res_init
)
2027 finfo
= finfo
->next
;
2033 /* Actually perform the cache clearing. */
2034 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
2035 if (to_clear
[dbcnt
])
2037 pthread_mutex_lock (&dbs
[dbcnt
].prune_lock
);
2038 dbs
[dbcnt
].clear_cache
= 1;
2039 pthread_mutex_unlock (&dbs
[dbcnt
].prune_lock
);
2040 pthread_cond_signal (&dbs
[dbcnt
].prune_cond
);
2051 if (idx_nl_status_fd
!= 0 && conns
[idx_nl_status_fd
].revents
!= 0)
2054 /* Read all the data. We do not interpret it here. */
2055 while (TEMP_FAILURE_RETRY (read (nl_status_fd
, buf
,
2056 sizeof (buf
))) != -1)
2059 dbs
[hstdb
].head
->extra_data
[NSCD_HST_IDX_CONF_TIMESTAMP
]
2060 = __bump_nl_timestamp ();
2064 for (size_t cnt
= first
; cnt
< nused
&& n
> 0; ++cnt
)
2065 if (conns
[cnt
].revents
!= 0)
2067 fd_ready (conns
[cnt
].fd
);
2069 /* Clean up the CONNS array. */
2071 if (cnt
< firstfree
)
2073 if (cnt
== nused
- 1)
2076 while (conns
[nused
- 1].fd
== -1);
2082 /* Now find entries which have timed out. */
2085 /* We make the timeout length depend on the number of file
2086 descriptors currently used. */
2087 #define ACCEPT_TIMEOUT \
2088 (MAX_ACCEPT_TIMEOUT \
2089 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2090 time_t laststart
= now
- ACCEPT_TIMEOUT
;
2092 for (size_t cnt
= nused
- 1; cnt
> 0; --cnt
)
2094 if (conns
[cnt
].fd
!= -1 && starttime
[cnt
] < laststart
)
2096 /* Remove the entry, it timed out. */
2097 (void) close (conns
[cnt
].fd
);
2100 if (cnt
< firstfree
)
2102 if (cnt
== nused
- 1)
2105 while (conns
[nused
- 1].fd
== -1);
2109 if (restart_p (now
))
2117 main_loop_epoll (int efd
)
2119 struct epoll_event ev
= { 0, };
2123 /* Add the socket. */
2124 ev
.events
= EPOLLRDNORM
;
2126 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, sock
, &ev
) == -1)
2127 /* We cannot use epoll. */
2130 # ifdef HAVE_INOTIFY
2131 if (inotify_fd
!= -1)
2133 ev
.events
= EPOLLRDNORM
;
2134 ev
.data
.fd
= inotify_fd
;
2135 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, inotify_fd
, &ev
) == -1)
2136 /* We cannot use epoll. */
2142 # ifdef HAVE_NETLINK
2143 if (nl_status_fd
!= -1)
2145 ev
.events
= EPOLLRDNORM
;
2146 ev
.data
.fd
= nl_status_fd
;
2147 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, nl_status_fd
, &ev
) == -1)
2148 /* We cannot use epoll. */
2155 struct epoll_event revs
[100];
2156 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2158 int n
= epoll_wait (efd
, revs
, nrevs
, MAIN_THREAD_TIMEOUT
);
2160 time_t now
= time (NULL
);
2162 for (int cnt
= 0; cnt
< n
; ++cnt
)
2163 if (revs
[cnt
].data
.fd
== sock
)
2165 /* A new connection. */
2168 # ifndef __ASSUME_ACCEPT4
2170 if (have_accept4
>= 0)
2173 fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
2175 # ifndef __ASSUME_ACCEPT4
2176 if (have_accept4
== 0)
2177 have_accept4
= fd
!= -1 || errno
!= ENOSYS
? 1 : -1;
2180 # ifndef __ASSUME_ACCEPT4
2181 if (have_accept4
< 0)
2182 fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
2185 /* Use the descriptor if we have not reached the limit. */
2188 /* Try to add the new descriptor. */
2191 || epoll_ctl (efd
, EPOLL_CTL_ADD
, fd
, &ev
) == -1)
2192 /* The descriptor is too large or something went
2193 wrong. Close the descriptor. */
2197 /* Remember when we accepted the connection. */
2198 starttime
[fd
] = now
;
2207 # ifdef HAVE_INOTIFY
2208 else if (revs
[cnt
].data
.fd
== inotify_fd
)
2210 bool to_clear
[lastdb
] = { false, };
2213 struct inotify_event i
;
2214 char buf
[sizeof (struct inotify_event
) + PATH_MAX
];
2219 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
, &inev
,
2221 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
2223 if (__builtin_expect (nb
== -1 && errno
!= EAGAIN
, 0))
2225 /* Something went wrong when reading the inotify
2226 data. Better disable inotify. */
2227 dbg_log (_("disabled inotify after read error %d"),
2229 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, inotify_fd
,
2237 /* Check which of the files changed. */
2238 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
2240 struct traced_file
*finfo
= dbs
[dbcnt
].traced_files
;
2242 while (finfo
!= NULL
)
2244 if (finfo
->inotify_descr
== inev
.i
.wd
)
2246 to_clear
[dbcnt
] = true;
2247 if (finfo
->call_res_init
)
2252 finfo
= finfo
->next
;
2258 /* Actually perform the cache clearing. */
2259 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
2260 if (to_clear
[dbcnt
])
2262 pthread_mutex_lock (&dbs
[dbcnt
].prune_lock
);
2263 dbs
[dbcnt
].clear_cache
= 1;
2264 pthread_mutex_unlock (&dbs
[dbcnt
].prune_lock
);
2265 pthread_cond_signal (&dbs
[dbcnt
].prune_cond
);
2269 # ifdef HAVE_NETLINK
2270 else if (revs
[cnt
].data
.fd
== nl_status_fd
)
2273 /* Read all the data. We do not interpret it here. */
2274 while (TEMP_FAILURE_RETRY (read (nl_status_fd
, buf
,
2275 sizeof (buf
))) != -1)
2278 __bump_nl_timestamp ();
2283 /* Remove the descriptor from the epoll descriptor. */
2284 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, revs
[cnt
].data
.fd
, NULL
);
2286 /* Get a worker to handle the request. */
2287 fd_ready (revs
[cnt
].data
.fd
);
2289 /* Reset the time. */
2290 starttime
[revs
[cnt
].data
.fd
] = 0;
2291 if (revs
[cnt
].data
.fd
== highest
)
2294 while (highest
> 0 && starttime
[highest
] == 0);
2299 /* Now look for descriptors for accepted connections which have
2300 no reply in too long of a time. */
2301 time_t laststart
= now
- ACCEPT_TIMEOUT
;
2302 assert (starttime
[sock
] == 0);
2303 assert (inotify_fd
== -1 || starttime
[inotify_fd
] == 0);
2304 assert (nl_status_fd
== -1 || starttime
[nl_status_fd
] == 0);
2305 for (int cnt
= highest
; cnt
> STDERR_FILENO
; --cnt
)
2306 if (starttime
[cnt
] != 0 && starttime
[cnt
] < laststart
)
2308 /* We are waiting for this one for too long. Close it. */
2309 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, cnt
, NULL
);
2317 else if (cnt
!= sock
&& starttime
[cnt
] == 0 && cnt
== highest
)
2320 if (restart_p (now
))
2327 /* Start all the threads we want. The initial process is thread no. 1. */
2329 start_threads (void)
2331 /* Initialize the conditional variable we will use. The only
2332 non-standard attribute we might use is the clock selection. */
2333 pthread_condattr_t condattr
;
2334 pthread_condattr_init (&condattr
);
2336 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2337 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2338 /* Determine whether the monotonous clock is available. */
2339 struct timespec dummy
;
2340 # if _POSIX_MONOTONIC_CLOCK == 0
2341 if (sysconf (_SC_MONOTONIC_CLOCK
) > 0)
2343 # if _POSIX_CLOCK_SELECTION == 0
2344 if (sysconf (_SC_CLOCK_SELECTION
) > 0)
2346 if (clock_getres (CLOCK_MONOTONIC
, &dummy
) == 0
2347 && pthread_condattr_setclock (&condattr
, CLOCK_MONOTONIC
) == 0)
2348 timeout_clock
= CLOCK_MONOTONIC
;
2351 /* Create the attribute for the threads. They are all created
2353 pthread_attr_init (&attr
);
2354 pthread_attr_setdetachstate (&attr
, PTHREAD_CREATE_DETACHED
);
2355 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2356 pthread_attr_setstacksize (&attr
, NSCD_THREAD_STACKSIZE
);
2358 /* We allow less than LASTDB threads only for debugging. */
2359 if (debug_level
== 0)
2360 nthreads
= MAX (nthreads
, lastdb
);
2362 /* Create the threads which prune the databases. */
2363 // XXX Ideally this work would be done by some of the worker threads.
2364 // XXX But this is problematic since we would need to be able to wake
2365 // XXX them up explicitly as well as part of the group handling the
2366 // XXX ready-list. This requires an operation where we can wait on
2367 // XXX two conditional variables at the same time. This operation
2368 // XXX does not exist (yet).
2369 for (long int i
= 0; i
< lastdb
; ++i
)
2371 /* Initialize the conditional variable. */
2372 if (pthread_cond_init (&dbs
[i
].prune_cond
, &condattr
) != 0)
2374 dbg_log (_("could not initialize conditional variable"));
2380 && pthread_create (&th
, &attr
, nscd_run_prune
, (void *) i
) != 0)
2382 dbg_log (_("could not start clean-up thread; terminating"));
2387 pthread_condattr_destroy (&condattr
);
2389 for (long int i
= 0; i
< nthreads
; ++i
)
2392 if (pthread_create (&th
, &attr
, nscd_run_worker
, NULL
) != 0)
2396 dbg_log (_("could not start any worker thread; terminating"));
2404 /* Determine how much room for descriptors we should initially
2405 allocate. This might need to change later if we cap the number
2407 const long int nfds
= sysconf (_SC_OPEN_MAX
);
2409 #define MAXCONN 16384
2410 if (nfds
== -1 || nfds
> MAXCONN
)
2412 else if (nfds
< MINCONN
)
2417 /* We need memory to pass descriptors on to the worker threads. */
2418 fdlist
= (struct fdlist
*) xcalloc (nconns
, sizeof (fdlist
[0]));
2419 /* Array to keep track when connection was accepted. */
2420 starttime
= (time_t *) xcalloc (nconns
, sizeof (starttime
[0]));
2422 /* In the main thread we execute the loop which handles incoming
2425 int efd
= epoll_create (100);
2428 main_loop_epoll (efd
);
2437 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2438 this function is called, we are not listening on the nscd socket yet so
2439 we can just use the ordinary lookup functions without causing a lockup */
2441 begin_drop_privileges (void)
2443 struct passwd
*pwd
= getpwnam (server_user
);
2447 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2448 error (EXIT_FAILURE
, 0, _("Failed to run nscd as user '%s'"),
2452 server_uid
= pwd
->pw_uid
;
2453 server_gid
= pwd
->pw_gid
;
2455 /* Save the old UID/GID if we have to change back. */
2458 old_uid
= getuid ();
2459 old_gid
= getgid ();
2462 if (getgrouplist (server_user
, server_gid
, NULL
, &server_ngroups
) == 0)
2464 /* This really must never happen. */
2465 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2466 error (EXIT_FAILURE
, errno
, _("initial getgrouplist failed"));
2469 server_groups
= (gid_t
*) xmalloc (server_ngroups
* sizeof (gid_t
));
2471 if (getgrouplist (server_user
, server_gid
, server_groups
, &server_ngroups
)
2474 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2475 error (EXIT_FAILURE
, errno
, _("getgrouplist failed"));
2480 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2481 run nscd as the user specified in the configuration file. */
2483 finish_drop_privileges (void)
2485 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2486 /* We need to preserve the capabilities to connect to the audit daemon. */
2487 cap_t new_caps
= preserve_capabilities ();
2490 if (setgroups (server_ngroups
, server_groups
) == -1)
2492 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2493 error (EXIT_FAILURE
, errno
, _("setgroups failed"));
2498 res
= setresgid (server_gid
, server_gid
, old_gid
);
2500 res
= setgid (server_gid
);
2503 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2509 res
= setresuid (server_uid
, server_uid
, old_uid
);
2511 res
= setuid (server_uid
);
2514 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2519 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2520 /* Remove the temporary capabilities. */
2521 install_real_capabilities (new_caps
);