1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2013 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
34 #include <arpa/inet.h>
36 # include <linux/netlink.h>
37 # include <linux/rtnetlink.h>
40 # include <sys/epoll.h>
43 # include <sys/inotify.h>
46 #include <sys/param.h>
49 # include <sys/sendfile.h>
51 #include <sys/socket.h>
58 #include <resolv/resolv.h>
60 #include <kernel-features.h>
63 /* Support to run nscd as an unprivileged user */
64 const char *server_user
;
65 static uid_t server_uid
;
66 static gid_t server_gid
;
67 const char *stat_user
;
69 static gid_t
*server_groups
;
73 static int server_ngroups
;
75 static pthread_attr_t attr
;
77 static void begin_drop_privileges (void);
78 static void finish_drop_privileges (void);
80 /* Map request type to a string. */
81 const char *const serv2str
[LASTREQ
] =
83 [GETPWBYNAME
] = "GETPWBYNAME",
84 [GETPWBYUID
] = "GETPWBYUID",
85 [GETGRBYNAME
] = "GETGRBYNAME",
86 [GETGRBYGID
] = "GETGRBYGID",
87 [GETHOSTBYNAME
] = "GETHOSTBYNAME",
88 [GETHOSTBYNAMEv6
] = "GETHOSTBYNAMEv6",
89 [GETHOSTBYADDR
] = "GETHOSTBYADDR",
90 [GETHOSTBYADDRv6
] = "GETHOSTBYADDRv6",
91 [SHUTDOWN
] = "SHUTDOWN",
92 [GETSTAT
] = "GETSTAT",
93 [INVALIDATE
] = "INVALIDATE",
94 [GETFDPW
] = "GETFDPW",
95 [GETFDGR
] = "GETFDGR",
96 [GETFDHST
] = "GETFDHST",
98 [INITGROUPS
] = "INITGROUPS",
99 [GETSERVBYNAME
] = "GETSERVBYNAME",
100 [GETSERVBYPORT
] = "GETSERVBYPORT",
101 [GETFDSERV
] = "GETFDSERV",
102 [GETNETGRENT
] = "GETNETGRENT",
103 [INNETGR
] = "INNETGR",
104 [GETFDNETGR
] = "GETFDNETGR"
107 /* The control data structures for the services. */
108 struct database_dyn dbs
[lastdb
] =
111 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
112 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
113 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
119 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
120 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
121 .db_filename
= _PATH_NSCD_PASSWD_DB
,
122 .disabled_iov
= &pwd_iov_disabled
,
130 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
131 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
132 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
138 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
139 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
140 .db_filename
= _PATH_NSCD_GROUP_DB
,
141 .disabled_iov
= &grp_iov_disabled
,
149 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
150 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
151 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
155 .propagate
= 0, /* Not used. */
157 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
158 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
159 .db_filename
= _PATH_NSCD_HOSTS_DB
,
160 .disabled_iov
= &hst_iov_disabled
,
168 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
169 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
170 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
174 .propagate
= 0, /* Not used. */
176 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
177 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
178 .db_filename
= _PATH_NSCD_SERVICES_DB
,
179 .disabled_iov
= &serv_iov_disabled
,
187 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
188 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
189 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
193 .propagate
= 0, /* Not used. */
195 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
196 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
197 .db_filename
= _PATH_NSCD_NETGROUP_DB
,
198 .disabled_iov
= &netgroup_iov_disabled
,
208 /* Mapping of request type to database. */
212 struct database_dyn
*db
;
213 } const reqinfo
[LASTREQ
] =
215 [GETPWBYNAME
] = { true, &dbs
[pwddb
] },
216 [GETPWBYUID
] = { true, &dbs
[pwddb
] },
217 [GETGRBYNAME
] = { true, &dbs
[grpdb
] },
218 [GETGRBYGID
] = { true, &dbs
[grpdb
] },
219 [GETHOSTBYNAME
] = { true, &dbs
[hstdb
] },
220 [GETHOSTBYNAMEv6
] = { true, &dbs
[hstdb
] },
221 [GETHOSTBYADDR
] = { true, &dbs
[hstdb
] },
222 [GETHOSTBYADDRv6
] = { true, &dbs
[hstdb
] },
223 [SHUTDOWN
] = { false, NULL
},
224 [GETSTAT
] = { false, NULL
},
225 [SHUTDOWN
] = { false, NULL
},
226 [GETFDPW
] = { false, &dbs
[pwddb
] },
227 [GETFDGR
] = { false, &dbs
[grpdb
] },
228 [GETFDHST
] = { false, &dbs
[hstdb
] },
229 [GETAI
] = { true, &dbs
[hstdb
] },
230 [INITGROUPS
] = { true, &dbs
[grpdb
] },
231 [GETSERVBYNAME
] = { true, &dbs
[servdb
] },
232 [GETSERVBYPORT
] = { true, &dbs
[servdb
] },
233 [GETFDSERV
] = { false, &dbs
[servdb
] },
234 [GETNETGRENT
] = { true, &dbs
[netgrdb
] },
235 [INNETGR
] = { true, &dbs
[netgrdb
] },
236 [GETFDNETGR
] = { false, &dbs
[netgrdb
] }
240 /* Initial number of threads to use. */
242 /* Maximum number of threads to use. */
243 int max_nthreads
= 32;
245 /* Socket for incoming connections. */
249 /* Inotify descriptor. */
254 /* Descriptor for netlink status updates. */
255 static int nl_status_fd
= -1;
258 #ifndef __ASSUME_SOCK_CLOEXEC
259 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
260 before be know the result. */
261 static int have_sock_cloexec
;
263 #ifndef __ASSUME_ACCEPT4
264 static int have_accept4
;
267 /* Number of times clients had to wait. */
268 unsigned long int client_queued
;
272 writeall (int fd
, const void *buf
, size_t len
)
278 ret
= TEMP_FAILURE_RETRY (send (fd
, buf
, n
, MSG_NOSIGNAL
));
281 buf
= (const char *) buf
+ ret
;
285 return ret
< 0 ? ret
: len
- n
;
291 sendfileall (int tofd
, int fromfd
, off_t off
, size_t len
)
298 ret
= TEMP_FAILURE_RETRY (sendfile (tofd
, fromfd
, &off
, n
));
304 return ret
< 0 ? ret
: len
- n
;
312 /* The following three are not really used, they are symbolic constants. */
318 use_he_begin
= use_he
| use_begin
,
319 use_he_end
= use_he
| use_end
,
322 use_key_begin
= use_key
| use_begin
,
323 use_key_end
= use_key
| use_end
,
324 use_key_first
= use_key_begin
| use_first
,
327 use_data_begin
= use_data
| use_begin
,
328 use_data_end
= use_data
| use_end
,
329 use_data_first
= use_data_begin
| use_first
334 check_use (const char *data
, nscd_ssize_t first_free
, uint8_t *usemap
,
335 enum usekey use
, ref_t start
, size_t len
)
339 if (start
> first_free
|| start
+ len
> first_free
340 || (start
& BLOCK_ALIGN_M1
))
343 if (usemap
[start
] == use_not
)
345 /* Add the start marker. */
346 usemap
[start
] = use
| use_begin
;
350 if (usemap
[++start
] != use_not
)
355 /* Add the end marker. */
356 usemap
[start
] = use
| use_end
;
358 else if ((usemap
[start
] & ~use_first
) == ((use
| use_begin
) & ~use_first
))
360 /* Hash entries can't be shared. */
364 usemap
[start
] |= (use
& use_first
);
368 if (usemap
[++start
] != use
)
371 if (usemap
[++start
] != (use
| use_end
))
375 /* Points to a wrong object or somewhere in the middle. */
382 /* Verify data in persistent database. */
384 verify_persistent_db (void *mem
, struct database_pers_head
*readhead
, int dbnr
)
386 assert (dbnr
== pwddb
|| dbnr
== grpdb
|| dbnr
== hstdb
|| dbnr
== servdb
389 time_t now
= time (NULL
);
391 struct database_pers_head
*head
= mem
;
392 struct database_pers_head head_copy
= *head
;
394 /* Check that the header that was read matches the head in the database. */
395 if (memcmp (head
, readhead
, sizeof (*head
)) != 0)
398 /* First some easy tests: make sure the database header is sane. */
399 if (head
->version
!= DB_VERSION
400 || head
->header_size
!= sizeof (*head
)
401 /* We allow a timestamp to be one hour ahead of the current time.
402 This should cover daylight saving time changes. */
403 || head
->timestamp
> now
+ 60 * 60 + 60
404 || (head
->gc_cycle
& 1)
406 || (size_t) head
->module
> INT32_MAX
/ sizeof (ref_t
)
407 || (size_t) head
->data_size
> INT32_MAX
- head
->module
* sizeof (ref_t
)
408 || head
->first_free
< 0
409 || head
->first_free
> head
->data_size
410 || (head
->first_free
& BLOCK_ALIGN_M1
) != 0
411 || head
->maxnentries
< 0
412 || head
->maxnsearched
< 0)
415 uint8_t *usemap
= calloc (head
->first_free
, 1);
419 const char *data
= (char *) &head
->array
[roundup (head
->module
,
420 ALIGN
/ sizeof (ref_t
))];
422 nscd_ssize_t he_cnt
= 0;
423 for (nscd_ssize_t cnt
= 0; cnt
< head
->module
; ++cnt
)
425 ref_t trail
= head
->array
[cnt
];
429 while (work
!= ENDREF
)
431 if (! check_use (data
, head
->first_free
, usemap
, use_he
, work
,
432 sizeof (struct hashentry
)))
435 /* Now we know we can dereference the record. */
436 struct hashentry
*here
= (struct hashentry
*) (data
+ work
);
440 /* Make sure the record is for this type of service. */
441 if (here
->type
>= LASTREQ
442 || reqinfo
[here
->type
].db
!= &dbs
[dbnr
])
445 /* Validate boolean field value. */
446 if (here
->first
!= false && here
->first
!= true)
454 || here
->packet
> head
->first_free
455 || here
->packet
+ sizeof (struct datahead
) > head
->first_free
)
458 struct datahead
*dh
= (struct datahead
*) (data
+ here
->packet
);
460 if (! check_use (data
, head
->first_free
, usemap
,
461 use_data
| (here
->first
? use_first
: 0),
462 here
->packet
, dh
->allocsize
))
465 if (dh
->allocsize
< sizeof (struct datahead
)
466 || dh
->recsize
> dh
->allocsize
467 || (dh
->notfound
!= false && dh
->notfound
!= true)
468 || (dh
->usable
!= false && dh
->usable
!= true))
471 if (here
->key
< here
->packet
+ sizeof (struct datahead
)
472 || here
->key
> here
->packet
+ dh
->allocsize
473 || here
->key
+ here
->len
> here
->packet
+ dh
->allocsize
)
476 /* If keys can appear outside of data, this should be done
477 instead. But gc doesn't mark the data in that case. */
478 if (! check_use (data
, head
->first_free
, usemap
,
479 use_key
| (here
->first
? use_first
: 0),
480 here
->key
, here
->len
))
488 /* A circular list, this must not happen. */
491 trail
= ((struct hashentry
*) (data
+ trail
))->next
;
496 if (he_cnt
!= head
->nentries
)
499 /* See if all data and keys had at least one reference from
500 he->first == true hashentry. */
501 for (ref_t idx
= 0; idx
< head
->first_free
; ++idx
)
504 if (usemap
[idx
] == use_key_begin
)
507 if (usemap
[idx
] == use_data_begin
)
511 /* Finally, make sure the database hasn't changed since the first test. */
512 if (memcmp (mem
, &head_copy
, sizeof (*head
)) != 0)
525 # define EXTRA_O_FLAGS O_CLOEXEC
527 # define EXTRA_O_FLAGS 0
531 /* Initialize database information structures. */
535 /* Look up unprivileged uid/gid/groups before we start listening on the
537 if (server_user
!= NULL
)
538 begin_drop_privileges ();
541 /* No configuration for this value, assume a default. */
544 for (size_t cnt
= 0; cnt
< lastdb
; ++cnt
)
545 if (dbs
[cnt
].enabled
)
547 pthread_rwlock_init (&dbs
[cnt
].lock
, NULL
);
548 pthread_mutex_init (&dbs
[cnt
].memlock
, NULL
);
550 if (dbs
[cnt
].persistent
)
552 /* Try to open the appropriate file on disk. */
553 int fd
= open (dbs
[cnt
].db_filename
, O_RDWR
| EXTRA_O_FLAGS
);
560 struct database_pers_head head
;
561 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, &head
,
563 if (n
!= sizeof (head
) || fstat64 (fd
, &st
) != 0)
566 /* The code is single-threaded at this point so
567 using strerror is just fine. */
568 msg
= strerror (errno
);
570 dbg_log (_("invalid persistent database file \"%s\": %s"),
571 dbs
[cnt
].db_filename
, msg
);
572 unlink (dbs
[cnt
].db_filename
);
574 else if (head
.module
== 0 && head
.data_size
== 0)
576 /* The file has been created, but the head has not
577 been initialized yet. */
578 msg
= _("uninitialized header");
581 else if (head
.header_size
!= (int) sizeof (head
))
583 msg
= _("header size does not match");
586 else if ((total
= (sizeof (head
)
587 + roundup (head
.module
* sizeof (ref_t
),
591 || total
< sizeof (head
))
593 msg
= _("file size does not match");
596 /* Note we map with the maximum size allowed for the
597 database. This is likely much larger than the
598 actual file size. This is OK on most OSes since
599 extensions of the underlying file will
600 automatically translate more pages available for
602 else if ((mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
603 PROT_READ
| PROT_WRITE
,
607 else if (!verify_persistent_db (mem
, &head
, cnt
))
610 msg
= _("verification failed");
615 /* Success. We have the database. */
617 dbs
[cnt
].memsize
= total
;
618 dbs
[cnt
].data
= (char *)
619 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
620 ALIGN
/ sizeof (ref_t
))];
621 dbs
[cnt
].mmap_used
= true;
623 if (dbs
[cnt
].suggested_module
> head
.module
)
624 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
629 /* We also need a read-only descriptor. */
632 dbs
[cnt
].ro_fd
= open (dbs
[cnt
].db_filename
,
633 O_RDONLY
| EXTRA_O_FLAGS
);
634 if (dbs
[cnt
].ro_fd
== -1)
636 cannot create read-only descriptor for \"%s\"; no mmap"),
637 dbs
[cnt
].db_filename
);
640 // XXX Shall we test whether the descriptors actually
641 // XXX point to the same file?
644 /* Close the file descriptors in case something went
645 wrong in which case the variable have not been
650 else if (errno
== EACCES
)
651 error (EXIT_FAILURE
, 0, _("cannot access '%s'"),
652 dbs
[cnt
].db_filename
);
655 if (dbs
[cnt
].head
== NULL
)
657 /* No database loaded. Allocate the data structure,
659 struct database_pers_head head
;
660 size_t total
= (sizeof (head
)
661 + roundup (dbs
[cnt
].suggested_module
662 * sizeof (ref_t
), ALIGN
)
663 + (dbs
[cnt
].suggested_module
664 * DEFAULT_DATASIZE_PER_BUCKET
));
666 /* Try to create the database. If we do not need a
667 persistent database create a temporary file. */
670 if (dbs
[cnt
].persistent
)
672 fd
= open (dbs
[cnt
].db_filename
,
673 O_RDWR
| O_CREAT
| O_EXCL
| O_TRUNC
| EXTRA_O_FLAGS
,
675 if (fd
!= -1 && dbs
[cnt
].shared
)
676 ro_fd
= open (dbs
[cnt
].db_filename
,
677 O_RDONLY
| EXTRA_O_FLAGS
);
681 char fname
[] = _PATH_NSCD_XYZ_DB_TMP
;
682 fd
= mkostemp (fname
, EXTRA_O_FLAGS
);
684 /* We do not need the file name anymore after we
685 opened another file descriptor in read-only mode. */
689 ro_fd
= open (fname
, O_RDONLY
| EXTRA_O_FLAGS
);
699 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
700 dbnames
[cnt
], dbs
[cnt
].db_filename
);
701 // XXX Correct way to terminate?
705 if (dbs
[cnt
].persistent
)
706 dbg_log (_("cannot create %s; no persistent database used"),
707 dbs
[cnt
].db_filename
);
709 dbg_log (_("cannot create %s; no sharing possible"),
710 dbs
[cnt
].db_filename
);
712 dbs
[cnt
].persistent
= 0;
713 // XXX remember: no mmap
717 /* Tell the user if we could not create the read-only
719 if (ro_fd
== -1 && dbs
[cnt
].shared
)
721 cannot create read-only descriptor for \"%s\"; no mmap"),
722 dbs
[cnt
].db_filename
);
724 /* Before we create the header, initialiye the hash
725 table. So that if we get interrupted if writing
726 the header we can recognize a partially initialized
728 size_t ps
= sysconf (_SC_PAGESIZE
);
730 assert (~ENDREF
== 0);
731 memset (tmpbuf
, '\xff', ps
);
733 size_t remaining
= dbs
[cnt
].suggested_module
* sizeof (ref_t
);
734 off_t offset
= sizeof (head
);
737 if (offset
% ps
!= 0)
739 towrite
= MIN (remaining
, ps
- (offset
% ps
));
740 if (pwrite (fd
, tmpbuf
, towrite
, offset
) != towrite
)
743 remaining
-= towrite
;
746 while (remaining
> ps
)
748 if (pwrite (fd
, tmpbuf
, ps
, offset
) == -1)
755 && pwrite (fd
, tmpbuf
, remaining
, offset
) != remaining
)
758 /* Create the header of the file. */
759 struct database_pers_head head
=
761 .version
= DB_VERSION
,
762 .header_size
= sizeof (head
),
763 .module
= dbs
[cnt
].suggested_module
,
764 .data_size
= (dbs
[cnt
].suggested_module
765 * DEFAULT_DATASIZE_PER_BUCKET
),
770 if ((TEMP_FAILURE_RETRY (write (fd
, &head
, sizeof (head
)))
772 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd
, 0, total
))
774 || (mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
775 PROT_READ
| PROT_WRITE
,
776 MAP_SHARED
, fd
, 0)) == MAP_FAILED
)
779 unlink (dbs
[cnt
].db_filename
);
780 dbg_log (_("cannot write to database file %s: %s"),
781 dbs
[cnt
].db_filename
, strerror (errno
));
782 dbs
[cnt
].persistent
= 0;
788 dbs
[cnt
].data
= (char *)
789 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
790 ALIGN
/ sizeof (ref_t
))];
791 dbs
[cnt
].memsize
= total
;
792 dbs
[cnt
].mmap_used
= true;
794 /* Remember the descriptors. */
796 dbs
[cnt
].ro_fd
= ro_fd
;
808 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
809 /* We do not check here whether the O_CLOEXEC provided to the
810 open call was successful or not. The two fcntl calls are
811 only performed once each per process start-up and therefore
812 is not noticeable at all. */
814 && ((dbs
[cnt
].wr_fd
!= -1
815 && fcntl (dbs
[cnt
].wr_fd
, F_SETFD
, FD_CLOEXEC
) == -1)
816 || (dbs
[cnt
].ro_fd
!= -1
817 && fcntl (dbs
[cnt
].ro_fd
, F_SETFD
, FD_CLOEXEC
) == -1)))
820 cannot set socket to close on exec: %s; disabling paranoia mode"),
826 if (dbs
[cnt
].head
== NULL
)
828 /* We do not use the persistent database. Just
829 create an in-memory data structure. */
830 assert (! dbs
[cnt
].persistent
);
832 dbs
[cnt
].head
= xmalloc (sizeof (struct database_pers_head
)
833 + (dbs
[cnt
].suggested_module
835 memset (dbs
[cnt
].head
, '\0', sizeof (struct database_pers_head
));
836 assert (~ENDREF
== 0);
837 memset (dbs
[cnt
].head
->array
, '\xff',
838 dbs
[cnt
].suggested_module
* sizeof (ref_t
));
839 dbs
[cnt
].head
->module
= dbs
[cnt
].suggested_module
;
840 dbs
[cnt
].head
->data_size
= (DEFAULT_DATASIZE_PER_BUCKET
841 * dbs
[cnt
].head
->module
);
842 dbs
[cnt
].data
= xmalloc (dbs
[cnt
].head
->data_size
);
843 dbs
[cnt
].head
->first_free
= 0;
846 assert (dbs
[cnt
].ro_fd
== -1);
850 /* Create the socket. */
851 #ifndef __ASSUME_SOCK_CLOEXEC
853 if (have_sock_cloexec
>= 0)
856 sock
= socket (AF_UNIX
, SOCK_STREAM
| SOCK_CLOEXEC
| SOCK_NONBLOCK
, 0);
857 #ifndef __ASSUME_SOCK_CLOEXEC
858 if (have_sock_cloexec
== 0)
859 have_sock_cloexec
= sock
!= -1 || errno
!= EINVAL
? 1 : -1;
862 #ifndef __ASSUME_SOCK_CLOEXEC
863 if (have_sock_cloexec
< 0)
864 sock
= socket (AF_UNIX
, SOCK_STREAM
, 0);
868 dbg_log (_("cannot open socket: %s"), strerror (errno
));
869 exit (errno
== EACCES
? 4 : 1);
871 /* Bind a name to the socket. */
872 struct sockaddr_un sock_addr
;
873 sock_addr
.sun_family
= AF_UNIX
;
874 strcpy (sock_addr
.sun_path
, _PATH_NSCDSOCKET
);
875 if (bind (sock
, (struct sockaddr
*) &sock_addr
, sizeof (sock_addr
)) < 0)
877 dbg_log ("%s: %s", _PATH_NSCDSOCKET
, strerror (errno
));
878 exit (errno
== EACCES
? 4 : 1);
881 #ifndef __ASSUME_SOCK_CLOEXEC
882 if (have_sock_cloexec
< 0)
884 /* We don't want to get stuck on accept. */
885 int fl
= fcntl (sock
, F_GETFL
);
886 if (fl
== -1 || fcntl (sock
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
888 dbg_log (_("cannot change socket to nonblocking mode: %s"),
893 /* The descriptor needs to be closed on exec. */
894 if (paranoia
&& fcntl (sock
, F_SETFD
, FD_CLOEXEC
) == -1)
896 dbg_log (_("cannot set socket to close on exec: %s"),
903 /* Set permissions for the socket. */
904 chmod (_PATH_NSCDSOCKET
, DEFFILEMODE
);
906 /* Set the socket up to accept connections. */
907 if (listen (sock
, SOMAXCONN
) < 0)
909 dbg_log (_("cannot enable socket to accept connections: %s"),
915 if (dbs
[hstdb
].enabled
)
917 /* Try to open netlink socket to monitor network setting changes. */
918 nl_status_fd
= socket (AF_NETLINK
,
919 SOCK_RAW
| SOCK_CLOEXEC
| SOCK_NONBLOCK
,
921 if (nl_status_fd
!= -1)
923 struct sockaddr_nl snl
;
924 memset (&snl
, '\0', sizeof (snl
));
925 snl
.nl_family
= AF_NETLINK
;
926 /* XXX Is this the best set to use? */
927 snl
.nl_groups
= (RTMGRP_IPV4_IFADDR
| RTMGRP_TC
| RTMGRP_IPV4_MROUTE
928 | RTMGRP_IPV4_ROUTE
| RTMGRP_IPV4_RULE
929 | RTMGRP_IPV6_IFADDR
| RTMGRP_IPV6_MROUTE
930 | RTMGRP_IPV6_ROUTE
| RTMGRP_IPV6_IFINFO
931 | RTMGRP_IPV6_PREFIX
);
933 if (bind (nl_status_fd
, (struct sockaddr
*) &snl
, sizeof (snl
)) != 0)
935 close (nl_status_fd
);
940 /* Start the timestamp process. */
941 dbs
[hstdb
].head
->extra_data
[NSCD_HST_IDX_CONF_TIMESTAMP
]
942 = __bump_nl_timestamp ();
944 # ifndef __ASSUME_SOCK_CLOEXEC
945 if (have_sock_cloexec
< 0)
947 /* We don't want to get stuck on accept. */
948 int fl
= fcntl (nl_status_fd
, F_GETFL
);
950 || fcntl (nl_status_fd
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
953 cannot change socket to nonblocking mode: %s"),
958 /* The descriptor needs to be closed on exec. */
960 && fcntl (nl_status_fd
, F_SETFD
, FD_CLOEXEC
) == -1)
962 dbg_log (_("cannot set socket to close on exec: %s"),
973 /* Change to unprivileged uid/gid/groups if specified in config file */
974 if (server_user
!= NULL
)
975 finish_drop_privileges ();
979 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
981 We support registering multiple files per database. Each call to
982 register_traced_file adds to the list of registered files.
984 When we prune the database, either through timeout or a request to
985 invalidate, we will check to see if any of the registered files has changed.
986 When we accept new connections to handle a cache request we will also
987 check to see if any of the registered files has changed.
989 If we have inotify support then we install an inotify fd to notify us of
990 file deletion or modification, both of which will require we invalidate
991 the cache for the database. Without inotify support we stat the file and
992 store st_mtime to determine if the file has been modified. */
994 register_traced_file (size_t dbidx
, struct traced_file
*finfo
)
996 /* If the database is disabled or file checking is disabled
997 then ignore the registration. */
998 if (! dbs
[dbidx
].enabled
|| ! dbs
[dbidx
].check_file
)
1001 if (__builtin_expect (debug_level
> 0, 0))
1002 dbg_log (_("register trace file %s for database %s"),
1003 finfo
->fname
, dbnames
[dbidx
]);
1007 || (finfo
->inotify_descr
= inotify_add_watch (inotify_fd
, finfo
->fname
,
1012 /* We need the modification date of the file. */
1015 if (stat64 (finfo
->fname
, &st
) < 0)
1017 /* We cannot stat() the file, disable file checking. */
1018 dbg_log (_("cannot stat() file `%s': %s"),
1019 finfo
->fname
, strerror (errno
));
1023 finfo
->inotify_descr
= -1;
1024 finfo
->mtime
= st
.st_mtime
;
1027 /* Queue up the file name. */
1028 finfo
->next
= dbs
[dbidx
].traced_files
;
1029 dbs
[dbidx
].traced_files
= finfo
;
1033 /* Close the connections. */
1035 close_sockets (void)
1042 invalidate_cache (char *key
, int fd
)
1047 for (number
= pwddb
; number
< lastdb
; ++number
)
1048 if (strcmp (key
, dbnames
[number
]) == 0)
1050 if (number
== hstdb
)
1052 struct traced_file
*runp
= dbs
[hstdb
].traced_files
;
1053 while (runp
!= NULL
)
1054 if (runp
->call_res_init
)
1065 if (number
== lastdb
)
1068 writeall (fd
, &resp
, sizeof (resp
));
1072 if (dbs
[number
].enabled
)
1074 pthread_mutex_lock (&dbs
[number
].prune_run_lock
);
1075 prune_cache (&dbs
[number
], LONG_MAX
, fd
);
1076 pthread_mutex_unlock (&dbs
[number
].prune_run_lock
);
1081 writeall (fd
, &resp
, sizeof (resp
));
1088 send_ro_fd (struct database_dyn
*db
, char *key
, int fd
)
1090 /* If we do not have an read-only file descriptor do nothing. */
1091 if (db
->ro_fd
== -1)
1094 /* We need to send some data along with the descriptor. */
1095 uint64_t mapsize
= (db
->head
->data_size
1096 + roundup (db
->head
->module
* sizeof (ref_t
), ALIGN
)
1097 + sizeof (struct database_pers_head
));
1098 struct iovec iov
[2];
1099 iov
[0].iov_base
= key
;
1100 iov
[0].iov_len
= strlen (key
) + 1;
1101 iov
[1].iov_base
= &mapsize
;
1102 iov
[1].iov_len
= sizeof (mapsize
);
1104 /* Prepare the control message to transfer the descriptor. */
1108 char bytes
[CMSG_SPACE (sizeof (int))];
1110 struct msghdr msg
= { .msg_iov
= iov
, .msg_iovlen
= 2,
1111 .msg_control
= buf
.bytes
,
1112 .msg_controllen
= sizeof (buf
) };
1113 struct cmsghdr
*cmsg
= CMSG_FIRSTHDR (&msg
);
1115 cmsg
->cmsg_level
= SOL_SOCKET
;
1116 cmsg
->cmsg_type
= SCM_RIGHTS
;
1117 cmsg
->cmsg_len
= CMSG_LEN (sizeof (int));
1119 int *ip
= (int *) CMSG_DATA (cmsg
);
1122 msg
.msg_controllen
= cmsg
->cmsg_len
;
1124 /* Send the control message. We repeat when we are interrupted but
1125 everything else is ignored. */
1126 #ifndef MSG_NOSIGNAL
1127 # define MSG_NOSIGNAL 0
1129 (void) TEMP_FAILURE_RETRY (sendmsg (fd
, &msg
, MSG_NOSIGNAL
));
1131 if (__builtin_expect (debug_level
> 0, 0))
1132 dbg_log (_("provide access to FD %d, for %s"), db
->ro_fd
, key
);
1134 #endif /* SCM_RIGHTS */
1137 /* Handle new request. */
1139 handle_request (int fd
, request_header
*req
, void *key
, uid_t uid
, pid_t pid
)
1141 if (__builtin_expect (req
->version
, NSCD_VERSION
) != NSCD_VERSION
)
1143 if (debug_level
> 0)
1145 cannot handle old request version %d; current version is %d"),
1146 req
->version
, NSCD_VERSION
);
1150 /* Perform the SELinux check before we go on to the standard checks. */
1151 if (selinux_enabled
&& nscd_request_avc_has_perm (fd
, req
->type
) != 0)
1153 if (debug_level
> 0)
1162 snprintf (buf
, sizeof (buf
), "/proc/%ld/exe", (long int) pid
);
1163 ssize_t n
= readlink (buf
, buf
, sizeof (buf
) - 1);
1167 request from %ld not handled due to missing permission"), (long int) pid
);
1172 request from '%s' [%ld] not handled due to missing permission"),
1173 buf
, (long int) pid
);
1176 dbg_log (_("request not handled due to missing permission"));
1182 struct database_dyn
*db
= reqinfo
[req
->type
].db
;
1184 /* See whether we can service the request from the cache. */
1185 if (__builtin_expect (reqinfo
[req
->type
].data_request
, true))
1187 if (__builtin_expect (debug_level
, 0) > 0)
1189 if (req
->type
== GETHOSTBYADDR
|| req
->type
== GETHOSTBYADDRv6
)
1191 char buf
[INET6_ADDRSTRLEN
];
1193 dbg_log ("\t%s (%s)", serv2str
[req
->type
],
1194 inet_ntop (req
->type
== GETHOSTBYADDR
1195 ? AF_INET
: AF_INET6
,
1196 key
, buf
, sizeof (buf
)));
1199 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1202 /* Is this service enabled? */
1203 if (__builtin_expect (!db
->enabled
, 0))
1205 /* No, sent the prepared record. */
1206 if (TEMP_FAILURE_RETRY (send (fd
, db
->disabled_iov
->iov_base
,
1207 db
->disabled_iov
->iov_len
,
1209 != (ssize_t
) db
->disabled_iov
->iov_len
1210 && __builtin_expect (debug_level
, 0) > 0)
1212 /* We have problems sending the result. */
1214 dbg_log (_("cannot write result: %s"),
1215 strerror_r (errno
, buf
, sizeof (buf
)));
1221 /* Be sure we can read the data. */
1222 if (__builtin_expect (pthread_rwlock_tryrdlock (&db
->lock
) != 0, 0))
1224 ++db
->head
->rdlockdelayed
;
1225 pthread_rwlock_rdlock (&db
->lock
);
1228 /* See whether we can handle it from the cache. */
1229 struct datahead
*cached
;
1230 cached
= (struct datahead
*) cache_search (req
->type
, key
, req
->key_len
,
1234 /* Hurray it's in the cache. */
1237 #ifdef HAVE_SENDFILE
1238 if (__builtin_expect (db
->mmap_used
, 1))
1240 assert (db
->wr_fd
!= -1);
1241 assert ((char *) cached
->data
> (char *) db
->data
);
1242 assert ((char *) cached
->data
- (char *) db
->head
1244 <= (sizeof (struct database_pers_head
)
1245 + db
->head
->module
* sizeof (ref_t
)
1246 + db
->head
->data_size
));
1247 nwritten
= sendfileall (fd
, db
->wr_fd
,
1248 (char *) cached
->data
1249 - (char *) db
->head
, cached
->recsize
);
1250 # ifndef __ASSUME_SENDFILE
1251 if (nwritten
== -1 && errno
== ENOSYS
)
1256 # ifndef __ASSUME_SENDFILE
1260 nwritten
= writeall (fd
, cached
->data
, cached
->recsize
);
1262 if (nwritten
!= cached
->recsize
1263 && __builtin_expect (debug_level
, 0) > 0)
1265 /* We have problems sending the result. */
1267 dbg_log (_("cannot write result: %s"),
1268 strerror_r (errno
, buf
, sizeof (buf
)));
1271 pthread_rwlock_unlock (&db
->lock
);
1276 pthread_rwlock_unlock (&db
->lock
);
1278 else if (__builtin_expect (debug_level
, 0) > 0)
1280 if (req
->type
== INVALIDATE
)
1281 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1283 dbg_log ("\t%s", serv2str
[req
->type
]);
1286 /* Handle the request. */
1290 addpwbyname (db
, fd
, req
, key
, uid
);
1294 addpwbyuid (db
, fd
, req
, key
, uid
);
1298 addgrbyname (db
, fd
, req
, key
, uid
);
1302 addgrbygid (db
, fd
, req
, key
, uid
);
1306 addhstbyname (db
, fd
, req
, key
, uid
);
1309 case GETHOSTBYNAMEv6
:
1310 addhstbynamev6 (db
, fd
, req
, key
, uid
);
1314 addhstbyaddr (db
, fd
, req
, key
, uid
);
1317 case GETHOSTBYADDRv6
:
1318 addhstbyaddrv6 (db
, fd
, req
, key
, uid
);
1322 addhstai (db
, fd
, req
, key
, uid
);
1326 addinitgroups (db
, fd
, req
, key
, uid
);
1330 addservbyname (db
, fd
, req
, key
, uid
);
1334 addservbyport (db
, fd
, req
, key
, uid
);
1338 addgetnetgrent (db
, fd
, req
, key
, uid
);
1342 addinnetgr (db
, fd
, req
, key
, uid
);
1349 /* Get the callers credentials. */
1351 struct ucred caller
;
1352 socklen_t optlen
= sizeof (caller
);
1354 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) < 0)
1358 dbg_log (_("error getting caller's id: %s"),
1359 strerror_r (errno
, buf
, sizeof (buf
)));
1365 /* Some systems have no SO_PEERCRED implementation. They don't
1366 care about security so we don't as well. */
1371 /* Accept shutdown, getstat and invalidate only from root. For
1372 the stat call also allow the user specified in the config file. */
1373 if (req
->type
== GETSTAT
)
1375 if (uid
== 0 || uid
== stat_uid
)
1376 send_stats (fd
, dbs
);
1380 if (req
->type
== INVALIDATE
)
1381 invalidate_cache (key
, fd
);
1383 termination_handler (0);
1393 send_ro_fd (reqinfo
[req
->type
].db
, key
, fd
);
1398 /* Ignore the command, it's nothing we know. */
1404 /* Restart the process. */
1408 /* First determine the parameters. We do not use the parameters
1409 passed to main() since in case nscd is started by running the
1410 dynamic linker this will not work. Yes, this is not the usual
1411 case but nscd is part of glibc and we occasionally do this. */
1412 size_t buflen
= 1024;
1413 char *buf
= alloca (buflen
);
1415 int fd
= open ("/proc/self/cmdline", O_RDONLY
);
1419 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1428 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, buf
+ readlen
,
1433 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1443 if (readlen
< buflen
)
1446 /* We might have to extend the buffer. */
1447 size_t old_buflen
= buflen
;
1448 char *newp
= extend_alloca (buf
, buflen
, 2 * buflen
);
1449 buf
= memmove (newp
, buf
, old_buflen
);
1454 /* Parse the command line. Worst case scenario: every two
1455 characters form one parameter (one character plus NUL). */
1456 char **argv
= alloca ((readlen
/ 2 + 1) * sizeof (argv
[0]));
1460 while (cp
< buf
+ readlen
)
1463 cp
= (char *) rawmemchr (cp
, '\0') + 1;
1467 /* Second, change back to the old user if we changed it. */
1468 if (server_user
!= NULL
)
1470 if (setresuid (old_uid
, old_uid
, old_uid
) != 0)
1473 cannot change to old UID: %s; disabling paranoia mode"),
1480 if (setresgid (old_gid
, old_gid
, old_gid
) != 0)
1483 cannot change to old GID: %s; disabling paranoia mode"),
1486 setuid (server_uid
);
1492 /* Next change back to the old working directory. */
1493 if (chdir (oldcwd
) == -1)
1496 cannot change to old working directory: %s; disabling paranoia mode"),
1499 if (server_user
!= NULL
)
1501 setuid (server_uid
);
1502 setgid (server_gid
);
1508 /* Synchronize memory. */
1509 int32_t certainly
[lastdb
];
1510 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1511 if (dbs
[cnt
].enabled
)
1513 /* Make sure nobody keeps using the database. */
1514 dbs
[cnt
].head
->timestamp
= 0;
1515 certainly
[cnt
] = dbs
[cnt
].head
->nscd_certainly_running
;
1516 dbs
[cnt
].head
->nscd_certainly_running
= 0;
1518 if (dbs
[cnt
].persistent
)
1520 msync (dbs
[cnt
].head
, dbs
[cnt
].memsize
, MS_ASYNC
);
1523 /* The preparations are done. */
1525 char pathbuf
[PATH_MAX
];
1529 /* Try to exec the real nscd program so the process name (as reported
1530 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1531 if readlink or the exec with the result of the readlink call fails. */
1532 ssize_t n
= readlink ("/proc/self/exe", pathbuf
, sizeof (pathbuf
) - 1);
1536 execv (pathbuf
, argv
);
1538 execv ("/proc/self/exe", argv
);
1540 /* If we come here, we will never be able to re-exec. */
1541 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1544 if (server_user
!= NULL
)
1546 setuid (server_uid
);
1547 setgid (server_gid
);
1549 if (chdir ("/") != 0)
1550 dbg_log (_("cannot change current working directory to \"/\": %s"),
1554 /* Reenable the databases. */
1555 time_t now
= time (NULL
);
1556 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1557 if (dbs
[cnt
].enabled
)
1559 dbs
[cnt
].head
->timestamp
= now
;
1560 dbs
[cnt
].head
->nscd_certainly_running
= certainly
[cnt
];
1565 /* List of file descriptors. */
1569 struct fdlist
*next
;
1571 /* Memory allocated for the list. */
1572 static struct fdlist
*fdlist
;
1573 /* List of currently ready-to-read file descriptors. */
1574 static struct fdlist
*readylist
;
1576 /* Conditional variable and mutex to signal availability of entries in
1577 READYLIST. The condvar is initialized dynamically since we might
1578 use a different clock depending on availability. */
1579 static pthread_cond_t readylist_cond
= PTHREAD_COND_INITIALIZER
;
1580 static pthread_mutex_t readylist_lock
= PTHREAD_MUTEX_INITIALIZER
;
1582 /* The clock to use with the condvar. */
1583 static clockid_t timeout_clock
= CLOCK_REALTIME
;
1585 /* Number of threads ready to handle the READYLIST. */
1586 static unsigned long int nready
;
1589 /* Function for the clean-up threads. */
1591 __attribute__ ((__noreturn__
))
1592 nscd_run_prune (void *p
)
1594 const long int my_number
= (long int) p
;
1595 assert (dbs
[my_number
].enabled
);
1597 int dont_need_update
= setup_thread (&dbs
[my_number
]);
1599 time_t now
= time (NULL
);
1601 /* We are running. */
1602 dbs
[my_number
].head
->timestamp
= now
;
1604 struct timespec prune_ts
;
1605 if (__builtin_expect (clock_gettime (timeout_clock
, &prune_ts
) == -1, 0))
1606 /* Should never happen. */
1609 /* Compute the initial timeout time. Prevent all the timers to go
1610 off at the same time by adding a db-based value. */
1611 prune_ts
.tv_sec
+= CACHE_PRUNE_INTERVAL
+ my_number
;
1612 dbs
[my_number
].wakeup_time
= now
+ CACHE_PRUNE_INTERVAL
+ my_number
;
1614 pthread_mutex_t
*prune_lock
= &dbs
[my_number
].prune_lock
;
1615 pthread_mutex_t
*prune_run_lock
= &dbs
[my_number
].prune_run_lock
;
1616 pthread_cond_t
*prune_cond
= &dbs
[my_number
].prune_cond
;
1618 pthread_mutex_lock (prune_lock
);
1621 /* Wait, but not forever. */
1623 if (! dbs
[my_number
].clear_cache
)
1624 e
= pthread_cond_timedwait (prune_cond
, prune_lock
, &prune_ts
);
1625 assert (__builtin_expect (e
== 0 || e
== ETIMEDOUT
, 1));
1629 if (e
== ETIMEDOUT
|| now
>= dbs
[my_number
].wakeup_time
1630 || dbs
[my_number
].clear_cache
)
1632 /* We will determine the new timout values based on the
1633 cache content. Should there be concurrent additions to
1634 the cache which are not accounted for in the cache
1635 pruning we want to know about it. Therefore set the
1636 timeout to the maximum. It will be descreased when adding
1637 new entries to the cache, if necessary. */
1638 dbs
[my_number
].wakeup_time
= MAX_TIMEOUT_VALUE
;
1640 /* Unconditionally reset the flag. */
1641 time_t prune_now
= dbs
[my_number
].clear_cache
? LONG_MAX
: now
;
1642 dbs
[my_number
].clear_cache
= 0;
1644 pthread_mutex_unlock (prune_lock
);
1646 /* We use a separate lock for running the prune function (instead
1647 of keeping prune_lock locked) because this enables concurrent
1648 invocations of cache_add which might modify the timeout value. */
1649 pthread_mutex_lock (prune_run_lock
);
1650 next_wait
= prune_cache (&dbs
[my_number
], prune_now
, -1);
1651 pthread_mutex_unlock (prune_run_lock
);
1653 next_wait
= MAX (next_wait
, CACHE_PRUNE_INTERVAL
);
1654 /* If clients cannot determine for sure whether nscd is running
1655 we need to wake up occasionally to update the timestamp.
1656 Wait 90% of the update period. */
1657 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1658 if (__builtin_expect (! dont_need_update
, 0))
1660 next_wait
= MIN (UPDATE_MAPPING_TIMEOUT
, next_wait
);
1661 dbs
[my_number
].head
->timestamp
= now
;
1664 pthread_mutex_lock (prune_lock
);
1666 /* Make it known when we will wake up again. */
1667 if (now
+ next_wait
< dbs
[my_number
].wakeup_time
)
1668 dbs
[my_number
].wakeup_time
= now
+ next_wait
;
1670 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1673 /* The cache was just pruned. Do not do it again now. Just
1674 use the new timeout value. */
1675 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1677 if (clock_gettime (timeout_clock
, &prune_ts
) == -1)
1678 /* Should never happen. */
1681 /* Compute next timeout time. */
1682 prune_ts
.tv_sec
+= next_wait
;
1687 /* This is the main loop. It is replicated in different threads but
1688 the use of the ready list makes sure only one thread handles an
1689 incoming connection. */
1691 __attribute__ ((__noreturn__
))
1692 nscd_run_worker (void *p
)
1696 /* Initial locking. */
1697 pthread_mutex_lock (&readylist_lock
);
1699 /* One more thread available. */
1704 while (readylist
== NULL
)
1705 pthread_cond_wait (&readylist_cond
, &readylist_lock
);
1707 struct fdlist
*it
= readylist
->next
;
1708 if (readylist
->next
== readylist
)
1709 /* Just one entry on the list. */
1712 readylist
->next
= it
->next
;
1714 /* Extract the information and mark the record ready to be used
1719 /* One more thread available. */
1722 /* We are done with the list. */
1723 pthread_mutex_unlock (&readylist_lock
);
1725 #ifndef __ASSUME_ACCEPT4
1726 if (have_accept4
< 0)
1728 /* We do not want to block on a short read or so. */
1729 int fl
= fcntl (fd
, F_GETFL
);
1730 if (fl
== -1 || fcntl (fd
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
1735 /* Now read the request. */
1737 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, &req
, sizeof (req
)))
1738 != sizeof (req
), 0))
1740 /* We failed to read data. Note that this also might mean we
1741 failed because we would have blocked. */
1742 if (debug_level
> 0)
1743 dbg_log (_("short read while reading request: %s"),
1744 strerror_r (errno
, buf
, sizeof (buf
)));
1748 /* Check whether this is a valid request type. */
1749 if (req
.type
< GETPWBYNAME
|| req
.type
>= LASTREQ
)
1752 /* Some systems have no SO_PEERCRED implementation. They don't
1753 care about security so we don't as well. */
1758 if (__builtin_expect (debug_level
> 0, 0))
1760 struct ucred caller
;
1761 socklen_t optlen
= sizeof (caller
);
1763 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) == 0)
1767 const pid_t pid
= 0;
1770 /* It should not be possible to crash the nscd with a silly
1771 request (i.e., a terribly large key). We limit the size to 1kb. */
1772 if (__builtin_expect (req
.key_len
, 1) < 0
1773 || __builtin_expect (req
.key_len
, 1) > MAXKEYLEN
)
1775 if (debug_level
> 0)
1776 dbg_log (_("key length in request too long: %d"), req
.key_len
);
1781 char keybuf
[MAXKEYLEN
];
1783 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, keybuf
,
1787 /* Again, this can also mean we would have blocked. */
1788 if (debug_level
> 0)
1789 dbg_log (_("short read while reading request key: %s"),
1790 strerror_r (errno
, buf
, sizeof (buf
)));
1794 if (__builtin_expect (debug_level
, 0) > 0)
1799 handle_request: request received (Version = %d) from PID %ld"),
1800 req
.version
, (long int) pid
);
1804 handle_request: request received (Version = %d)"), req
.version
);
1807 /* Phew, we got all the data, now process it. */
1808 handle_request (fd
, &req
, keybuf
, uid
, pid
);
1816 pthread_mutex_lock (&readylist_lock
);
1818 /* One more thread available. */
1825 static unsigned int nconns
;
1830 pthread_mutex_lock (&readylist_lock
);
1832 /* Find an empty entry in FDLIST. */
1834 for (inner
= 0; inner
< nconns
; ++inner
)
1835 if (fdlist
[inner
].next
== NULL
)
1837 assert (inner
< nconns
);
1839 fdlist
[inner
].fd
= fd
;
1841 if (readylist
== NULL
)
1842 readylist
= fdlist
[inner
].next
= &fdlist
[inner
];
1845 fdlist
[inner
].next
= readylist
->next
;
1846 readylist
= readylist
->next
= &fdlist
[inner
];
1849 bool do_signal
= true;
1850 if (__builtin_expect (nready
== 0, 0))
1855 /* Try to start another thread to help out. */
1857 if (nthreads
< max_nthreads
1858 && pthread_create (&th
, &attr
, nscd_run_worker
,
1859 (void *) (long int) nthreads
) == 0)
1861 /* We got another thread. */
1863 /* The new thread might need a kick. */
1869 pthread_mutex_unlock (&readylist_lock
);
1871 /* Tell one of the worker threads there is work to do. */
1873 pthread_cond_signal (&readylist_cond
);
1877 /* Check whether restarting should happen. */
1879 restart_p (time_t now
)
1881 return (paranoia
&& readylist
== NULL
&& nready
== nthreads
1882 && now
>= restart_time
);
1886 /* Array for times a connection was accepted. */
1887 static time_t *starttime
;
1890 /* Inotify event for changed file. */
1893 struct inotify_event i
;
1895 # define PATH_MAX 1024
1897 char buf
[sizeof (struct inotify_event
) + PATH_MAX
];
1900 /* Process the inotify event in INEV. If the event matches any of the files
1901 registered with a database then mark that database as requiring its cache
1902 to be cleared. We indicate the cache needs clearing by setting
1903 TO_CLEAR[DBCNT] to true for the matching database. */
1905 inotify_check_files (bool *to_clear
, union __inev
*inev
)
1907 /* Check which of the files changed. */
1908 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
1910 struct traced_file
*finfo
= dbs
[dbcnt
].traced_files
;
1912 while (finfo
!= NULL
)
1914 /* Inotify event watch descriptor matches. */
1915 if (finfo
->inotify_descr
== inev
->i
.wd
)
1917 /* Mark cache as needing to be cleared and reinitialize. */
1918 to_clear
[dbcnt
] = true;
1919 if (finfo
->call_res_init
)
1924 finfo
= finfo
->next
;
1929 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1930 for the associated database, otherwise do nothing. The TO_CLEAR array must
1931 have LASTDB entries. */
1933 clear_db_cache (bool *to_clear
)
1935 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
1936 if (to_clear
[dbcnt
])
1938 pthread_mutex_lock (&dbs
[dbcnt
].prune_lock
);
1939 dbs
[dbcnt
].clear_cache
= 1;
1940 pthread_mutex_unlock (&dbs
[dbcnt
].prune_lock
);
1941 pthread_cond_signal (&dbs
[dbcnt
].prune_cond
);
1948 __attribute__ ((__noreturn__
))
1949 main_loop_poll (void)
1951 struct pollfd
*conns
= (struct pollfd
*) xmalloc (nconns
1952 * sizeof (conns
[0]));
1955 conns
[0].events
= POLLRDNORM
;
1957 size_t firstfree
= 1;
1960 if (inotify_fd
!= -1)
1962 conns
[1].fd
= inotify_fd
;
1963 conns
[1].events
= POLLRDNORM
;
1970 size_t idx_nl_status_fd
= 0;
1971 if (nl_status_fd
!= -1)
1973 idx_nl_status_fd
= nused
;
1974 conns
[nused
].fd
= nl_status_fd
;
1975 conns
[nused
].events
= POLLRDNORM
;
1983 /* Wait for any event. We wait at most a couple of seconds so
1984 that we can check whether we should close any of the accepted
1985 connections since we have not received a request. */
1986 #define MAX_ACCEPT_TIMEOUT 30
1987 #define MIN_ACCEPT_TIMEOUT 5
1988 #define MAIN_THREAD_TIMEOUT \
1989 (MAX_ACCEPT_TIMEOUT * 1000 \
1990 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1992 int n
= poll (conns
, nused
, MAIN_THREAD_TIMEOUT
);
1994 time_t now
= time (NULL
);
1996 /* If there is a descriptor ready for reading or there is a new
1997 connection, process this now. */
2000 if (conns
[0].revents
!= 0)
2002 /* We have a new incoming connection. Accept the connection. */
2005 #ifndef __ASSUME_ACCEPT4
2007 if (have_accept4
>= 0)
2010 fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
2012 #ifndef __ASSUME_ACCEPT4
2013 if (have_accept4
== 0)
2014 have_accept4
= fd
!= -1 || errno
!= ENOSYS
? 1 : -1;
2017 #ifndef __ASSUME_ACCEPT4
2018 if (have_accept4
< 0)
2019 fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
2022 /* Use the descriptor if we have not reached the limit. */
2025 if (firstfree
< nconns
)
2027 conns
[firstfree
].fd
= fd
;
2028 conns
[firstfree
].events
= POLLRDNORM
;
2029 starttime
[firstfree
] = now
;
2030 if (firstfree
>= nused
)
2031 nused
= firstfree
+ 1;
2035 while (firstfree
< nused
&& conns
[firstfree
].fd
!= -1);
2038 /* We cannot use the connection so close it. */
2047 if (inotify_fd
!= -1 && conns
[1].fd
== inotify_fd
)
2049 if (conns
[1].revents
!= 0)
2051 bool to_clear
[lastdb
] = { false, };
2054 /* Read all inotify events for files registered via
2055 register_traced_file(). */
2058 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
, &inev
,
2060 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
2062 if (__builtin_expect (nb
== -1 && errno
!= EAGAIN
,
2065 /* Something went wrong when reading the inotify
2066 data. Better disable inotify. */
2068 disabled inotify after read error %d"),
2080 /* Check which of the files changed. */
2081 inotify_check_files (to_clear
, &inev
);
2084 /* Actually perform the cache clearing. */
2085 clear_db_cache (to_clear
);
2095 if (idx_nl_status_fd
!= 0 && conns
[idx_nl_status_fd
].revents
!= 0)
2098 /* Read all the data. We do not interpret it here. */
2099 while (TEMP_FAILURE_RETRY (read (nl_status_fd
, buf
,
2100 sizeof (buf
))) != -1)
2103 dbs
[hstdb
].head
->extra_data
[NSCD_HST_IDX_CONF_TIMESTAMP
]
2104 = __bump_nl_timestamp ();
2108 for (size_t cnt
= first
; cnt
< nused
&& n
> 0; ++cnt
)
2109 if (conns
[cnt
].revents
!= 0)
2111 fd_ready (conns
[cnt
].fd
);
2113 /* Clean up the CONNS array. */
2115 if (cnt
< firstfree
)
2117 if (cnt
== nused
- 1)
2120 while (conns
[nused
- 1].fd
== -1);
2126 /* Now find entries which have timed out. */
2129 /* We make the timeout length depend on the number of file
2130 descriptors currently used. */
2131 #define ACCEPT_TIMEOUT \
2132 (MAX_ACCEPT_TIMEOUT \
2133 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2134 time_t laststart
= now
- ACCEPT_TIMEOUT
;
2136 for (size_t cnt
= nused
- 1; cnt
> 0; --cnt
)
2138 if (conns
[cnt
].fd
!= -1 && starttime
[cnt
] < laststart
)
2140 /* Remove the entry, it timed out. */
2141 (void) close (conns
[cnt
].fd
);
2144 if (cnt
< firstfree
)
2146 if (cnt
== nused
- 1)
2149 while (conns
[nused
- 1].fd
== -1);
2153 if (restart_p (now
))
2161 main_loop_epoll (int efd
)
2163 struct epoll_event ev
= { 0, };
2167 /* Add the socket. */
2168 ev
.events
= EPOLLRDNORM
;
2170 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, sock
, &ev
) == -1)
2171 /* We cannot use epoll. */
2174 # ifdef HAVE_INOTIFY
2175 if (inotify_fd
!= -1)
2177 ev
.events
= EPOLLRDNORM
;
2178 ev
.data
.fd
= inotify_fd
;
2179 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, inotify_fd
, &ev
) == -1)
2180 /* We cannot use epoll. */
2186 # ifdef HAVE_NETLINK
2187 if (nl_status_fd
!= -1)
2189 ev
.events
= EPOLLRDNORM
;
2190 ev
.data
.fd
= nl_status_fd
;
2191 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, nl_status_fd
, &ev
) == -1)
2192 /* We cannot use epoll. */
2199 struct epoll_event revs
[100];
2200 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2202 int n
= epoll_wait (efd
, revs
, nrevs
, MAIN_THREAD_TIMEOUT
);
2204 time_t now
= time (NULL
);
2206 for (int cnt
= 0; cnt
< n
; ++cnt
)
2207 if (revs
[cnt
].data
.fd
== sock
)
2209 /* A new connection. */
2212 # ifndef __ASSUME_ACCEPT4
2214 if (have_accept4
>= 0)
2217 fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
2219 # ifndef __ASSUME_ACCEPT4
2220 if (have_accept4
== 0)
2221 have_accept4
= fd
!= -1 || errno
!= ENOSYS
? 1 : -1;
2224 # ifndef __ASSUME_ACCEPT4
2225 if (have_accept4
< 0)
2226 fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
2229 /* Use the descriptor if we have not reached the limit. */
2232 /* Try to add the new descriptor. */
2235 || epoll_ctl (efd
, EPOLL_CTL_ADD
, fd
, &ev
) == -1)
2236 /* The descriptor is too large or something went
2237 wrong. Close the descriptor. */
2241 /* Remember when we accepted the connection. */
2242 starttime
[fd
] = now
;
2251 # ifdef HAVE_INOTIFY
2252 else if (revs
[cnt
].data
.fd
== inotify_fd
)
2254 bool to_clear
[lastdb
] = { false, };
2257 /* Read all inotify events for files registered via
2258 register_traced_file(). */
2261 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
, &inev
,
2263 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
2265 if (__builtin_expect (nb
== -1 && errno
!= EAGAIN
, 0))
2267 /* Something went wrong when reading the inotify
2268 data. Better disable inotify. */
2269 dbg_log (_("disabled inotify after read error %d"),
2271 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, inotify_fd
,
2279 /* Check which of the files changed. */
2280 inotify_check_files(to_clear
, &inev
);
2283 /* Actually perform the cache clearing. */
2284 clear_db_cache (to_clear
);
2287 # ifdef HAVE_NETLINK
2288 else if (revs
[cnt
].data
.fd
== nl_status_fd
)
2291 /* Read all the data. We do not interpret it here. */
2292 while (TEMP_FAILURE_RETRY (read (nl_status_fd
, buf
,
2293 sizeof (buf
))) != -1)
2296 __bump_nl_timestamp ();
2301 /* Remove the descriptor from the epoll descriptor. */
2302 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, revs
[cnt
].data
.fd
, NULL
);
2304 /* Get a worker to handle the request. */
2305 fd_ready (revs
[cnt
].data
.fd
);
2307 /* Reset the time. */
2308 starttime
[revs
[cnt
].data
.fd
] = 0;
2309 if (revs
[cnt
].data
.fd
== highest
)
2312 while (highest
> 0 && starttime
[highest
] == 0);
2317 /* Now look for descriptors for accepted connections which have
2318 no reply in too long of a time. */
2319 time_t laststart
= now
- ACCEPT_TIMEOUT
;
2320 assert (starttime
[sock
] == 0);
2321 assert (inotify_fd
== -1 || starttime
[inotify_fd
] == 0);
2322 assert (nl_status_fd
== -1 || starttime
[nl_status_fd
] == 0);
2323 for (int cnt
= highest
; cnt
> STDERR_FILENO
; --cnt
)
2324 if (starttime
[cnt
] != 0 && starttime
[cnt
] < laststart
)
2326 /* We are waiting for this one for too long. Close it. */
2327 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, cnt
, NULL
);
2335 else if (cnt
!= sock
&& starttime
[cnt
] == 0 && cnt
== highest
)
2338 if (restart_p (now
))
2345 /* Start all the threads we want. The initial process is thread no. 1. */
2347 start_threads (void)
2349 /* Initialize the conditional variable we will use. The only
2350 non-standard attribute we might use is the clock selection. */
2351 pthread_condattr_t condattr
;
2352 pthread_condattr_init (&condattr
);
2354 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2355 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2356 /* Determine whether the monotonous clock is available. */
2357 struct timespec dummy
;
2358 # if _POSIX_MONOTONIC_CLOCK == 0
2359 if (sysconf (_SC_MONOTONIC_CLOCK
) > 0)
2361 # if _POSIX_CLOCK_SELECTION == 0
2362 if (sysconf (_SC_CLOCK_SELECTION
) > 0)
2364 if (clock_getres (CLOCK_MONOTONIC
, &dummy
) == 0
2365 && pthread_condattr_setclock (&condattr
, CLOCK_MONOTONIC
) == 0)
2366 timeout_clock
= CLOCK_MONOTONIC
;
2369 /* Create the attribute for the threads. They are all created
2371 pthread_attr_init (&attr
);
2372 pthread_attr_setdetachstate (&attr
, PTHREAD_CREATE_DETACHED
);
2373 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2374 pthread_attr_setstacksize (&attr
, NSCD_THREAD_STACKSIZE
);
2376 /* We allow less than LASTDB threads only for debugging. */
2377 if (debug_level
== 0)
2378 nthreads
= MAX (nthreads
, lastdb
);
2380 /* Create the threads which prune the databases. */
2381 // XXX Ideally this work would be done by some of the worker threads.
2382 // XXX But this is problematic since we would need to be able to wake
2383 // XXX them up explicitly as well as part of the group handling the
2384 // XXX ready-list. This requires an operation where we can wait on
2385 // XXX two conditional variables at the same time. This operation
2386 // XXX does not exist (yet).
2387 for (long int i
= 0; i
< lastdb
; ++i
)
2389 /* Initialize the conditional variable. */
2390 if (pthread_cond_init (&dbs
[i
].prune_cond
, &condattr
) != 0)
2392 dbg_log (_("could not initialize conditional variable"));
2398 && pthread_create (&th
, &attr
, nscd_run_prune
, (void *) i
) != 0)
2400 dbg_log (_("could not start clean-up thread; terminating"));
2405 pthread_condattr_destroy (&condattr
);
2407 for (long int i
= 0; i
< nthreads
; ++i
)
2410 if (pthread_create (&th
, &attr
, nscd_run_worker
, NULL
) != 0)
2414 dbg_log (_("could not start any worker thread; terminating"));
2422 /* Determine how much room for descriptors we should initially
2423 allocate. This might need to change later if we cap the number
2425 const long int nfds
= sysconf (_SC_OPEN_MAX
);
2427 #define MAXCONN 16384
2428 if (nfds
== -1 || nfds
> MAXCONN
)
2430 else if (nfds
< MINCONN
)
2435 /* We need memory to pass descriptors on to the worker threads. */
2436 fdlist
= (struct fdlist
*) xcalloc (nconns
, sizeof (fdlist
[0]));
2437 /* Array to keep track when connection was accepted. */
2438 starttime
= (time_t *) xcalloc (nconns
, sizeof (starttime
[0]));
2440 /* In the main thread we execute the loop which handles incoming
2443 int efd
= epoll_create (100);
2446 main_loop_epoll (efd
);
2455 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2456 this function is called, we are not listening on the nscd socket yet so
2457 we can just use the ordinary lookup functions without causing a lockup */
2459 begin_drop_privileges (void)
2461 struct passwd
*pwd
= getpwnam (server_user
);
2465 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2466 error (EXIT_FAILURE
, 0, _("Failed to run nscd as user '%s'"),
2470 server_uid
= pwd
->pw_uid
;
2471 server_gid
= pwd
->pw_gid
;
2473 /* Save the old UID/GID if we have to change back. */
2476 old_uid
= getuid ();
2477 old_gid
= getgid ();
2480 if (getgrouplist (server_user
, server_gid
, NULL
, &server_ngroups
) == 0)
2482 /* This really must never happen. */
2483 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2484 error (EXIT_FAILURE
, errno
, _("initial getgrouplist failed"));
2487 server_groups
= (gid_t
*) xmalloc (server_ngroups
* sizeof (gid_t
));
2489 if (getgrouplist (server_user
, server_gid
, server_groups
, &server_ngroups
)
2492 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2493 error (EXIT_FAILURE
, errno
, _("getgrouplist failed"));
2498 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2499 run nscd as the user specified in the configuration file. */
2501 finish_drop_privileges (void)
2503 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2504 /* We need to preserve the capabilities to connect to the audit daemon. */
2505 cap_t new_caps
= preserve_capabilities ();
2508 if (setgroups (server_ngroups
, server_groups
) == -1)
2510 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2511 error (EXIT_FAILURE
, errno
, _("setgroups failed"));
2516 res
= setresgid (server_gid
, server_gid
, old_gid
);
2518 res
= setgid (server_gid
);
2521 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2527 res
= setresuid (server_uid
, server_uid
, old_uid
);
2529 res
= setuid (server_uid
);
2532 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2537 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2538 /* Remove the temporary capabilities. */
2539 install_real_capabilities (new_caps
);