1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2014 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
35 #include <arpa/inet.h>
37 # include <linux/netlink.h>
38 # include <linux/rtnetlink.h>
41 # include <sys/epoll.h>
44 # include <sys/inotify.h>
47 #include <sys/param.h>
50 # include <sys/sendfile.h>
52 #include <sys/socket.h>
59 #include <resolv/resolv.h>
61 #include <kernel-features.h>
64 /* Support to run nscd as an unprivileged user */
65 const char *server_user
;
66 static uid_t server_uid
;
67 static gid_t server_gid
;
68 const char *stat_user
;
70 static gid_t
*server_groups
;
74 static int server_ngroups
;
76 static pthread_attr_t attr
;
78 static void begin_drop_privileges (void);
79 static void finish_drop_privileges (void);
81 /* Map request type to a string. */
82 const char *const serv2str
[LASTREQ
] =
84 [GETPWBYNAME
] = "GETPWBYNAME",
85 [GETPWBYUID
] = "GETPWBYUID",
86 [GETGRBYNAME
] = "GETGRBYNAME",
87 [GETGRBYGID
] = "GETGRBYGID",
88 [GETHOSTBYNAME
] = "GETHOSTBYNAME",
89 [GETHOSTBYNAMEv6
] = "GETHOSTBYNAMEv6",
90 [GETHOSTBYADDR
] = "GETHOSTBYADDR",
91 [GETHOSTBYADDRv6
] = "GETHOSTBYADDRv6",
92 [SHUTDOWN
] = "SHUTDOWN",
93 [GETSTAT
] = "GETSTAT",
94 [INVALIDATE
] = "INVALIDATE",
95 [GETFDPW
] = "GETFDPW",
96 [GETFDGR
] = "GETFDGR",
97 [GETFDHST
] = "GETFDHST",
99 [INITGROUPS
] = "INITGROUPS",
100 [GETSERVBYNAME
] = "GETSERVBYNAME",
101 [GETSERVBYPORT
] = "GETSERVBYPORT",
102 [GETFDSERV
] = "GETFDSERV",
103 [GETNETGRENT
] = "GETNETGRENT",
104 [INNETGR
] = "INNETGR",
105 [GETFDNETGR
] = "GETFDNETGR"
108 /* The control data structures for the services. */
109 struct database_dyn dbs
[lastdb
] =
112 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
113 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
114 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
120 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
121 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
122 .db_filename
= _PATH_NSCD_PASSWD_DB
,
123 .disabled_iov
= &pwd_iov_disabled
,
131 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
132 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
133 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
139 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
140 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
141 .db_filename
= _PATH_NSCD_GROUP_DB
,
142 .disabled_iov
= &grp_iov_disabled
,
150 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
151 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
152 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
156 .propagate
= 0, /* Not used. */
158 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
159 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
160 .db_filename
= _PATH_NSCD_HOSTS_DB
,
161 .disabled_iov
= &hst_iov_disabled
,
169 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
170 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
171 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
175 .propagate
= 0, /* Not used. */
177 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
178 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
179 .db_filename
= _PATH_NSCD_SERVICES_DB
,
180 .disabled_iov
= &serv_iov_disabled
,
188 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
189 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
190 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
194 .propagate
= 0, /* Not used. */
196 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
197 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
198 .db_filename
= _PATH_NSCD_NETGROUP_DB
,
199 .disabled_iov
= &netgroup_iov_disabled
,
209 /* Mapping of request type to database. */
213 struct database_dyn
*db
;
214 } const reqinfo
[LASTREQ
] =
216 [GETPWBYNAME
] = { true, &dbs
[pwddb
] },
217 [GETPWBYUID
] = { true, &dbs
[pwddb
] },
218 [GETGRBYNAME
] = { true, &dbs
[grpdb
] },
219 [GETGRBYGID
] = { true, &dbs
[grpdb
] },
220 [GETHOSTBYNAME
] = { true, &dbs
[hstdb
] },
221 [GETHOSTBYNAMEv6
] = { true, &dbs
[hstdb
] },
222 [GETHOSTBYADDR
] = { true, &dbs
[hstdb
] },
223 [GETHOSTBYADDRv6
] = { true, &dbs
[hstdb
] },
224 [SHUTDOWN
] = { false, NULL
},
225 [GETSTAT
] = { false, NULL
},
226 [SHUTDOWN
] = { false, NULL
},
227 [GETFDPW
] = { false, &dbs
[pwddb
] },
228 [GETFDGR
] = { false, &dbs
[grpdb
] },
229 [GETFDHST
] = { false, &dbs
[hstdb
] },
230 [GETAI
] = { true, &dbs
[hstdb
] },
231 [INITGROUPS
] = { true, &dbs
[grpdb
] },
232 [GETSERVBYNAME
] = { true, &dbs
[servdb
] },
233 [GETSERVBYPORT
] = { true, &dbs
[servdb
] },
234 [GETFDSERV
] = { false, &dbs
[servdb
] },
235 [GETNETGRENT
] = { true, &dbs
[netgrdb
] },
236 [INNETGR
] = { true, &dbs
[netgrdb
] },
237 [GETFDNETGR
] = { false, &dbs
[netgrdb
] }
241 /* Initial number of threads to use. */
243 /* Maximum number of threads to use. */
244 int max_nthreads
= 32;
246 /* Socket for incoming connections. */
250 /* Inotify descriptor. */
255 /* Descriptor for netlink status updates. */
256 static int nl_status_fd
= -1;
259 #ifndef __ASSUME_SOCK_CLOEXEC
260 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
261 before be know the result. */
262 static int have_sock_cloexec
;
264 #ifndef __ASSUME_ACCEPT4
265 static int have_accept4
;
268 /* Number of times clients had to wait. */
269 unsigned long int client_queued
;
273 writeall (int fd
, const void *buf
, size_t len
)
279 ret
= TEMP_FAILURE_RETRY (send (fd
, buf
, n
, MSG_NOSIGNAL
));
282 buf
= (const char *) buf
+ ret
;
286 return ret
< 0 ? ret
: len
- n
;
292 sendfileall (int tofd
, int fromfd
, off_t off
, size_t len
)
299 ret
= TEMP_FAILURE_RETRY (sendfile (tofd
, fromfd
, &off
, n
));
305 return ret
< 0 ? ret
: len
- n
;
313 /* The following three are not really used, they are symbolic constants. */
319 use_he_begin
= use_he
| use_begin
,
320 use_he_end
= use_he
| use_end
,
323 use_key_begin
= use_key
| use_begin
,
324 use_key_end
= use_key
| use_end
,
325 use_key_first
= use_key_begin
| use_first
,
328 use_data_begin
= use_data
| use_begin
,
329 use_data_end
= use_data
| use_end
,
330 use_data_first
= use_data_begin
| use_first
335 check_use (const char *data
, nscd_ssize_t first_free
, uint8_t *usemap
,
336 enum usekey use
, ref_t start
, size_t len
)
340 if (start
> first_free
|| start
+ len
> first_free
341 || (start
& BLOCK_ALIGN_M1
))
344 if (usemap
[start
] == use_not
)
346 /* Add the start marker. */
347 usemap
[start
] = use
| use_begin
;
351 if (usemap
[++start
] != use_not
)
356 /* Add the end marker. */
357 usemap
[start
] = use
| use_end
;
359 else if ((usemap
[start
] & ~use_first
) == ((use
| use_begin
) & ~use_first
))
361 /* Hash entries can't be shared. */
365 usemap
[start
] |= (use
& use_first
);
369 if (usemap
[++start
] != use
)
372 if (usemap
[++start
] != (use
| use_end
))
376 /* Points to a wrong object or somewhere in the middle. */
383 /* Verify data in persistent database. */
385 verify_persistent_db (void *mem
, struct database_pers_head
*readhead
, int dbnr
)
387 assert (dbnr
== pwddb
|| dbnr
== grpdb
|| dbnr
== hstdb
|| dbnr
== servdb
390 time_t now
= time (NULL
);
392 struct database_pers_head
*head
= mem
;
393 struct database_pers_head head_copy
= *head
;
395 /* Check that the header that was read matches the head in the database. */
396 if (memcmp (head
, readhead
, sizeof (*head
)) != 0)
399 /* First some easy tests: make sure the database header is sane. */
400 if (head
->version
!= DB_VERSION
401 || head
->header_size
!= sizeof (*head
)
402 /* We allow a timestamp to be one hour ahead of the current time.
403 This should cover daylight saving time changes. */
404 || head
->timestamp
> now
+ 60 * 60 + 60
405 || (head
->gc_cycle
& 1)
407 || (size_t) head
->module
> INT32_MAX
/ sizeof (ref_t
)
408 || (size_t) head
->data_size
> INT32_MAX
- head
->module
* sizeof (ref_t
)
409 || head
->first_free
< 0
410 || head
->first_free
> head
->data_size
411 || (head
->first_free
& BLOCK_ALIGN_M1
) != 0
412 || head
->maxnentries
< 0
413 || head
->maxnsearched
< 0)
416 uint8_t *usemap
= calloc (head
->first_free
, 1);
420 const char *data
= (char *) &head
->array
[roundup (head
->module
,
421 ALIGN
/ sizeof (ref_t
))];
423 nscd_ssize_t he_cnt
= 0;
424 for (nscd_ssize_t cnt
= 0; cnt
< head
->module
; ++cnt
)
426 ref_t trail
= head
->array
[cnt
];
430 while (work
!= ENDREF
)
432 if (! check_use (data
, head
->first_free
, usemap
, use_he
, work
,
433 sizeof (struct hashentry
)))
436 /* Now we know we can dereference the record. */
437 struct hashentry
*here
= (struct hashentry
*) (data
+ work
);
441 /* Make sure the record is for this type of service. */
442 if (here
->type
>= LASTREQ
443 || reqinfo
[here
->type
].db
!= &dbs
[dbnr
])
446 /* Validate boolean field value. */
447 if (here
->first
!= false && here
->first
!= true)
455 || here
->packet
> head
->first_free
456 || here
->packet
+ sizeof (struct datahead
) > head
->first_free
)
459 struct datahead
*dh
= (struct datahead
*) (data
+ here
->packet
);
461 if (! check_use (data
, head
->first_free
, usemap
,
462 use_data
| (here
->first
? use_first
: 0),
463 here
->packet
, dh
->allocsize
))
466 if (dh
->allocsize
< sizeof (struct datahead
)
467 || dh
->recsize
> dh
->allocsize
468 || (dh
->notfound
!= false && dh
->notfound
!= true)
469 || (dh
->usable
!= false && dh
->usable
!= true))
472 if (here
->key
< here
->packet
+ sizeof (struct datahead
)
473 || here
->key
> here
->packet
+ dh
->allocsize
474 || here
->key
+ here
->len
> here
->packet
+ dh
->allocsize
)
477 /* If keys can appear outside of data, this should be done
478 instead. But gc doesn't mark the data in that case. */
479 if (! check_use (data
, head
->first_free
, usemap
,
480 use_key
| (here
->first
? use_first
: 0),
481 here
->key
, here
->len
))
489 /* A circular list, this must not happen. */
492 trail
= ((struct hashentry
*) (data
+ trail
))->next
;
497 if (he_cnt
!= head
->nentries
)
500 /* See if all data and keys had at least one reference from
501 he->first == true hashentry. */
502 for (ref_t idx
= 0; idx
< head
->first_free
; ++idx
)
505 if (usemap
[idx
] == use_key_begin
)
508 if (usemap
[idx
] == use_data_begin
)
512 /* Finally, make sure the database hasn't changed since the first test. */
513 if (memcmp (mem
, &head_copy
, sizeof (*head
)) != 0)
526 # define EXTRA_O_FLAGS O_CLOEXEC
528 # define EXTRA_O_FLAGS 0
532 /* Initialize database information structures. */
536 /* Look up unprivileged uid/gid/groups before we start listening on the
538 if (server_user
!= NULL
)
539 begin_drop_privileges ();
542 /* No configuration for this value, assume a default. */
545 for (size_t cnt
= 0; cnt
< lastdb
; ++cnt
)
546 if (dbs
[cnt
].enabled
)
548 pthread_rwlock_init (&dbs
[cnt
].lock
, NULL
);
549 pthread_mutex_init (&dbs
[cnt
].memlock
, NULL
);
551 if (dbs
[cnt
].persistent
)
553 /* Try to open the appropriate file on disk. */
554 int fd
= open (dbs
[cnt
].db_filename
, O_RDWR
| EXTRA_O_FLAGS
);
561 struct database_pers_head head
;
562 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, &head
,
564 if (n
!= sizeof (head
) || fstat64 (fd
, &st
) != 0)
567 /* The code is single-threaded at this point so
568 using strerror is just fine. */
569 msg
= strerror (errno
);
571 dbg_log (_("invalid persistent database file \"%s\": %s"),
572 dbs
[cnt
].db_filename
, msg
);
573 unlink (dbs
[cnt
].db_filename
);
575 else if (head
.module
== 0 && head
.data_size
== 0)
577 /* The file has been created, but the head has not
578 been initialized yet. */
579 msg
= _("uninitialized header");
582 else if (head
.header_size
!= (int) sizeof (head
))
584 msg
= _("header size does not match");
587 else if ((total
= (sizeof (head
)
588 + roundup (head
.module
* sizeof (ref_t
),
592 || total
< sizeof (head
))
594 msg
= _("file size does not match");
597 /* Note we map with the maximum size allowed for the
598 database. This is likely much larger than the
599 actual file size. This is OK on most OSes since
600 extensions of the underlying file will
601 automatically translate more pages available for
603 else if ((mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
604 PROT_READ
| PROT_WRITE
,
608 else if (!verify_persistent_db (mem
, &head
, cnt
))
611 msg
= _("verification failed");
616 /* Success. We have the database. */
618 dbs
[cnt
].memsize
= total
;
619 dbs
[cnt
].data
= (char *)
620 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
621 ALIGN
/ sizeof (ref_t
))];
622 dbs
[cnt
].mmap_used
= true;
624 if (dbs
[cnt
].suggested_module
> head
.module
)
625 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
630 /* We also need a read-only descriptor. */
633 dbs
[cnt
].ro_fd
= open (dbs
[cnt
].db_filename
,
634 O_RDONLY
| EXTRA_O_FLAGS
);
635 if (dbs
[cnt
].ro_fd
== -1)
637 cannot create read-only descriptor for \"%s\"; no mmap"),
638 dbs
[cnt
].db_filename
);
641 // XXX Shall we test whether the descriptors actually
642 // XXX point to the same file?
645 /* Close the file descriptors in case something went
646 wrong in which case the variable have not been
651 else if (errno
== EACCES
)
652 do_exit (EXIT_FAILURE
, 0, _("cannot access '%s'"),
653 dbs
[cnt
].db_filename
);
656 if (dbs
[cnt
].head
== NULL
)
658 /* No database loaded. Allocate the data structure,
660 struct database_pers_head head
;
661 size_t total
= (sizeof (head
)
662 + roundup (dbs
[cnt
].suggested_module
663 * sizeof (ref_t
), ALIGN
)
664 + (dbs
[cnt
].suggested_module
665 * DEFAULT_DATASIZE_PER_BUCKET
));
667 /* Try to create the database. If we do not need a
668 persistent database create a temporary file. */
671 if (dbs
[cnt
].persistent
)
673 fd
= open (dbs
[cnt
].db_filename
,
674 O_RDWR
| O_CREAT
| O_EXCL
| O_TRUNC
| EXTRA_O_FLAGS
,
676 if (fd
!= -1 && dbs
[cnt
].shared
)
677 ro_fd
= open (dbs
[cnt
].db_filename
,
678 O_RDONLY
| EXTRA_O_FLAGS
);
682 char fname
[] = _PATH_NSCD_XYZ_DB_TMP
;
683 fd
= mkostemp (fname
, EXTRA_O_FLAGS
);
685 /* We do not need the file name anymore after we
686 opened another file descriptor in read-only mode. */
690 ro_fd
= open (fname
, O_RDONLY
| EXTRA_O_FLAGS
);
700 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
701 dbnames
[cnt
], dbs
[cnt
].db_filename
);
702 do_exit (1, 0, NULL
);
705 if (dbs
[cnt
].persistent
)
706 dbg_log (_("cannot create %s; no persistent database used"),
707 dbs
[cnt
].db_filename
);
709 dbg_log (_("cannot create %s; no sharing possible"),
710 dbs
[cnt
].db_filename
);
712 dbs
[cnt
].persistent
= 0;
713 // XXX remember: no mmap
717 /* Tell the user if we could not create the read-only
719 if (ro_fd
== -1 && dbs
[cnt
].shared
)
721 cannot create read-only descriptor for \"%s\"; no mmap"),
722 dbs
[cnt
].db_filename
);
724 /* Before we create the header, initialize the hash
725 table. That way if we get interrupted while writing
726 the header we can recognize a partially initialized
728 size_t ps
= sysconf (_SC_PAGESIZE
);
730 assert (~ENDREF
== 0);
731 memset (tmpbuf
, '\xff', ps
);
733 size_t remaining
= dbs
[cnt
].suggested_module
* sizeof (ref_t
);
734 off_t offset
= sizeof (head
);
737 if (offset
% ps
!= 0)
739 towrite
= MIN (remaining
, ps
- (offset
% ps
));
740 if (pwrite (fd
, tmpbuf
, towrite
, offset
) != towrite
)
743 remaining
-= towrite
;
746 while (remaining
> ps
)
748 if (pwrite (fd
, tmpbuf
, ps
, offset
) == -1)
755 && pwrite (fd
, tmpbuf
, remaining
, offset
) != remaining
)
758 /* Create the header of the file. */
759 struct database_pers_head head
=
761 .version
= DB_VERSION
,
762 .header_size
= sizeof (head
),
763 .module
= dbs
[cnt
].suggested_module
,
764 .data_size
= (dbs
[cnt
].suggested_module
765 * DEFAULT_DATASIZE_PER_BUCKET
),
770 if ((TEMP_FAILURE_RETRY (write (fd
, &head
, sizeof (head
)))
772 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd
, 0, total
))
774 || (mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
775 PROT_READ
| PROT_WRITE
,
776 MAP_SHARED
, fd
, 0)) == MAP_FAILED
)
779 unlink (dbs
[cnt
].db_filename
);
780 dbg_log (_("cannot write to database file %s: %s"),
781 dbs
[cnt
].db_filename
, strerror (errno
));
782 dbs
[cnt
].persistent
= 0;
788 dbs
[cnt
].data
= (char *)
789 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
790 ALIGN
/ sizeof (ref_t
))];
791 dbs
[cnt
].memsize
= total
;
792 dbs
[cnt
].mmap_used
= true;
794 /* Remember the descriptors. */
796 dbs
[cnt
].ro_fd
= ro_fd
;
808 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
809 /* We do not check here whether the O_CLOEXEC provided to the
810 open call was successful or not. The two fcntl calls are
811 only performed once each per process start-up and therefore
812 is not noticeable at all. */
814 && ((dbs
[cnt
].wr_fd
!= -1
815 && fcntl (dbs
[cnt
].wr_fd
, F_SETFD
, FD_CLOEXEC
) == -1)
816 || (dbs
[cnt
].ro_fd
!= -1
817 && fcntl (dbs
[cnt
].ro_fd
, F_SETFD
, FD_CLOEXEC
) == -1)))
820 cannot set socket to close on exec: %s; disabling paranoia mode"),
826 if (dbs
[cnt
].head
== NULL
)
828 /* We do not use the persistent database. Just
829 create an in-memory data structure. */
830 assert (! dbs
[cnt
].persistent
);
832 dbs
[cnt
].head
= xmalloc (sizeof (struct database_pers_head
)
833 + (dbs
[cnt
].suggested_module
835 memset (dbs
[cnt
].head
, '\0', sizeof (struct database_pers_head
));
836 assert (~ENDREF
== 0);
837 memset (dbs
[cnt
].head
->array
, '\xff',
838 dbs
[cnt
].suggested_module
* sizeof (ref_t
));
839 dbs
[cnt
].head
->module
= dbs
[cnt
].suggested_module
;
840 dbs
[cnt
].head
->data_size
= (DEFAULT_DATASIZE_PER_BUCKET
841 * dbs
[cnt
].head
->module
);
842 dbs
[cnt
].data
= xmalloc (dbs
[cnt
].head
->data_size
);
843 dbs
[cnt
].head
->first_free
= 0;
846 assert (dbs
[cnt
].ro_fd
== -1);
850 /* Create the socket. */
851 #ifndef __ASSUME_SOCK_CLOEXEC
853 if (have_sock_cloexec
>= 0)
856 sock
= socket (AF_UNIX
, SOCK_STREAM
| SOCK_CLOEXEC
| SOCK_NONBLOCK
, 0);
857 #ifndef __ASSUME_SOCK_CLOEXEC
858 if (have_sock_cloexec
== 0)
859 have_sock_cloexec
= sock
!= -1 || errno
!= EINVAL
? 1 : -1;
862 #ifndef __ASSUME_SOCK_CLOEXEC
863 if (have_sock_cloexec
< 0)
864 sock
= socket (AF_UNIX
, SOCK_STREAM
, 0);
868 dbg_log (_("cannot open socket: %s"), strerror (errno
));
869 do_exit (errno
== EACCES
? 4 : 1, 0, NULL
);
871 /* Bind a name to the socket. */
872 struct sockaddr_un sock_addr
;
873 sock_addr
.sun_family
= AF_UNIX
;
874 strcpy (sock_addr
.sun_path
, _PATH_NSCDSOCKET
);
875 if (bind (sock
, (struct sockaddr
*) &sock_addr
, sizeof (sock_addr
)) < 0)
877 dbg_log ("%s: %s", _PATH_NSCDSOCKET
, strerror (errno
));
878 do_exit (errno
== EACCES
? 4 : 1, 0, NULL
);
881 #ifndef __ASSUME_SOCK_CLOEXEC
882 if (have_sock_cloexec
< 0)
884 /* We don't want to get stuck on accept. */
885 int fl
= fcntl (sock
, F_GETFL
);
886 if (fl
== -1 || fcntl (sock
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
888 dbg_log (_("cannot change socket to nonblocking mode: %s"),
890 do_exit (1, 0, NULL
);
893 /* The descriptor needs to be closed on exec. */
894 if (paranoia
&& fcntl (sock
, F_SETFD
, FD_CLOEXEC
) == -1)
896 dbg_log (_("cannot set socket to close on exec: %s"),
898 do_exit (1, 0, NULL
);
903 /* Set permissions for the socket. */
904 chmod (_PATH_NSCDSOCKET
, DEFFILEMODE
);
906 /* Set the socket up to accept connections. */
907 if (listen (sock
, SOMAXCONN
) < 0)
909 dbg_log (_("cannot enable socket to accept connections: %s"),
911 do_exit (1, 0, NULL
);
915 if (dbs
[hstdb
].enabled
)
917 /* Try to open netlink socket to monitor network setting changes. */
918 nl_status_fd
= socket (AF_NETLINK
,
919 SOCK_RAW
| SOCK_CLOEXEC
| SOCK_NONBLOCK
,
921 if (nl_status_fd
!= -1)
923 struct sockaddr_nl snl
;
924 memset (&snl
, '\0', sizeof (snl
));
925 snl
.nl_family
= AF_NETLINK
;
926 /* XXX Is this the best set to use? */
927 snl
.nl_groups
= (RTMGRP_IPV4_IFADDR
| RTMGRP_TC
| RTMGRP_IPV4_MROUTE
928 | RTMGRP_IPV4_ROUTE
| RTMGRP_IPV4_RULE
929 | RTMGRP_IPV6_IFADDR
| RTMGRP_IPV6_MROUTE
930 | RTMGRP_IPV6_ROUTE
| RTMGRP_IPV6_IFINFO
931 | RTMGRP_IPV6_PREFIX
);
933 if (bind (nl_status_fd
, (struct sockaddr
*) &snl
, sizeof (snl
)) != 0)
935 close (nl_status_fd
);
940 /* Start the timestamp process. */
941 dbs
[hstdb
].head
->extra_data
[NSCD_HST_IDX_CONF_TIMESTAMP
]
942 = __bump_nl_timestamp ();
944 # ifndef __ASSUME_SOCK_CLOEXEC
945 if (have_sock_cloexec
< 0)
947 /* We don't want to get stuck on accept. */
948 int fl
= fcntl (nl_status_fd
, F_GETFL
);
950 || fcntl (nl_status_fd
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
953 cannot change socket to nonblocking mode: %s"),
955 do_exit (1, 0, NULL
);
958 /* The descriptor needs to be closed on exec. */
960 && fcntl (nl_status_fd
, F_SETFD
, FD_CLOEXEC
) == -1)
962 dbg_log (_("cannot set socket to close on exec: %s"),
964 do_exit (1, 0, NULL
);
973 /* Change to unprivileged uid/gid/groups if specified in config file */
974 if (server_user
!= NULL
)
975 finish_drop_privileges ();
979 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
981 We support registering multiple files per database. Each call to
982 register_traced_file adds to the list of registered files.
984 When we prune the database, either through timeout or a request to
985 invalidate, we will check to see if any of the registered files has changed.
986 When we accept new connections to handle a cache request we will also
987 check to see if any of the registered files has changed.
989 If we have inotify support then we install an inotify fd to notify us of
990 file deletion or modification, both of which will require we invalidate
991 the cache for the database. Without inotify support we stat the file and
992 store st_mtime to determine if the file has been modified. */
994 register_traced_file (size_t dbidx
, struct traced_file
*finfo
)
996 /* If the database is disabled or file checking is disabled
997 then ignore the registration. */
998 if (! dbs
[dbidx
].enabled
|| ! dbs
[dbidx
].check_file
)
1001 if (__glibc_unlikely (debug_level
> 0))
1002 dbg_log (_("register trace file %s for database %s"),
1003 finfo
->fname
, dbnames
[dbidx
]);
1007 || (finfo
->inotify_descr
= inotify_add_watch (inotify_fd
, finfo
->fname
,
1012 /* We need the modification date of the file. */
1015 if (stat64 (finfo
->fname
, &st
) < 0)
1017 /* We cannot stat() the file, disable file checking. */
1018 dbg_log (_("cannot stat() file `%s': %s"),
1019 finfo
->fname
, strerror (errno
));
1023 finfo
->inotify_descr
= -1;
1024 finfo
->mtime
= st
.st_mtime
;
1027 /* Queue up the file name. */
1028 finfo
->next
= dbs
[dbidx
].traced_files
;
1029 dbs
[dbidx
].traced_files
= finfo
;
1033 /* Close the connections. */
1035 close_sockets (void)
1042 invalidate_cache (char *key
, int fd
)
1047 for (number
= pwddb
; number
< lastdb
; ++number
)
1048 if (strcmp (key
, dbnames
[number
]) == 0)
1050 if (number
== hstdb
)
1052 struct traced_file
*runp
= dbs
[hstdb
].traced_files
;
1053 while (runp
!= NULL
)
1054 if (runp
->call_res_init
)
1065 if (number
== lastdb
)
1068 writeall (fd
, &resp
, sizeof (resp
));
1072 if (dbs
[number
].enabled
)
1074 pthread_mutex_lock (&dbs
[number
].prune_run_lock
);
1075 prune_cache (&dbs
[number
], LONG_MAX
, fd
);
1076 pthread_mutex_unlock (&dbs
[number
].prune_run_lock
);
1081 writeall (fd
, &resp
, sizeof (resp
));
1088 send_ro_fd (struct database_dyn
*db
, char *key
, int fd
)
1090 /* If we do not have an read-only file descriptor do nothing. */
1091 if (db
->ro_fd
== -1)
1094 /* We need to send some data along with the descriptor. */
1095 uint64_t mapsize
= (db
->head
->data_size
1096 + roundup (db
->head
->module
* sizeof (ref_t
), ALIGN
)
1097 + sizeof (struct database_pers_head
));
1098 struct iovec iov
[2];
1099 iov
[0].iov_base
= key
;
1100 iov
[0].iov_len
= strlen (key
) + 1;
1101 iov
[1].iov_base
= &mapsize
;
1102 iov
[1].iov_len
= sizeof (mapsize
);
1104 /* Prepare the control message to transfer the descriptor. */
1108 char bytes
[CMSG_SPACE (sizeof (int))];
1110 struct msghdr msg
= { .msg_iov
= iov
, .msg_iovlen
= 2,
1111 .msg_control
= buf
.bytes
,
1112 .msg_controllen
= sizeof (buf
) };
1113 struct cmsghdr
*cmsg
= CMSG_FIRSTHDR (&msg
);
1115 cmsg
->cmsg_level
= SOL_SOCKET
;
1116 cmsg
->cmsg_type
= SCM_RIGHTS
;
1117 cmsg
->cmsg_len
= CMSG_LEN (sizeof (int));
1119 int *ip
= (int *) CMSG_DATA (cmsg
);
1122 msg
.msg_controllen
= cmsg
->cmsg_len
;
1124 /* Send the control message. We repeat when we are interrupted but
1125 everything else is ignored. */
1126 #ifndef MSG_NOSIGNAL
1127 # define MSG_NOSIGNAL 0
1129 (void) TEMP_FAILURE_RETRY (sendmsg (fd
, &msg
, MSG_NOSIGNAL
));
1131 if (__glibc_unlikely (debug_level
> 0))
1132 dbg_log (_("provide access to FD %d, for %s"), db
->ro_fd
, key
);
1134 #endif /* SCM_RIGHTS */
1137 /* Handle new request. */
1139 handle_request (int fd
, request_header
*req
, void *key
, uid_t uid
, pid_t pid
)
1141 if (__builtin_expect (req
->version
, NSCD_VERSION
) != NSCD_VERSION
)
1143 if (debug_level
> 0)
1145 cannot handle old request version %d; current version is %d"),
1146 req
->version
, NSCD_VERSION
);
1150 /* Perform the SELinux check before we go on to the standard checks. */
1151 if (selinux_enabled
&& nscd_request_avc_has_perm (fd
, req
->type
) != 0)
1153 if (debug_level
> 0)
1162 snprintf (buf
, sizeof (buf
), "/proc/%ld/exe", (long int) pid
);
1163 ssize_t n
= readlink (buf
, buf
, sizeof (buf
) - 1);
1167 request from %ld not handled due to missing permission"), (long int) pid
);
1172 request from '%s' [%ld] not handled due to missing permission"),
1173 buf
, (long int) pid
);
1176 dbg_log (_("request not handled due to missing permission"));
1182 struct database_dyn
*db
= reqinfo
[req
->type
].db
;
1184 /* See whether we can service the request from the cache. */
1185 if (__builtin_expect (reqinfo
[req
->type
].data_request
, true))
1187 if (__builtin_expect (debug_level
, 0) > 0)
1189 if (req
->type
== GETHOSTBYADDR
|| req
->type
== GETHOSTBYADDRv6
)
1191 char buf
[INET6_ADDRSTRLEN
];
1193 dbg_log ("\t%s (%s)", serv2str
[req
->type
],
1194 inet_ntop (req
->type
== GETHOSTBYADDR
1195 ? AF_INET
: AF_INET6
,
1196 key
, buf
, sizeof (buf
)));
1199 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1202 /* Is this service enabled? */
1203 if (__glibc_unlikely (!db
->enabled
))
1205 /* No, sent the prepared record. */
1206 if (TEMP_FAILURE_RETRY (send (fd
, db
->disabled_iov
->iov_base
,
1207 db
->disabled_iov
->iov_len
,
1209 != (ssize_t
) db
->disabled_iov
->iov_len
1210 && __builtin_expect (debug_level
, 0) > 0)
1212 /* We have problems sending the result. */
1214 dbg_log (_("cannot write result: %s"),
1215 strerror_r (errno
, buf
, sizeof (buf
)));
1221 /* Be sure we can read the data. */
1222 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db
->lock
) != 0))
1224 ++db
->head
->rdlockdelayed
;
1225 pthread_rwlock_rdlock (&db
->lock
);
1228 /* See whether we can handle it from the cache. */
1229 struct datahead
*cached
;
1230 cached
= (struct datahead
*) cache_search (req
->type
, key
, req
->key_len
,
1234 /* Hurray it's in the cache. */
1237 #ifdef HAVE_SENDFILE
1238 if (__glibc_likely (db
->mmap_used
))
1240 assert (db
->wr_fd
!= -1);
1241 assert ((char *) cached
->data
> (char *) db
->data
);
1242 assert ((char *) cached
->data
- (char *) db
->head
1244 <= (sizeof (struct database_pers_head
)
1245 + db
->head
->module
* sizeof (ref_t
)
1246 + db
->head
->data_size
));
1247 nwritten
= sendfileall (fd
, db
->wr_fd
,
1248 (char *) cached
->data
1249 - (char *) db
->head
, cached
->recsize
);
1250 # ifndef __ASSUME_SENDFILE
1251 if (nwritten
== -1 && errno
== ENOSYS
)
1256 # ifndef __ASSUME_SENDFILE
1260 nwritten
= writeall (fd
, cached
->data
, cached
->recsize
);
1262 if (nwritten
!= cached
->recsize
1263 && __builtin_expect (debug_level
, 0) > 0)
1265 /* We have problems sending the result. */
1267 dbg_log (_("cannot write result: %s"),
1268 strerror_r (errno
, buf
, sizeof (buf
)));
1271 pthread_rwlock_unlock (&db
->lock
);
1276 pthread_rwlock_unlock (&db
->lock
);
1278 else if (__builtin_expect (debug_level
, 0) > 0)
1280 if (req
->type
== INVALIDATE
)
1281 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1283 dbg_log ("\t%s", serv2str
[req
->type
]);
1286 /* Handle the request. */
1290 addpwbyname (db
, fd
, req
, key
, uid
);
1294 addpwbyuid (db
, fd
, req
, key
, uid
);
1298 addgrbyname (db
, fd
, req
, key
, uid
);
1302 addgrbygid (db
, fd
, req
, key
, uid
);
1306 addhstbyname (db
, fd
, req
, key
, uid
);
1309 case GETHOSTBYNAMEv6
:
1310 addhstbynamev6 (db
, fd
, req
, key
, uid
);
1314 addhstbyaddr (db
, fd
, req
, key
, uid
);
1317 case GETHOSTBYADDRv6
:
1318 addhstbyaddrv6 (db
, fd
, req
, key
, uid
);
1322 addhstai (db
, fd
, req
, key
, uid
);
1326 addinitgroups (db
, fd
, req
, key
, uid
);
1330 addservbyname (db
, fd
, req
, key
, uid
);
1334 addservbyport (db
, fd
, req
, key
, uid
);
1338 addgetnetgrent (db
, fd
, req
, key
, uid
);
1342 addinnetgr (db
, fd
, req
, key
, uid
);
1349 /* Get the callers credentials. */
1351 struct ucred caller
;
1352 socklen_t optlen
= sizeof (caller
);
1354 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) < 0)
1358 dbg_log (_("error getting caller's id: %s"),
1359 strerror_r (errno
, buf
, sizeof (buf
)));
1365 /* Some systems have no SO_PEERCRED implementation. They don't
1366 care about security so we don't as well. */
1371 /* Accept shutdown, getstat and invalidate only from root. For
1372 the stat call also allow the user specified in the config file. */
1373 if (req
->type
== GETSTAT
)
1375 if (uid
== 0 || uid
== stat_uid
)
1376 send_stats (fd
, dbs
);
1380 if (req
->type
== INVALIDATE
)
1381 invalidate_cache (key
, fd
);
1383 termination_handler (0);
1393 send_ro_fd (reqinfo
[req
->type
].db
, key
, fd
);
1398 /* Ignore the command, it's nothing we know. */
1404 /* Restart the process. */
1408 /* First determine the parameters. We do not use the parameters
1409 passed to main() since in case nscd is started by running the
1410 dynamic linker this will not work. Yes, this is not the usual
1411 case but nscd is part of glibc and we occasionally do this. */
1412 size_t buflen
= 1024;
1413 char *buf
= alloca (buflen
);
1415 int fd
= open ("/proc/self/cmdline", O_RDONLY
);
1419 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1428 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, buf
+ readlen
,
1433 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1443 if (readlen
< buflen
)
1446 /* We might have to extend the buffer. */
1447 size_t old_buflen
= buflen
;
1448 char *newp
= extend_alloca (buf
, buflen
, 2 * buflen
);
1449 buf
= memmove (newp
, buf
, old_buflen
);
1454 /* Parse the command line. Worst case scenario: every two
1455 characters form one parameter (one character plus NUL). */
1456 char **argv
= alloca ((readlen
/ 2 + 1) * sizeof (argv
[0]));
1460 while (cp
< buf
+ readlen
)
1463 cp
= (char *) rawmemchr (cp
, '\0') + 1;
1467 /* Second, change back to the old user if we changed it. */
1468 if (server_user
!= NULL
)
1470 if (setresuid (old_uid
, old_uid
, old_uid
) != 0)
1473 cannot change to old UID: %s; disabling paranoia mode"),
1480 if (setresgid (old_gid
, old_gid
, old_gid
) != 0)
1483 cannot change to old GID: %s; disabling paranoia mode"),
1486 setuid (server_uid
);
1492 /* Next change back to the old working directory. */
1493 if (chdir (oldcwd
) == -1)
1496 cannot change to old working directory: %s; disabling paranoia mode"),
1499 if (server_user
!= NULL
)
1501 setuid (server_uid
);
1502 setgid (server_gid
);
1508 /* Synchronize memory. */
1509 int32_t certainly
[lastdb
];
1510 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1511 if (dbs
[cnt
].enabled
)
1513 /* Make sure nobody keeps using the database. */
1514 dbs
[cnt
].head
->timestamp
= 0;
1515 certainly
[cnt
] = dbs
[cnt
].head
->nscd_certainly_running
;
1516 dbs
[cnt
].head
->nscd_certainly_running
= 0;
1518 if (dbs
[cnt
].persistent
)
1520 msync (dbs
[cnt
].head
, dbs
[cnt
].memsize
, MS_ASYNC
);
1523 /* The preparations are done. */
1525 char pathbuf
[PATH_MAX
];
1529 /* Try to exec the real nscd program so the process name (as reported
1530 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1531 if readlink or the exec with the result of the readlink call fails. */
1532 ssize_t n
= readlink ("/proc/self/exe", pathbuf
, sizeof (pathbuf
) - 1);
1536 execv (pathbuf
, argv
);
1538 execv ("/proc/self/exe", argv
);
1540 /* If we come here, we will never be able to re-exec. */
1541 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1544 if (server_user
!= NULL
)
1546 setuid (server_uid
);
1547 setgid (server_gid
);
1549 if (chdir ("/") != 0)
1550 dbg_log (_("cannot change current working directory to \"/\": %s"),
1554 /* Reenable the databases. */
1555 time_t now
= time (NULL
);
1556 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1557 if (dbs
[cnt
].enabled
)
1559 dbs
[cnt
].head
->timestamp
= now
;
1560 dbs
[cnt
].head
->nscd_certainly_running
= certainly
[cnt
];
1565 /* List of file descriptors. */
1569 struct fdlist
*next
;
1571 /* Memory allocated for the list. */
1572 static struct fdlist
*fdlist
;
1573 /* List of currently ready-to-read file descriptors. */
1574 static struct fdlist
*readylist
;
1576 /* Conditional variable and mutex to signal availability of entries in
1577 READYLIST. The condvar is initialized dynamically since we might
1578 use a different clock depending on availability. */
1579 static pthread_cond_t readylist_cond
= PTHREAD_COND_INITIALIZER
;
1580 static pthread_mutex_t readylist_lock
= PTHREAD_MUTEX_INITIALIZER
;
1582 /* The clock to use with the condvar. */
1583 static clockid_t timeout_clock
= CLOCK_REALTIME
;
1585 /* Number of threads ready to handle the READYLIST. */
1586 static unsigned long int nready
;
1589 /* Function for the clean-up threads. */
1591 __attribute__ ((__noreturn__
))
1592 nscd_run_prune (void *p
)
1594 const long int my_number
= (long int) p
;
1595 assert (dbs
[my_number
].enabled
);
1597 int dont_need_update
= setup_thread (&dbs
[my_number
]);
1599 time_t now
= time (NULL
);
1601 /* We are running. */
1602 dbs
[my_number
].head
->timestamp
= now
;
1604 struct timespec prune_ts
;
1605 if (__glibc_unlikely (clock_gettime (timeout_clock
, &prune_ts
) == -1))
1606 /* Should never happen. */
1609 /* Compute the initial timeout time. Prevent all the timers to go
1610 off at the same time by adding a db-based value. */
1611 prune_ts
.tv_sec
+= CACHE_PRUNE_INTERVAL
+ my_number
;
1612 dbs
[my_number
].wakeup_time
= now
+ CACHE_PRUNE_INTERVAL
+ my_number
;
1614 pthread_mutex_t
*prune_lock
= &dbs
[my_number
].prune_lock
;
1615 pthread_mutex_t
*prune_run_lock
= &dbs
[my_number
].prune_run_lock
;
1616 pthread_cond_t
*prune_cond
= &dbs
[my_number
].prune_cond
;
1618 pthread_mutex_lock (prune_lock
);
1621 /* Wait, but not forever. */
1623 if (! dbs
[my_number
].clear_cache
)
1624 e
= pthread_cond_timedwait (prune_cond
, prune_lock
, &prune_ts
);
1625 assert (__builtin_expect (e
== 0 || e
== ETIMEDOUT
, 1));
1629 if (e
== ETIMEDOUT
|| now
>= dbs
[my_number
].wakeup_time
1630 || dbs
[my_number
].clear_cache
)
1632 /* We will determine the new timout values based on the
1633 cache content. Should there be concurrent additions to
1634 the cache which are not accounted for in the cache
1635 pruning we want to know about it. Therefore set the
1636 timeout to the maximum. It will be descreased when adding
1637 new entries to the cache, if necessary. */
1638 dbs
[my_number
].wakeup_time
= MAX_TIMEOUT_VALUE
;
1640 /* Unconditionally reset the flag. */
1641 time_t prune_now
= dbs
[my_number
].clear_cache
? LONG_MAX
: now
;
1642 dbs
[my_number
].clear_cache
= 0;
1644 pthread_mutex_unlock (prune_lock
);
1646 /* We use a separate lock for running the prune function (instead
1647 of keeping prune_lock locked) because this enables concurrent
1648 invocations of cache_add which might modify the timeout value. */
1649 pthread_mutex_lock (prune_run_lock
);
1650 next_wait
= prune_cache (&dbs
[my_number
], prune_now
, -1);
1651 pthread_mutex_unlock (prune_run_lock
);
1653 next_wait
= MAX (next_wait
, CACHE_PRUNE_INTERVAL
);
1654 /* If clients cannot determine for sure whether nscd is running
1655 we need to wake up occasionally to update the timestamp.
1656 Wait 90% of the update period. */
1657 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1658 if (__glibc_unlikely (! dont_need_update
))
1660 next_wait
= MIN (UPDATE_MAPPING_TIMEOUT
, next_wait
);
1661 dbs
[my_number
].head
->timestamp
= now
;
1664 pthread_mutex_lock (prune_lock
);
1666 /* Make it known when we will wake up again. */
1667 if (now
+ next_wait
< dbs
[my_number
].wakeup_time
)
1668 dbs
[my_number
].wakeup_time
= now
+ next_wait
;
1670 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1673 /* The cache was just pruned. Do not do it again now. Just
1674 use the new timeout value. */
1675 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1677 if (clock_gettime (timeout_clock
, &prune_ts
) == -1)
1678 /* Should never happen. */
1681 /* Compute next timeout time. */
1682 prune_ts
.tv_sec
+= next_wait
;
1687 /* This is the main loop. It is replicated in different threads but
1688 the use of the ready list makes sure only one thread handles an
1689 incoming connection. */
1691 __attribute__ ((__noreturn__
))
1692 nscd_run_worker (void *p
)
1696 /* Initial locking. */
1697 pthread_mutex_lock (&readylist_lock
);
1699 /* One more thread available. */
1704 while (readylist
== NULL
)
1705 pthread_cond_wait (&readylist_cond
, &readylist_lock
);
1707 struct fdlist
*it
= readylist
->next
;
1708 if (readylist
->next
== readylist
)
1709 /* Just one entry on the list. */
1712 readylist
->next
= it
->next
;
1714 /* Extract the information and mark the record ready to be used
1719 /* One more thread available. */
1722 /* We are done with the list. */
1723 pthread_mutex_unlock (&readylist_lock
);
1725 #ifndef __ASSUME_ACCEPT4
1726 if (have_accept4
< 0)
1728 /* We do not want to block on a short read or so. */
1729 int fl
= fcntl (fd
, F_GETFL
);
1730 if (fl
== -1 || fcntl (fd
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
1735 /* Now read the request. */
1737 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, &req
, sizeof (req
)))
1738 != sizeof (req
), 0))
1740 /* We failed to read data. Note that this also might mean we
1741 failed because we would have blocked. */
1742 if (debug_level
> 0)
1743 dbg_log (_("short read while reading request: %s"),
1744 strerror_r (errno
, buf
, sizeof (buf
)));
1748 /* Check whether this is a valid request type. */
1749 if (req
.type
< GETPWBYNAME
|| req
.type
>= LASTREQ
)
1752 /* Some systems have no SO_PEERCRED implementation. They don't
1753 care about security so we don't as well. */
1758 if (__glibc_unlikely (debug_level
> 0))
1760 struct ucred caller
;
1761 socklen_t optlen
= sizeof (caller
);
1763 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) == 0)
1767 const pid_t pid
= 0;
1770 /* It should not be possible to crash the nscd with a silly
1771 request (i.e., a terribly large key). We limit the size to 1kb. */
1772 if (__builtin_expect (req
.key_len
, 1) < 0
1773 || __builtin_expect (req
.key_len
, 1) > MAXKEYLEN
)
1775 if (debug_level
> 0)
1776 dbg_log (_("key length in request too long: %d"), req
.key_len
);
1781 char keybuf
[MAXKEYLEN
+ 1];
1783 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, keybuf
,
1787 /* Again, this can also mean we would have blocked. */
1788 if (debug_level
> 0)
1789 dbg_log (_("short read while reading request key: %s"),
1790 strerror_r (errno
, buf
, sizeof (buf
)));
1793 keybuf
[req
.key_len
] = '\0';
1795 if (__builtin_expect (debug_level
, 0) > 0)
1800 handle_request: request received (Version = %d) from PID %ld"),
1801 req
.version
, (long int) pid
);
1805 handle_request: request received (Version = %d)"), req
.version
);
1808 /* Phew, we got all the data, now process it. */
1809 handle_request (fd
, &req
, keybuf
, uid
, pid
);
1817 pthread_mutex_lock (&readylist_lock
);
1819 /* One more thread available. */
1826 static unsigned int nconns
;
1831 pthread_mutex_lock (&readylist_lock
);
1833 /* Find an empty entry in FDLIST. */
1835 for (inner
= 0; inner
< nconns
; ++inner
)
1836 if (fdlist
[inner
].next
== NULL
)
1838 assert (inner
< nconns
);
1840 fdlist
[inner
].fd
= fd
;
1842 if (readylist
== NULL
)
1843 readylist
= fdlist
[inner
].next
= &fdlist
[inner
];
1846 fdlist
[inner
].next
= readylist
->next
;
1847 readylist
= readylist
->next
= &fdlist
[inner
];
1850 bool do_signal
= true;
1851 if (__glibc_unlikely (nready
== 0))
1856 /* Try to start another thread to help out. */
1858 if (nthreads
< max_nthreads
1859 && pthread_create (&th
, &attr
, nscd_run_worker
,
1860 (void *) (long int) nthreads
) == 0)
1862 /* We got another thread. */
1864 /* The new thread might need a kick. */
1870 pthread_mutex_unlock (&readylist_lock
);
1872 /* Tell one of the worker threads there is work to do. */
1874 pthread_cond_signal (&readylist_cond
);
1878 /* Check whether restarting should happen. */
1880 restart_p (time_t now
)
1882 return (paranoia
&& readylist
== NULL
&& nready
== nthreads
1883 && now
>= restart_time
);
1887 /* Array for times a connection was accepted. */
1888 static time_t *starttime
;
1891 /* Inotify event for changed file. */
1894 struct inotify_event i
;
1896 # define PATH_MAX 1024
1898 char buf
[sizeof (struct inotify_event
) + PATH_MAX
];
1901 /* Process the inotify event in INEV. If the event matches any of the files
1902 registered with a database then mark that database as requiring its cache
1903 to be cleared. We indicate the cache needs clearing by setting
1904 TO_CLEAR[DBCNT] to true for the matching database. */
1906 inotify_check_files (bool *to_clear
, union __inev
*inev
)
1908 /* Check which of the files changed. */
1909 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
1911 struct traced_file
*finfo
= dbs
[dbcnt
].traced_files
;
1913 while (finfo
!= NULL
)
1915 /* Inotify event watch descriptor matches. */
1916 if (finfo
->inotify_descr
== inev
->i
.wd
)
1918 /* Mark cache as needing to be cleared and reinitialize. */
1919 to_clear
[dbcnt
] = true;
1920 if (finfo
->call_res_init
)
1925 finfo
= finfo
->next
;
1930 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1931 for the associated database, otherwise do nothing. The TO_CLEAR array must
1932 have LASTDB entries. */
1934 clear_db_cache (bool *to_clear
)
1936 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
1937 if (to_clear
[dbcnt
])
1939 pthread_mutex_lock (&dbs
[dbcnt
].prune_lock
);
1940 dbs
[dbcnt
].clear_cache
= 1;
1941 pthread_mutex_unlock (&dbs
[dbcnt
].prune_lock
);
1942 pthread_cond_signal (&dbs
[dbcnt
].prune_cond
);
1949 __attribute__ ((__noreturn__
))
1950 main_loop_poll (void)
1952 struct pollfd
*conns
= (struct pollfd
*) xmalloc (nconns
1953 * sizeof (conns
[0]));
1956 conns
[0].events
= POLLRDNORM
;
1958 size_t firstfree
= 1;
1961 if (inotify_fd
!= -1)
1963 conns
[1].fd
= inotify_fd
;
1964 conns
[1].events
= POLLRDNORM
;
1971 size_t idx_nl_status_fd
= 0;
1972 if (nl_status_fd
!= -1)
1974 idx_nl_status_fd
= nused
;
1975 conns
[nused
].fd
= nl_status_fd
;
1976 conns
[nused
].events
= POLLRDNORM
;
1984 /* Wait for any event. We wait at most a couple of seconds so
1985 that we can check whether we should close any of the accepted
1986 connections since we have not received a request. */
1987 #define MAX_ACCEPT_TIMEOUT 30
1988 #define MIN_ACCEPT_TIMEOUT 5
1989 #define MAIN_THREAD_TIMEOUT \
1990 (MAX_ACCEPT_TIMEOUT * 1000 \
1991 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1993 int n
= poll (conns
, nused
, MAIN_THREAD_TIMEOUT
);
1995 time_t now
= time (NULL
);
1997 /* If there is a descriptor ready for reading or there is a new
1998 connection, process this now. */
2001 if (conns
[0].revents
!= 0)
2003 /* We have a new incoming connection. Accept the connection. */
2006 #ifndef __ASSUME_ACCEPT4
2008 if (have_accept4
>= 0)
2011 fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
2013 #ifndef __ASSUME_ACCEPT4
2014 if (have_accept4
== 0)
2015 have_accept4
= fd
!= -1 || errno
!= ENOSYS
? 1 : -1;
2018 #ifndef __ASSUME_ACCEPT4
2019 if (have_accept4
< 0)
2020 fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
2023 /* Use the descriptor if we have not reached the limit. */
2026 if (firstfree
< nconns
)
2028 conns
[firstfree
].fd
= fd
;
2029 conns
[firstfree
].events
= POLLRDNORM
;
2030 starttime
[firstfree
] = now
;
2031 if (firstfree
>= nused
)
2032 nused
= firstfree
+ 1;
2036 while (firstfree
< nused
&& conns
[firstfree
].fd
!= -1);
2039 /* We cannot use the connection so close it. */
2048 if (inotify_fd
!= -1 && conns
[1].fd
== inotify_fd
)
2050 if (conns
[1].revents
!= 0)
2052 bool to_clear
[lastdb
] = { false, };
2055 /* Read all inotify events for files registered via
2056 register_traced_file(). */
2059 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
, &inev
,
2061 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
2063 if (__builtin_expect (nb
== -1 && errno
!= EAGAIN
,
2066 /* Something went wrong when reading the inotify
2067 data. Better disable inotify. */
2069 disabled inotify after read error %d"),
2081 /* Check which of the files changed. */
2082 inotify_check_files (to_clear
, &inev
);
2085 /* Actually perform the cache clearing. */
2086 clear_db_cache (to_clear
);
2096 if (idx_nl_status_fd
!= 0 && conns
[idx_nl_status_fd
].revents
!= 0)
2099 /* Read all the data. We do not interpret it here. */
2100 while (TEMP_FAILURE_RETRY (read (nl_status_fd
, buf
,
2101 sizeof (buf
))) != -1)
2104 dbs
[hstdb
].head
->extra_data
[NSCD_HST_IDX_CONF_TIMESTAMP
]
2105 = __bump_nl_timestamp ();
2109 for (size_t cnt
= first
; cnt
< nused
&& n
> 0; ++cnt
)
2110 if (conns
[cnt
].revents
!= 0)
2112 fd_ready (conns
[cnt
].fd
);
2114 /* Clean up the CONNS array. */
2116 if (cnt
< firstfree
)
2118 if (cnt
== nused
- 1)
2121 while (conns
[nused
- 1].fd
== -1);
2127 /* Now find entries which have timed out. */
2130 /* We make the timeout length depend on the number of file
2131 descriptors currently used. */
2132 #define ACCEPT_TIMEOUT \
2133 (MAX_ACCEPT_TIMEOUT \
2134 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2135 time_t laststart
= now
- ACCEPT_TIMEOUT
;
2137 for (size_t cnt
= nused
- 1; cnt
> 0; --cnt
)
2139 if (conns
[cnt
].fd
!= -1 && starttime
[cnt
] < laststart
)
2141 /* Remove the entry, it timed out. */
2142 (void) close (conns
[cnt
].fd
);
2145 if (cnt
< firstfree
)
2147 if (cnt
== nused
- 1)
2150 while (conns
[nused
- 1].fd
== -1);
2154 if (restart_p (now
))
2162 main_loop_epoll (int efd
)
2164 struct epoll_event ev
= { 0, };
2168 /* Add the socket. */
2169 ev
.events
= EPOLLRDNORM
;
2171 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, sock
, &ev
) == -1)
2172 /* We cannot use epoll. */
2175 # ifdef HAVE_INOTIFY
2176 if (inotify_fd
!= -1)
2178 ev
.events
= EPOLLRDNORM
;
2179 ev
.data
.fd
= inotify_fd
;
2180 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, inotify_fd
, &ev
) == -1)
2181 /* We cannot use epoll. */
2187 # ifdef HAVE_NETLINK
2188 if (nl_status_fd
!= -1)
2190 ev
.events
= EPOLLRDNORM
;
2191 ev
.data
.fd
= nl_status_fd
;
2192 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, nl_status_fd
, &ev
) == -1)
2193 /* We cannot use epoll. */
2200 struct epoll_event revs
[100];
2201 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2203 int n
= epoll_wait (efd
, revs
, nrevs
, MAIN_THREAD_TIMEOUT
);
2205 time_t now
= time (NULL
);
2207 for (int cnt
= 0; cnt
< n
; ++cnt
)
2208 if (revs
[cnt
].data
.fd
== sock
)
2210 /* A new connection. */
2213 # ifndef __ASSUME_ACCEPT4
2215 if (have_accept4
>= 0)
2218 fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
2220 # ifndef __ASSUME_ACCEPT4
2221 if (have_accept4
== 0)
2222 have_accept4
= fd
!= -1 || errno
!= ENOSYS
? 1 : -1;
2225 # ifndef __ASSUME_ACCEPT4
2226 if (have_accept4
< 0)
2227 fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
2230 /* Use the descriptor if we have not reached the limit. */
2233 /* Try to add the new descriptor. */
2236 || epoll_ctl (efd
, EPOLL_CTL_ADD
, fd
, &ev
) == -1)
2237 /* The descriptor is too large or something went
2238 wrong. Close the descriptor. */
2242 /* Remember when we accepted the connection. */
2243 starttime
[fd
] = now
;
2252 # ifdef HAVE_INOTIFY
2253 else if (revs
[cnt
].data
.fd
== inotify_fd
)
2255 bool to_clear
[lastdb
] = { false, };
2258 /* Read all inotify events for files registered via
2259 register_traced_file(). */
2262 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
, &inev
,
2264 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
2266 if (__glibc_unlikely (nb
== -1 && errno
!= EAGAIN
))
2268 /* Something went wrong when reading the inotify
2269 data. Better disable inotify. */
2270 dbg_log (_("disabled inotify after read error %d"),
2272 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, inotify_fd
,
2280 /* Check which of the files changed. */
2281 inotify_check_files(to_clear
, &inev
);
2284 /* Actually perform the cache clearing. */
2285 clear_db_cache (to_clear
);
2288 # ifdef HAVE_NETLINK
2289 else if (revs
[cnt
].data
.fd
== nl_status_fd
)
2292 /* Read all the data. We do not interpret it here. */
2293 while (TEMP_FAILURE_RETRY (read (nl_status_fd
, buf
,
2294 sizeof (buf
))) != -1)
2297 __bump_nl_timestamp ();
2302 /* Remove the descriptor from the epoll descriptor. */
2303 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, revs
[cnt
].data
.fd
, NULL
);
2305 /* Get a worker to handle the request. */
2306 fd_ready (revs
[cnt
].data
.fd
);
2308 /* Reset the time. */
2309 starttime
[revs
[cnt
].data
.fd
] = 0;
2310 if (revs
[cnt
].data
.fd
== highest
)
2313 while (highest
> 0 && starttime
[highest
] == 0);
2318 /* Now look for descriptors for accepted connections which have
2319 no reply in too long of a time. */
2320 time_t laststart
= now
- ACCEPT_TIMEOUT
;
2321 assert (starttime
[sock
] == 0);
2322 assert (inotify_fd
== -1 || starttime
[inotify_fd
] == 0);
2323 assert (nl_status_fd
== -1 || starttime
[nl_status_fd
] == 0);
2324 for (int cnt
= highest
; cnt
> STDERR_FILENO
; --cnt
)
2325 if (starttime
[cnt
] != 0 && starttime
[cnt
] < laststart
)
2327 /* We are waiting for this one for too long. Close it. */
2328 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, cnt
, NULL
);
2336 else if (cnt
!= sock
&& starttime
[cnt
] == 0 && cnt
== highest
)
2339 if (restart_p (now
))
2346 /* Start all the threads we want. The initial process is thread no. 1. */
2348 start_threads (void)
2350 /* Initialize the conditional variable we will use. The only
2351 non-standard attribute we might use is the clock selection. */
2352 pthread_condattr_t condattr
;
2353 pthread_condattr_init (&condattr
);
2355 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2356 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2357 /* Determine whether the monotonous clock is available. */
2358 struct timespec dummy
;
2359 # if _POSIX_MONOTONIC_CLOCK == 0
2360 if (sysconf (_SC_MONOTONIC_CLOCK
) > 0)
2362 # if _POSIX_CLOCK_SELECTION == 0
2363 if (sysconf (_SC_CLOCK_SELECTION
) > 0)
2365 if (clock_getres (CLOCK_MONOTONIC
, &dummy
) == 0
2366 && pthread_condattr_setclock (&condattr
, CLOCK_MONOTONIC
) == 0)
2367 timeout_clock
= CLOCK_MONOTONIC
;
2370 /* Create the attribute for the threads. They are all created
2372 pthread_attr_init (&attr
);
2373 pthread_attr_setdetachstate (&attr
, PTHREAD_CREATE_DETACHED
);
2374 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2375 pthread_attr_setstacksize (&attr
, NSCD_THREAD_STACKSIZE
);
2377 /* We allow less than LASTDB threads only for debugging. */
2378 if (debug_level
== 0)
2379 nthreads
= MAX (nthreads
, lastdb
);
2381 /* Create the threads which prune the databases. */
2382 // XXX Ideally this work would be done by some of the worker threads.
2383 // XXX But this is problematic since we would need to be able to wake
2384 // XXX them up explicitly as well as part of the group handling the
2385 // XXX ready-list. This requires an operation where we can wait on
2386 // XXX two conditional variables at the same time. This operation
2387 // XXX does not exist (yet).
2388 for (long int i
= 0; i
< lastdb
; ++i
)
2390 /* Initialize the conditional variable. */
2391 if (pthread_cond_init (&dbs
[i
].prune_cond
, &condattr
) != 0)
2393 dbg_log (_("could not initialize conditional variable"));
2394 do_exit (1, 0, NULL
);
2399 && pthread_create (&th
, &attr
, nscd_run_prune
, (void *) i
) != 0)
2401 dbg_log (_("could not start clean-up thread; terminating"));
2402 do_exit (1, 0, NULL
);
2406 pthread_condattr_destroy (&condattr
);
2408 for (long int i
= 0; i
< nthreads
; ++i
)
2411 if (pthread_create (&th
, &attr
, nscd_run_worker
, NULL
) != 0)
2415 dbg_log (_("could not start any worker thread; terminating"));
2416 do_exit (1, 0, NULL
);
2423 /* Now it is safe to let the parent know that we're doing fine and it can
2427 /* Determine how much room for descriptors we should initially
2428 allocate. This might need to change later if we cap the number
2430 const long int nfds
= sysconf (_SC_OPEN_MAX
);
2432 #define MAXCONN 16384
2433 if (nfds
== -1 || nfds
> MAXCONN
)
2435 else if (nfds
< MINCONN
)
2440 /* We need memory to pass descriptors on to the worker threads. */
2441 fdlist
= (struct fdlist
*) xcalloc (nconns
, sizeof (fdlist
[0]));
2442 /* Array to keep track when connection was accepted. */
2443 starttime
= (time_t *) xcalloc (nconns
, sizeof (starttime
[0]));
2445 /* In the main thread we execute the loop which handles incoming
2448 int efd
= epoll_create (100);
2451 main_loop_epoll (efd
);
2460 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2461 this function is called, we are not listening on the nscd socket yet so
2462 we can just use the ordinary lookup functions without causing a lockup */
2464 begin_drop_privileges (void)
2466 struct passwd
*pwd
= getpwnam (server_user
);
2470 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2471 do_exit (EXIT_FAILURE
, 0,
2472 _("Failed to run nscd as user '%s'"), server_user
);
2475 server_uid
= pwd
->pw_uid
;
2476 server_gid
= pwd
->pw_gid
;
2478 /* Save the old UID/GID if we have to change back. */
2481 old_uid
= getuid ();
2482 old_gid
= getgid ();
2485 if (getgrouplist (server_user
, server_gid
, NULL
, &server_ngroups
) == 0)
2487 /* This really must never happen. */
2488 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2489 do_exit (EXIT_FAILURE
, errno
,
2490 _("initial getgrouplist failed"));
2493 server_groups
= (gid_t
*) xmalloc (server_ngroups
* sizeof (gid_t
));
2495 if (getgrouplist (server_user
, server_gid
, server_groups
, &server_ngroups
)
2498 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2499 do_exit (EXIT_FAILURE
, errno
, _("getgrouplist failed"));
2504 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2505 run nscd as the user specified in the configuration file. */
2507 finish_drop_privileges (void)
2509 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2510 /* We need to preserve the capabilities to connect to the audit daemon. */
2511 cap_t new_caps
= preserve_capabilities ();
2514 if (setgroups (server_ngroups
, server_groups
) == -1)
2516 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2517 do_exit (EXIT_FAILURE
, errno
, _("setgroups failed"));
2522 res
= setresgid (server_gid
, server_gid
, old_gid
);
2524 res
= setgid (server_gid
);
2527 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2528 do_exit (4, errno
, "setgid");
2532 res
= setresuid (server_uid
, server_uid
, old_uid
);
2534 res
= setuid (server_uid
);
2537 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2538 do_exit (4, errno
, "setuid");
2541 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2542 /* Remove the temporary capabilities. */
2543 install_real_capabilities (new_caps
);