1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2015 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
35 #include <arpa/inet.h>
37 # include <linux/netlink.h>
38 # include <linux/rtnetlink.h>
41 # include <sys/epoll.h>
44 # include <sys/inotify.h>
47 #include <sys/param.h>
50 # include <sys/sendfile.h>
52 #include <sys/socket.h>
59 #include <resolv/resolv.h>
61 #include <kernel-features.h>
62 #include <libc-internal.h>
65 /* Support to run nscd as an unprivileged user */
66 const char *server_user
;
67 static uid_t server_uid
;
68 static gid_t server_gid
;
69 const char *stat_user
;
71 static gid_t
*server_groups
;
75 static int server_ngroups
;
77 static pthread_attr_t attr
;
79 static void begin_drop_privileges (void);
80 static void finish_drop_privileges (void);
82 /* Map request type to a string. */
83 const char *const serv2str
[LASTREQ
] =
85 [GETPWBYNAME
] = "GETPWBYNAME",
86 [GETPWBYUID
] = "GETPWBYUID",
87 [GETGRBYNAME
] = "GETGRBYNAME",
88 [GETGRBYGID
] = "GETGRBYGID",
89 [GETHOSTBYNAME
] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6
] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR
] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6
] = "GETHOSTBYADDRv6",
93 [SHUTDOWN
] = "SHUTDOWN",
94 [GETSTAT
] = "GETSTAT",
95 [INVALIDATE
] = "INVALIDATE",
96 [GETFDPW
] = "GETFDPW",
97 [GETFDGR
] = "GETFDGR",
98 [GETFDHST
] = "GETFDHST",
100 [INITGROUPS
] = "INITGROUPS",
101 [GETSERVBYNAME
] = "GETSERVBYNAME",
102 [GETSERVBYPORT
] = "GETSERVBYPORT",
103 [GETFDSERV
] = "GETFDSERV",
104 [GETNETGRENT
] = "GETNETGRENT",
105 [INNETGR
] = "INNETGR",
106 [GETFDNETGR
] = "GETFDNETGR"
109 /* The control data structures for the services. */
110 struct database_dyn dbs
[lastdb
] =
113 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
114 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
115 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
121 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
122 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
123 .db_filename
= _PATH_NSCD_PASSWD_DB
,
124 .disabled_iov
= &pwd_iov_disabled
,
132 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
133 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
134 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
140 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
141 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
142 .db_filename
= _PATH_NSCD_GROUP_DB
,
143 .disabled_iov
= &grp_iov_disabled
,
151 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
152 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
153 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
157 .propagate
= 0, /* Not used. */
159 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
160 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
161 .db_filename
= _PATH_NSCD_HOSTS_DB
,
162 .disabled_iov
= &hst_iov_disabled
,
170 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
171 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
172 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
176 .propagate
= 0, /* Not used. */
178 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
179 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
180 .db_filename
= _PATH_NSCD_SERVICES_DB
,
181 .disabled_iov
= &serv_iov_disabled
,
189 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
190 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
191 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
195 .propagate
= 0, /* Not used. */
197 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
198 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
199 .db_filename
= _PATH_NSCD_NETGROUP_DB
,
200 .disabled_iov
= &netgroup_iov_disabled
,
210 /* Mapping of request type to database. */
214 struct database_dyn
*db
;
215 } const reqinfo
[LASTREQ
] =
217 [GETPWBYNAME
] = { true, &dbs
[pwddb
] },
218 [GETPWBYUID
] = { true, &dbs
[pwddb
] },
219 [GETGRBYNAME
] = { true, &dbs
[grpdb
] },
220 [GETGRBYGID
] = { true, &dbs
[grpdb
] },
221 [GETHOSTBYNAME
] = { true, &dbs
[hstdb
] },
222 [GETHOSTBYNAMEv6
] = { true, &dbs
[hstdb
] },
223 [GETHOSTBYADDR
] = { true, &dbs
[hstdb
] },
224 [GETHOSTBYADDRv6
] = { true, &dbs
[hstdb
] },
225 [SHUTDOWN
] = { false, NULL
},
226 [GETSTAT
] = { false, NULL
},
227 [SHUTDOWN
] = { false, NULL
},
228 [GETFDPW
] = { false, &dbs
[pwddb
] },
229 [GETFDGR
] = { false, &dbs
[grpdb
] },
230 [GETFDHST
] = { false, &dbs
[hstdb
] },
231 [GETAI
] = { true, &dbs
[hstdb
] },
232 [INITGROUPS
] = { true, &dbs
[grpdb
] },
233 [GETSERVBYNAME
] = { true, &dbs
[servdb
] },
234 [GETSERVBYPORT
] = { true, &dbs
[servdb
] },
235 [GETFDSERV
] = { false, &dbs
[servdb
] },
236 [GETNETGRENT
] = { true, &dbs
[netgrdb
] },
237 [INNETGR
] = { true, &dbs
[netgrdb
] },
238 [GETFDNETGR
] = { false, &dbs
[netgrdb
] }
242 /* Initial number of threads to use. */
244 /* Maximum number of threads to use. */
245 int max_nthreads
= 32;
247 /* Socket for incoming connections. */
251 /* Inotify descriptor. */
256 /* Descriptor for netlink status updates. */
257 static int nl_status_fd
= -1;
260 #ifndef __ASSUME_SOCK_CLOEXEC
261 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
262 before be know the result. */
263 static int have_sock_cloexec
;
265 #ifndef __ASSUME_ACCEPT4
266 static int have_accept4
;
269 /* Number of times clients had to wait. */
270 unsigned long int client_queued
;
274 writeall (int fd
, const void *buf
, size_t len
)
280 ret
= TEMP_FAILURE_RETRY (send (fd
, buf
, n
, MSG_NOSIGNAL
));
283 buf
= (const char *) buf
+ ret
;
287 return ret
< 0 ? ret
: len
- n
;
293 sendfileall (int tofd
, int fromfd
, off_t off
, size_t len
)
300 ret
= TEMP_FAILURE_RETRY (sendfile (tofd
, fromfd
, &off
, n
));
306 return ret
< 0 ? ret
: len
- n
;
314 /* The following three are not really used, they are symbolic constants. */
320 use_he_begin
= use_he
| use_begin
,
321 use_he_end
= use_he
| use_end
,
323 use_data_begin
= use_data
| use_begin
,
324 use_data_end
= use_data
| use_end
,
325 use_data_first
= use_data_begin
| use_first
330 check_use (const char *data
, nscd_ssize_t first_free
, uint8_t *usemap
,
331 enum usekey use
, ref_t start
, size_t len
)
335 if (start
> first_free
|| start
+ len
> first_free
336 || (start
& BLOCK_ALIGN_M1
))
339 if (usemap
[start
] == use_not
)
341 /* Add the start marker. */
342 usemap
[start
] = use
| use_begin
;
346 if (usemap
[++start
] != use_not
)
351 /* Add the end marker. */
352 usemap
[start
] = use
| use_end
;
354 else if ((usemap
[start
] & ~use_first
) == ((use
| use_begin
) & ~use_first
))
356 /* Hash entries can't be shared. */
360 usemap
[start
] |= (use
& use_first
);
364 if (usemap
[++start
] != use
)
367 if (usemap
[++start
] != (use
| use_end
))
371 /* Points to a wrong object or somewhere in the middle. */
378 /* Verify data in persistent database. */
380 verify_persistent_db (void *mem
, struct database_pers_head
*readhead
, int dbnr
)
382 assert (dbnr
== pwddb
|| dbnr
== grpdb
|| dbnr
== hstdb
|| dbnr
== servdb
385 time_t now
= time (NULL
);
387 struct database_pers_head
*head
= mem
;
388 struct database_pers_head head_copy
= *head
;
390 /* Check that the header that was read matches the head in the database. */
391 if (memcmp (head
, readhead
, sizeof (*head
)) != 0)
394 /* First some easy tests: make sure the database header is sane. */
395 if (head
->version
!= DB_VERSION
396 || head
->header_size
!= sizeof (*head
)
397 /* We allow a timestamp to be one hour ahead of the current time.
398 This should cover daylight saving time changes. */
399 || head
->timestamp
> now
+ 60 * 60 + 60
400 || (head
->gc_cycle
& 1)
402 || (size_t) head
->module
> INT32_MAX
/ sizeof (ref_t
)
403 || (size_t) head
->data_size
> INT32_MAX
- head
->module
* sizeof (ref_t
)
404 || head
->first_free
< 0
405 || head
->first_free
> head
->data_size
406 || (head
->first_free
& BLOCK_ALIGN_M1
) != 0
407 || head
->maxnentries
< 0
408 || head
->maxnsearched
< 0)
411 uint8_t *usemap
= calloc (head
->first_free
, 1);
415 const char *data
= (char *) &head
->array
[roundup (head
->module
,
416 ALIGN
/ sizeof (ref_t
))];
418 nscd_ssize_t he_cnt
= 0;
419 for (nscd_ssize_t cnt
= 0; cnt
< head
->module
; ++cnt
)
421 ref_t trail
= head
->array
[cnt
];
425 while (work
!= ENDREF
)
427 if (! check_use (data
, head
->first_free
, usemap
, use_he
, work
,
428 sizeof (struct hashentry
)))
431 /* Now we know we can dereference the record. */
432 struct hashentry
*here
= (struct hashentry
*) (data
+ work
);
436 /* Make sure the record is for this type of service. */
437 if (here
->type
>= LASTREQ
438 || reqinfo
[here
->type
].db
!= &dbs
[dbnr
])
441 /* Validate boolean field value. */
442 if (here
->first
!= false && here
->first
!= true)
450 || here
->packet
> head
->first_free
451 || here
->packet
+ sizeof (struct datahead
) > head
->first_free
)
454 struct datahead
*dh
= (struct datahead
*) (data
+ here
->packet
);
456 if (! check_use (data
, head
->first_free
, usemap
,
457 use_data
| (here
->first
? use_first
: 0),
458 here
->packet
, dh
->allocsize
))
461 if (dh
->allocsize
< sizeof (struct datahead
)
462 || dh
->recsize
> dh
->allocsize
463 || (dh
->notfound
!= false && dh
->notfound
!= true)
464 || (dh
->usable
!= false && dh
->usable
!= true))
467 if (here
->key
< here
->packet
+ sizeof (struct datahead
)
468 || here
->key
> here
->packet
+ dh
->allocsize
469 || here
->key
+ here
->len
> here
->packet
+ dh
->allocsize
)
475 /* A circular list, this must not happen. */
478 trail
= ((struct hashentry
*) (data
+ trail
))->next
;
483 if (he_cnt
!= head
->nentries
)
486 /* See if all data and keys had at least one reference from
487 he->first == true hashentry. */
488 for (ref_t idx
= 0; idx
< head
->first_free
; ++idx
)
490 if (usemap
[idx
] == use_data_begin
)
494 /* Finally, make sure the database hasn't changed since the first test. */
495 if (memcmp (mem
, &head_copy
, sizeof (*head
)) != 0)
508 # define EXTRA_O_FLAGS O_CLOEXEC
510 # define EXTRA_O_FLAGS 0
514 /* Initialize database information structures. */
518 /* Look up unprivileged uid/gid/groups before we start listening on the
520 if (server_user
!= NULL
)
521 begin_drop_privileges ();
524 /* No configuration for this value, assume a default. */
527 for (size_t cnt
= 0; cnt
< lastdb
; ++cnt
)
528 if (dbs
[cnt
].enabled
)
530 pthread_rwlock_init (&dbs
[cnt
].lock
, NULL
);
531 pthread_mutex_init (&dbs
[cnt
].memlock
, NULL
);
533 if (dbs
[cnt
].persistent
)
535 /* Try to open the appropriate file on disk. */
536 int fd
= open (dbs
[cnt
].db_filename
, O_RDWR
| EXTRA_O_FLAGS
);
543 struct database_pers_head head
;
544 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, &head
,
546 if (n
!= sizeof (head
) || fstat64 (fd
, &st
) != 0)
549 /* The code is single-threaded at this point so
550 using strerror is just fine. */
551 msg
= strerror (errno
);
553 dbg_log (_("invalid persistent database file \"%s\": %s"),
554 dbs
[cnt
].db_filename
, msg
);
555 unlink (dbs
[cnt
].db_filename
);
557 else if (head
.module
== 0 && head
.data_size
== 0)
559 /* The file has been created, but the head has not
560 been initialized yet. */
561 msg
= _("uninitialized header");
564 else if (head
.header_size
!= (int) sizeof (head
))
566 msg
= _("header size does not match");
569 else if ((total
= (sizeof (head
)
570 + roundup (head
.module
* sizeof (ref_t
),
574 || total
< sizeof (head
))
576 msg
= _("file size does not match");
579 /* Note we map with the maximum size allowed for the
580 database. This is likely much larger than the
581 actual file size. This is OK on most OSes since
582 extensions of the underlying file will
583 automatically translate more pages available for
585 else if ((mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
586 PROT_READ
| PROT_WRITE
,
590 else if (!verify_persistent_db (mem
, &head
, cnt
))
593 msg
= _("verification failed");
598 /* Success. We have the database. */
600 dbs
[cnt
].memsize
= total
;
601 dbs
[cnt
].data
= (char *)
602 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
603 ALIGN
/ sizeof (ref_t
))];
604 dbs
[cnt
].mmap_used
= true;
606 if (dbs
[cnt
].suggested_module
> head
.module
)
607 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
612 /* We also need a read-only descriptor. */
615 dbs
[cnt
].ro_fd
= open (dbs
[cnt
].db_filename
,
616 O_RDONLY
| EXTRA_O_FLAGS
);
617 if (dbs
[cnt
].ro_fd
== -1)
619 cannot create read-only descriptor for \"%s\"; no mmap"),
620 dbs
[cnt
].db_filename
);
623 // XXX Shall we test whether the descriptors actually
624 // XXX point to the same file?
627 /* Close the file descriptors in case something went
628 wrong in which case the variable have not been
633 else if (errno
== EACCES
)
634 do_exit (EXIT_FAILURE
, 0, _("cannot access '%s'"),
635 dbs
[cnt
].db_filename
);
638 if (dbs
[cnt
].head
== NULL
)
640 /* No database loaded. Allocate the data structure,
642 struct database_pers_head head
;
643 size_t total
= (sizeof (head
)
644 + roundup (dbs
[cnt
].suggested_module
645 * sizeof (ref_t
), ALIGN
)
646 + (dbs
[cnt
].suggested_module
647 * DEFAULT_DATASIZE_PER_BUCKET
));
649 /* Try to create the database. If we do not need a
650 persistent database create a temporary file. */
653 if (dbs
[cnt
].persistent
)
655 fd
= open (dbs
[cnt
].db_filename
,
656 O_RDWR
| O_CREAT
| O_EXCL
| O_TRUNC
| EXTRA_O_FLAGS
,
658 if (fd
!= -1 && dbs
[cnt
].shared
)
659 ro_fd
= open (dbs
[cnt
].db_filename
,
660 O_RDONLY
| EXTRA_O_FLAGS
);
664 char fname
[] = _PATH_NSCD_XYZ_DB_TMP
;
665 fd
= mkostemp (fname
, EXTRA_O_FLAGS
);
667 /* We do not need the file name anymore after we
668 opened another file descriptor in read-only mode. */
672 ro_fd
= open (fname
, O_RDONLY
| EXTRA_O_FLAGS
);
682 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
683 dbnames
[cnt
], dbs
[cnt
].db_filename
);
684 do_exit (1, 0, NULL
);
687 if (dbs
[cnt
].persistent
)
688 dbg_log (_("cannot create %s; no persistent database used"),
689 dbs
[cnt
].db_filename
);
691 dbg_log (_("cannot create %s; no sharing possible"),
692 dbs
[cnt
].db_filename
);
694 dbs
[cnt
].persistent
= 0;
695 // XXX remember: no mmap
699 /* Tell the user if we could not create the read-only
701 if (ro_fd
== -1 && dbs
[cnt
].shared
)
703 cannot create read-only descriptor for \"%s\"; no mmap"),
704 dbs
[cnt
].db_filename
);
706 /* Before we create the header, initialize the hash
707 table. That way if we get interrupted while writing
708 the header we can recognize a partially initialized
710 size_t ps
= sysconf (_SC_PAGESIZE
);
712 assert (~ENDREF
== 0);
713 memset (tmpbuf
, '\xff', ps
);
715 size_t remaining
= dbs
[cnt
].suggested_module
* sizeof (ref_t
);
716 off_t offset
= sizeof (head
);
719 if (offset
% ps
!= 0)
721 towrite
= MIN (remaining
, ps
- (offset
% ps
));
722 if (pwrite (fd
, tmpbuf
, towrite
, offset
) != towrite
)
725 remaining
-= towrite
;
728 while (remaining
> ps
)
730 if (pwrite (fd
, tmpbuf
, ps
, offset
) == -1)
737 && pwrite (fd
, tmpbuf
, remaining
, offset
) != remaining
)
740 /* Create the header of the file. */
741 struct database_pers_head head
=
743 .version
= DB_VERSION
,
744 .header_size
= sizeof (head
),
745 .module
= dbs
[cnt
].suggested_module
,
746 .data_size
= (dbs
[cnt
].suggested_module
747 * DEFAULT_DATASIZE_PER_BUCKET
),
752 if ((TEMP_FAILURE_RETRY (write (fd
, &head
, sizeof (head
)))
754 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd
, 0, total
))
756 || (mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
757 PROT_READ
| PROT_WRITE
,
758 MAP_SHARED
, fd
, 0)) == MAP_FAILED
)
761 unlink (dbs
[cnt
].db_filename
);
762 dbg_log (_("cannot write to database file %s: %s"),
763 dbs
[cnt
].db_filename
, strerror (errno
));
764 dbs
[cnt
].persistent
= 0;
770 dbs
[cnt
].data
= (char *)
771 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
772 ALIGN
/ sizeof (ref_t
))];
773 dbs
[cnt
].memsize
= total
;
774 dbs
[cnt
].mmap_used
= true;
776 /* Remember the descriptors. */
778 dbs
[cnt
].ro_fd
= ro_fd
;
790 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
791 /* We do not check here whether the O_CLOEXEC provided to the
792 open call was successful or not. The two fcntl calls are
793 only performed once each per process start-up and therefore
794 is not noticeable at all. */
796 && ((dbs
[cnt
].wr_fd
!= -1
797 && fcntl (dbs
[cnt
].wr_fd
, F_SETFD
, FD_CLOEXEC
) == -1)
798 || (dbs
[cnt
].ro_fd
!= -1
799 && fcntl (dbs
[cnt
].ro_fd
, F_SETFD
, FD_CLOEXEC
) == -1)))
802 cannot set socket to close on exec: %s; disabling paranoia mode"),
808 if (dbs
[cnt
].head
== NULL
)
810 /* We do not use the persistent database. Just
811 create an in-memory data structure. */
812 assert (! dbs
[cnt
].persistent
);
814 dbs
[cnt
].head
= xmalloc (sizeof (struct database_pers_head
)
815 + (dbs
[cnt
].suggested_module
817 memset (dbs
[cnt
].head
, '\0', sizeof (struct database_pers_head
));
818 assert (~ENDREF
== 0);
819 memset (dbs
[cnt
].head
->array
, '\xff',
820 dbs
[cnt
].suggested_module
* sizeof (ref_t
));
821 dbs
[cnt
].head
->module
= dbs
[cnt
].suggested_module
;
822 dbs
[cnt
].head
->data_size
= (DEFAULT_DATASIZE_PER_BUCKET
823 * dbs
[cnt
].head
->module
);
824 dbs
[cnt
].data
= xmalloc (dbs
[cnt
].head
->data_size
);
825 dbs
[cnt
].head
->first_free
= 0;
828 assert (dbs
[cnt
].ro_fd
== -1);
832 /* Create the socket. */
833 #ifndef __ASSUME_SOCK_CLOEXEC
835 if (have_sock_cloexec
>= 0)
838 sock
= socket (AF_UNIX
, SOCK_STREAM
| SOCK_CLOEXEC
| SOCK_NONBLOCK
, 0);
839 #ifndef __ASSUME_SOCK_CLOEXEC
840 if (have_sock_cloexec
== 0)
841 have_sock_cloexec
= sock
!= -1 || errno
!= EINVAL
? 1 : -1;
844 #ifndef __ASSUME_SOCK_CLOEXEC
845 if (have_sock_cloexec
< 0)
846 sock
= socket (AF_UNIX
, SOCK_STREAM
, 0);
850 dbg_log (_("cannot open socket: %s"), strerror (errno
));
851 do_exit (errno
== EACCES
? 4 : 1, 0, NULL
);
853 /* Bind a name to the socket. */
854 struct sockaddr_un sock_addr
;
855 sock_addr
.sun_family
= AF_UNIX
;
856 strcpy (sock_addr
.sun_path
, _PATH_NSCDSOCKET
);
857 if (bind (sock
, (struct sockaddr
*) &sock_addr
, sizeof (sock_addr
)) < 0)
859 dbg_log ("%s: %s", _PATH_NSCDSOCKET
, strerror (errno
));
860 do_exit (errno
== EACCES
? 4 : 1, 0, NULL
);
863 #ifndef __ASSUME_SOCK_CLOEXEC
864 if (have_sock_cloexec
< 0)
866 /* We don't want to get stuck on accept. */
867 int fl
= fcntl (sock
, F_GETFL
);
868 if (fl
== -1 || fcntl (sock
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
870 dbg_log (_("cannot change socket to nonblocking mode: %s"),
872 do_exit (1, 0, NULL
);
875 /* The descriptor needs to be closed on exec. */
876 if (paranoia
&& fcntl (sock
, F_SETFD
, FD_CLOEXEC
) == -1)
878 dbg_log (_("cannot set socket to close on exec: %s"),
880 do_exit (1, 0, NULL
);
885 /* Set permissions for the socket. */
886 chmod (_PATH_NSCDSOCKET
, DEFFILEMODE
);
888 /* Set the socket up to accept connections. */
889 if (listen (sock
, SOMAXCONN
) < 0)
891 dbg_log (_("cannot enable socket to accept connections: %s"),
893 do_exit (1, 0, NULL
);
897 if (dbs
[hstdb
].enabled
)
899 /* Try to open netlink socket to monitor network setting changes. */
900 nl_status_fd
= socket (AF_NETLINK
,
901 SOCK_RAW
| SOCK_CLOEXEC
| SOCK_NONBLOCK
,
903 if (nl_status_fd
!= -1)
905 struct sockaddr_nl snl
;
906 memset (&snl
, '\0', sizeof (snl
));
907 snl
.nl_family
= AF_NETLINK
;
908 /* XXX Is this the best set to use? */
909 snl
.nl_groups
= (RTMGRP_IPV4_IFADDR
| RTMGRP_TC
| RTMGRP_IPV4_MROUTE
910 | RTMGRP_IPV4_ROUTE
| RTMGRP_IPV4_RULE
911 | RTMGRP_IPV6_IFADDR
| RTMGRP_IPV6_MROUTE
912 | RTMGRP_IPV6_ROUTE
| RTMGRP_IPV6_IFINFO
913 | RTMGRP_IPV6_PREFIX
);
915 if (bind (nl_status_fd
, (struct sockaddr
*) &snl
, sizeof (snl
)) != 0)
917 close (nl_status_fd
);
922 /* Start the timestamp process. */
923 dbs
[hstdb
].head
->extra_data
[NSCD_HST_IDX_CONF_TIMESTAMP
]
924 = __bump_nl_timestamp ();
926 # ifndef __ASSUME_SOCK_CLOEXEC
927 if (have_sock_cloexec
< 0)
929 /* We don't want to get stuck on accept. */
930 int fl
= fcntl (nl_status_fd
, F_GETFL
);
932 || fcntl (nl_status_fd
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
935 cannot change socket to nonblocking mode: %s"),
937 do_exit (1, 0, NULL
);
940 /* The descriptor needs to be closed on exec. */
942 && fcntl (nl_status_fd
, F_SETFD
, FD_CLOEXEC
) == -1)
944 dbg_log (_("cannot set socket to close on exec: %s"),
946 do_exit (1, 0, NULL
);
955 /* Change to unprivileged uid/gid/groups if specified in config file */
956 if (server_user
!= NULL
)
957 finish_drop_privileges ();
961 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
963 We support registering multiple files per database. Each call to
964 register_traced_file adds to the list of registered files.
966 When we prune the database, either through timeout or a request to
967 invalidate, we will check to see if any of the registered files has changed.
968 When we accept new connections to handle a cache request we will also
969 check to see if any of the registered files has changed.
971 If we have inotify support then we install an inotify fd to notify us of
972 file deletion or modification, both of which will require we invalidate
973 the cache for the database. Without inotify support we stat the file and
974 store st_mtime to determine if the file has been modified. */
976 register_traced_file (size_t dbidx
, struct traced_file
*finfo
)
978 /* If the database is disabled or file checking is disabled
979 then ignore the registration. */
980 if (! dbs
[dbidx
].enabled
|| ! dbs
[dbidx
].check_file
)
983 if (__glibc_unlikely (debug_level
> 0))
984 dbg_log (_("register trace file %s for database %s"),
985 finfo
->fname
, dbnames
[dbidx
]);
989 || (finfo
->inotify_descr
= inotify_add_watch (inotify_fd
, finfo
->fname
,
994 /* We need the modification date of the file. */
997 if (stat64 (finfo
->fname
, &st
) < 0)
999 /* We cannot stat() the file, disable file checking. */
1000 dbg_log (_("cannot stat() file `%s': %s"),
1001 finfo
->fname
, strerror (errno
));
1005 finfo
->inotify_descr
= -1;
1006 finfo
->mtime
= st
.st_mtime
;
1009 /* Queue up the file name. */
1010 finfo
->next
= dbs
[dbidx
].traced_files
;
1011 dbs
[dbidx
].traced_files
= finfo
;
1015 /* Close the connections. */
1017 close_sockets (void)
1024 invalidate_cache (char *key
, int fd
)
1029 for (number
= pwddb
; number
< lastdb
; ++number
)
1030 if (strcmp (key
, dbnames
[number
]) == 0)
1032 if (number
== hstdb
)
1034 struct traced_file
*runp
= dbs
[hstdb
].traced_files
;
1035 while (runp
!= NULL
)
1036 if (runp
->call_res_init
)
1047 if (number
== lastdb
)
1050 writeall (fd
, &resp
, sizeof (resp
));
1054 if (dbs
[number
].enabled
)
1056 pthread_mutex_lock (&dbs
[number
].prune_run_lock
);
1057 prune_cache (&dbs
[number
], LONG_MAX
, fd
);
1058 pthread_mutex_unlock (&dbs
[number
].prune_run_lock
);
1063 writeall (fd
, &resp
, sizeof (resp
));
1070 send_ro_fd (struct database_dyn
*db
, char *key
, int fd
)
1072 /* If we do not have an read-only file descriptor do nothing. */
1073 if (db
->ro_fd
== -1)
1076 /* We need to send some data along with the descriptor. */
1077 uint64_t mapsize
= (db
->head
->data_size
1078 + roundup (db
->head
->module
* sizeof (ref_t
), ALIGN
)
1079 + sizeof (struct database_pers_head
));
1080 struct iovec iov
[2];
1081 iov
[0].iov_base
= key
;
1082 iov
[0].iov_len
= strlen (key
) + 1;
1083 iov
[1].iov_base
= &mapsize
;
1084 iov
[1].iov_len
= sizeof (mapsize
);
1086 /* Prepare the control message to transfer the descriptor. */
1090 char bytes
[CMSG_SPACE (sizeof (int))];
1092 struct msghdr msg
= { .msg_iov
= iov
, .msg_iovlen
= 2,
1093 .msg_control
= buf
.bytes
,
1094 .msg_controllen
= sizeof (buf
) };
1095 struct cmsghdr
*cmsg
= CMSG_FIRSTHDR (&msg
);
1097 cmsg
->cmsg_level
= SOL_SOCKET
;
1098 cmsg
->cmsg_type
= SCM_RIGHTS
;
1099 cmsg
->cmsg_len
= CMSG_LEN (sizeof (int));
1101 int *ip
= (int *) CMSG_DATA (cmsg
);
1104 msg
.msg_controllen
= cmsg
->cmsg_len
;
1106 /* Send the control message. We repeat when we are interrupted but
1107 everything else is ignored. */
1108 #ifndef MSG_NOSIGNAL
1109 # define MSG_NOSIGNAL 0
1111 (void) TEMP_FAILURE_RETRY (sendmsg (fd
, &msg
, MSG_NOSIGNAL
));
1113 if (__glibc_unlikely (debug_level
> 0))
1114 dbg_log (_("provide access to FD %d, for %s"), db
->ro_fd
, key
);
1116 #endif /* SCM_RIGHTS */
1119 /* Handle new request. */
1121 handle_request (int fd
, request_header
*req
, void *key
, uid_t uid
, pid_t pid
)
1123 if (__builtin_expect (req
->version
, NSCD_VERSION
) != NSCD_VERSION
)
1125 if (debug_level
> 0)
1127 cannot handle old request version %d; current version is %d"),
1128 req
->version
, NSCD_VERSION
);
1132 /* Perform the SELinux check before we go on to the standard checks. */
1133 if (selinux_enabled
&& nscd_request_avc_has_perm (fd
, req
->type
) != 0)
1135 if (debug_level
> 0)
1144 snprintf (buf
, sizeof (buf
), "/proc/%ld/exe", (long int) pid
);
1145 ssize_t n
= readlink (buf
, buf
, sizeof (buf
) - 1);
1149 request from %ld not handled due to missing permission"), (long int) pid
);
1154 request from '%s' [%ld] not handled due to missing permission"),
1155 buf
, (long int) pid
);
1158 dbg_log (_("request not handled due to missing permission"));
1164 struct database_dyn
*db
= reqinfo
[req
->type
].db
;
1166 /* See whether we can service the request from the cache. */
1167 if (__builtin_expect (reqinfo
[req
->type
].data_request
, true))
1169 if (__builtin_expect (debug_level
, 0) > 0)
1171 if (req
->type
== GETHOSTBYADDR
|| req
->type
== GETHOSTBYADDRv6
)
1173 char buf
[INET6_ADDRSTRLEN
];
1175 dbg_log ("\t%s (%s)", serv2str
[req
->type
],
1176 inet_ntop (req
->type
== GETHOSTBYADDR
1177 ? AF_INET
: AF_INET6
,
1178 key
, buf
, sizeof (buf
)));
1181 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1184 /* Is this service enabled? */
1185 if (__glibc_unlikely (!db
->enabled
))
1187 /* No, sent the prepared record. */
1188 if (TEMP_FAILURE_RETRY (send (fd
, db
->disabled_iov
->iov_base
,
1189 db
->disabled_iov
->iov_len
,
1191 != (ssize_t
) db
->disabled_iov
->iov_len
1192 && __builtin_expect (debug_level
, 0) > 0)
1194 /* We have problems sending the result. */
1196 dbg_log (_("cannot write result: %s"),
1197 strerror_r (errno
, buf
, sizeof (buf
)));
1203 /* Be sure we can read the data. */
1204 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db
->lock
) != 0))
1206 ++db
->head
->rdlockdelayed
;
1207 pthread_rwlock_rdlock (&db
->lock
);
1210 /* See whether we can handle it from the cache. */
1211 struct datahead
*cached
;
1212 cached
= (struct datahead
*) cache_search (req
->type
, key
, req
->key_len
,
1216 /* Hurray it's in the cache. */
1219 #ifdef HAVE_SENDFILE
1220 if (__glibc_likely (db
->mmap_used
))
1222 assert (db
->wr_fd
!= -1);
1223 assert ((char *) cached
->data
> (char *) db
->data
);
1224 assert ((char *) cached
->data
- (char *) db
->head
1226 <= (sizeof (struct database_pers_head
)
1227 + db
->head
->module
* sizeof (ref_t
)
1228 + db
->head
->data_size
));
1229 nwritten
= sendfileall (fd
, db
->wr_fd
,
1230 (char *) cached
->data
1231 - (char *) db
->head
, cached
->recsize
);
1232 # ifndef __ASSUME_SENDFILE
1233 if (nwritten
== -1 && errno
== ENOSYS
)
1238 # ifndef __ASSUME_SENDFILE
1242 nwritten
= writeall (fd
, cached
->data
, cached
->recsize
);
1244 if (nwritten
!= cached
->recsize
1245 && __builtin_expect (debug_level
, 0) > 0)
1247 /* We have problems sending the result. */
1249 dbg_log (_("cannot write result: %s"),
1250 strerror_r (errno
, buf
, sizeof (buf
)));
1253 pthread_rwlock_unlock (&db
->lock
);
1258 pthread_rwlock_unlock (&db
->lock
);
1260 else if (__builtin_expect (debug_level
, 0) > 0)
1262 if (req
->type
== INVALIDATE
)
1263 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1265 dbg_log ("\t%s", serv2str
[req
->type
]);
1268 /* Handle the request. */
1272 addpwbyname (db
, fd
, req
, key
, uid
);
1276 addpwbyuid (db
, fd
, req
, key
, uid
);
1280 addgrbyname (db
, fd
, req
, key
, uid
);
1284 addgrbygid (db
, fd
, req
, key
, uid
);
1288 addhstbyname (db
, fd
, req
, key
, uid
);
1291 case GETHOSTBYNAMEv6
:
1292 addhstbynamev6 (db
, fd
, req
, key
, uid
);
1296 addhstbyaddr (db
, fd
, req
, key
, uid
);
1299 case GETHOSTBYADDRv6
:
1300 addhstbyaddrv6 (db
, fd
, req
, key
, uid
);
1304 addhstai (db
, fd
, req
, key
, uid
);
1308 addinitgroups (db
, fd
, req
, key
, uid
);
1312 addservbyname (db
, fd
, req
, key
, uid
);
1316 addservbyport (db
, fd
, req
, key
, uid
);
1320 addgetnetgrent (db
, fd
, req
, key
, uid
);
1324 addinnetgr (db
, fd
, req
, key
, uid
);
1331 /* Get the callers credentials. */
1333 struct ucred caller
;
1334 socklen_t optlen
= sizeof (caller
);
1336 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) < 0)
1340 dbg_log (_("error getting caller's id: %s"),
1341 strerror_r (errno
, buf
, sizeof (buf
)));
1347 /* Some systems have no SO_PEERCRED implementation. They don't
1348 care about security so we don't as well. */
1353 /* Accept shutdown, getstat and invalidate only from root. For
1354 the stat call also allow the user specified in the config file. */
1355 if (req
->type
== GETSTAT
)
1357 if (uid
== 0 || uid
== stat_uid
)
1358 send_stats (fd
, dbs
);
1362 if (req
->type
== INVALIDATE
)
1363 invalidate_cache (key
, fd
);
1365 termination_handler (0);
1375 send_ro_fd (reqinfo
[req
->type
].db
, key
, fd
);
1380 /* Ignore the command, it's nothing we know. */
1386 /* Restart the process. */
1390 /* First determine the parameters. We do not use the parameters
1391 passed to main() since in case nscd is started by running the
1392 dynamic linker this will not work. Yes, this is not the usual
1393 case but nscd is part of glibc and we occasionally do this. */
1394 size_t buflen
= 1024;
1395 char *buf
= alloca (buflen
);
1397 int fd
= open ("/proc/self/cmdline", O_RDONLY
);
1401 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1410 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, buf
+ readlen
,
1415 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1425 if (readlen
< buflen
)
1428 /* We might have to extend the buffer. */
1429 size_t old_buflen
= buflen
;
1430 char *newp
= extend_alloca (buf
, buflen
, 2 * buflen
);
1431 buf
= memmove (newp
, buf
, old_buflen
);
1436 /* Parse the command line. Worst case scenario: every two
1437 characters form one parameter (one character plus NUL). */
1438 char **argv
= alloca ((readlen
/ 2 + 1) * sizeof (argv
[0]));
1442 while (cp
< buf
+ readlen
)
1445 cp
= (char *) rawmemchr (cp
, '\0') + 1;
1449 /* Second, change back to the old user if we changed it. */
1450 if (server_user
!= NULL
)
1452 if (setresuid (old_uid
, old_uid
, old_uid
) != 0)
1455 cannot change to old UID: %s; disabling paranoia mode"),
1462 if (setresgid (old_gid
, old_gid
, old_gid
) != 0)
1465 cannot change to old GID: %s; disabling paranoia mode"),
1468 ignore_value (setuid (server_uid
));
1474 /* Next change back to the old working directory. */
1475 if (chdir (oldcwd
) == -1)
1478 cannot change to old working directory: %s; disabling paranoia mode"),
1481 if (server_user
!= NULL
)
1483 ignore_value (setuid (server_uid
));
1484 ignore_value (setgid (server_gid
));
1490 /* Synchronize memory. */
1491 int32_t certainly
[lastdb
];
1492 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1493 if (dbs
[cnt
].enabled
)
1495 /* Make sure nobody keeps using the database. */
1496 dbs
[cnt
].head
->timestamp
= 0;
1497 certainly
[cnt
] = dbs
[cnt
].head
->nscd_certainly_running
;
1498 dbs
[cnt
].head
->nscd_certainly_running
= 0;
1500 if (dbs
[cnt
].persistent
)
1502 msync (dbs
[cnt
].head
, dbs
[cnt
].memsize
, MS_ASYNC
);
1505 /* The preparations are done. */
1507 char pathbuf
[PATH_MAX
];
1511 /* Try to exec the real nscd program so the process name (as reported
1512 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1513 if readlink or the exec with the result of the readlink call fails. */
1514 ssize_t n
= readlink ("/proc/self/exe", pathbuf
, sizeof (pathbuf
) - 1);
1518 execv (pathbuf
, argv
);
1520 execv ("/proc/self/exe", argv
);
1522 /* If we come here, we will never be able to re-exec. */
1523 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1526 if (server_user
!= NULL
)
1528 ignore_value (setuid (server_uid
));
1529 ignore_value (setgid (server_gid
));
1531 if (chdir ("/") != 0)
1532 dbg_log (_("cannot change current working directory to \"/\": %s"),
1536 /* Reenable the databases. */
1537 time_t now
= time (NULL
);
1538 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1539 if (dbs
[cnt
].enabled
)
1541 dbs
[cnt
].head
->timestamp
= now
;
1542 dbs
[cnt
].head
->nscd_certainly_running
= certainly
[cnt
];
1547 /* List of file descriptors. */
1551 struct fdlist
*next
;
1553 /* Memory allocated for the list. */
1554 static struct fdlist
*fdlist
;
1555 /* List of currently ready-to-read file descriptors. */
1556 static struct fdlist
*readylist
;
1558 /* Conditional variable and mutex to signal availability of entries in
1559 READYLIST. The condvar is initialized dynamically since we might
1560 use a different clock depending on availability. */
1561 static pthread_cond_t readylist_cond
= PTHREAD_COND_INITIALIZER
;
1562 static pthread_mutex_t readylist_lock
= PTHREAD_MUTEX_INITIALIZER
;
1564 /* The clock to use with the condvar. */
1565 static clockid_t timeout_clock
= CLOCK_REALTIME
;
1567 /* Number of threads ready to handle the READYLIST. */
1568 static unsigned long int nready
;
1571 /* Function for the clean-up threads. */
1573 __attribute__ ((__noreturn__
))
1574 nscd_run_prune (void *p
)
1576 const long int my_number
= (long int) p
;
1577 assert (dbs
[my_number
].enabled
);
1579 int dont_need_update
= setup_thread (&dbs
[my_number
]);
1581 time_t now
= time (NULL
);
1583 /* We are running. */
1584 dbs
[my_number
].head
->timestamp
= now
;
1586 struct timespec prune_ts
;
1587 if (__glibc_unlikely (clock_gettime (timeout_clock
, &prune_ts
) == -1))
1588 /* Should never happen. */
1591 /* Compute the initial timeout time. Prevent all the timers to go
1592 off at the same time by adding a db-based value. */
1593 prune_ts
.tv_sec
+= CACHE_PRUNE_INTERVAL
+ my_number
;
1594 dbs
[my_number
].wakeup_time
= now
+ CACHE_PRUNE_INTERVAL
+ my_number
;
1596 pthread_mutex_t
*prune_lock
= &dbs
[my_number
].prune_lock
;
1597 pthread_mutex_t
*prune_run_lock
= &dbs
[my_number
].prune_run_lock
;
1598 pthread_cond_t
*prune_cond
= &dbs
[my_number
].prune_cond
;
1600 pthread_mutex_lock (prune_lock
);
1603 /* Wait, but not forever. */
1605 if (! dbs
[my_number
].clear_cache
)
1606 e
= pthread_cond_timedwait (prune_cond
, prune_lock
, &prune_ts
);
1607 assert (__builtin_expect (e
== 0 || e
== ETIMEDOUT
, 1));
1611 if (e
== ETIMEDOUT
|| now
>= dbs
[my_number
].wakeup_time
1612 || dbs
[my_number
].clear_cache
)
1614 /* We will determine the new timout values based on the
1615 cache content. Should there be concurrent additions to
1616 the cache which are not accounted for in the cache
1617 pruning we want to know about it. Therefore set the
1618 timeout to the maximum. It will be descreased when adding
1619 new entries to the cache, if necessary. */
1620 dbs
[my_number
].wakeup_time
= MAX_TIMEOUT_VALUE
;
1622 /* Unconditionally reset the flag. */
1623 time_t prune_now
= dbs
[my_number
].clear_cache
? LONG_MAX
: now
;
1624 dbs
[my_number
].clear_cache
= 0;
1626 pthread_mutex_unlock (prune_lock
);
1628 /* We use a separate lock for running the prune function (instead
1629 of keeping prune_lock locked) because this enables concurrent
1630 invocations of cache_add which might modify the timeout value. */
1631 pthread_mutex_lock (prune_run_lock
);
1632 next_wait
= prune_cache (&dbs
[my_number
], prune_now
, -1);
1633 pthread_mutex_unlock (prune_run_lock
);
1635 next_wait
= MAX (next_wait
, CACHE_PRUNE_INTERVAL
);
1636 /* If clients cannot determine for sure whether nscd is running
1637 we need to wake up occasionally to update the timestamp.
1638 Wait 90% of the update period. */
1639 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1640 if (__glibc_unlikely (! dont_need_update
))
1642 next_wait
= MIN (UPDATE_MAPPING_TIMEOUT
, next_wait
);
1643 dbs
[my_number
].head
->timestamp
= now
;
1646 pthread_mutex_lock (prune_lock
);
1648 /* Make it known when we will wake up again. */
1649 if (now
+ next_wait
< dbs
[my_number
].wakeup_time
)
1650 dbs
[my_number
].wakeup_time
= now
+ next_wait
;
1652 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1655 /* The cache was just pruned. Do not do it again now. Just
1656 use the new timeout value. */
1657 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1659 if (clock_gettime (timeout_clock
, &prune_ts
) == -1)
1660 /* Should never happen. */
1663 /* Compute next timeout time. */
1664 prune_ts
.tv_sec
+= next_wait
;
1669 /* This is the main loop. It is replicated in different threads but
1670 the use of the ready list makes sure only one thread handles an
1671 incoming connection. */
1673 __attribute__ ((__noreturn__
))
1674 nscd_run_worker (void *p
)
1678 /* Initial locking. */
1679 pthread_mutex_lock (&readylist_lock
);
1681 /* One more thread available. */
1686 while (readylist
== NULL
)
1687 pthread_cond_wait (&readylist_cond
, &readylist_lock
);
1689 struct fdlist
*it
= readylist
->next
;
1690 if (readylist
->next
== readylist
)
1691 /* Just one entry on the list. */
1694 readylist
->next
= it
->next
;
1696 /* Extract the information and mark the record ready to be used
1701 /* One more thread available. */
1704 /* We are done with the list. */
1705 pthread_mutex_unlock (&readylist_lock
);
1707 #ifndef __ASSUME_ACCEPT4
1708 if (have_accept4
< 0)
1710 /* We do not want to block on a short read or so. */
1711 int fl
= fcntl (fd
, F_GETFL
);
1712 if (fl
== -1 || fcntl (fd
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
1717 /* Now read the request. */
1719 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, &req
, sizeof (req
)))
1720 != sizeof (req
), 0))
1722 /* We failed to read data. Note that this also might mean we
1723 failed because we would have blocked. */
1724 if (debug_level
> 0)
1725 dbg_log (_("short read while reading request: %s"),
1726 strerror_r (errno
, buf
, sizeof (buf
)));
1730 /* Check whether this is a valid request type. */
1731 if (req
.type
< GETPWBYNAME
|| req
.type
>= LASTREQ
)
1734 /* Some systems have no SO_PEERCRED implementation. They don't
1735 care about security so we don't as well. */
1740 if (__glibc_unlikely (debug_level
> 0))
1742 struct ucred caller
;
1743 socklen_t optlen
= sizeof (caller
);
1745 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) == 0)
1749 const pid_t pid
= 0;
1752 /* It should not be possible to crash the nscd with a silly
1753 request (i.e., a terribly large key). We limit the size to 1kb. */
1754 if (__builtin_expect (req
.key_len
, 1) < 0
1755 || __builtin_expect (req
.key_len
, 1) > MAXKEYLEN
)
1757 if (debug_level
> 0)
1758 dbg_log (_("key length in request too long: %d"), req
.key_len
);
1763 char keybuf
[MAXKEYLEN
+ 1];
1765 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, keybuf
,
1769 /* Again, this can also mean we would have blocked. */
1770 if (debug_level
> 0)
1771 dbg_log (_("short read while reading request key: %s"),
1772 strerror_r (errno
, buf
, sizeof (buf
)));
1775 keybuf
[req
.key_len
] = '\0';
1777 if (__builtin_expect (debug_level
, 0) > 0)
1782 handle_request: request received (Version = %d) from PID %ld"),
1783 req
.version
, (long int) pid
);
1787 handle_request: request received (Version = %d)"), req
.version
);
1790 /* Phew, we got all the data, now process it. */
1791 handle_request (fd
, &req
, keybuf
, uid
, pid
);
1799 pthread_mutex_lock (&readylist_lock
);
1801 /* One more thread available. */
1808 static unsigned int nconns
;
1813 pthread_mutex_lock (&readylist_lock
);
1815 /* Find an empty entry in FDLIST. */
1817 for (inner
= 0; inner
< nconns
; ++inner
)
1818 if (fdlist
[inner
].next
== NULL
)
1820 assert (inner
< nconns
);
1822 fdlist
[inner
].fd
= fd
;
1824 if (readylist
== NULL
)
1825 readylist
= fdlist
[inner
].next
= &fdlist
[inner
];
1828 fdlist
[inner
].next
= readylist
->next
;
1829 readylist
= readylist
->next
= &fdlist
[inner
];
1832 bool do_signal
= true;
1833 if (__glibc_unlikely (nready
== 0))
1838 /* Try to start another thread to help out. */
1840 if (nthreads
< max_nthreads
1841 && pthread_create (&th
, &attr
, nscd_run_worker
,
1842 (void *) (long int) nthreads
) == 0)
1844 /* We got another thread. */
1846 /* The new thread might need a kick. */
1852 pthread_mutex_unlock (&readylist_lock
);
1854 /* Tell one of the worker threads there is work to do. */
1856 pthread_cond_signal (&readylist_cond
);
1860 /* Check whether restarting should happen. */
1862 restart_p (time_t now
)
1864 return (paranoia
&& readylist
== NULL
&& nready
== nthreads
1865 && now
>= restart_time
);
1869 /* Array for times a connection was accepted. */
1870 static time_t *starttime
;
1873 /* Inotify event for changed file. */
1876 struct inotify_event i
;
1878 # define PATH_MAX 1024
1880 char buf
[sizeof (struct inotify_event
) + PATH_MAX
];
1883 /* Process the inotify event in INEV. If the event matches any of the files
1884 registered with a database then mark that database as requiring its cache
1885 to be cleared. We indicate the cache needs clearing by setting
1886 TO_CLEAR[DBCNT] to true for the matching database. */
1888 inotify_check_files (bool *to_clear
, union __inev
*inev
)
1890 /* Check which of the files changed. */
1891 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
1893 struct traced_file
*finfo
= dbs
[dbcnt
].traced_files
;
1895 while (finfo
!= NULL
)
1897 /* Inotify event watch descriptor matches. */
1898 if (finfo
->inotify_descr
== inev
->i
.wd
)
1900 /* Mark cache as needing to be cleared and reinitialize. */
1901 to_clear
[dbcnt
] = true;
1902 if (finfo
->call_res_init
)
1907 finfo
= finfo
->next
;
1912 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1913 for the associated database, otherwise do nothing. The TO_CLEAR array must
1914 have LASTDB entries. */
1916 clear_db_cache (bool *to_clear
)
1918 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
1919 if (to_clear
[dbcnt
])
1921 pthread_mutex_lock (&dbs
[dbcnt
].prune_lock
);
1922 dbs
[dbcnt
].clear_cache
= 1;
1923 pthread_mutex_unlock (&dbs
[dbcnt
].prune_lock
);
1924 pthread_cond_signal (&dbs
[dbcnt
].prune_cond
);
1931 __attribute__ ((__noreturn__
))
1932 main_loop_poll (void)
1934 struct pollfd
*conns
= (struct pollfd
*) xmalloc (nconns
1935 * sizeof (conns
[0]));
1938 conns
[0].events
= POLLRDNORM
;
1940 size_t firstfree
= 1;
1943 if (inotify_fd
!= -1)
1945 conns
[1].fd
= inotify_fd
;
1946 conns
[1].events
= POLLRDNORM
;
1953 size_t idx_nl_status_fd
= 0;
1954 if (nl_status_fd
!= -1)
1956 idx_nl_status_fd
= nused
;
1957 conns
[nused
].fd
= nl_status_fd
;
1958 conns
[nused
].events
= POLLRDNORM
;
1966 /* Wait for any event. We wait at most a couple of seconds so
1967 that we can check whether we should close any of the accepted
1968 connections since we have not received a request. */
1969 #define MAX_ACCEPT_TIMEOUT 30
1970 #define MIN_ACCEPT_TIMEOUT 5
1971 #define MAIN_THREAD_TIMEOUT \
1972 (MAX_ACCEPT_TIMEOUT * 1000 \
1973 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1975 int n
= poll (conns
, nused
, MAIN_THREAD_TIMEOUT
);
1977 time_t now
= time (NULL
);
1979 /* If there is a descriptor ready for reading or there is a new
1980 connection, process this now. */
1983 if (conns
[0].revents
!= 0)
1985 /* We have a new incoming connection. Accept the connection. */
1988 #ifndef __ASSUME_ACCEPT4
1990 if (have_accept4
>= 0)
1993 fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
1995 #ifndef __ASSUME_ACCEPT4
1996 if (have_accept4
== 0)
1997 have_accept4
= fd
!= -1 || errno
!= ENOSYS
? 1 : -1;
2000 #ifndef __ASSUME_ACCEPT4
2001 if (have_accept4
< 0)
2002 fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
2005 /* Use the descriptor if we have not reached the limit. */
2008 if (firstfree
< nconns
)
2010 conns
[firstfree
].fd
= fd
;
2011 conns
[firstfree
].events
= POLLRDNORM
;
2012 starttime
[firstfree
] = now
;
2013 if (firstfree
>= nused
)
2014 nused
= firstfree
+ 1;
2018 while (firstfree
< nused
&& conns
[firstfree
].fd
!= -1);
2021 /* We cannot use the connection so close it. */
2030 if (inotify_fd
!= -1 && conns
[1].fd
== inotify_fd
)
2032 if (conns
[1].revents
!= 0)
2034 bool to_clear
[lastdb
] = { false, };
2037 /* Read all inotify events for files registered via
2038 register_traced_file(). */
2041 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
, &inev
,
2043 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
2045 if (__builtin_expect (nb
== -1 && errno
!= EAGAIN
,
2048 /* Something went wrong when reading the inotify
2049 data. Better disable inotify. */
2051 disabled inotify after read error %d"),
2063 /* Check which of the files changed. */
2064 inotify_check_files (to_clear
, &inev
);
2067 /* Actually perform the cache clearing. */
2068 clear_db_cache (to_clear
);
2078 if (idx_nl_status_fd
!= 0 && conns
[idx_nl_status_fd
].revents
!= 0)
2081 /* Read all the data. We do not interpret it here. */
2082 while (TEMP_FAILURE_RETRY (read (nl_status_fd
, buf
,
2083 sizeof (buf
))) != -1)
2086 dbs
[hstdb
].head
->extra_data
[NSCD_HST_IDX_CONF_TIMESTAMP
]
2087 = __bump_nl_timestamp ();
2091 for (size_t cnt
= first
; cnt
< nused
&& n
> 0; ++cnt
)
2092 if (conns
[cnt
].revents
!= 0)
2094 fd_ready (conns
[cnt
].fd
);
2096 /* Clean up the CONNS array. */
2098 if (cnt
< firstfree
)
2100 if (cnt
== nused
- 1)
2103 while (conns
[nused
- 1].fd
== -1);
2109 /* Now find entries which have timed out. */
2112 /* We make the timeout length depend on the number of file
2113 descriptors currently used. */
2114 #define ACCEPT_TIMEOUT \
2115 (MAX_ACCEPT_TIMEOUT \
2116 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2117 time_t laststart
= now
- ACCEPT_TIMEOUT
;
2119 for (size_t cnt
= nused
- 1; cnt
> 0; --cnt
)
2121 if (conns
[cnt
].fd
!= -1 && starttime
[cnt
] < laststart
)
2123 /* Remove the entry, it timed out. */
2124 (void) close (conns
[cnt
].fd
);
2127 if (cnt
< firstfree
)
2129 if (cnt
== nused
- 1)
2132 while (conns
[nused
- 1].fd
== -1);
2136 if (restart_p (now
))
2144 main_loop_epoll (int efd
)
2146 struct epoll_event ev
= { 0, };
2150 /* Add the socket. */
2151 ev
.events
= EPOLLRDNORM
;
2153 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, sock
, &ev
) == -1)
2154 /* We cannot use epoll. */
2157 # ifdef HAVE_INOTIFY
2158 if (inotify_fd
!= -1)
2160 ev
.events
= EPOLLRDNORM
;
2161 ev
.data
.fd
= inotify_fd
;
2162 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, inotify_fd
, &ev
) == -1)
2163 /* We cannot use epoll. */
2169 # ifdef HAVE_NETLINK
2170 if (nl_status_fd
!= -1)
2172 ev
.events
= EPOLLRDNORM
;
2173 ev
.data
.fd
= nl_status_fd
;
2174 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, nl_status_fd
, &ev
) == -1)
2175 /* We cannot use epoll. */
2182 struct epoll_event revs
[100];
2183 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2185 int n
= epoll_wait (efd
, revs
, nrevs
, MAIN_THREAD_TIMEOUT
);
2187 time_t now
= time (NULL
);
2189 for (int cnt
= 0; cnt
< n
; ++cnt
)
2190 if (revs
[cnt
].data
.fd
== sock
)
2192 /* A new connection. */
2195 # ifndef __ASSUME_ACCEPT4
2197 if (have_accept4
>= 0)
2200 fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
2202 # ifndef __ASSUME_ACCEPT4
2203 if (have_accept4
== 0)
2204 have_accept4
= fd
!= -1 || errno
!= ENOSYS
? 1 : -1;
2207 # ifndef __ASSUME_ACCEPT4
2208 if (have_accept4
< 0)
2209 fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
2212 /* Use the descriptor if we have not reached the limit. */
2215 /* Try to add the new descriptor. */
2218 || epoll_ctl (efd
, EPOLL_CTL_ADD
, fd
, &ev
) == -1)
2219 /* The descriptor is too large or something went
2220 wrong. Close the descriptor. */
2224 /* Remember when we accepted the connection. */
2225 starttime
[fd
] = now
;
2234 # ifdef HAVE_INOTIFY
2235 else if (revs
[cnt
].data
.fd
== inotify_fd
)
2237 bool to_clear
[lastdb
] = { false, };
2240 /* Read all inotify events for files registered via
2241 register_traced_file(). */
2244 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
, &inev
,
2246 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
2248 if (__glibc_unlikely (nb
== -1 && errno
!= EAGAIN
))
2250 /* Something went wrong when reading the inotify
2251 data. Better disable inotify. */
2252 dbg_log (_("disabled inotify after read error %d"),
2254 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, inotify_fd
,
2262 /* Check which of the files changed. */
2263 inotify_check_files(to_clear
, &inev
);
2266 /* Actually perform the cache clearing. */
2267 clear_db_cache (to_clear
);
2270 # ifdef HAVE_NETLINK
2271 else if (revs
[cnt
].data
.fd
== nl_status_fd
)
2274 /* Read all the data. We do not interpret it here. */
2275 while (TEMP_FAILURE_RETRY (read (nl_status_fd
, buf
,
2276 sizeof (buf
))) != -1)
2279 __bump_nl_timestamp ();
2284 /* Remove the descriptor from the epoll descriptor. */
2285 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, revs
[cnt
].data
.fd
, NULL
);
2287 /* Get a worker to handle the request. */
2288 fd_ready (revs
[cnt
].data
.fd
);
2290 /* Reset the time. */
2291 starttime
[revs
[cnt
].data
.fd
] = 0;
2292 if (revs
[cnt
].data
.fd
== highest
)
2295 while (highest
> 0 && starttime
[highest
] == 0);
2300 /* Now look for descriptors for accepted connections which have
2301 no reply in too long of a time. */
2302 time_t laststart
= now
- ACCEPT_TIMEOUT
;
2303 assert (starttime
[sock
] == 0);
2304 assert (inotify_fd
== -1 || starttime
[inotify_fd
] == 0);
2305 assert (nl_status_fd
== -1 || starttime
[nl_status_fd
] == 0);
2306 for (int cnt
= highest
; cnt
> STDERR_FILENO
; --cnt
)
2307 if (starttime
[cnt
] != 0 && starttime
[cnt
] < laststart
)
2309 /* We are waiting for this one for too long. Close it. */
2310 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, cnt
, NULL
);
2318 else if (cnt
!= sock
&& starttime
[cnt
] == 0 && cnt
== highest
)
2321 if (restart_p (now
))
2328 /* Start all the threads we want. The initial process is thread no. 1. */
2330 start_threads (void)
2332 /* Initialize the conditional variable we will use. The only
2333 non-standard attribute we might use is the clock selection. */
2334 pthread_condattr_t condattr
;
2335 pthread_condattr_init (&condattr
);
2337 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2338 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2339 /* Determine whether the monotonous clock is available. */
2340 struct timespec dummy
;
2341 # if _POSIX_MONOTONIC_CLOCK == 0
2342 if (sysconf (_SC_MONOTONIC_CLOCK
) > 0)
2344 # if _POSIX_CLOCK_SELECTION == 0
2345 if (sysconf (_SC_CLOCK_SELECTION
) > 0)
2347 if (clock_getres (CLOCK_MONOTONIC
, &dummy
) == 0
2348 && pthread_condattr_setclock (&condattr
, CLOCK_MONOTONIC
) == 0)
2349 timeout_clock
= CLOCK_MONOTONIC
;
2352 /* Create the attribute for the threads. They are all created
2354 pthread_attr_init (&attr
);
2355 pthread_attr_setdetachstate (&attr
, PTHREAD_CREATE_DETACHED
);
2356 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2357 pthread_attr_setstacksize (&attr
, NSCD_THREAD_STACKSIZE
);
2359 /* We allow less than LASTDB threads only for debugging. */
2360 if (debug_level
== 0)
2361 nthreads
= MAX (nthreads
, lastdb
);
2363 /* Create the threads which prune the databases. */
2364 // XXX Ideally this work would be done by some of the worker threads.
2365 // XXX But this is problematic since we would need to be able to wake
2366 // XXX them up explicitly as well as part of the group handling the
2367 // XXX ready-list. This requires an operation where we can wait on
2368 // XXX two conditional variables at the same time. This operation
2369 // XXX does not exist (yet).
2370 for (long int i
= 0; i
< lastdb
; ++i
)
2372 /* Initialize the conditional variable. */
2373 if (pthread_cond_init (&dbs
[i
].prune_cond
, &condattr
) != 0)
2375 dbg_log (_("could not initialize conditional variable"));
2376 do_exit (1, 0, NULL
);
2381 && pthread_create (&th
, &attr
, nscd_run_prune
, (void *) i
) != 0)
2383 dbg_log (_("could not start clean-up thread; terminating"));
2384 do_exit (1, 0, NULL
);
2388 pthread_condattr_destroy (&condattr
);
2390 for (long int i
= 0; i
< nthreads
; ++i
)
2393 if (pthread_create (&th
, &attr
, nscd_run_worker
, NULL
) != 0)
2397 dbg_log (_("could not start any worker thread; terminating"));
2398 do_exit (1, 0, NULL
);
2405 /* Now it is safe to let the parent know that we're doing fine and it can
2409 /* Determine how much room for descriptors we should initially
2410 allocate. This might need to change later if we cap the number
2412 const long int nfds
= sysconf (_SC_OPEN_MAX
);
2414 #define MAXCONN 16384
2415 if (nfds
== -1 || nfds
> MAXCONN
)
2417 else if (nfds
< MINCONN
)
2422 /* We need memory to pass descriptors on to the worker threads. */
2423 fdlist
= (struct fdlist
*) xcalloc (nconns
, sizeof (fdlist
[0]));
2424 /* Array to keep track when connection was accepted. */
2425 starttime
= (time_t *) xcalloc (nconns
, sizeof (starttime
[0]));
2427 /* In the main thread we execute the loop which handles incoming
2430 int efd
= epoll_create (100);
2433 main_loop_epoll (efd
);
2442 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2443 this function is called, we are not listening on the nscd socket yet so
2444 we can just use the ordinary lookup functions without causing a lockup */
2446 begin_drop_privileges (void)
2448 struct passwd
*pwd
= getpwnam (server_user
);
2452 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2453 do_exit (EXIT_FAILURE
, 0,
2454 _("Failed to run nscd as user '%s'"), server_user
);
2457 server_uid
= pwd
->pw_uid
;
2458 server_gid
= pwd
->pw_gid
;
2460 /* Save the old UID/GID if we have to change back. */
2463 old_uid
= getuid ();
2464 old_gid
= getgid ();
2467 if (getgrouplist (server_user
, server_gid
, NULL
, &server_ngroups
) == 0)
2469 /* This really must never happen. */
2470 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2471 do_exit (EXIT_FAILURE
, errno
,
2472 _("initial getgrouplist failed"));
2475 server_groups
= (gid_t
*) xmalloc (server_ngroups
* sizeof (gid_t
));
2477 if (getgrouplist (server_user
, server_gid
, server_groups
, &server_ngroups
)
2480 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2481 do_exit (EXIT_FAILURE
, errno
, _("getgrouplist failed"));
2486 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2487 run nscd as the user specified in the configuration file. */
2489 finish_drop_privileges (void)
2491 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2492 /* We need to preserve the capabilities to connect to the audit daemon. */
2493 cap_t new_caps
= preserve_capabilities ();
2496 if (setgroups (server_ngroups
, server_groups
) == -1)
2498 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2499 do_exit (EXIT_FAILURE
, errno
, _("setgroups failed"));
2504 res
= setresgid (server_gid
, server_gid
, old_gid
);
2506 res
= setgid (server_gid
);
2509 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2510 do_exit (4, errno
, "setgid");
2514 res
= setresuid (server_uid
, server_uid
, old_uid
);
2516 res
= setuid (server_uid
);
2519 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2520 do_exit (4, errno
, "setuid");
2523 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2524 /* Remove the temporary capabilities. */
2525 install_real_capabilities (new_caps
);