1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
35 #include <arpa/inet.h>
37 # include <linux/netlink.h>
38 # include <linux/rtnetlink.h>
41 # include <sys/epoll.h>
44 # include <sys/inotify.h>
47 #include <sys/param.h>
49 #include <sys/socket.h>
56 #include <resolv/resolv.h>
58 #include <kernel-features.h>
59 #include <libc-diag.h>
62 /* Support to run nscd as an unprivileged user */
63 const char *server_user
;
64 static uid_t server_uid
;
65 static gid_t server_gid
;
66 const char *stat_user
;
68 static gid_t
*server_groups
;
72 static int server_ngroups
;
74 static pthread_attr_t attr
;
76 static void begin_drop_privileges (void);
77 static void finish_drop_privileges (void);
79 /* Map request type to a string. */
80 const char *const serv2str
[LASTREQ
] =
82 [GETPWBYNAME
] = "GETPWBYNAME",
83 [GETPWBYUID
] = "GETPWBYUID",
84 [GETGRBYNAME
] = "GETGRBYNAME",
85 [GETGRBYGID
] = "GETGRBYGID",
86 [GETHOSTBYNAME
] = "GETHOSTBYNAME",
87 [GETHOSTBYNAMEv6
] = "GETHOSTBYNAMEv6",
88 [GETHOSTBYADDR
] = "GETHOSTBYADDR",
89 [GETHOSTBYADDRv6
] = "GETHOSTBYADDRv6",
90 [SHUTDOWN
] = "SHUTDOWN",
91 [GETSTAT
] = "GETSTAT",
92 [INVALIDATE
] = "INVALIDATE",
93 [GETFDPW
] = "GETFDPW",
94 [GETFDGR
] = "GETFDGR",
95 [GETFDHST
] = "GETFDHST",
97 [INITGROUPS
] = "INITGROUPS",
98 [GETSERVBYNAME
] = "GETSERVBYNAME",
99 [GETSERVBYPORT
] = "GETSERVBYPORT",
100 [GETFDSERV
] = "GETFDSERV",
101 [GETNETGRENT
] = "GETNETGRENT",
102 [INNETGR
] = "INNETGR",
103 [GETFDNETGR
] = "GETFDNETGR"
106 #ifdef PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
107 # define RWLOCK_INITIALIZER PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
109 # define RWLOCK_INITIALIZER PTHREAD_RWLOCK_INITIALIZER
112 /* The control data structures for the services. */
113 struct database_dyn dbs
[lastdb
] =
116 .lock
= RWLOCK_INITIALIZER
,
117 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
118 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
124 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
125 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
126 .db_filename
= _PATH_NSCD_PASSWD_DB
,
127 .disabled_iov
= &pwd_iov_disabled
,
135 .lock
= RWLOCK_INITIALIZER
,
136 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
137 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
143 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
144 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
145 .db_filename
= _PATH_NSCD_GROUP_DB
,
146 .disabled_iov
= &grp_iov_disabled
,
154 .lock
= RWLOCK_INITIALIZER
,
155 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
156 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
160 .propagate
= 0, /* Not used. */
162 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
163 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
164 .db_filename
= _PATH_NSCD_HOSTS_DB
,
165 .disabled_iov
= &hst_iov_disabled
,
173 .lock
= RWLOCK_INITIALIZER
,
174 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
175 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
179 .propagate
= 0, /* Not used. */
181 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
182 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
183 .db_filename
= _PATH_NSCD_SERVICES_DB
,
184 .disabled_iov
= &serv_iov_disabled
,
192 .lock
= RWLOCK_INITIALIZER
,
193 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
194 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
198 .propagate
= 0, /* Not used. */
200 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
201 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
202 .db_filename
= _PATH_NSCD_NETGROUP_DB
,
203 .disabled_iov
= &netgroup_iov_disabled
,
213 /* Mapping of request type to database. */
217 struct database_dyn
*db
;
218 } const reqinfo
[LASTREQ
] =
220 [GETPWBYNAME
] = { true, &dbs
[pwddb
] },
221 [GETPWBYUID
] = { true, &dbs
[pwddb
] },
222 [GETGRBYNAME
] = { true, &dbs
[grpdb
] },
223 [GETGRBYGID
] = { true, &dbs
[grpdb
] },
224 [GETHOSTBYNAME
] = { true, &dbs
[hstdb
] },
225 [GETHOSTBYNAMEv6
] = { true, &dbs
[hstdb
] },
226 [GETHOSTBYADDR
] = { true, &dbs
[hstdb
] },
227 [GETHOSTBYADDRv6
] = { true, &dbs
[hstdb
] },
228 [SHUTDOWN
] = { false, NULL
},
229 [GETSTAT
] = { false, NULL
},
230 [SHUTDOWN
] = { false, NULL
},
231 [GETFDPW
] = { false, &dbs
[pwddb
] },
232 [GETFDGR
] = { false, &dbs
[grpdb
] },
233 [GETFDHST
] = { false, &dbs
[hstdb
] },
234 [GETAI
] = { true, &dbs
[hstdb
] },
235 [INITGROUPS
] = { true, &dbs
[grpdb
] },
236 [GETSERVBYNAME
] = { true, &dbs
[servdb
] },
237 [GETSERVBYPORT
] = { true, &dbs
[servdb
] },
238 [GETFDSERV
] = { false, &dbs
[servdb
] },
239 [GETNETGRENT
] = { true, &dbs
[netgrdb
] },
240 [INNETGR
] = { true, &dbs
[netgrdb
] },
241 [GETFDNETGR
] = { false, &dbs
[netgrdb
] }
245 /* Initial number of threads to use. */
247 /* Maximum number of threads to use. */
248 int max_nthreads
= 32;
250 /* Socket for incoming connections. */
254 /* Inotify descriptor. */
259 /* Descriptor for netlink status updates. */
260 static int nl_status_fd
= -1;
263 /* Number of times clients had to wait. */
264 unsigned long int client_queued
;
268 writeall (int fd
, const void *buf
, size_t len
)
274 ret
= TEMP_FAILURE_RETRY (send (fd
, buf
, n
, MSG_NOSIGNAL
));
277 buf
= (const char *) buf
+ ret
;
281 return ret
< 0 ? ret
: len
- n
;
288 /* The following three are not really used, they are symbolic constants. */
294 use_he_begin
= use_he
| use_begin
,
295 use_he_end
= use_he
| use_end
,
297 use_data_begin
= use_data
| use_begin
,
298 use_data_end
= use_data
| use_end
,
299 use_data_first
= use_data_begin
| use_first
304 check_use (const char *data
, nscd_ssize_t first_free
, uint8_t *usemap
,
305 enum usekey use
, ref_t start
, size_t len
)
310 if (start
> first_free
|| start
+ len
> first_free
311 || (start
& BLOCK_ALIGN_M1
))
314 if (usemap
[start
] == use_not
)
316 /* Add the start marker. */
317 usemap
[start
] = use
| use_begin
;
321 if (usemap
[++start
] != use_not
)
326 /* Add the end marker. */
327 usemap
[start
] = use
| use_end
;
329 else if ((usemap
[start
] & ~use_first
) == ((use
| use_begin
) & ~use_first
))
331 /* Hash entries can't be shared. */
335 usemap
[start
] |= (use
& use_first
);
339 if (usemap
[++start
] != use
)
342 if (usemap
[++start
] != (use
| use_end
))
346 /* Points to a wrong object or somewhere in the middle. */
353 /* Verify data in persistent database. */
355 verify_persistent_db (void *mem
, struct database_pers_head
*readhead
, int dbnr
)
357 assert (dbnr
== pwddb
|| dbnr
== grpdb
|| dbnr
== hstdb
|| dbnr
== servdb
360 time_t now
= time (NULL
);
362 struct database_pers_head
*head
= mem
;
363 struct database_pers_head head_copy
= *head
;
365 /* Check that the header that was read matches the head in the database. */
366 if (memcmp (head
, readhead
, sizeof (*head
)) != 0)
369 /* First some easy tests: make sure the database header is sane. */
370 if (head
->version
!= DB_VERSION
371 || head
->header_size
!= sizeof (*head
)
372 /* We allow a timestamp to be one hour ahead of the current time.
373 This should cover daylight saving time changes. */
374 || head
->timestamp
> now
+ 60 * 60 + 60
375 || (head
->gc_cycle
& 1)
377 || (size_t) head
->module
> INT32_MAX
/ sizeof (ref_t
)
378 || (size_t) head
->data_size
> INT32_MAX
- head
->module
* sizeof (ref_t
)
379 || head
->first_free
< 0
380 || head
->first_free
> head
->data_size
381 || (head
->first_free
& BLOCK_ALIGN_M1
) != 0
382 || head
->maxnentries
< 0
383 || head
->maxnsearched
< 0)
386 uint8_t *usemap
= calloc (head
->first_free
, 1);
390 const char *data
= (char *) &head
->array
[roundup (head
->module
,
391 ALIGN
/ sizeof (ref_t
))];
393 nscd_ssize_t he_cnt
= 0;
394 for (nscd_ssize_t cnt
= 0; cnt
< head
->module
; ++cnt
)
396 ref_t trail
= head
->array
[cnt
];
400 while (work
!= ENDREF
)
402 if (! check_use (data
, head
->first_free
, usemap
, use_he
, work
,
403 sizeof (struct hashentry
)))
406 /* Now we know we can dereference the record. */
407 struct hashentry
*here
= (struct hashentry
*) (data
+ work
);
411 /* Make sure the record is for this type of service. */
412 if (here
->type
>= LASTREQ
413 || reqinfo
[here
->type
].db
!= &dbs
[dbnr
])
416 /* Validate boolean field value. */
417 if (here
->first
!= false && here
->first
!= true)
425 || here
->packet
> head
->first_free
426 || here
->packet
+ sizeof (struct datahead
) > head
->first_free
)
429 struct datahead
*dh
= (struct datahead
*) (data
+ here
->packet
);
431 if (! check_use (data
, head
->first_free
, usemap
,
432 use_data
| (here
->first
? use_first
: 0),
433 here
->packet
, dh
->allocsize
))
436 if (dh
->allocsize
< sizeof (struct datahead
)
437 || dh
->recsize
> dh
->allocsize
438 || (dh
->notfound
!= false && dh
->notfound
!= true)
439 || (dh
->usable
!= false && dh
->usable
!= true))
442 if (here
->key
< here
->packet
+ sizeof (struct datahead
)
443 || here
->key
> here
->packet
+ dh
->allocsize
444 || here
->key
+ here
->len
> here
->packet
+ dh
->allocsize
)
450 /* A circular list, this must not happen. */
453 trail
= ((struct hashentry
*) (data
+ trail
))->next
;
458 if (he_cnt
!= head
->nentries
)
461 /* See if all data and keys had at least one reference from
462 he->first == true hashentry. */
463 for (ref_t idx
= 0; idx
< head
->first_free
; ++idx
)
465 if (usemap
[idx
] == use_data_begin
)
469 /* Finally, make sure the database hasn't changed since the first test. */
470 if (memcmp (mem
, &head_copy
, sizeof (*head
)) != 0)
482 /* Initialize database information structures. */
486 /* Look up unprivileged uid/gid/groups before we start listening on the
488 if (server_user
!= NULL
)
489 begin_drop_privileges ();
492 /* No configuration for this value, assume a default. */
495 for (size_t cnt
= 0; cnt
< lastdb
; ++cnt
)
496 if (dbs
[cnt
].enabled
)
498 pthread_rwlock_init (&dbs
[cnt
].lock
, NULL
);
499 pthread_mutex_init (&dbs
[cnt
].memlock
, NULL
);
501 if (dbs
[cnt
].persistent
)
503 /* Try to open the appropriate file on disk. */
504 int fd
= open (dbs
[cnt
].db_filename
, O_RDWR
| O_CLOEXEC
);
511 struct database_pers_head head
;
512 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, &head
,
514 if (n
!= sizeof (head
) || fstat64 (fd
, &st
) != 0)
517 /* The code is single-threaded at this point so
518 using strerror is just fine. */
519 msg
= strerror (errno
);
521 dbg_log (_("invalid persistent database file \"%s\": %s"),
522 dbs
[cnt
].db_filename
, msg
);
523 unlink (dbs
[cnt
].db_filename
);
525 else if (head
.module
== 0 && head
.data_size
== 0)
527 /* The file has been created, but the head has not
528 been initialized yet. */
529 msg
= _("uninitialized header");
532 else if (head
.header_size
!= (int) sizeof (head
))
534 msg
= _("header size does not match");
537 else if ((total
= (sizeof (head
)
538 + roundup (head
.module
* sizeof (ref_t
),
542 || total
< sizeof (head
))
544 msg
= _("file size does not match");
547 /* Note we map with the maximum size allowed for the
548 database. This is likely much larger than the
549 actual file size. This is OK on most OSes since
550 extensions of the underlying file will
551 automatically translate more pages available for
553 else if ((mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
554 PROT_READ
| PROT_WRITE
,
558 else if (!verify_persistent_db (mem
, &head
, cnt
))
561 msg
= _("verification failed");
566 /* Success. We have the database. */
568 dbs
[cnt
].memsize
= total
;
569 dbs
[cnt
].data
= (char *)
570 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
571 ALIGN
/ sizeof (ref_t
))];
572 dbs
[cnt
].mmap_used
= true;
574 if (dbs
[cnt
].suggested_module
> head
.module
)
575 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
580 /* We also need a read-only descriptor. */
583 dbs
[cnt
].ro_fd
= open (dbs
[cnt
].db_filename
,
584 O_RDONLY
| O_CLOEXEC
);
585 if (dbs
[cnt
].ro_fd
== -1)
587 cannot create read-only descriptor for \"%s\"; no mmap"),
588 dbs
[cnt
].db_filename
);
591 // XXX Shall we test whether the descriptors actually
592 // XXX point to the same file?
595 /* Close the file descriptors in case something went
596 wrong in which case the variable have not been
601 else if (errno
== EACCES
)
602 do_exit (EXIT_FAILURE
, 0, _("cannot access '%s'"),
603 dbs
[cnt
].db_filename
);
606 if (dbs
[cnt
].head
== NULL
)
608 /* No database loaded. Allocate the data structure,
610 struct database_pers_head head
;
611 size_t total
= (sizeof (head
)
612 + roundup (dbs
[cnt
].suggested_module
613 * sizeof (ref_t
), ALIGN
)
614 + (dbs
[cnt
].suggested_module
615 * DEFAULT_DATASIZE_PER_BUCKET
));
617 /* Try to create the database. If we do not need a
618 persistent database create a temporary file. */
621 if (dbs
[cnt
].persistent
)
623 fd
= open (dbs
[cnt
].db_filename
,
624 O_RDWR
| O_CREAT
| O_EXCL
| O_TRUNC
| O_CLOEXEC
,
626 if (fd
!= -1 && dbs
[cnt
].shared
)
627 ro_fd
= open (dbs
[cnt
].db_filename
,
628 O_RDONLY
| O_CLOEXEC
);
632 char fname
[] = _PATH_NSCD_XYZ_DB_TMP
;
633 fd
= mkostemp (fname
, O_CLOEXEC
);
635 /* We do not need the file name anymore after we
636 opened another file descriptor in read-only mode. */
640 ro_fd
= open (fname
, O_RDONLY
| O_CLOEXEC
);
650 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
651 dbnames
[cnt
], dbs
[cnt
].db_filename
);
652 do_exit (1, 0, NULL
);
655 if (dbs
[cnt
].persistent
)
656 dbg_log (_("cannot create %s; no persistent database used"),
657 dbs
[cnt
].db_filename
);
659 dbg_log (_("cannot create %s; no sharing possible"),
660 dbs
[cnt
].db_filename
);
662 dbs
[cnt
].persistent
= 0;
663 // XXX remember: no mmap
667 /* Tell the user if we could not create the read-only
669 if (ro_fd
== -1 && dbs
[cnt
].shared
)
671 cannot create read-only descriptor for \"%s\"; no mmap"),
672 dbs
[cnt
].db_filename
);
674 /* Before we create the header, initialize the hash
675 table. That way if we get interrupted while writing
676 the header we can recognize a partially initialized
678 size_t ps
= sysconf (_SC_PAGESIZE
);
680 assert (~ENDREF
== 0);
681 memset (tmpbuf
, '\xff', ps
);
683 size_t remaining
= dbs
[cnt
].suggested_module
* sizeof (ref_t
);
684 off_t offset
= sizeof (head
);
687 if (offset
% ps
!= 0)
689 towrite
= MIN (remaining
, ps
- (offset
% ps
));
690 if (pwrite (fd
, tmpbuf
, towrite
, offset
) != towrite
)
693 remaining
-= towrite
;
696 while (remaining
> ps
)
698 if (pwrite (fd
, tmpbuf
, ps
, offset
) == -1)
705 && pwrite (fd
, tmpbuf
, remaining
, offset
) != remaining
)
708 /* Create the header of the file. */
709 struct database_pers_head head
=
711 .version
= DB_VERSION
,
712 .header_size
= sizeof (head
),
713 .module
= dbs
[cnt
].suggested_module
,
714 .data_size
= (dbs
[cnt
].suggested_module
715 * DEFAULT_DATASIZE_PER_BUCKET
),
720 if ((TEMP_FAILURE_RETRY (write (fd
, &head
, sizeof (head
)))
722 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd
, 0, total
))
724 || (mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
725 PROT_READ
| PROT_WRITE
,
726 MAP_SHARED
, fd
, 0)) == MAP_FAILED
)
729 unlink (dbs
[cnt
].db_filename
);
730 dbg_log (_("cannot write to database file %s: %s"),
731 dbs
[cnt
].db_filename
, strerror (errno
));
732 dbs
[cnt
].persistent
= 0;
738 dbs
[cnt
].data
= (char *)
739 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
740 ALIGN
/ sizeof (ref_t
))];
741 dbs
[cnt
].memsize
= total
;
742 dbs
[cnt
].mmap_used
= true;
744 /* Remember the descriptors. */
746 dbs
[cnt
].ro_fd
= ro_fd
;
758 if (dbs
[cnt
].head
== NULL
)
760 /* We do not use the persistent database. Just
761 create an in-memory data structure. */
762 assert (! dbs
[cnt
].persistent
);
764 dbs
[cnt
].head
= xmalloc (sizeof (struct database_pers_head
)
765 + (dbs
[cnt
].suggested_module
767 memset (dbs
[cnt
].head
, '\0', sizeof (struct database_pers_head
));
768 assert (~ENDREF
== 0);
769 memset (dbs
[cnt
].head
->array
, '\xff',
770 dbs
[cnt
].suggested_module
* sizeof (ref_t
));
771 dbs
[cnt
].head
->module
= dbs
[cnt
].suggested_module
;
772 dbs
[cnt
].head
->data_size
= (DEFAULT_DATASIZE_PER_BUCKET
773 * dbs
[cnt
].head
->module
);
774 dbs
[cnt
].data
= xmalloc (dbs
[cnt
].head
->data_size
);
775 dbs
[cnt
].head
->first_free
= 0;
778 assert (dbs
[cnt
].ro_fd
== -1);
782 /* Create the socket. */
783 sock
= socket (AF_UNIX
, SOCK_STREAM
| SOCK_CLOEXEC
| SOCK_NONBLOCK
, 0);
786 dbg_log (_("cannot open socket: %s"), strerror (errno
));
787 do_exit (errno
== EACCES
? 4 : 1, 0, NULL
);
789 /* Bind a name to the socket. */
790 struct sockaddr_un sock_addr
;
791 sock_addr
.sun_family
= AF_UNIX
;
792 strcpy (sock_addr
.sun_path
, _PATH_NSCDSOCKET
);
793 if (bind (sock
, (struct sockaddr
*) &sock_addr
, sizeof (sock_addr
)) < 0)
795 dbg_log ("%s: %s", _PATH_NSCDSOCKET
, strerror (errno
));
796 do_exit (errno
== EACCES
? 4 : 1, 0, NULL
);
799 /* Set permissions for the socket. */
800 chmod (_PATH_NSCDSOCKET
, DEFFILEMODE
);
802 /* Set the socket up to accept connections. */
803 if (listen (sock
, SOMAXCONN
) < 0)
805 dbg_log (_("cannot enable socket to accept connections: %s"),
807 do_exit (1, 0, NULL
);
811 if (dbs
[hstdb
].enabled
)
813 /* Try to open netlink socket to monitor network setting changes. */
814 nl_status_fd
= socket (AF_NETLINK
,
815 SOCK_RAW
| SOCK_CLOEXEC
| SOCK_NONBLOCK
,
817 if (nl_status_fd
!= -1)
819 struct sockaddr_nl snl
;
820 memset (&snl
, '\0', sizeof (snl
));
821 snl
.nl_family
= AF_NETLINK
;
822 /* XXX Is this the best set to use? */
823 snl
.nl_groups
= (RTMGRP_IPV4_IFADDR
| RTMGRP_TC
| RTMGRP_IPV4_MROUTE
824 | RTMGRP_IPV4_ROUTE
| RTMGRP_IPV4_RULE
825 | RTMGRP_IPV6_IFADDR
| RTMGRP_IPV6_MROUTE
826 | RTMGRP_IPV6_ROUTE
| RTMGRP_IPV6_IFINFO
827 | RTMGRP_IPV6_PREFIX
);
829 if (bind (nl_status_fd
, (struct sockaddr
*) &snl
, sizeof (snl
)) != 0)
831 close (nl_status_fd
);
836 /* Start the timestamp process. */
837 dbs
[hstdb
].head
->extra_data
[NSCD_HST_IDX_CONF_TIMESTAMP
]
838 = __bump_nl_timestamp ();
844 /* Change to unprivileged uid/gid/groups if specified in config file */
845 if (server_user
!= NULL
)
846 finish_drop_privileges ();
850 #define TRACED_FILE_MASK (IN_DELETE_SELF | IN_CLOSE_WRITE | IN_MOVE_SELF)
851 #define TRACED_DIR_MASK (IN_DELETE_SELF | IN_CREATE | IN_MOVED_TO | IN_MOVE_SELF)
853 install_watches (struct traced_file
*finfo
)
855 /* Use inotify support if we have it. */
856 if (finfo
->inotify_descr
[TRACED_FILE
] < 0)
857 finfo
->inotify_descr
[TRACED_FILE
] = inotify_add_watch (inotify_fd
,
860 if (finfo
->inotify_descr
[TRACED_FILE
] < 0)
862 dbg_log (_("disabled inotify-based monitoring for file `%s': %s"),
863 finfo
->fname
, strerror (errno
));
866 dbg_log (_("monitoring file `%s` (%d)"),
867 finfo
->fname
, finfo
->inotify_descr
[TRACED_FILE
]);
868 /* Additionally listen for events in the file's parent directory.
869 We do this because the file to be watched might be
870 deleted and then added back again. When it is added back again
871 we must re-add the watch. We must also cover IN_MOVED_TO to
872 detect a file being moved into the directory. */
873 if (finfo
->inotify_descr
[TRACED_DIR
] < 0)
874 finfo
->inotify_descr
[TRACED_DIR
] = inotify_add_watch (inotify_fd
,
877 if (finfo
->inotify_descr
[TRACED_DIR
] < 0)
879 dbg_log (_("disabled inotify-based monitoring for directory `%s': %s"),
880 finfo
->fname
, strerror (errno
));
883 dbg_log (_("monitoring directory `%s` (%d)"),
884 finfo
->dname
, finfo
->inotify_descr
[TRACED_DIR
]);
888 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
890 We support registering multiple files per database. Each call to
891 register_traced_file adds to the list of registered files.
893 When we prune the database, either through timeout or a request to
894 invalidate, we will check to see if any of the registered files has changed.
895 When we accept new connections to handle a cache request we will also
896 check to see if any of the registered files has changed.
898 If we have inotify support then we install an inotify fd to notify us of
899 file deletion or modification, both of which will require we invalidate
900 the cache for the database. Without inotify support we stat the file and
901 store st_mtime to determine if the file has been modified. */
903 register_traced_file (size_t dbidx
, struct traced_file
*finfo
)
905 /* If the database is disabled or file checking is disabled
906 then ignore the registration. */
907 if (! dbs
[dbidx
].enabled
|| ! dbs
[dbidx
].check_file
)
910 if (__glibc_unlikely (debug_level
> 0))
911 dbg_log (_("monitoring file %s for database %s"),
912 finfo
->fname
, dbnames
[dbidx
]);
915 install_watches (finfo
);
918 if (stat64 (finfo
->fname
, &st
) < 0)
920 /* We cannot stat() the file. Set mtime to zero and try again later. */
921 dbg_log (_("stat failed for file `%s'; will try again later: %s"),
922 finfo
->fname
, strerror (errno
));
926 finfo
->mtime
= st
.st_mtime
;
928 /* Queue up the file name. */
929 finfo
->next
= dbs
[dbidx
].traced_files
;
930 dbs
[dbidx
].traced_files
= finfo
;
934 /* Close the connections. */
943 invalidate_cache (char *key
, int fd
)
948 for (number
= pwddb
; number
< lastdb
; ++number
)
949 if (strcmp (key
, dbnames
[number
]) == 0)
951 struct traced_file
*runp
= dbs
[number
].traced_files
;
954 /* Make sure we reload from file when checking mtime. */
957 /* During an invalidation we try to reload the traced
958 file watches. This allows the user to re-sync if
959 inotify events were lost. Similar to what we do during
961 install_watches (runp
);
963 if (runp
->call_res_init
)
973 if (number
== lastdb
)
976 writeall (fd
, &resp
, sizeof (resp
));
980 if (dbs
[number
].enabled
)
982 pthread_mutex_lock (&dbs
[number
].prune_run_lock
);
983 prune_cache (&dbs
[number
], LONG_MAX
, fd
);
984 pthread_mutex_unlock (&dbs
[number
].prune_run_lock
);
989 writeall (fd
, &resp
, sizeof (resp
));
996 send_ro_fd (struct database_dyn
*db
, char *key
, int fd
)
998 /* If we do not have an read-only file descriptor do nothing. */
1002 /* We need to send some data along with the descriptor. */
1003 uint64_t mapsize
= (db
->head
->data_size
1004 + roundup (db
->head
->module
* sizeof (ref_t
), ALIGN
)
1005 + sizeof (struct database_pers_head
));
1006 struct iovec iov
[2];
1007 iov
[0].iov_base
= key
;
1008 iov
[0].iov_len
= strlen (key
) + 1;
1009 iov
[1].iov_base
= &mapsize
;
1010 iov
[1].iov_len
= sizeof (mapsize
);
1012 /* Prepare the control message to transfer the descriptor. */
1016 char bytes
[CMSG_SPACE (sizeof (int))];
1018 struct msghdr msg
= { .msg_iov
= iov
, .msg_iovlen
= 2,
1019 .msg_control
= buf
.bytes
,
1020 .msg_controllen
= sizeof (buf
) };
1021 struct cmsghdr
*cmsg
= CMSG_FIRSTHDR (&msg
);
1023 cmsg
->cmsg_level
= SOL_SOCKET
;
1024 cmsg
->cmsg_type
= SCM_RIGHTS
;
1025 cmsg
->cmsg_len
= CMSG_LEN (sizeof (int));
1027 int *ip
= (int *) CMSG_DATA (cmsg
);
1030 msg
.msg_controllen
= cmsg
->cmsg_len
;
1032 /* Send the control message. We repeat when we are interrupted but
1033 everything else is ignored. */
1034 #ifndef MSG_NOSIGNAL
1035 # define MSG_NOSIGNAL 0
1037 (void) TEMP_FAILURE_RETRY (sendmsg (fd
, &msg
, MSG_NOSIGNAL
));
1039 if (__glibc_unlikely (debug_level
> 0))
1040 dbg_log (_("provide access to FD %d, for %s"), db
->ro_fd
, key
);
1042 #endif /* SCM_RIGHTS */
1045 /* Handle new request. */
1047 handle_request (int fd
, request_header
*req
, void *key
, uid_t uid
, pid_t pid
)
1049 if (__builtin_expect (req
->version
, NSCD_VERSION
) != NSCD_VERSION
)
1051 if (debug_level
> 0)
1053 cannot handle old request version %d; current version is %d"),
1054 req
->version
, NSCD_VERSION
);
1058 /* Perform the SELinux check before we go on to the standard checks. */
1059 if (selinux_enabled
&& nscd_request_avc_has_perm (fd
, req
->type
) != 0)
1061 if (debug_level
> 0)
1064 char pbuf
[sizeof ("/proc//exe") + 3 * sizeof (long int)];
1071 snprintf (pbuf
, sizeof (pbuf
), "/proc/%ld/exe", (long int) pid
);
1072 ssize_t n
= readlink (pbuf
, buf
, sizeof (buf
) - 1);
1076 request from %ld not handled due to missing permission"), (long int) pid
);
1081 request from '%s' [%ld] not handled due to missing permission"),
1082 buf
, (long int) pid
);
1085 dbg_log (_("request not handled due to missing permission"));
1091 struct database_dyn
*db
= reqinfo
[req
->type
].db
;
1093 /* See whether we can service the request from the cache. */
1094 if (__builtin_expect (reqinfo
[req
->type
].data_request
, true))
1096 if (__builtin_expect (debug_level
, 0) > 0)
1098 if (req
->type
== GETHOSTBYADDR
|| req
->type
== GETHOSTBYADDRv6
)
1100 char buf
[INET6_ADDRSTRLEN
];
1102 dbg_log ("\t%s (%s)", serv2str
[req
->type
],
1103 inet_ntop (req
->type
== GETHOSTBYADDR
1104 ? AF_INET
: AF_INET6
,
1105 key
, buf
, sizeof (buf
)));
1108 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1111 /* Is this service enabled? */
1112 if (__glibc_unlikely (!db
->enabled
))
1114 /* No, sent the prepared record. */
1115 if (TEMP_FAILURE_RETRY (send (fd
, db
->disabled_iov
->iov_base
,
1116 db
->disabled_iov
->iov_len
,
1118 != (ssize_t
) db
->disabled_iov
->iov_len
1119 && __builtin_expect (debug_level
, 0) > 0)
1121 /* We have problems sending the result. */
1123 dbg_log (_("cannot write result: %s"),
1124 strerror_r (errno
, buf
, sizeof (buf
)));
1130 /* Be sure we can read the data. */
1131 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db
->lock
) != 0))
1133 ++db
->head
->rdlockdelayed
;
1134 pthread_rwlock_rdlock (&db
->lock
);
1137 /* See whether we can handle it from the cache. */
1138 struct datahead
*cached
;
1139 cached
= (struct datahead
*) cache_search (req
->type
, key
, req
->key_len
,
1143 /* Hurray it's in the cache. */
1144 if (writeall (fd
, cached
->data
, cached
->recsize
) != cached
->recsize
1145 && __glibc_unlikely (debug_level
> 0))
1147 /* We have problems sending the result. */
1149 dbg_log (_("cannot write result: %s"),
1150 strerror_r (errno
, buf
, sizeof (buf
)));
1153 pthread_rwlock_unlock (&db
->lock
);
1158 pthread_rwlock_unlock (&db
->lock
);
1160 else if (__builtin_expect (debug_level
, 0) > 0)
1162 if (req
->type
== INVALIDATE
)
1163 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1165 dbg_log ("\t%s", serv2str
[req
->type
]);
1168 /* Handle the request. */
1172 addpwbyname (db
, fd
, req
, key
, uid
);
1176 addpwbyuid (db
, fd
, req
, key
, uid
);
1180 addgrbyname (db
, fd
, req
, key
, uid
);
1184 addgrbygid (db
, fd
, req
, key
, uid
);
1188 addhstbyname (db
, fd
, req
, key
, uid
);
1191 case GETHOSTBYNAMEv6
:
1192 addhstbynamev6 (db
, fd
, req
, key
, uid
);
1196 addhstbyaddr (db
, fd
, req
, key
, uid
);
1199 case GETHOSTBYADDRv6
:
1200 addhstbyaddrv6 (db
, fd
, req
, key
, uid
);
1204 addhstai (db
, fd
, req
, key
, uid
);
1208 addinitgroups (db
, fd
, req
, key
, uid
);
1212 addservbyname (db
, fd
, req
, key
, uid
);
1216 addservbyport (db
, fd
, req
, key
, uid
);
1220 addgetnetgrent (db
, fd
, req
, key
, uid
);
1224 addinnetgr (db
, fd
, req
, key
, uid
);
1231 /* Get the callers credentials. */
1233 struct ucred caller
;
1234 socklen_t optlen
= sizeof (caller
);
1236 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) < 0)
1240 dbg_log (_("error getting caller's id: %s"),
1241 strerror_r (errno
, buf
, sizeof (buf
)));
1247 /* Some systems have no SO_PEERCRED implementation. They don't
1248 care about security so we don't as well. */
1253 /* Accept shutdown, getstat and invalidate only from root. For
1254 the stat call also allow the user specified in the config file. */
1255 if (req
->type
== GETSTAT
)
1257 if (uid
== 0 || uid
== stat_uid
)
1258 send_stats (fd
, dbs
);
1262 if (req
->type
== INVALIDATE
)
1263 invalidate_cache (key
, fd
);
1265 termination_handler (0);
1275 send_ro_fd (reqinfo
[req
->type
].db
, key
, fd
);
1280 /* Ignore the command, it's nothing we know. */
1286 read_cmdline (size_t *size
)
1288 int fd
= open ("/proc/self/cmdline", O_RDONLY
);
1292 size_t limit
= 1024;
1293 char *buffer
= malloc (limit
);
1302 if (current
== limit
)
1305 if (2 * limit
< limit
1306 || (newptr
= realloc (buffer
, 2 * limit
)) == NULL
)
1317 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, buffer
+ current
,
1338 /* Restart the process. */
1342 /* First determine the parameters. We do not use the parameters
1343 passed to main because then nscd would use the system libc after
1344 restarting even if it was started by a non-system dynamic linker
1345 during glibc testing. */
1347 char *cmdline
= read_cmdline (&readlen
);
1348 if (cmdline
== NULL
)
1351 cannot open /proc/self/cmdline: %m; disabling paranoia mode"));
1356 /* Parse the command line. Worst case scenario: every two
1357 characters form one parameter (one character plus NUL). */
1358 char **argv
= alloca ((readlen
/ 2 + 1) * sizeof (argv
[0]));
1361 for (char *cp
= cmdline
; cp
< cmdline
+ readlen
;)
1364 cp
= (char *) rawmemchr (cp
, '\0') + 1;
1368 /* Second, change back to the old user if we changed it. */
1369 if (server_user
!= NULL
)
1371 if (setresuid (old_uid
, old_uid
, old_uid
) != 0)
1374 cannot change to old UID: %s; disabling paranoia mode"),
1382 if (setresgid (old_gid
, old_gid
, old_gid
) != 0)
1385 cannot change to old GID: %s; disabling paranoia mode"),
1388 ignore_value (setuid (server_uid
));
1395 /* Next change back to the old working directory. */
1396 if (chdir (oldcwd
) == -1)
1399 cannot change to old working directory: %s; disabling paranoia mode"),
1402 if (server_user
!= NULL
)
1404 ignore_value (setuid (server_uid
));
1405 ignore_value (setgid (server_gid
));
1412 /* Synchronize memory. */
1413 int32_t certainly
[lastdb
];
1414 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1415 if (dbs
[cnt
].enabled
)
1417 /* Make sure nobody keeps using the database. */
1418 dbs
[cnt
].head
->timestamp
= 0;
1419 certainly
[cnt
] = dbs
[cnt
].head
->nscd_certainly_running
;
1420 dbs
[cnt
].head
->nscd_certainly_running
= 0;
1422 if (dbs
[cnt
].persistent
)
1424 msync (dbs
[cnt
].head
, dbs
[cnt
].memsize
, MS_ASYNC
);
1427 /* The preparations are done. */
1429 char pathbuf
[PATH_MAX
];
1433 /* Try to exec the real nscd program so the process name (as reported
1434 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1435 if readlink or the exec with the result of the readlink call fails. */
1436 ssize_t n
= readlink ("/proc/self/exe", pathbuf
, sizeof (pathbuf
) - 1);
1440 execv (pathbuf
, argv
);
1442 execv ("/proc/self/exe", argv
);
1444 /* If we come here, we will never be able to re-exec. */
1445 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1448 if (server_user
!= NULL
)
1450 ignore_value (setuid (server_uid
));
1451 ignore_value (setgid (server_gid
));
1453 if (chdir ("/") != 0)
1454 dbg_log (_("cannot change current working directory to \"/\": %s"),
1459 /* Reenable the databases. */
1460 time_t now
= time (NULL
);
1461 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1462 if (dbs
[cnt
].enabled
)
1464 dbs
[cnt
].head
->timestamp
= now
;
1465 dbs
[cnt
].head
->nscd_certainly_running
= certainly
[cnt
];
1470 /* List of file descriptors. */
1474 struct fdlist
*next
;
1476 /* Memory allocated for the list. */
1477 static struct fdlist
*fdlist
;
1478 /* List of currently ready-to-read file descriptors. */
1479 static struct fdlist
*readylist
;
1481 /* Conditional variable and mutex to signal availability of entries in
1482 READYLIST. The condvar is initialized dynamically since we might
1483 use a different clock depending on availability. */
1484 static pthread_cond_t readylist_cond
= PTHREAD_COND_INITIALIZER
;
1485 static pthread_mutex_t readylist_lock
= PTHREAD_MUTEX_INITIALIZER
;
1487 /* The clock to use with the condvar. */
1488 static clockid_t timeout_clock
= CLOCK_REALTIME
;
1490 /* Number of threads ready to handle the READYLIST. */
1491 static unsigned long int nready
;
1494 /* Function for the clean-up threads. */
1496 __attribute__ ((__noreturn__
))
1497 nscd_run_prune (void *p
)
1499 const long int my_number
= (long int) p
;
1500 assert (dbs
[my_number
].enabled
);
1502 int dont_need_update
= setup_thread (&dbs
[my_number
]);
1504 time_t now
= time (NULL
);
1506 /* We are running. */
1507 dbs
[my_number
].head
->timestamp
= now
;
1509 struct timespec prune_ts
;
1510 if (__glibc_unlikely (clock_gettime (timeout_clock
, &prune_ts
) == -1))
1511 /* Should never happen. */
1514 /* Compute the initial timeout time. Prevent all the timers to go
1515 off at the same time by adding a db-based value. */
1516 prune_ts
.tv_sec
+= CACHE_PRUNE_INTERVAL
+ my_number
;
1517 dbs
[my_number
].wakeup_time
= now
+ CACHE_PRUNE_INTERVAL
+ my_number
;
1519 pthread_mutex_t
*prune_lock
= &dbs
[my_number
].prune_lock
;
1520 pthread_mutex_t
*prune_run_lock
= &dbs
[my_number
].prune_run_lock
;
1521 pthread_cond_t
*prune_cond
= &dbs
[my_number
].prune_cond
;
1523 pthread_mutex_lock (prune_lock
);
1526 /* Wait, but not forever. */
1528 if (! dbs
[my_number
].clear_cache
)
1529 e
= pthread_cond_timedwait (prune_cond
, prune_lock
, &prune_ts
);
1530 assert (__builtin_expect (e
== 0 || e
== ETIMEDOUT
, 1));
1534 if (e
== ETIMEDOUT
|| now
>= dbs
[my_number
].wakeup_time
1535 || dbs
[my_number
].clear_cache
)
1537 /* We will determine the new timout values based on the
1538 cache content. Should there be concurrent additions to
1539 the cache which are not accounted for in the cache
1540 pruning we want to know about it. Therefore set the
1541 timeout to the maximum. It will be descreased when adding
1542 new entries to the cache, if necessary. */
1543 dbs
[my_number
].wakeup_time
= MAX_TIMEOUT_VALUE
;
1545 /* Unconditionally reset the flag. */
1546 time_t prune_now
= dbs
[my_number
].clear_cache
? LONG_MAX
: now
;
1547 dbs
[my_number
].clear_cache
= 0;
1549 pthread_mutex_unlock (prune_lock
);
1551 /* We use a separate lock for running the prune function (instead
1552 of keeping prune_lock locked) because this enables concurrent
1553 invocations of cache_add which might modify the timeout value. */
1554 pthread_mutex_lock (prune_run_lock
);
1555 next_wait
= prune_cache (&dbs
[my_number
], prune_now
, -1);
1556 pthread_mutex_unlock (prune_run_lock
);
1558 next_wait
= MAX (next_wait
, CACHE_PRUNE_INTERVAL
);
1559 /* If clients cannot determine for sure whether nscd is running
1560 we need to wake up occasionally to update the timestamp.
1561 Wait 90% of the update period. */
1562 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1563 if (__glibc_unlikely (! dont_need_update
))
1565 next_wait
= MIN (UPDATE_MAPPING_TIMEOUT
, next_wait
);
1566 dbs
[my_number
].head
->timestamp
= now
;
1569 pthread_mutex_lock (prune_lock
);
1571 /* Make it known when we will wake up again. */
1572 if (now
+ next_wait
< dbs
[my_number
].wakeup_time
)
1573 dbs
[my_number
].wakeup_time
= now
+ next_wait
;
1575 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1578 /* The cache was just pruned. Do not do it again now. Just
1579 use the new timeout value. */
1580 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1582 if (clock_gettime (timeout_clock
, &prune_ts
) == -1)
1583 /* Should never happen. */
1586 /* Compute next timeout time. */
1587 prune_ts
.tv_sec
+= next_wait
;
1592 /* This is the main loop. It is replicated in different threads but
1593 the use of the ready list makes sure only one thread handles an
1594 incoming connection. */
1596 __attribute__ ((__noreturn__
))
1597 nscd_run_worker (void *p
)
1601 /* Initial locking. */
1602 pthread_mutex_lock (&readylist_lock
);
1604 /* One more thread available. */
1609 while (readylist
== NULL
)
1610 pthread_cond_wait (&readylist_cond
, &readylist_lock
);
1612 struct fdlist
*it
= readylist
->next
;
1613 if (readylist
->next
== readylist
)
1614 /* Just one entry on the list. */
1617 readylist
->next
= it
->next
;
1619 /* Extract the information and mark the record ready to be used
1624 /* One more thread available. */
1627 /* We are done with the list. */
1628 pthread_mutex_unlock (&readylist_lock
);
1630 /* Now read the request. */
1632 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, &req
, sizeof (req
)))
1633 != sizeof (req
), 0))
1635 /* We failed to read data. Note that this also might mean we
1636 failed because we would have blocked. */
1637 if (debug_level
> 0)
1638 dbg_log (_("short read while reading request: %s"),
1639 strerror_r (errno
, buf
, sizeof (buf
)));
1643 /* Check whether this is a valid request type. */
1644 if (req
.type
< GETPWBYNAME
|| req
.type
>= LASTREQ
)
1647 /* Some systems have no SO_PEERCRED implementation. They don't
1648 care about security so we don't as well. */
1653 if (__glibc_unlikely (debug_level
> 0))
1655 struct ucred caller
;
1656 socklen_t optlen
= sizeof (caller
);
1658 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) == 0)
1662 const pid_t pid
= 0;
1665 /* It should not be possible to crash the nscd with a silly
1666 request (i.e., a terribly large key). We limit the size to 1kb. */
1667 if (__builtin_expect (req
.key_len
, 1) < 0
1668 || __builtin_expect (req
.key_len
, 1) > MAXKEYLEN
)
1670 if (debug_level
> 0)
1671 dbg_log (_("key length in request too long: %d"), req
.key_len
);
1676 char keybuf
[MAXKEYLEN
+ 1];
1678 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, keybuf
,
1682 /* Again, this can also mean we would have blocked. */
1683 if (debug_level
> 0)
1684 dbg_log (_("short read while reading request key: %s"),
1685 strerror_r (errno
, buf
, sizeof (buf
)));
1688 keybuf
[req
.key_len
] = '\0';
1690 if (__builtin_expect (debug_level
, 0) > 0)
1695 handle_request: request received (Version = %d) from PID %ld"),
1696 req
.version
, (long int) pid
);
1700 handle_request: request received (Version = %d)"), req
.version
);
1703 /* Phew, we got all the data, now process it. */
1704 handle_request (fd
, &req
, keybuf
, uid
, pid
);
1712 pthread_mutex_lock (&readylist_lock
);
1714 /* One more thread available. */
1721 static unsigned int nconns
;
1726 pthread_mutex_lock (&readylist_lock
);
1728 /* Find an empty entry in FDLIST. */
1730 for (inner
= 0; inner
< nconns
; ++inner
)
1731 if (fdlist
[inner
].next
== NULL
)
1733 assert (inner
< nconns
);
1735 fdlist
[inner
].fd
= fd
;
1737 if (readylist
== NULL
)
1738 readylist
= fdlist
[inner
].next
= &fdlist
[inner
];
1741 fdlist
[inner
].next
= readylist
->next
;
1742 readylist
= readylist
->next
= &fdlist
[inner
];
1745 bool do_signal
= true;
1746 if (__glibc_unlikely (nready
== 0))
1751 /* Try to start another thread to help out. */
1753 if (nthreads
< max_nthreads
1754 && pthread_create (&th
, &attr
, nscd_run_worker
,
1755 (void *) (long int) nthreads
) == 0)
1757 /* We got another thread. */
1759 /* The new thread might need a kick. */
1765 pthread_mutex_unlock (&readylist_lock
);
1767 /* Tell one of the worker threads there is work to do. */
1769 pthread_cond_signal (&readylist_cond
);
1773 /* Check whether restarting should happen. */
1775 restart_p (time_t now
)
1777 return (paranoia
&& readylist
== NULL
&& nready
== nthreads
1778 && now
>= restart_time
);
1782 /* Array for times a connection was accepted. */
1783 static time_t *starttime
;
1786 /* Inotify event for changed file. */
1789 struct inotify_event i
;
1791 # define PATH_MAX 1024
1793 char buf
[sizeof (struct inotify_event
) + PATH_MAX
];
1796 /* Returns 0 if the file is there otherwise -1. */
1798 check_file (struct traced_file
*finfo
)
1801 /* We could check mtime and if different re-add
1802 the watches, and invalidate the database, but we
1803 don't because we are called from inotify_check_files
1804 which should be doing that work. If sufficient inotify
1805 events were lost then the next pruning or invalidation
1806 will do the stat and mtime check. We don't do it here to
1807 keep the logic simple. */
1808 if (stat64 (finfo
->fname
, &st
) < 0)
1813 /* Process the inotify event in INEV. If the event matches any of the files
1814 registered with a database then mark that database as requiring its cache
1815 to be cleared. We indicate the cache needs clearing by setting
1816 TO_CLEAR[DBCNT] to true for the matching database. */
1818 inotify_check_files (bool *to_clear
, union __inev
*inev
)
1820 /* Check which of the files changed. */
1821 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
1823 struct traced_file
*finfo
= dbs
[dbcnt
].traced_files
;
1825 while (finfo
!= NULL
)
1827 /* The configuration file was moved or deleted.
1828 We stop watching it at that point, and reinitialize. */
1829 if (finfo
->inotify_descr
[TRACED_FILE
] == inev
->i
.wd
1830 && ((inev
->i
.mask
& IN_MOVE_SELF
)
1831 || (inev
->i
.mask
& IN_DELETE_SELF
)
1832 || (inev
->i
.mask
& IN_IGNORED
)))
1835 bool moved
= (inev
->i
.mask
& IN_MOVE_SELF
) != 0;
1837 if (check_file (finfo
) == 0)
1839 dbg_log (_("ignored inotify event for `%s` (file exists)"),
1844 dbg_log (_("monitored file `%s` was %s, removing watch"),
1845 finfo
->fname
, moved
? "moved" : "deleted");
1846 /* File was moved out, remove the watch. Watches are
1847 automatically removed when the file is deleted. */
1850 ret
= inotify_rm_watch (inotify_fd
, inev
->i
.wd
);
1852 dbg_log (_("failed to remove file watch `%s`: %s"),
1853 finfo
->fname
, strerror (errno
));
1855 finfo
->inotify_descr
[TRACED_FILE
] = -1;
1856 to_clear
[dbcnt
] = true;
1857 if (finfo
->call_res_init
)
1861 /* The configuration file was open for writing and has just closed.
1862 We reset the cache and reinitialize. */
1863 if (finfo
->inotify_descr
[TRACED_FILE
] == inev
->i
.wd
1864 && inev
->i
.mask
& IN_CLOSE_WRITE
)
1866 /* Mark cache as needing to be cleared and reinitialize. */
1867 dbg_log (_("monitored file `%s` was written to"), finfo
->fname
);
1868 to_clear
[dbcnt
] = true;
1869 if (finfo
->call_res_init
)
1873 /* The parent directory was moved or deleted. We trigger one last
1874 invalidation. At the next pruning or invalidation we may add
1875 this watch back if the file is present again. */
1876 if (finfo
->inotify_descr
[TRACED_DIR
] == inev
->i
.wd
1877 && ((inev
->i
.mask
& IN_DELETE_SELF
)
1878 || (inev
->i
.mask
& IN_MOVE_SELF
)
1879 || (inev
->i
.mask
& IN_IGNORED
)))
1881 bool moved
= (inev
->i
.mask
& IN_MOVE_SELF
) != 0;
1882 /* The directory watch may have already been removed
1883 but we don't know so we just remove it again and
1884 ignore the error. Then we remove the file watch.
1885 Note: watches are automatically removed for deleted
1888 inotify_rm_watch (inotify_fd
, inev
->i
.wd
);
1889 if (finfo
->inotify_descr
[TRACED_FILE
] != -1)
1891 dbg_log (_("monitored parent directory `%s` was %s, removing watch on `%s`"),
1892 finfo
->dname
, moved
? "moved" : "deleted", finfo
->fname
);
1893 if (inotify_rm_watch (inotify_fd
, finfo
->inotify_descr
[TRACED_FILE
]) < 0)
1894 dbg_log (_("failed to remove file watch `%s`: %s"),
1895 finfo
->dname
, strerror (errno
));
1897 finfo
->inotify_descr
[TRACED_FILE
] = -1;
1898 finfo
->inotify_descr
[TRACED_DIR
] = -1;
1899 to_clear
[dbcnt
] = true;
1900 if (finfo
->call_res_init
)
1902 /* Continue to the next entry since this might be the
1903 parent directory for multiple registered files and
1904 we want to remove watches for all registered files. */
1907 /* The parent directory had a create or moved to event. */
1908 if (finfo
->inotify_descr
[TRACED_DIR
] == inev
->i
.wd
1909 && ((inev
->i
.mask
& IN_MOVED_TO
)
1910 || (inev
->i
.mask
& IN_CREATE
))
1911 && strcmp (inev
->i
.name
, finfo
->sfname
) == 0)
1913 /* We detected a directory change. We look for the creation
1914 of the file we are tracking or the move of the same file
1915 into the directory. */
1917 dbg_log (_("monitored file `%s` was %s, adding watch"),
1919 inev
->i
.mask
& IN_CREATE
? "created" : "moved into place");
1920 /* File was moved in or created. Regenerate the watch. */
1921 if (finfo
->inotify_descr
[TRACED_FILE
] != -1)
1922 inotify_rm_watch (inotify_fd
,
1923 finfo
->inotify_descr
[TRACED_FILE
]);
1925 ret
= inotify_add_watch (inotify_fd
,
1929 dbg_log (_("failed to add file watch `%s`: %s"),
1930 finfo
->fname
, strerror (errno
));
1932 finfo
->inotify_descr
[TRACED_FILE
] = ret
;
1934 /* The file is new or moved so mark cache as needing to
1935 be cleared and reinitialize. */
1936 to_clear
[dbcnt
] = true;
1937 if (finfo
->call_res_init
)
1940 /* Done re-adding the watch. Don't return, we may still
1941 have other files in this same directory, same watch
1942 descriptor, and need to process them. */
1944 /* Other events are ignored, and we move on to the next file. */
1945 finfo
= finfo
->next
;
1950 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1951 for the associated database, otherwise do nothing. The TO_CLEAR array must
1952 have LASTDB entries. */
1954 clear_db_cache (bool *to_clear
)
1956 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
1957 if (to_clear
[dbcnt
])
1959 pthread_mutex_lock (&dbs
[dbcnt
].prune_lock
);
1960 dbs
[dbcnt
].clear_cache
= 1;
1961 pthread_mutex_unlock (&dbs
[dbcnt
].prune_lock
);
1962 pthread_cond_signal (&dbs
[dbcnt
].prune_cond
);
1967 handle_inotify_events (void)
1969 bool to_clear
[lastdb
] = { false, };
1972 /* Read all inotify events for files registered via
1973 register_traced_file(). */
1976 /* Potentially read multiple events into buf. */
1977 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
,
1980 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
1982 /* Not even 1 event. */
1983 if (__glibc_unlikely (nb
== -1 && errno
!= EAGAIN
))
1985 /* Done reading events that are ready. */
1988 /* Process all events. The normal inotify interface delivers
1989 complete events on a read and never a partial event. */
1990 char *eptr
= &inev
.buf
[0];
1994 /* Check which of the files changed. */
1995 inotify_check_files (to_clear
, &inev
);
1996 count
= sizeof (struct inotify_event
) + inev
.i
.len
;
1999 if (nb
>= (ssize_t
) sizeof (struct inotify_event
))
2000 memcpy (&inev
, eptr
, nb
);
2006 /* Actually perform the cache clearing. */
2007 clear_db_cache (to_clear
);
2014 __attribute__ ((__noreturn__
))
2015 main_loop_poll (void)
2017 struct pollfd
*conns
= (struct pollfd
*) xmalloc (nconns
2018 * sizeof (conns
[0]));
2021 conns
[0].events
= POLLRDNORM
;
2023 size_t firstfree
= 1;
2026 if (inotify_fd
!= -1)
2028 conns
[1].fd
= inotify_fd
;
2029 conns
[1].events
= POLLRDNORM
;
2036 size_t idx_nl_status_fd
= 0;
2037 if (nl_status_fd
!= -1)
2039 idx_nl_status_fd
= nused
;
2040 conns
[nused
].fd
= nl_status_fd
;
2041 conns
[nused
].events
= POLLRDNORM
;
2049 /* Wait for any event. We wait at most a couple of seconds so
2050 that we can check whether we should close any of the accepted
2051 connections since we have not received a request. */
2052 #define MAX_ACCEPT_TIMEOUT 30
2053 #define MIN_ACCEPT_TIMEOUT 5
2054 #define MAIN_THREAD_TIMEOUT \
2055 (MAX_ACCEPT_TIMEOUT * 1000 \
2056 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
2058 int n
= poll (conns
, nused
, MAIN_THREAD_TIMEOUT
);
2060 time_t now
= time (NULL
);
2062 /* If there is a descriptor ready for reading or there is a new
2063 connection, process this now. */
2066 if (conns
[0].revents
!= 0)
2068 /* We have a new incoming connection. Accept the connection. */
2069 int fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
2072 /* Use the descriptor if we have not reached the limit. */
2075 if (firstfree
< nconns
)
2077 conns
[firstfree
].fd
= fd
;
2078 conns
[firstfree
].events
= POLLRDNORM
;
2079 starttime
[firstfree
] = now
;
2080 if (firstfree
>= nused
)
2081 nused
= firstfree
+ 1;
2085 while (firstfree
< nused
&& conns
[firstfree
].fd
!= -1);
2088 /* We cannot use the connection so close it. */
2097 if (inotify_fd
!= -1 && conns
[1].fd
== inotify_fd
)
2099 if (conns
[1].revents
!= 0)
2102 ret
= handle_inotify_events ();
2105 /* Something went wrong when reading the inotify
2106 data. Better disable inotify. */
2107 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno
);
2123 if (idx_nl_status_fd
!= 0 && conns
[idx_nl_status_fd
].revents
!= 0)
2126 /* Read all the data. We do not interpret it here. */
2127 while (TEMP_FAILURE_RETRY (read (nl_status_fd
, buf
,
2128 sizeof (buf
))) != -1)
2131 dbs
[hstdb
].head
->extra_data
[NSCD_HST_IDX_CONF_TIMESTAMP
]
2132 = __bump_nl_timestamp ();
2136 for (size_t cnt
= first
; cnt
< nused
&& n
> 0; ++cnt
)
2137 if (conns
[cnt
].revents
!= 0)
2139 fd_ready (conns
[cnt
].fd
);
2141 /* Clean up the CONNS array. */
2143 if (cnt
< firstfree
)
2145 if (cnt
== nused
- 1)
2148 while (conns
[nused
- 1].fd
== -1);
2154 /* Now find entries which have timed out. */
2157 /* We make the timeout length depend on the number of file
2158 descriptors currently used. */
2159 #define ACCEPT_TIMEOUT \
2160 (MAX_ACCEPT_TIMEOUT \
2161 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2162 time_t laststart
= now
- ACCEPT_TIMEOUT
;
2164 for (size_t cnt
= nused
- 1; cnt
> 0; --cnt
)
2166 if (conns
[cnt
].fd
!= -1 && starttime
[cnt
] < laststart
)
2168 /* Remove the entry, it timed out. */
2169 (void) close (conns
[cnt
].fd
);
2172 if (cnt
< firstfree
)
2174 if (cnt
== nused
- 1)
2177 while (conns
[nused
- 1].fd
== -1);
2181 if (restart_p (now
))
2189 main_loop_epoll (int efd
)
2191 struct epoll_event ev
= { 0, };
2195 /* Add the socket. */
2196 ev
.events
= EPOLLRDNORM
;
2198 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, sock
, &ev
) == -1)
2199 /* We cannot use epoll. */
2202 # ifdef HAVE_INOTIFY
2203 if (inotify_fd
!= -1)
2205 ev
.events
= EPOLLRDNORM
;
2206 ev
.data
.fd
= inotify_fd
;
2207 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, inotify_fd
, &ev
) == -1)
2208 /* We cannot use epoll. */
2214 # ifdef HAVE_NETLINK
2215 if (nl_status_fd
!= -1)
2217 ev
.events
= EPOLLRDNORM
;
2218 ev
.data
.fd
= nl_status_fd
;
2219 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, nl_status_fd
, &ev
) == -1)
2220 /* We cannot use epoll. */
2227 struct epoll_event revs
[100];
2228 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2230 int n
= epoll_wait (efd
, revs
, nrevs
, MAIN_THREAD_TIMEOUT
);
2232 time_t now
= time (NULL
);
2234 for (int cnt
= 0; cnt
< n
; ++cnt
)
2235 if (revs
[cnt
].data
.fd
== sock
)
2237 /* A new connection. */
2238 int fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
2241 /* Use the descriptor if we have not reached the limit. */
2244 /* Try to add the new descriptor. */
2247 || epoll_ctl (efd
, EPOLL_CTL_ADD
, fd
, &ev
) == -1)
2248 /* The descriptor is too large or something went
2249 wrong. Close the descriptor. */
2253 /* Remember when we accepted the connection. */
2254 starttime
[fd
] = now
;
2263 # ifdef HAVE_INOTIFY
2264 else if (revs
[cnt
].data
.fd
== inotify_fd
)
2267 ret
= handle_inotify_events ();
2270 /* Something went wrong when reading the inotify
2271 data. Better disable inotify. */
2272 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno
);
2273 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, inotify_fd
, NULL
);
2280 # ifdef HAVE_NETLINK
2281 else if (revs
[cnt
].data
.fd
== nl_status_fd
)
2284 /* Read all the data. We do not interpret it here. */
2285 while (TEMP_FAILURE_RETRY (read (nl_status_fd
, buf
,
2286 sizeof (buf
))) != -1)
2289 __bump_nl_timestamp ();
2294 /* Remove the descriptor from the epoll descriptor. */
2295 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, revs
[cnt
].data
.fd
, NULL
);
2297 /* Get a worker to handle the request. */
2298 fd_ready (revs
[cnt
].data
.fd
);
2300 /* Reset the time. */
2301 starttime
[revs
[cnt
].data
.fd
] = 0;
2302 if (revs
[cnt
].data
.fd
== highest
)
2305 while (highest
> 0 && starttime
[highest
] == 0);
2310 /* Now look for descriptors for accepted connections which have
2311 no reply in too long of a time. */
2312 time_t laststart
= now
- ACCEPT_TIMEOUT
;
2313 assert (starttime
[sock
] == 0);
2314 # ifdef HAVE_INOTIFY
2315 assert (inotify_fd
== -1 || starttime
[inotify_fd
] == 0);
2317 assert (nl_status_fd
== -1 || starttime
[nl_status_fd
] == 0);
2318 for (int cnt
= highest
; cnt
> STDERR_FILENO
; --cnt
)
2319 if (starttime
[cnt
] != 0 && starttime
[cnt
] < laststart
)
2321 /* We are waiting for this one for too long. Close it. */
2322 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, cnt
, NULL
);
2330 else if (cnt
!= sock
&& starttime
[cnt
] == 0 && cnt
== highest
)
2333 if (restart_p (now
))
2340 /* Start all the threads we want. The initial process is thread no. 1. */
2342 start_threads (void)
2344 /* Initialize the conditional variable we will use. The only
2345 non-standard attribute we might use is the clock selection. */
2346 pthread_condattr_t condattr
;
2347 pthread_condattr_init (&condattr
);
2349 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2350 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2351 /* Determine whether the monotonous clock is available. */
2352 struct timespec dummy
;
2353 # if _POSIX_MONOTONIC_CLOCK == 0
2354 if (sysconf (_SC_MONOTONIC_CLOCK
) > 0)
2356 # if _POSIX_CLOCK_SELECTION == 0
2357 if (sysconf (_SC_CLOCK_SELECTION
) > 0)
2359 if (clock_getres (CLOCK_MONOTONIC
, &dummy
) == 0
2360 && pthread_condattr_setclock (&condattr
, CLOCK_MONOTONIC
) == 0)
2361 timeout_clock
= CLOCK_MONOTONIC
;
2364 /* Create the attribute for the threads. They are all created
2366 pthread_attr_init (&attr
);
2367 pthread_attr_setdetachstate (&attr
, PTHREAD_CREATE_DETACHED
);
2368 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2369 pthread_attr_setstacksize (&attr
, NSCD_THREAD_STACKSIZE
);
2371 /* We allow less than LASTDB threads only for debugging. */
2372 if (debug_level
== 0)
2373 nthreads
= MAX (nthreads
, lastdb
);
2375 /* Create the threads which prune the databases. */
2376 // XXX Ideally this work would be done by some of the worker threads.
2377 // XXX But this is problematic since we would need to be able to wake
2378 // XXX them up explicitly as well as part of the group handling the
2379 // XXX ready-list. This requires an operation where we can wait on
2380 // XXX two conditional variables at the same time. This operation
2381 // XXX does not exist (yet).
2382 for (long int i
= 0; i
< lastdb
; ++i
)
2384 /* Initialize the conditional variable. */
2385 if (pthread_cond_init (&dbs
[i
].prune_cond
, &condattr
) != 0)
2387 dbg_log (_("could not initialize conditional variable"));
2388 do_exit (1, 0, NULL
);
2393 && pthread_create (&th
, &attr
, nscd_run_prune
, (void *) i
) != 0)
2395 dbg_log (_("could not start clean-up thread; terminating"));
2396 do_exit (1, 0, NULL
);
2400 pthread_condattr_destroy (&condattr
);
2402 for (long int i
= 0; i
< nthreads
; ++i
)
2405 if (pthread_create (&th
, &attr
, nscd_run_worker
, NULL
) != 0)
2409 dbg_log (_("could not start any worker thread; terminating"));
2410 do_exit (1, 0, NULL
);
2417 /* Now it is safe to let the parent know that we're doing fine and it can
2421 /* Determine how much room for descriptors we should initially
2422 allocate. This might need to change later if we cap the number
2424 const long int nfds
= sysconf (_SC_OPEN_MAX
);
2426 #define MAXCONN 16384
2427 if (nfds
== -1 || nfds
> MAXCONN
)
2429 else if (nfds
< MINCONN
)
2434 /* We need memory to pass descriptors on to the worker threads. */
2435 fdlist
= (struct fdlist
*) xcalloc (nconns
, sizeof (fdlist
[0]));
2436 /* Array to keep track when connection was accepted. */
2437 starttime
= (time_t *) xcalloc (nconns
, sizeof (starttime
[0]));
2439 /* In the main thread we execute the loop which handles incoming
2442 int efd
= epoll_create (100);
2445 main_loop_epoll (efd
);
2454 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2455 this function is called, we are not listening on the nscd socket yet so
2456 we can just use the ordinary lookup functions without causing a lockup */
2458 begin_drop_privileges (void)
2460 struct passwd
*pwd
= getpwnam (server_user
);
2464 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2465 do_exit (EXIT_FAILURE
, 0,
2466 _("Failed to run nscd as user '%s'"), server_user
);
2469 server_uid
= pwd
->pw_uid
;
2470 server_gid
= pwd
->pw_gid
;
2472 /* Save the old UID/GID if we have to change back. */
2475 old_uid
= getuid ();
2476 old_gid
= getgid ();
2479 if (getgrouplist (server_user
, server_gid
, NULL
, &server_ngroups
) == 0)
2481 /* This really must never happen. */
2482 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2483 do_exit (EXIT_FAILURE
, errno
,
2484 _("initial getgrouplist failed"));
2487 server_groups
= (gid_t
*) xmalloc (server_ngroups
* sizeof (gid_t
));
2489 if (getgrouplist (server_user
, server_gid
, server_groups
, &server_ngroups
)
2492 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2493 do_exit (EXIT_FAILURE
, errno
, _("getgrouplist failed"));
2498 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2499 run nscd as the user specified in the configuration file. */
2501 finish_drop_privileges (void)
2503 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2504 /* We need to preserve the capabilities to connect to the audit daemon. */
2505 cap_t new_caps
= preserve_capabilities ();
2508 if (setgroups (server_ngroups
, server_groups
) == -1)
2510 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2511 do_exit (EXIT_FAILURE
, errno
, _("setgroups failed"));
2516 res
= setresgid (server_gid
, server_gid
, old_gid
);
2518 res
= setgid (server_gid
);
2521 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2522 do_exit (4, errno
, "setgid");
2526 res
= setresuid (server_uid
, server_uid
, old_uid
);
2528 res
= setuid (server_uid
);
2531 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2532 do_exit (4, errno
, "setuid");
2535 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2536 /* Remove the temporary capabilities. */
2537 install_real_capabilities (new_caps
);