1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
35 #include <arpa/inet.h>
37 # include <linux/netlink.h>
38 # include <linux/rtnetlink.h>
41 # include <sys/epoll.h>
44 # include <sys/inotify.h>
47 #include <sys/param.h>
50 # include <sys/sendfile.h>
52 #include <sys/socket.h>
59 #include <resolv/resolv.h>
61 #include <kernel-features.h>
62 #include <libc-diag.h>
65 /* Support to run nscd as an unprivileged user */
66 const char *server_user
;
67 static uid_t server_uid
;
68 static gid_t server_gid
;
69 const char *stat_user
;
71 static gid_t
*server_groups
;
75 static int server_ngroups
;
77 static pthread_attr_t attr
;
79 static void begin_drop_privileges (void);
80 static void finish_drop_privileges (void);
82 /* Map request type to a string. */
83 const char *const serv2str
[LASTREQ
] =
85 [GETPWBYNAME
] = "GETPWBYNAME",
86 [GETPWBYUID
] = "GETPWBYUID",
87 [GETGRBYNAME
] = "GETGRBYNAME",
88 [GETGRBYGID
] = "GETGRBYGID",
89 [GETHOSTBYNAME
] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6
] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR
] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6
] = "GETHOSTBYADDRv6",
93 [SHUTDOWN
] = "SHUTDOWN",
94 [GETSTAT
] = "GETSTAT",
95 [INVALIDATE
] = "INVALIDATE",
96 [GETFDPW
] = "GETFDPW",
97 [GETFDGR
] = "GETFDGR",
98 [GETFDHST
] = "GETFDHST",
100 [INITGROUPS
] = "INITGROUPS",
101 [GETSERVBYNAME
] = "GETSERVBYNAME",
102 [GETSERVBYPORT
] = "GETSERVBYPORT",
103 [GETFDSERV
] = "GETFDSERV",
104 [GETNETGRENT
] = "GETNETGRENT",
105 [INNETGR
] = "INNETGR",
106 [GETFDNETGR
] = "GETFDNETGR"
109 /* The control data structures for the services. */
110 struct database_dyn dbs
[lastdb
] =
113 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
114 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
115 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
121 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
122 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
123 .db_filename
= _PATH_NSCD_PASSWD_DB
,
124 .disabled_iov
= &pwd_iov_disabled
,
132 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
133 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
134 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
140 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
141 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
142 .db_filename
= _PATH_NSCD_GROUP_DB
,
143 .disabled_iov
= &grp_iov_disabled
,
151 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
152 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
153 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
157 .propagate
= 0, /* Not used. */
159 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
160 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
161 .db_filename
= _PATH_NSCD_HOSTS_DB
,
162 .disabled_iov
= &hst_iov_disabled
,
170 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
171 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
172 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
176 .propagate
= 0, /* Not used. */
178 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
179 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
180 .db_filename
= _PATH_NSCD_SERVICES_DB
,
181 .disabled_iov
= &serv_iov_disabled
,
189 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
190 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
191 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
195 .propagate
= 0, /* Not used. */
197 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
198 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
199 .db_filename
= _PATH_NSCD_NETGROUP_DB
,
200 .disabled_iov
= &netgroup_iov_disabled
,
210 /* Mapping of request type to database. */
214 struct database_dyn
*db
;
215 } const reqinfo
[LASTREQ
] =
217 [GETPWBYNAME
] = { true, &dbs
[pwddb
] },
218 [GETPWBYUID
] = { true, &dbs
[pwddb
] },
219 [GETGRBYNAME
] = { true, &dbs
[grpdb
] },
220 [GETGRBYGID
] = { true, &dbs
[grpdb
] },
221 [GETHOSTBYNAME
] = { true, &dbs
[hstdb
] },
222 [GETHOSTBYNAMEv6
] = { true, &dbs
[hstdb
] },
223 [GETHOSTBYADDR
] = { true, &dbs
[hstdb
] },
224 [GETHOSTBYADDRv6
] = { true, &dbs
[hstdb
] },
225 [SHUTDOWN
] = { false, NULL
},
226 [GETSTAT
] = { false, NULL
},
227 [SHUTDOWN
] = { false, NULL
},
228 [GETFDPW
] = { false, &dbs
[pwddb
] },
229 [GETFDGR
] = { false, &dbs
[grpdb
] },
230 [GETFDHST
] = { false, &dbs
[hstdb
] },
231 [GETAI
] = { true, &dbs
[hstdb
] },
232 [INITGROUPS
] = { true, &dbs
[grpdb
] },
233 [GETSERVBYNAME
] = { true, &dbs
[servdb
] },
234 [GETSERVBYPORT
] = { true, &dbs
[servdb
] },
235 [GETFDSERV
] = { false, &dbs
[servdb
] },
236 [GETNETGRENT
] = { true, &dbs
[netgrdb
] },
237 [INNETGR
] = { true, &dbs
[netgrdb
] },
238 [GETFDNETGR
] = { false, &dbs
[netgrdb
] }
242 /* Initial number of threads to use. */
244 /* Maximum number of threads to use. */
245 int max_nthreads
= 32;
247 /* Socket for incoming connections. */
251 /* Inotify descriptor. */
256 /* Descriptor for netlink status updates. */
257 static int nl_status_fd
= -1;
260 /* Number of times clients had to wait. */
261 unsigned long int client_queued
;
265 writeall (int fd
, const void *buf
, size_t len
)
271 ret
= TEMP_FAILURE_RETRY (send (fd
, buf
, n
, MSG_NOSIGNAL
));
274 buf
= (const char *) buf
+ ret
;
278 return ret
< 0 ? ret
: len
- n
;
284 sendfileall (int tofd
, int fromfd
, off_t off
, size_t len
)
291 ret
= TEMP_FAILURE_RETRY (sendfile (tofd
, fromfd
, &off
, n
));
297 return ret
< 0 ? ret
: len
- n
;
305 /* The following three are not really used, they are symbolic constants. */
311 use_he_begin
= use_he
| use_begin
,
312 use_he_end
= use_he
| use_end
,
314 use_data_begin
= use_data
| use_begin
,
315 use_data_end
= use_data
| use_end
,
316 use_data_first
= use_data_begin
| use_first
321 check_use (const char *data
, nscd_ssize_t first_free
, uint8_t *usemap
,
322 enum usekey use
, ref_t start
, size_t len
)
326 if (start
> first_free
|| start
+ len
> first_free
327 || (start
& BLOCK_ALIGN_M1
))
330 if (usemap
[start
] == use_not
)
332 /* Add the start marker. */
333 usemap
[start
] = use
| use_begin
;
337 if (usemap
[++start
] != use_not
)
342 /* Add the end marker. */
343 usemap
[start
] = use
| use_end
;
345 else if ((usemap
[start
] & ~use_first
) == ((use
| use_begin
) & ~use_first
))
347 /* Hash entries can't be shared. */
351 usemap
[start
] |= (use
& use_first
);
355 if (usemap
[++start
] != use
)
358 if (usemap
[++start
] != (use
| use_end
))
362 /* Points to a wrong object or somewhere in the middle. */
369 /* Verify data in persistent database. */
371 verify_persistent_db (void *mem
, struct database_pers_head
*readhead
, int dbnr
)
373 assert (dbnr
== pwddb
|| dbnr
== grpdb
|| dbnr
== hstdb
|| dbnr
== servdb
376 time_t now
= time (NULL
);
378 struct database_pers_head
*head
= mem
;
379 struct database_pers_head head_copy
= *head
;
381 /* Check that the header that was read matches the head in the database. */
382 if (memcmp (head
, readhead
, sizeof (*head
)) != 0)
385 /* First some easy tests: make sure the database header is sane. */
386 if (head
->version
!= DB_VERSION
387 || head
->header_size
!= sizeof (*head
)
388 /* We allow a timestamp to be one hour ahead of the current time.
389 This should cover daylight saving time changes. */
390 || head
->timestamp
> now
+ 60 * 60 + 60
391 || (head
->gc_cycle
& 1)
393 || (size_t) head
->module
> INT32_MAX
/ sizeof (ref_t
)
394 || (size_t) head
->data_size
> INT32_MAX
- head
->module
* sizeof (ref_t
)
395 || head
->first_free
< 0
396 || head
->first_free
> head
->data_size
397 || (head
->first_free
& BLOCK_ALIGN_M1
) != 0
398 || head
->maxnentries
< 0
399 || head
->maxnsearched
< 0)
402 uint8_t *usemap
= calloc (head
->first_free
, 1);
406 const char *data
= (char *) &head
->array
[roundup (head
->module
,
407 ALIGN
/ sizeof (ref_t
))];
409 nscd_ssize_t he_cnt
= 0;
410 for (nscd_ssize_t cnt
= 0; cnt
< head
->module
; ++cnt
)
412 ref_t trail
= head
->array
[cnt
];
416 while (work
!= ENDREF
)
418 if (! check_use (data
, head
->first_free
, usemap
, use_he
, work
,
419 sizeof (struct hashentry
)))
422 /* Now we know we can dereference the record. */
423 struct hashentry
*here
= (struct hashentry
*) (data
+ work
);
427 /* Make sure the record is for this type of service. */
428 if (here
->type
>= LASTREQ
429 || reqinfo
[here
->type
].db
!= &dbs
[dbnr
])
432 /* Validate boolean field value. */
433 if (here
->first
!= false && here
->first
!= true)
441 || here
->packet
> head
->first_free
442 || here
->packet
+ sizeof (struct datahead
) > head
->first_free
)
445 struct datahead
*dh
= (struct datahead
*) (data
+ here
->packet
);
447 if (! check_use (data
, head
->first_free
, usemap
,
448 use_data
| (here
->first
? use_first
: 0),
449 here
->packet
, dh
->allocsize
))
452 if (dh
->allocsize
< sizeof (struct datahead
)
453 || dh
->recsize
> dh
->allocsize
454 || (dh
->notfound
!= false && dh
->notfound
!= true)
455 || (dh
->usable
!= false && dh
->usable
!= true))
458 if (here
->key
< here
->packet
+ sizeof (struct datahead
)
459 || here
->key
> here
->packet
+ dh
->allocsize
460 || here
->key
+ here
->len
> here
->packet
+ dh
->allocsize
)
466 /* A circular list, this must not happen. */
469 trail
= ((struct hashentry
*) (data
+ trail
))->next
;
474 if (he_cnt
!= head
->nentries
)
477 /* See if all data and keys had at least one reference from
478 he->first == true hashentry. */
479 for (ref_t idx
= 0; idx
< head
->first_free
; ++idx
)
481 if (usemap
[idx
] == use_data_begin
)
485 /* Finally, make sure the database hasn't changed since the first test. */
486 if (memcmp (mem
, &head_copy
, sizeof (*head
)) != 0)
498 /* Initialize database information structures. */
502 /* Look up unprivileged uid/gid/groups before we start listening on the
504 if (server_user
!= NULL
)
505 begin_drop_privileges ();
508 /* No configuration for this value, assume a default. */
511 for (size_t cnt
= 0; cnt
< lastdb
; ++cnt
)
512 if (dbs
[cnt
].enabled
)
514 pthread_rwlock_init (&dbs
[cnt
].lock
, NULL
);
515 pthread_mutex_init (&dbs
[cnt
].memlock
, NULL
);
517 if (dbs
[cnt
].persistent
)
519 /* Try to open the appropriate file on disk. */
520 int fd
= open (dbs
[cnt
].db_filename
, O_RDWR
| O_CLOEXEC
);
527 struct database_pers_head head
;
528 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, &head
,
530 if (n
!= sizeof (head
) || fstat64 (fd
, &st
) != 0)
533 /* The code is single-threaded at this point so
534 using strerror is just fine. */
535 msg
= strerror (errno
);
537 dbg_log (_("invalid persistent database file \"%s\": %s"),
538 dbs
[cnt
].db_filename
, msg
);
539 unlink (dbs
[cnt
].db_filename
);
541 else if (head
.module
== 0 && head
.data_size
== 0)
543 /* The file has been created, but the head has not
544 been initialized yet. */
545 msg
= _("uninitialized header");
548 else if (head
.header_size
!= (int) sizeof (head
))
550 msg
= _("header size does not match");
553 else if ((total
= (sizeof (head
)
554 + roundup (head
.module
* sizeof (ref_t
),
558 || total
< sizeof (head
))
560 msg
= _("file size does not match");
563 /* Note we map with the maximum size allowed for the
564 database. This is likely much larger than the
565 actual file size. This is OK on most OSes since
566 extensions of the underlying file will
567 automatically translate more pages available for
569 else if ((mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
570 PROT_READ
| PROT_WRITE
,
574 else if (!verify_persistent_db (mem
, &head
, cnt
))
577 msg
= _("verification failed");
582 /* Success. We have the database. */
584 dbs
[cnt
].memsize
= total
;
585 dbs
[cnt
].data
= (char *)
586 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
587 ALIGN
/ sizeof (ref_t
))];
588 dbs
[cnt
].mmap_used
= true;
590 if (dbs
[cnt
].suggested_module
> head
.module
)
591 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
596 /* We also need a read-only descriptor. */
599 dbs
[cnt
].ro_fd
= open (dbs
[cnt
].db_filename
,
600 O_RDONLY
| O_CLOEXEC
);
601 if (dbs
[cnt
].ro_fd
== -1)
603 cannot create read-only descriptor for \"%s\"; no mmap"),
604 dbs
[cnt
].db_filename
);
607 // XXX Shall we test whether the descriptors actually
608 // XXX point to the same file?
611 /* Close the file descriptors in case something went
612 wrong in which case the variable have not been
617 else if (errno
== EACCES
)
618 do_exit (EXIT_FAILURE
, 0, _("cannot access '%s'"),
619 dbs
[cnt
].db_filename
);
622 if (dbs
[cnt
].head
== NULL
)
624 /* No database loaded. Allocate the data structure,
626 struct database_pers_head head
;
627 size_t total
= (sizeof (head
)
628 + roundup (dbs
[cnt
].suggested_module
629 * sizeof (ref_t
), ALIGN
)
630 + (dbs
[cnt
].suggested_module
631 * DEFAULT_DATASIZE_PER_BUCKET
));
633 /* Try to create the database. If we do not need a
634 persistent database create a temporary file. */
637 if (dbs
[cnt
].persistent
)
639 fd
= open (dbs
[cnt
].db_filename
,
640 O_RDWR
| O_CREAT
| O_EXCL
| O_TRUNC
| O_CLOEXEC
,
642 if (fd
!= -1 && dbs
[cnt
].shared
)
643 ro_fd
= open (dbs
[cnt
].db_filename
,
644 O_RDONLY
| O_CLOEXEC
);
648 char fname
[] = _PATH_NSCD_XYZ_DB_TMP
;
649 fd
= mkostemp (fname
, O_CLOEXEC
);
651 /* We do not need the file name anymore after we
652 opened another file descriptor in read-only mode. */
656 ro_fd
= open (fname
, O_RDONLY
| O_CLOEXEC
);
666 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
667 dbnames
[cnt
], dbs
[cnt
].db_filename
);
668 do_exit (1, 0, NULL
);
671 if (dbs
[cnt
].persistent
)
672 dbg_log (_("cannot create %s; no persistent database used"),
673 dbs
[cnt
].db_filename
);
675 dbg_log (_("cannot create %s; no sharing possible"),
676 dbs
[cnt
].db_filename
);
678 dbs
[cnt
].persistent
= 0;
679 // XXX remember: no mmap
683 /* Tell the user if we could not create the read-only
685 if (ro_fd
== -1 && dbs
[cnt
].shared
)
687 cannot create read-only descriptor for \"%s\"; no mmap"),
688 dbs
[cnt
].db_filename
);
690 /* Before we create the header, initialize the hash
691 table. That way if we get interrupted while writing
692 the header we can recognize a partially initialized
694 size_t ps
= sysconf (_SC_PAGESIZE
);
696 assert (~ENDREF
== 0);
697 memset (tmpbuf
, '\xff', ps
);
699 size_t remaining
= dbs
[cnt
].suggested_module
* sizeof (ref_t
);
700 off_t offset
= sizeof (head
);
703 if (offset
% ps
!= 0)
705 towrite
= MIN (remaining
, ps
- (offset
% ps
));
706 if (pwrite (fd
, tmpbuf
, towrite
, offset
) != towrite
)
709 remaining
-= towrite
;
712 while (remaining
> ps
)
714 if (pwrite (fd
, tmpbuf
, ps
, offset
) == -1)
721 && pwrite (fd
, tmpbuf
, remaining
, offset
) != remaining
)
724 /* Create the header of the file. */
725 struct database_pers_head head
=
727 .version
= DB_VERSION
,
728 .header_size
= sizeof (head
),
729 .module
= dbs
[cnt
].suggested_module
,
730 .data_size
= (dbs
[cnt
].suggested_module
731 * DEFAULT_DATASIZE_PER_BUCKET
),
736 if ((TEMP_FAILURE_RETRY (write (fd
, &head
, sizeof (head
)))
738 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd
, 0, total
))
740 || (mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
741 PROT_READ
| PROT_WRITE
,
742 MAP_SHARED
, fd
, 0)) == MAP_FAILED
)
745 unlink (dbs
[cnt
].db_filename
);
746 dbg_log (_("cannot write to database file %s: %s"),
747 dbs
[cnt
].db_filename
, strerror (errno
));
748 dbs
[cnt
].persistent
= 0;
754 dbs
[cnt
].data
= (char *)
755 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
756 ALIGN
/ sizeof (ref_t
))];
757 dbs
[cnt
].memsize
= total
;
758 dbs
[cnt
].mmap_used
= true;
760 /* Remember the descriptors. */
762 dbs
[cnt
].ro_fd
= ro_fd
;
774 if (dbs
[cnt
].head
== NULL
)
776 /* We do not use the persistent database. Just
777 create an in-memory data structure. */
778 assert (! dbs
[cnt
].persistent
);
780 dbs
[cnt
].head
= xmalloc (sizeof (struct database_pers_head
)
781 + (dbs
[cnt
].suggested_module
783 memset (dbs
[cnt
].head
, '\0', sizeof (struct database_pers_head
));
784 assert (~ENDREF
== 0);
785 memset (dbs
[cnt
].head
->array
, '\xff',
786 dbs
[cnt
].suggested_module
* sizeof (ref_t
));
787 dbs
[cnt
].head
->module
= dbs
[cnt
].suggested_module
;
788 dbs
[cnt
].head
->data_size
= (DEFAULT_DATASIZE_PER_BUCKET
789 * dbs
[cnt
].head
->module
);
790 dbs
[cnt
].data
= xmalloc (dbs
[cnt
].head
->data_size
);
791 dbs
[cnt
].head
->first_free
= 0;
794 assert (dbs
[cnt
].ro_fd
== -1);
798 /* Create the socket. */
799 sock
= socket (AF_UNIX
, SOCK_STREAM
| SOCK_CLOEXEC
| SOCK_NONBLOCK
, 0);
802 dbg_log (_("cannot open socket: %s"), strerror (errno
));
803 do_exit (errno
== EACCES
? 4 : 1, 0, NULL
);
805 /* Bind a name to the socket. */
806 struct sockaddr_un sock_addr
;
807 sock_addr
.sun_family
= AF_UNIX
;
808 strcpy (sock_addr
.sun_path
, _PATH_NSCDSOCKET
);
809 if (bind (sock
, (struct sockaddr
*) &sock_addr
, sizeof (sock_addr
)) < 0)
811 dbg_log ("%s: %s", _PATH_NSCDSOCKET
, strerror (errno
));
812 do_exit (errno
== EACCES
? 4 : 1, 0, NULL
);
815 /* Set permissions for the socket. */
816 chmod (_PATH_NSCDSOCKET
, DEFFILEMODE
);
818 /* Set the socket up to accept connections. */
819 if (listen (sock
, SOMAXCONN
) < 0)
821 dbg_log (_("cannot enable socket to accept connections: %s"),
823 do_exit (1, 0, NULL
);
827 if (dbs
[hstdb
].enabled
)
829 /* Try to open netlink socket to monitor network setting changes. */
830 nl_status_fd
= socket (AF_NETLINK
,
831 SOCK_RAW
| SOCK_CLOEXEC
| SOCK_NONBLOCK
,
833 if (nl_status_fd
!= -1)
835 struct sockaddr_nl snl
;
836 memset (&snl
, '\0', sizeof (snl
));
837 snl
.nl_family
= AF_NETLINK
;
838 /* XXX Is this the best set to use? */
839 snl
.nl_groups
= (RTMGRP_IPV4_IFADDR
| RTMGRP_TC
| RTMGRP_IPV4_MROUTE
840 | RTMGRP_IPV4_ROUTE
| RTMGRP_IPV4_RULE
841 | RTMGRP_IPV6_IFADDR
| RTMGRP_IPV6_MROUTE
842 | RTMGRP_IPV6_ROUTE
| RTMGRP_IPV6_IFINFO
843 | RTMGRP_IPV6_PREFIX
);
845 if (bind (nl_status_fd
, (struct sockaddr
*) &snl
, sizeof (snl
)) != 0)
847 close (nl_status_fd
);
852 /* Start the timestamp process. */
853 dbs
[hstdb
].head
->extra_data
[NSCD_HST_IDX_CONF_TIMESTAMP
]
854 = __bump_nl_timestamp ();
860 /* Change to unprivileged uid/gid/groups if specified in config file */
861 if (server_user
!= NULL
)
862 finish_drop_privileges ();
866 #define TRACED_FILE_MASK (IN_DELETE_SELF | IN_CLOSE_WRITE | IN_MOVE_SELF)
867 #define TRACED_DIR_MASK (IN_DELETE_SELF | IN_CREATE | IN_MOVED_TO | IN_MOVE_SELF)
869 install_watches (struct traced_file
*finfo
)
871 /* Use inotify support if we have it. */
872 if (finfo
->inotify_descr
[TRACED_FILE
] < 0)
873 finfo
->inotify_descr
[TRACED_FILE
] = inotify_add_watch (inotify_fd
,
876 if (finfo
->inotify_descr
[TRACED_FILE
] < 0)
878 dbg_log (_("disabled inotify-based monitoring for file `%s': %s"),
879 finfo
->fname
, strerror (errno
));
882 dbg_log (_("monitoring file `%s` (%d)"),
883 finfo
->fname
, finfo
->inotify_descr
[TRACED_FILE
]);
884 /* Additionally listen for events in the file's parent directory.
885 We do this because the file to be watched might be
886 deleted and then added back again. When it is added back again
887 we must re-add the watch. We must also cover IN_MOVED_TO to
888 detect a file being moved into the directory. */
889 if (finfo
->inotify_descr
[TRACED_DIR
] < 0)
890 finfo
->inotify_descr
[TRACED_DIR
] = inotify_add_watch (inotify_fd
,
893 if (finfo
->inotify_descr
[TRACED_DIR
] < 0)
895 dbg_log (_("disabled inotify-based monitoring for directory `%s': %s"),
896 finfo
->fname
, strerror (errno
));
899 dbg_log (_("monitoring directory `%s` (%d)"),
900 finfo
->dname
, finfo
->inotify_descr
[TRACED_DIR
]);
904 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
906 We support registering multiple files per database. Each call to
907 register_traced_file adds to the list of registered files.
909 When we prune the database, either through timeout or a request to
910 invalidate, we will check to see if any of the registered files has changed.
911 When we accept new connections to handle a cache request we will also
912 check to see if any of the registered files has changed.
914 If we have inotify support then we install an inotify fd to notify us of
915 file deletion or modification, both of which will require we invalidate
916 the cache for the database. Without inotify support we stat the file and
917 store st_mtime to determine if the file has been modified. */
919 register_traced_file (size_t dbidx
, struct traced_file
*finfo
)
921 /* If the database is disabled or file checking is disabled
922 then ignore the registration. */
923 if (! dbs
[dbidx
].enabled
|| ! dbs
[dbidx
].check_file
)
926 if (__glibc_unlikely (debug_level
> 0))
927 dbg_log (_("monitoring file %s for database %s"),
928 finfo
->fname
, dbnames
[dbidx
]);
931 install_watches (finfo
);
934 if (stat64 (finfo
->fname
, &st
) < 0)
936 /* We cannot stat() the file. Set mtime to zero and try again later. */
937 dbg_log (_("stat failed for file `%s'; will try again later: %s"),
938 finfo
->fname
, strerror (errno
));
942 finfo
->mtime
= st
.st_mtime
;
944 /* Queue up the file name. */
945 finfo
->next
= dbs
[dbidx
].traced_files
;
946 dbs
[dbidx
].traced_files
= finfo
;
950 /* Close the connections. */
959 invalidate_cache (char *key
, int fd
)
964 for (number
= pwddb
; number
< lastdb
; ++number
)
965 if (strcmp (key
, dbnames
[number
]) == 0)
967 struct traced_file
*runp
= dbs
[number
].traced_files
;
970 /* Make sure we reload from file when checking mtime. */
973 /* During an invalidation we try to reload the traced
974 file watches. This allows the user to re-sync if
975 inotify events were lost. Similar to what we do during
977 install_watches (runp
);
979 if (runp
->call_res_init
)
989 if (number
== lastdb
)
992 writeall (fd
, &resp
, sizeof (resp
));
996 if (dbs
[number
].enabled
)
998 pthread_mutex_lock (&dbs
[number
].prune_run_lock
);
999 prune_cache (&dbs
[number
], LONG_MAX
, fd
);
1000 pthread_mutex_unlock (&dbs
[number
].prune_run_lock
);
1005 writeall (fd
, &resp
, sizeof (resp
));
1012 send_ro_fd (struct database_dyn
*db
, char *key
, int fd
)
1014 /* If we do not have an read-only file descriptor do nothing. */
1015 if (db
->ro_fd
== -1)
1018 /* We need to send some data along with the descriptor. */
1019 uint64_t mapsize
= (db
->head
->data_size
1020 + roundup (db
->head
->module
* sizeof (ref_t
), ALIGN
)
1021 + sizeof (struct database_pers_head
));
1022 struct iovec iov
[2];
1023 iov
[0].iov_base
= key
;
1024 iov
[0].iov_len
= strlen (key
) + 1;
1025 iov
[1].iov_base
= &mapsize
;
1026 iov
[1].iov_len
= sizeof (mapsize
);
1028 /* Prepare the control message to transfer the descriptor. */
1032 char bytes
[CMSG_SPACE (sizeof (int))];
1034 struct msghdr msg
= { .msg_iov
= iov
, .msg_iovlen
= 2,
1035 .msg_control
= buf
.bytes
,
1036 .msg_controllen
= sizeof (buf
) };
1037 struct cmsghdr
*cmsg
= CMSG_FIRSTHDR (&msg
);
1039 cmsg
->cmsg_level
= SOL_SOCKET
;
1040 cmsg
->cmsg_type
= SCM_RIGHTS
;
1041 cmsg
->cmsg_len
= CMSG_LEN (sizeof (int));
1043 int *ip
= (int *) CMSG_DATA (cmsg
);
1046 msg
.msg_controllen
= cmsg
->cmsg_len
;
1048 /* Send the control message. We repeat when we are interrupted but
1049 everything else is ignored. */
1050 #ifndef MSG_NOSIGNAL
1051 # define MSG_NOSIGNAL 0
1053 (void) TEMP_FAILURE_RETRY (sendmsg (fd
, &msg
, MSG_NOSIGNAL
));
1055 if (__glibc_unlikely (debug_level
> 0))
1056 dbg_log (_("provide access to FD %d, for %s"), db
->ro_fd
, key
);
1058 #endif /* SCM_RIGHTS */
1061 /* Handle new request. */
1063 handle_request (int fd
, request_header
*req
, void *key
, uid_t uid
, pid_t pid
)
1065 if (__builtin_expect (req
->version
, NSCD_VERSION
) != NSCD_VERSION
)
1067 if (debug_level
> 0)
1069 cannot handle old request version %d; current version is %d"),
1070 req
->version
, NSCD_VERSION
);
1074 /* Perform the SELinux check before we go on to the standard checks. */
1075 if (selinux_enabled
&& nscd_request_avc_has_perm (fd
, req
->type
) != 0)
1077 if (debug_level
> 0)
1080 char pbuf
[sizeof ("/proc//exe") + 3 * sizeof (long int)];
1087 snprintf (pbuf
, sizeof (pbuf
), "/proc/%ld/exe", (long int) pid
);
1088 ssize_t n
= readlink (pbuf
, buf
, sizeof (buf
) - 1);
1092 request from %ld not handled due to missing permission"), (long int) pid
);
1097 request from '%s' [%ld] not handled due to missing permission"),
1098 buf
, (long int) pid
);
1101 dbg_log (_("request not handled due to missing permission"));
1107 struct database_dyn
*db
= reqinfo
[req
->type
].db
;
1109 /* See whether we can service the request from the cache. */
1110 if (__builtin_expect (reqinfo
[req
->type
].data_request
, true))
1112 if (__builtin_expect (debug_level
, 0) > 0)
1114 if (req
->type
== GETHOSTBYADDR
|| req
->type
== GETHOSTBYADDRv6
)
1116 char buf
[INET6_ADDRSTRLEN
];
1118 dbg_log ("\t%s (%s)", serv2str
[req
->type
],
1119 inet_ntop (req
->type
== GETHOSTBYADDR
1120 ? AF_INET
: AF_INET6
,
1121 key
, buf
, sizeof (buf
)));
1124 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1127 /* Is this service enabled? */
1128 if (__glibc_unlikely (!db
->enabled
))
1130 /* No, sent the prepared record. */
1131 if (TEMP_FAILURE_RETRY (send (fd
, db
->disabled_iov
->iov_base
,
1132 db
->disabled_iov
->iov_len
,
1134 != (ssize_t
) db
->disabled_iov
->iov_len
1135 && __builtin_expect (debug_level
, 0) > 0)
1137 /* We have problems sending the result. */
1139 dbg_log (_("cannot write result: %s"),
1140 strerror_r (errno
, buf
, sizeof (buf
)));
1146 /* Be sure we can read the data. */
1147 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db
->lock
) != 0))
1149 ++db
->head
->rdlockdelayed
;
1150 pthread_rwlock_rdlock (&db
->lock
);
1153 /* See whether we can handle it from the cache. */
1154 struct datahead
*cached
;
1155 cached
= (struct datahead
*) cache_search (req
->type
, key
, req
->key_len
,
1159 /* Hurray it's in the cache. */
1162 #ifdef HAVE_SENDFILE
1163 if (__glibc_likely (db
->mmap_used
))
1165 assert (db
->wr_fd
!= -1);
1166 assert ((char *) cached
->data
> (char *) db
->data
);
1167 assert ((char *) cached
->data
- (char *) db
->head
1169 <= (sizeof (struct database_pers_head
)
1170 + db
->head
->module
* sizeof (ref_t
)
1171 + db
->head
->data_size
));
1172 nwritten
= sendfileall (fd
, db
->wr_fd
,
1173 (char *) cached
->data
1174 - (char *) db
->head
, cached
->recsize
);
1175 # ifndef __ASSUME_SENDFILE
1176 if (nwritten
== -1 && errno
== ENOSYS
)
1181 # ifndef __ASSUME_SENDFILE
1185 nwritten
= writeall (fd
, cached
->data
, cached
->recsize
);
1187 if (nwritten
!= cached
->recsize
1188 && __builtin_expect (debug_level
, 0) > 0)
1190 /* We have problems sending the result. */
1192 dbg_log (_("cannot write result: %s"),
1193 strerror_r (errno
, buf
, sizeof (buf
)));
1196 pthread_rwlock_unlock (&db
->lock
);
1201 pthread_rwlock_unlock (&db
->lock
);
1203 else if (__builtin_expect (debug_level
, 0) > 0)
1205 if (req
->type
== INVALIDATE
)
1206 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1208 dbg_log ("\t%s", serv2str
[req
->type
]);
1211 /* Handle the request. */
1215 addpwbyname (db
, fd
, req
, key
, uid
);
1219 addpwbyuid (db
, fd
, req
, key
, uid
);
1223 addgrbyname (db
, fd
, req
, key
, uid
);
1227 addgrbygid (db
, fd
, req
, key
, uid
);
1231 addhstbyname (db
, fd
, req
, key
, uid
);
1234 case GETHOSTBYNAMEv6
:
1235 addhstbynamev6 (db
, fd
, req
, key
, uid
);
1239 addhstbyaddr (db
, fd
, req
, key
, uid
);
1242 case GETHOSTBYADDRv6
:
1243 addhstbyaddrv6 (db
, fd
, req
, key
, uid
);
1247 addhstai (db
, fd
, req
, key
, uid
);
1251 addinitgroups (db
, fd
, req
, key
, uid
);
1255 addservbyname (db
, fd
, req
, key
, uid
);
1259 addservbyport (db
, fd
, req
, key
, uid
);
1263 addgetnetgrent (db
, fd
, req
, key
, uid
);
1267 addinnetgr (db
, fd
, req
, key
, uid
);
1274 /* Get the callers credentials. */
1276 struct ucred caller
;
1277 socklen_t optlen
= sizeof (caller
);
1279 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) < 0)
1283 dbg_log (_("error getting caller's id: %s"),
1284 strerror_r (errno
, buf
, sizeof (buf
)));
1290 /* Some systems have no SO_PEERCRED implementation. They don't
1291 care about security so we don't as well. */
1296 /* Accept shutdown, getstat and invalidate only from root. For
1297 the stat call also allow the user specified in the config file. */
1298 if (req
->type
== GETSTAT
)
1300 if (uid
== 0 || uid
== stat_uid
)
1301 send_stats (fd
, dbs
);
1305 if (req
->type
== INVALIDATE
)
1306 invalidate_cache (key
, fd
);
1308 termination_handler (0);
1318 send_ro_fd (reqinfo
[req
->type
].db
, key
, fd
);
1323 /* Ignore the command, it's nothing we know. */
1329 /* Restart the process. */
1333 /* First determine the parameters. We do not use the parameters
1334 passed to main() since in case nscd is started by running the
1335 dynamic linker this will not work. Yes, this is not the usual
1336 case but nscd is part of glibc and we occasionally do this. */
1337 size_t buflen
= 1024;
1338 char *buf
= alloca (buflen
);
1340 int fd
= open ("/proc/self/cmdline", O_RDONLY
);
1344 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1353 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, buf
+ readlen
,
1358 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1368 if (readlen
< buflen
)
1371 /* We might have to extend the buffer. */
1372 size_t old_buflen
= buflen
;
1373 char *newp
= extend_alloca (buf
, buflen
, 2 * buflen
);
1374 buf
= memmove (newp
, buf
, old_buflen
);
1379 /* Parse the command line. Worst case scenario: every two
1380 characters form one parameter (one character plus NUL). */
1381 char **argv
= alloca ((readlen
/ 2 + 1) * sizeof (argv
[0]));
1385 while (cp
< buf
+ readlen
)
1388 cp
= (char *) rawmemchr (cp
, '\0') + 1;
1392 /* Second, change back to the old user if we changed it. */
1393 if (server_user
!= NULL
)
1395 if (setresuid (old_uid
, old_uid
, old_uid
) != 0)
1398 cannot change to old UID: %s; disabling paranoia mode"),
1405 if (setresgid (old_gid
, old_gid
, old_gid
) != 0)
1408 cannot change to old GID: %s; disabling paranoia mode"),
1411 ignore_value (setuid (server_uid
));
1417 /* Next change back to the old working directory. */
1418 if (chdir (oldcwd
) == -1)
1421 cannot change to old working directory: %s; disabling paranoia mode"),
1424 if (server_user
!= NULL
)
1426 ignore_value (setuid (server_uid
));
1427 ignore_value (setgid (server_gid
));
1433 /* Synchronize memory. */
1434 int32_t certainly
[lastdb
];
1435 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1436 if (dbs
[cnt
].enabled
)
1438 /* Make sure nobody keeps using the database. */
1439 dbs
[cnt
].head
->timestamp
= 0;
1440 certainly
[cnt
] = dbs
[cnt
].head
->nscd_certainly_running
;
1441 dbs
[cnt
].head
->nscd_certainly_running
= 0;
1443 if (dbs
[cnt
].persistent
)
1445 msync (dbs
[cnt
].head
, dbs
[cnt
].memsize
, MS_ASYNC
);
1448 /* The preparations are done. */
1450 char pathbuf
[PATH_MAX
];
1454 /* Try to exec the real nscd program so the process name (as reported
1455 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1456 if readlink or the exec with the result of the readlink call fails. */
1457 ssize_t n
= readlink ("/proc/self/exe", pathbuf
, sizeof (pathbuf
) - 1);
1461 execv (pathbuf
, argv
);
1463 execv ("/proc/self/exe", argv
);
1465 /* If we come here, we will never be able to re-exec. */
1466 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1469 if (server_user
!= NULL
)
1471 ignore_value (setuid (server_uid
));
1472 ignore_value (setgid (server_gid
));
1474 if (chdir ("/") != 0)
1475 dbg_log (_("cannot change current working directory to \"/\": %s"),
1479 /* Reenable the databases. */
1480 time_t now
= time (NULL
);
1481 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1482 if (dbs
[cnt
].enabled
)
1484 dbs
[cnt
].head
->timestamp
= now
;
1485 dbs
[cnt
].head
->nscd_certainly_running
= certainly
[cnt
];
1490 /* List of file descriptors. */
1494 struct fdlist
*next
;
1496 /* Memory allocated for the list. */
1497 static struct fdlist
*fdlist
;
1498 /* List of currently ready-to-read file descriptors. */
1499 static struct fdlist
*readylist
;
1501 /* Conditional variable and mutex to signal availability of entries in
1502 READYLIST. The condvar is initialized dynamically since we might
1503 use a different clock depending on availability. */
1504 static pthread_cond_t readylist_cond
= PTHREAD_COND_INITIALIZER
;
1505 static pthread_mutex_t readylist_lock
= PTHREAD_MUTEX_INITIALIZER
;
1507 /* The clock to use with the condvar. */
1508 static clockid_t timeout_clock
= CLOCK_REALTIME
;
1510 /* Number of threads ready to handle the READYLIST. */
1511 static unsigned long int nready
;
1514 /* Function for the clean-up threads. */
1516 __attribute__ ((__noreturn__
))
1517 nscd_run_prune (void *p
)
1519 const long int my_number
= (long int) p
;
1520 assert (dbs
[my_number
].enabled
);
1522 int dont_need_update
= setup_thread (&dbs
[my_number
]);
1524 time_t now
= time (NULL
);
1526 /* We are running. */
1527 dbs
[my_number
].head
->timestamp
= now
;
1529 struct timespec prune_ts
;
1530 if (__glibc_unlikely (clock_gettime (timeout_clock
, &prune_ts
) == -1))
1531 /* Should never happen. */
1534 /* Compute the initial timeout time. Prevent all the timers to go
1535 off at the same time by adding a db-based value. */
1536 prune_ts
.tv_sec
+= CACHE_PRUNE_INTERVAL
+ my_number
;
1537 dbs
[my_number
].wakeup_time
= now
+ CACHE_PRUNE_INTERVAL
+ my_number
;
1539 pthread_mutex_t
*prune_lock
= &dbs
[my_number
].prune_lock
;
1540 pthread_mutex_t
*prune_run_lock
= &dbs
[my_number
].prune_run_lock
;
1541 pthread_cond_t
*prune_cond
= &dbs
[my_number
].prune_cond
;
1543 pthread_mutex_lock (prune_lock
);
1546 /* Wait, but not forever. */
1548 if (! dbs
[my_number
].clear_cache
)
1549 e
= pthread_cond_timedwait (prune_cond
, prune_lock
, &prune_ts
);
1550 assert (__builtin_expect (e
== 0 || e
== ETIMEDOUT
, 1));
1554 if (e
== ETIMEDOUT
|| now
>= dbs
[my_number
].wakeup_time
1555 || dbs
[my_number
].clear_cache
)
1557 /* We will determine the new timout values based on the
1558 cache content. Should there be concurrent additions to
1559 the cache which are not accounted for in the cache
1560 pruning we want to know about it. Therefore set the
1561 timeout to the maximum. It will be descreased when adding
1562 new entries to the cache, if necessary. */
1563 dbs
[my_number
].wakeup_time
= MAX_TIMEOUT_VALUE
;
1565 /* Unconditionally reset the flag. */
1566 time_t prune_now
= dbs
[my_number
].clear_cache
? LONG_MAX
: now
;
1567 dbs
[my_number
].clear_cache
= 0;
1569 pthread_mutex_unlock (prune_lock
);
1571 /* We use a separate lock for running the prune function (instead
1572 of keeping prune_lock locked) because this enables concurrent
1573 invocations of cache_add which might modify the timeout value. */
1574 pthread_mutex_lock (prune_run_lock
);
1575 next_wait
= prune_cache (&dbs
[my_number
], prune_now
, -1);
1576 pthread_mutex_unlock (prune_run_lock
);
1578 next_wait
= MAX (next_wait
, CACHE_PRUNE_INTERVAL
);
1579 /* If clients cannot determine for sure whether nscd is running
1580 we need to wake up occasionally to update the timestamp.
1581 Wait 90% of the update period. */
1582 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1583 if (__glibc_unlikely (! dont_need_update
))
1585 next_wait
= MIN (UPDATE_MAPPING_TIMEOUT
, next_wait
);
1586 dbs
[my_number
].head
->timestamp
= now
;
1589 pthread_mutex_lock (prune_lock
);
1591 /* Make it known when we will wake up again. */
1592 if (now
+ next_wait
< dbs
[my_number
].wakeup_time
)
1593 dbs
[my_number
].wakeup_time
= now
+ next_wait
;
1595 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1598 /* The cache was just pruned. Do not do it again now. Just
1599 use the new timeout value. */
1600 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1602 if (clock_gettime (timeout_clock
, &prune_ts
) == -1)
1603 /* Should never happen. */
1606 /* Compute next timeout time. */
1607 prune_ts
.tv_sec
+= next_wait
;
1612 /* This is the main loop. It is replicated in different threads but
1613 the use of the ready list makes sure only one thread handles an
1614 incoming connection. */
1616 __attribute__ ((__noreturn__
))
1617 nscd_run_worker (void *p
)
1621 /* Initial locking. */
1622 pthread_mutex_lock (&readylist_lock
);
1624 /* One more thread available. */
1629 while (readylist
== NULL
)
1630 pthread_cond_wait (&readylist_cond
, &readylist_lock
);
1632 struct fdlist
*it
= readylist
->next
;
1633 if (readylist
->next
== readylist
)
1634 /* Just one entry on the list. */
1637 readylist
->next
= it
->next
;
1639 /* Extract the information and mark the record ready to be used
1644 /* One more thread available. */
1647 /* We are done with the list. */
1648 pthread_mutex_unlock (&readylist_lock
);
1650 /* Now read the request. */
1652 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, &req
, sizeof (req
)))
1653 != sizeof (req
), 0))
1655 /* We failed to read data. Note that this also might mean we
1656 failed because we would have blocked. */
1657 if (debug_level
> 0)
1658 dbg_log (_("short read while reading request: %s"),
1659 strerror_r (errno
, buf
, sizeof (buf
)));
1663 /* Check whether this is a valid request type. */
1664 if (req
.type
< GETPWBYNAME
|| req
.type
>= LASTREQ
)
1667 /* Some systems have no SO_PEERCRED implementation. They don't
1668 care about security so we don't as well. */
1673 if (__glibc_unlikely (debug_level
> 0))
1675 struct ucred caller
;
1676 socklen_t optlen
= sizeof (caller
);
1678 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) == 0)
1682 const pid_t pid
= 0;
1685 /* It should not be possible to crash the nscd with a silly
1686 request (i.e., a terribly large key). We limit the size to 1kb. */
1687 if (__builtin_expect (req
.key_len
, 1) < 0
1688 || __builtin_expect (req
.key_len
, 1) > MAXKEYLEN
)
1690 if (debug_level
> 0)
1691 dbg_log (_("key length in request too long: %d"), req
.key_len
);
1696 char keybuf
[MAXKEYLEN
+ 1];
1698 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, keybuf
,
1702 /* Again, this can also mean we would have blocked. */
1703 if (debug_level
> 0)
1704 dbg_log (_("short read while reading request key: %s"),
1705 strerror_r (errno
, buf
, sizeof (buf
)));
1708 keybuf
[req
.key_len
] = '\0';
1710 if (__builtin_expect (debug_level
, 0) > 0)
1715 handle_request: request received (Version = %d) from PID %ld"),
1716 req
.version
, (long int) pid
);
1720 handle_request: request received (Version = %d)"), req
.version
);
1723 /* Phew, we got all the data, now process it. */
1724 handle_request (fd
, &req
, keybuf
, uid
, pid
);
1732 pthread_mutex_lock (&readylist_lock
);
1734 /* One more thread available. */
1741 static unsigned int nconns
;
1746 pthread_mutex_lock (&readylist_lock
);
1748 /* Find an empty entry in FDLIST. */
1750 for (inner
= 0; inner
< nconns
; ++inner
)
1751 if (fdlist
[inner
].next
== NULL
)
1753 assert (inner
< nconns
);
1755 fdlist
[inner
].fd
= fd
;
1757 if (readylist
== NULL
)
1758 readylist
= fdlist
[inner
].next
= &fdlist
[inner
];
1761 fdlist
[inner
].next
= readylist
->next
;
1762 readylist
= readylist
->next
= &fdlist
[inner
];
1765 bool do_signal
= true;
1766 if (__glibc_unlikely (nready
== 0))
1771 /* Try to start another thread to help out. */
1773 if (nthreads
< max_nthreads
1774 && pthread_create (&th
, &attr
, nscd_run_worker
,
1775 (void *) (long int) nthreads
) == 0)
1777 /* We got another thread. */
1779 /* The new thread might need a kick. */
1785 pthread_mutex_unlock (&readylist_lock
);
1787 /* Tell one of the worker threads there is work to do. */
1789 pthread_cond_signal (&readylist_cond
);
1793 /* Check whether restarting should happen. */
1795 restart_p (time_t now
)
1797 return (paranoia
&& readylist
== NULL
&& nready
== nthreads
1798 && now
>= restart_time
);
1802 /* Array for times a connection was accepted. */
1803 static time_t *starttime
;
1806 /* Inotify event for changed file. */
1809 struct inotify_event i
;
1811 # define PATH_MAX 1024
1813 char buf
[sizeof (struct inotify_event
) + PATH_MAX
];
1816 /* Returns 0 if the file is there otherwise -1. */
1818 check_file (struct traced_file
*finfo
)
1821 /* We could check mtime and if different re-add
1822 the watches, and invalidate the database, but we
1823 don't because we are called from inotify_check_files
1824 which should be doing that work. If sufficient inotify
1825 events were lost then the next pruning or invalidation
1826 will do the stat and mtime check. We don't do it here to
1827 keep the logic simple. */
1828 if (stat64 (finfo
->fname
, &st
) < 0)
1833 /* Process the inotify event in INEV. If the event matches any of the files
1834 registered with a database then mark that database as requiring its cache
1835 to be cleared. We indicate the cache needs clearing by setting
1836 TO_CLEAR[DBCNT] to true for the matching database. */
1838 inotify_check_files (bool *to_clear
, union __inev
*inev
)
1840 /* Check which of the files changed. */
1841 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
1843 struct traced_file
*finfo
= dbs
[dbcnt
].traced_files
;
1845 while (finfo
!= NULL
)
1847 /* The configuration file was moved or deleted.
1848 We stop watching it at that point, and reinitialize. */
1849 if (finfo
->inotify_descr
[TRACED_FILE
] == inev
->i
.wd
1850 && ((inev
->i
.mask
& IN_MOVE_SELF
)
1851 || (inev
->i
.mask
& IN_DELETE_SELF
)
1852 || (inev
->i
.mask
& IN_IGNORED
)))
1855 bool moved
= (inev
->i
.mask
& IN_MOVE_SELF
) != 0;
1857 if (check_file (finfo
) == 0)
1859 dbg_log (_("ignored inotify event for `%s` (file exists)"),
1864 dbg_log (_("monitored file `%s` was %s, removing watch"),
1865 finfo
->fname
, moved
? "moved" : "deleted");
1866 /* File was moved out, remove the watch. Watches are
1867 automatically removed when the file is deleted. */
1870 ret
= inotify_rm_watch (inotify_fd
, inev
->i
.wd
);
1872 dbg_log (_("failed to remove file watch `%s`: %s"),
1873 finfo
->fname
, strerror (errno
));
1875 finfo
->inotify_descr
[TRACED_FILE
] = -1;
1876 to_clear
[dbcnt
] = true;
1877 if (finfo
->call_res_init
)
1881 /* The configuration file was open for writing and has just closed.
1882 We reset the cache and reinitialize. */
1883 if (finfo
->inotify_descr
[TRACED_FILE
] == inev
->i
.wd
1884 && inev
->i
.mask
& IN_CLOSE_WRITE
)
1886 /* Mark cache as needing to be cleared and reinitialize. */
1887 dbg_log (_("monitored file `%s` was written to"), finfo
->fname
);
1888 to_clear
[dbcnt
] = true;
1889 if (finfo
->call_res_init
)
1893 /* The parent directory was moved or deleted. We trigger one last
1894 invalidation. At the next pruning or invalidation we may add
1895 this watch back if the file is present again. */
1896 if (finfo
->inotify_descr
[TRACED_DIR
] == inev
->i
.wd
1897 && ((inev
->i
.mask
& IN_DELETE_SELF
)
1898 || (inev
->i
.mask
& IN_MOVE_SELF
)
1899 || (inev
->i
.mask
& IN_IGNORED
)))
1901 bool moved
= (inev
->i
.mask
& IN_MOVE_SELF
) != 0;
1902 /* The directory watch may have already been removed
1903 but we don't know so we just remove it again and
1904 ignore the error. Then we remove the file watch.
1905 Note: watches are automatically removed for deleted
1908 inotify_rm_watch (inotify_fd
, inev
->i
.wd
);
1909 if (finfo
->inotify_descr
[TRACED_FILE
] != -1)
1911 dbg_log (_("monitored parent directory `%s` was %s, removing watch on `%s`"),
1912 finfo
->dname
, moved
? "moved" : "deleted", finfo
->fname
);
1913 if (inotify_rm_watch (inotify_fd
, finfo
->inotify_descr
[TRACED_FILE
]) < 0)
1914 dbg_log (_("failed to remove file watch `%s`: %s"),
1915 finfo
->dname
, strerror (errno
));
1917 finfo
->inotify_descr
[TRACED_FILE
] = -1;
1918 finfo
->inotify_descr
[TRACED_DIR
] = -1;
1919 to_clear
[dbcnt
] = true;
1920 if (finfo
->call_res_init
)
1922 /* Continue to the next entry since this might be the
1923 parent directory for multiple registered files and
1924 we want to remove watches for all registered files. */
1927 /* The parent directory had a create or moved to event. */
1928 if (finfo
->inotify_descr
[TRACED_DIR
] == inev
->i
.wd
1929 && ((inev
->i
.mask
& IN_MOVED_TO
)
1930 || (inev
->i
.mask
& IN_CREATE
))
1931 && strcmp (inev
->i
.name
, finfo
->sfname
) == 0)
1933 /* We detected a directory change. We look for the creation
1934 of the file we are tracking or the move of the same file
1935 into the directory. */
1937 dbg_log (_("monitored file `%s` was %s, adding watch"),
1939 inev
->i
.mask
& IN_CREATE
? "created" : "moved into place");
1940 /* File was moved in or created. Regenerate the watch. */
1941 if (finfo
->inotify_descr
[TRACED_FILE
] != -1)
1942 inotify_rm_watch (inotify_fd
,
1943 finfo
->inotify_descr
[TRACED_FILE
]);
1945 ret
= inotify_add_watch (inotify_fd
,
1949 dbg_log (_("failed to add file watch `%s`: %s"),
1950 finfo
->fname
, strerror (errno
));
1952 finfo
->inotify_descr
[TRACED_FILE
] = ret
;
1954 /* The file is new or moved so mark cache as needing to
1955 be cleared and reinitialize. */
1956 to_clear
[dbcnt
] = true;
1957 if (finfo
->call_res_init
)
1960 /* Done re-adding the watch. Don't return, we may still
1961 have other files in this same directory, same watch
1962 descriptor, and need to process them. */
1964 /* Other events are ignored, and we move on to the next file. */
1965 finfo
= finfo
->next
;
1970 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1971 for the associated database, otherwise do nothing. The TO_CLEAR array must
1972 have LASTDB entries. */
1974 clear_db_cache (bool *to_clear
)
1976 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
1977 if (to_clear
[dbcnt
])
1979 pthread_mutex_lock (&dbs
[dbcnt
].prune_lock
);
1980 dbs
[dbcnt
].clear_cache
= 1;
1981 pthread_mutex_unlock (&dbs
[dbcnt
].prune_lock
);
1982 pthread_cond_signal (&dbs
[dbcnt
].prune_cond
);
1987 handle_inotify_events (void)
1989 bool to_clear
[lastdb
] = { false, };
1992 /* Read all inotify events for files registered via
1993 register_traced_file(). */
1996 /* Potentially read multiple events into buf. */
1997 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
,
2000 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
2002 /* Not even 1 event. */
2003 if (__glibc_unlikely (nb
== -1 && errno
!= EAGAIN
))
2005 /* Done reading events that are ready. */
2008 /* Process all events. The normal inotify interface delivers
2009 complete events on a read and never a partial event. */
2010 char *eptr
= &inev
.buf
[0];
2014 /* Check which of the files changed. */
2015 inotify_check_files (to_clear
, &inev
);
2016 count
= sizeof (struct inotify_event
) + inev
.i
.len
;
2019 if (nb
>= (ssize_t
) sizeof (struct inotify_event
))
2020 memcpy (&inev
, eptr
, nb
);
2026 /* Actually perform the cache clearing. */
2027 clear_db_cache (to_clear
);
2034 __attribute__ ((__noreturn__
))
2035 main_loop_poll (void)
2037 struct pollfd
*conns
= (struct pollfd
*) xmalloc (nconns
2038 * sizeof (conns
[0]));
2041 conns
[0].events
= POLLRDNORM
;
2043 size_t firstfree
= 1;
2046 if (inotify_fd
!= -1)
2048 conns
[1].fd
= inotify_fd
;
2049 conns
[1].events
= POLLRDNORM
;
2056 size_t idx_nl_status_fd
= 0;
2057 if (nl_status_fd
!= -1)
2059 idx_nl_status_fd
= nused
;
2060 conns
[nused
].fd
= nl_status_fd
;
2061 conns
[nused
].events
= POLLRDNORM
;
2069 /* Wait for any event. We wait at most a couple of seconds so
2070 that we can check whether we should close any of the accepted
2071 connections since we have not received a request. */
2072 #define MAX_ACCEPT_TIMEOUT 30
2073 #define MIN_ACCEPT_TIMEOUT 5
2074 #define MAIN_THREAD_TIMEOUT \
2075 (MAX_ACCEPT_TIMEOUT * 1000 \
2076 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
2078 int n
= poll (conns
, nused
, MAIN_THREAD_TIMEOUT
);
2080 time_t now
= time (NULL
);
2082 /* If there is a descriptor ready for reading or there is a new
2083 connection, process this now. */
2086 if (conns
[0].revents
!= 0)
2088 /* We have a new incoming connection. Accept the connection. */
2089 int fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
2092 /* Use the descriptor if we have not reached the limit. */
2095 if (firstfree
< nconns
)
2097 conns
[firstfree
].fd
= fd
;
2098 conns
[firstfree
].events
= POLLRDNORM
;
2099 starttime
[firstfree
] = now
;
2100 if (firstfree
>= nused
)
2101 nused
= firstfree
+ 1;
2105 while (firstfree
< nused
&& conns
[firstfree
].fd
!= -1);
2108 /* We cannot use the connection so close it. */
2117 if (inotify_fd
!= -1 && conns
[1].fd
== inotify_fd
)
2119 if (conns
[1].revents
!= 0)
2122 ret
= handle_inotify_events ();
2125 /* Something went wrong when reading the inotify
2126 data. Better disable inotify. */
2127 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno
);
2143 if (idx_nl_status_fd
!= 0 && conns
[idx_nl_status_fd
].revents
!= 0)
2146 /* Read all the data. We do not interpret it here. */
2147 while (TEMP_FAILURE_RETRY (read (nl_status_fd
, buf
,
2148 sizeof (buf
))) != -1)
2151 dbs
[hstdb
].head
->extra_data
[NSCD_HST_IDX_CONF_TIMESTAMP
]
2152 = __bump_nl_timestamp ();
2156 for (size_t cnt
= first
; cnt
< nused
&& n
> 0; ++cnt
)
2157 if (conns
[cnt
].revents
!= 0)
2159 fd_ready (conns
[cnt
].fd
);
2161 /* Clean up the CONNS array. */
2163 if (cnt
< firstfree
)
2165 if (cnt
== nused
- 1)
2168 while (conns
[nused
- 1].fd
== -1);
2174 /* Now find entries which have timed out. */
2177 /* We make the timeout length depend on the number of file
2178 descriptors currently used. */
2179 #define ACCEPT_TIMEOUT \
2180 (MAX_ACCEPT_TIMEOUT \
2181 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2182 time_t laststart
= now
- ACCEPT_TIMEOUT
;
2184 for (size_t cnt
= nused
- 1; cnt
> 0; --cnt
)
2186 if (conns
[cnt
].fd
!= -1 && starttime
[cnt
] < laststart
)
2188 /* Remove the entry, it timed out. */
2189 (void) close (conns
[cnt
].fd
);
2192 if (cnt
< firstfree
)
2194 if (cnt
== nused
- 1)
2197 while (conns
[nused
- 1].fd
== -1);
2201 if (restart_p (now
))
2209 main_loop_epoll (int efd
)
2211 struct epoll_event ev
= { 0, };
2215 /* Add the socket. */
2216 ev
.events
= EPOLLRDNORM
;
2218 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, sock
, &ev
) == -1)
2219 /* We cannot use epoll. */
2222 # ifdef HAVE_INOTIFY
2223 if (inotify_fd
!= -1)
2225 ev
.events
= EPOLLRDNORM
;
2226 ev
.data
.fd
= inotify_fd
;
2227 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, inotify_fd
, &ev
) == -1)
2228 /* We cannot use epoll. */
2234 # ifdef HAVE_NETLINK
2235 if (nl_status_fd
!= -1)
2237 ev
.events
= EPOLLRDNORM
;
2238 ev
.data
.fd
= nl_status_fd
;
2239 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, nl_status_fd
, &ev
) == -1)
2240 /* We cannot use epoll. */
2247 struct epoll_event revs
[100];
2248 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2250 int n
= epoll_wait (efd
, revs
, nrevs
, MAIN_THREAD_TIMEOUT
);
2252 time_t now
= time (NULL
);
2254 for (int cnt
= 0; cnt
< n
; ++cnt
)
2255 if (revs
[cnt
].data
.fd
== sock
)
2257 /* A new connection. */
2258 int fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
2261 /* Use the descriptor if we have not reached the limit. */
2264 /* Try to add the new descriptor. */
2267 || epoll_ctl (efd
, EPOLL_CTL_ADD
, fd
, &ev
) == -1)
2268 /* The descriptor is too large or something went
2269 wrong. Close the descriptor. */
2273 /* Remember when we accepted the connection. */
2274 starttime
[fd
] = now
;
2283 # ifdef HAVE_INOTIFY
2284 else if (revs
[cnt
].data
.fd
== inotify_fd
)
2287 ret
= handle_inotify_events ();
2290 /* Something went wrong when reading the inotify
2291 data. Better disable inotify. */
2292 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno
);
2293 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, inotify_fd
, NULL
);
2300 # ifdef HAVE_NETLINK
2301 else if (revs
[cnt
].data
.fd
== nl_status_fd
)
2304 /* Read all the data. We do not interpret it here. */
2305 while (TEMP_FAILURE_RETRY (read (nl_status_fd
, buf
,
2306 sizeof (buf
))) != -1)
2309 __bump_nl_timestamp ();
2314 /* Remove the descriptor from the epoll descriptor. */
2315 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, revs
[cnt
].data
.fd
, NULL
);
2317 /* Get a worker to handle the request. */
2318 fd_ready (revs
[cnt
].data
.fd
);
2320 /* Reset the time. */
2321 starttime
[revs
[cnt
].data
.fd
] = 0;
2322 if (revs
[cnt
].data
.fd
== highest
)
2325 while (highest
> 0 && starttime
[highest
] == 0);
2330 /* Now look for descriptors for accepted connections which have
2331 no reply in too long of a time. */
2332 time_t laststart
= now
- ACCEPT_TIMEOUT
;
2333 assert (starttime
[sock
] == 0);
2334 # ifdef HAVE_INOTIFY
2335 assert (inotify_fd
== -1 || starttime
[inotify_fd
] == 0);
2337 assert (nl_status_fd
== -1 || starttime
[nl_status_fd
] == 0);
2338 for (int cnt
= highest
; cnt
> STDERR_FILENO
; --cnt
)
2339 if (starttime
[cnt
] != 0 && starttime
[cnt
] < laststart
)
2341 /* We are waiting for this one for too long. Close it. */
2342 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, cnt
, NULL
);
2350 else if (cnt
!= sock
&& starttime
[cnt
] == 0 && cnt
== highest
)
2353 if (restart_p (now
))
2360 /* Start all the threads we want. The initial process is thread no. 1. */
2362 start_threads (void)
2364 /* Initialize the conditional variable we will use. The only
2365 non-standard attribute we might use is the clock selection. */
2366 pthread_condattr_t condattr
;
2367 pthread_condattr_init (&condattr
);
2369 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2370 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2371 /* Determine whether the monotonous clock is available. */
2372 struct timespec dummy
;
2373 # if _POSIX_MONOTONIC_CLOCK == 0
2374 if (sysconf (_SC_MONOTONIC_CLOCK
) > 0)
2376 # if _POSIX_CLOCK_SELECTION == 0
2377 if (sysconf (_SC_CLOCK_SELECTION
) > 0)
2379 if (clock_getres (CLOCK_MONOTONIC
, &dummy
) == 0
2380 && pthread_condattr_setclock (&condattr
, CLOCK_MONOTONIC
) == 0)
2381 timeout_clock
= CLOCK_MONOTONIC
;
2384 /* Create the attribute for the threads. They are all created
2386 pthread_attr_init (&attr
);
2387 pthread_attr_setdetachstate (&attr
, PTHREAD_CREATE_DETACHED
);
2388 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2389 pthread_attr_setstacksize (&attr
, NSCD_THREAD_STACKSIZE
);
2391 /* We allow less than LASTDB threads only for debugging. */
2392 if (debug_level
== 0)
2393 nthreads
= MAX (nthreads
, lastdb
);
2395 /* Create the threads which prune the databases. */
2396 // XXX Ideally this work would be done by some of the worker threads.
2397 // XXX But this is problematic since we would need to be able to wake
2398 // XXX them up explicitly as well as part of the group handling the
2399 // XXX ready-list. This requires an operation where we can wait on
2400 // XXX two conditional variables at the same time. This operation
2401 // XXX does not exist (yet).
2402 for (long int i
= 0; i
< lastdb
; ++i
)
2404 /* Initialize the conditional variable. */
2405 if (pthread_cond_init (&dbs
[i
].prune_cond
, &condattr
) != 0)
2407 dbg_log (_("could not initialize conditional variable"));
2408 do_exit (1, 0, NULL
);
2413 && pthread_create (&th
, &attr
, nscd_run_prune
, (void *) i
) != 0)
2415 dbg_log (_("could not start clean-up thread; terminating"));
2416 do_exit (1, 0, NULL
);
2420 pthread_condattr_destroy (&condattr
);
2422 for (long int i
= 0; i
< nthreads
; ++i
)
2425 if (pthread_create (&th
, &attr
, nscd_run_worker
, NULL
) != 0)
2429 dbg_log (_("could not start any worker thread; terminating"));
2430 do_exit (1, 0, NULL
);
2437 /* Now it is safe to let the parent know that we're doing fine and it can
2441 /* Determine how much room for descriptors we should initially
2442 allocate. This might need to change later if we cap the number
2444 const long int nfds
= sysconf (_SC_OPEN_MAX
);
2446 #define MAXCONN 16384
2447 if (nfds
== -1 || nfds
> MAXCONN
)
2449 else if (nfds
< MINCONN
)
2454 /* We need memory to pass descriptors on to the worker threads. */
2455 fdlist
= (struct fdlist
*) xcalloc (nconns
, sizeof (fdlist
[0]));
2456 /* Array to keep track when connection was accepted. */
2457 starttime
= (time_t *) xcalloc (nconns
, sizeof (starttime
[0]));
2459 /* In the main thread we execute the loop which handles incoming
2462 int efd
= epoll_create (100);
2465 main_loop_epoll (efd
);
2474 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2475 this function is called, we are not listening on the nscd socket yet so
2476 we can just use the ordinary lookup functions without causing a lockup */
2478 begin_drop_privileges (void)
2480 struct passwd
*pwd
= getpwnam (server_user
);
2484 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2485 do_exit (EXIT_FAILURE
, 0,
2486 _("Failed to run nscd as user '%s'"), server_user
);
2489 server_uid
= pwd
->pw_uid
;
2490 server_gid
= pwd
->pw_gid
;
2492 /* Save the old UID/GID if we have to change back. */
2495 old_uid
= getuid ();
2496 old_gid
= getgid ();
2499 if (getgrouplist (server_user
, server_gid
, NULL
, &server_ngroups
) == 0)
2501 /* This really must never happen. */
2502 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2503 do_exit (EXIT_FAILURE
, errno
,
2504 _("initial getgrouplist failed"));
2507 server_groups
= (gid_t
*) xmalloc (server_ngroups
* sizeof (gid_t
));
2509 if (getgrouplist (server_user
, server_gid
, server_groups
, &server_ngroups
)
2512 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2513 do_exit (EXIT_FAILURE
, errno
, _("getgrouplist failed"));
2518 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2519 run nscd as the user specified in the configuration file. */
2521 finish_drop_privileges (void)
2523 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2524 /* We need to preserve the capabilities to connect to the audit daemon. */
2525 cap_t new_caps
= preserve_capabilities ();
2528 if (setgroups (server_ngroups
, server_groups
) == -1)
2530 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2531 do_exit (EXIT_FAILURE
, errno
, _("setgroups failed"));
2536 res
= setresgid (server_gid
, server_gid
, old_gid
);
2538 res
= setgid (server_gid
);
2541 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2542 do_exit (4, errno
, "setgid");
2546 res
= setresuid (server_uid
, server_uid
, old_uid
);
2548 res
= setuid (server_uid
);
2551 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2552 do_exit (4, errno
, "setuid");
2555 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2556 /* Remove the temporary capabilities. */
2557 install_real_capabilities (new_caps
);