1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2007, 2008 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
34 #include <arpa/inet.h>
36 # include <sys/epoll.h>
39 # include <sys/inotify.h>
42 #include <sys/param.h>
45 # include <sys/sendfile.h>
47 #include <sys/socket.h>
54 #include <resolv/resolv.h>
56 # include <kernel-features.h>
60 /* Wrapper functions with error checking for standard functions. */
61 extern void *xmalloc (size_t n
);
62 extern void *xcalloc (size_t n
, size_t s
);
63 extern void *xrealloc (void *o
, size_t n
);
65 /* Support to run nscd as an unprivileged user */
66 const char *server_user
;
67 static uid_t server_uid
;
68 static gid_t server_gid
;
69 const char *stat_user
;
71 static gid_t
*server_groups
;
75 static int server_ngroups
;
77 static pthread_attr_t attr
;
79 static void begin_drop_privileges (void);
80 static void finish_drop_privileges (void);
82 /* Map request type to a string. */
83 const char *const serv2str
[LASTREQ
] =
85 [GETPWBYNAME
] = "GETPWBYNAME",
86 [GETPWBYUID
] = "GETPWBYUID",
87 [GETGRBYNAME
] = "GETGRBYNAME",
88 [GETGRBYGID
] = "GETGRBYGID",
89 [GETHOSTBYNAME
] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6
] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR
] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6
] = "GETHOSTBYADDRv6",
93 [SHUTDOWN
] = "SHUTDOWN",
94 [GETSTAT
] = "GETSTAT",
95 [INVALIDATE
] = "INVALIDATE",
96 [GETFDPW
] = "GETFDPW",
97 [GETFDGR
] = "GETFDGR",
98 [GETFDHST
] = "GETFDHST",
100 [INITGROUPS
] = "INITGROUPS",
101 [GETSERVBYNAME
] = "GETSERVBYNAME",
102 [GETSERVBYPORT
] = "GETSERVBYPORT",
103 [GETFDSERV
] = "GETFDSERV"
106 /* The control data structures for the services. */
107 struct database_dyn dbs
[lastdb
] =
110 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
111 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
117 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
118 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
120 .filename
= "/etc/passwd",
121 .db_filename
= _PATH_NSCD_PASSWD_DB
,
122 .disabled_iov
= &pwd_iov_disabled
,
130 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
131 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
137 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
138 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
140 .filename
= "/etc/group",
141 .db_filename
= _PATH_NSCD_GROUP_DB
,
142 .disabled_iov
= &grp_iov_disabled
,
150 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
151 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
155 .propagate
= 0, /* Not used. */
157 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
158 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
160 .filename
= "/etc/hosts",
161 .db_filename
= _PATH_NSCD_HOSTS_DB
,
162 .disabled_iov
= &hst_iov_disabled
,
170 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
171 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
175 .propagate
= 0, /* Not used. */
177 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
178 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
180 .filename
= "/etc/services",
181 .db_filename
= _PATH_NSCD_SERVICES_DB
,
182 .disabled_iov
= &serv_iov_disabled
,
192 /* Mapping of request type to database. */
196 struct database_dyn
*db
;
197 } const reqinfo
[LASTREQ
] =
199 [GETPWBYNAME
] = { true, &dbs
[pwddb
] },
200 [GETPWBYUID
] = { true, &dbs
[pwddb
] },
201 [GETGRBYNAME
] = { true, &dbs
[grpdb
] },
202 [GETGRBYGID
] = { true, &dbs
[grpdb
] },
203 [GETHOSTBYNAME
] = { true, &dbs
[hstdb
] },
204 [GETHOSTBYNAMEv6
] = { true, &dbs
[hstdb
] },
205 [GETHOSTBYADDR
] = { true, &dbs
[hstdb
] },
206 [GETHOSTBYADDRv6
] = { true, &dbs
[hstdb
] },
207 [SHUTDOWN
] = { false, NULL
},
208 [GETSTAT
] = { false, NULL
},
209 [SHUTDOWN
] = { false, NULL
},
210 [GETFDPW
] = { false, &dbs
[pwddb
] },
211 [GETFDGR
] = { false, &dbs
[grpdb
] },
212 [GETFDHST
] = { false, &dbs
[hstdb
] },
213 [GETAI
] = { true, &dbs
[hstdb
] },
214 [INITGROUPS
] = { true, &dbs
[grpdb
] },
215 [GETSERVBYNAME
] = { true, &dbs
[servdb
] },
216 [GETSERVBYPORT
] = { true, &dbs
[servdb
] },
217 [GETFDSERV
] = { false, &dbs
[servdb
] }
221 /* Initial number of threads to use. */
223 /* Maximum number of threads to use. */
224 int max_nthreads
= 32;
226 /* Socket for incoming connections. */
230 /* Inotify descriptor. */
231 static int inotify_fd
= -1;
233 /* Watch descriptor for resolver configuration file. */
234 static int resolv_conf_descr
= -1;
237 #ifndef __ASSUME_SOCK_CLOEXEC
238 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
239 before be know the result. */
240 static int have_sock_cloexec
;
242 #ifndef __ASSUME_ACCEPT4
243 static int have_accept4
;
246 /* Number of times clients had to wait. */
247 unsigned long int client_queued
;
249 /* Data structure for recording in-flight memory allocation. */
250 __thread
struct mem_in_flight mem_in_flight attribute_tls_model_ie
;
251 /* Global list of the mem_in_flight variables of all the threads. */
252 struct mem_in_flight
*mem_in_flight_list
;
256 writeall (int fd
, const void *buf
, size_t len
)
262 ret
= TEMP_FAILURE_RETRY (send (fd
, buf
, n
, MSG_NOSIGNAL
));
265 buf
= (const char *) buf
+ ret
;
269 return ret
< 0 ? ret
: len
- n
;
275 sendfileall (int tofd
, int fromfd
, off_t off
, size_t len
)
282 ret
= TEMP_FAILURE_RETRY (sendfile (tofd
, fromfd
, &off
, n
));
288 return ret
< 0 ? ret
: len
- n
;
296 /* The following three are not really used, they are symbolic constants. */
302 use_he_begin
= use_he
| use_begin
,
303 use_he_end
= use_he
| use_end
,
306 use_key_begin
= use_key
| use_begin
,
307 use_key_end
= use_key
| use_end
,
308 use_key_first
= use_key_begin
| use_first
,
311 use_data_begin
= use_data
| use_begin
,
312 use_data_end
= use_data
| use_end
,
313 use_data_first
= use_data_begin
| use_first
318 check_use (const char *data
, nscd_ssize_t first_free
, uint8_t *usemap
,
319 enum usekey use
, ref_t start
, size_t len
)
323 if (start
> first_free
|| start
+ len
> first_free
324 || (start
& BLOCK_ALIGN_M1
))
327 if (usemap
[start
] == use_not
)
329 /* Add the start marker. */
330 usemap
[start
] = use
| use_begin
;
334 if (usemap
[++start
] != use_not
)
339 /* Add the end marker. */
340 usemap
[start
] = use
| use_end
;
342 else if ((usemap
[start
] & ~use_first
) == ((use
| use_begin
) & ~use_first
))
344 /* Hash entries can't be shared. */
348 usemap
[start
] |= (use
& use_first
);
352 if (usemap
[++start
] != use
)
355 if (usemap
[++start
] != (use
| use_end
))
359 /* Points to a wrong object or somewhere in the middle. */
366 /* Verify data in persistent database. */
368 verify_persistent_db (void *mem
, struct database_pers_head
*readhead
, int dbnr
)
370 assert (dbnr
== pwddb
|| dbnr
== grpdb
|| dbnr
== hstdb
|| dbnr
== servdb
);
372 time_t now
= time (NULL
);
374 struct database_pers_head
*head
= mem
;
375 struct database_pers_head head_copy
= *head
;
377 /* Check that the header that was read matches the head in the database. */
378 if (memcmp (head
, readhead
, sizeof (*head
)) != 0)
381 /* First some easy tests: make sure the database header is sane. */
382 if (head
->version
!= DB_VERSION
383 || head
->header_size
!= sizeof (*head
)
384 /* We allow a timestamp to be one hour ahead of the current time.
385 This should cover daylight saving time changes. */
386 || head
->timestamp
> now
+ 60 * 60 + 60
387 || (head
->gc_cycle
& 1)
389 || (size_t) head
->module
> INT32_MAX
/ sizeof (ref_t
)
390 || (size_t) head
->data_size
> INT32_MAX
- head
->module
* sizeof (ref_t
)
391 || head
->first_free
< 0
392 || head
->first_free
> head
->data_size
393 || (head
->first_free
& BLOCK_ALIGN_M1
) != 0
394 || head
->maxnentries
< 0
395 || head
->maxnsearched
< 0)
398 uint8_t *usemap
= calloc (head
->first_free
, 1);
402 const char *data
= (char *) &head
->array
[roundup (head
->module
,
403 ALIGN
/ sizeof (ref_t
))];
405 nscd_ssize_t he_cnt
= 0;
406 for (nscd_ssize_t cnt
= 0; cnt
< head
->module
; ++cnt
)
408 ref_t trail
= head
->array
[cnt
];
412 while (work
!= ENDREF
)
414 if (! check_use (data
, head
->first_free
, usemap
, use_he
, work
,
415 sizeof (struct hashentry
)))
418 /* Now we know we can dereference the record. */
419 struct hashentry
*here
= (struct hashentry
*) (data
+ work
);
423 /* Make sure the record is for this type of service. */
424 if (here
->type
>= LASTREQ
425 || reqinfo
[here
->type
].db
!= &dbs
[dbnr
])
428 /* Validate boolean field value. */
429 if (here
->first
!= false && here
->first
!= true)
437 || here
->packet
> head
->first_free
438 || here
->packet
+ sizeof (struct datahead
) > head
->first_free
)
441 struct datahead
*dh
= (struct datahead
*) (data
+ here
->packet
);
443 if (! check_use (data
, head
->first_free
, usemap
,
444 use_data
| (here
->first
? use_first
: 0),
445 here
->packet
, dh
->allocsize
))
448 if (dh
->allocsize
< sizeof (struct datahead
)
449 || dh
->recsize
> dh
->allocsize
450 || (dh
->notfound
!= false && dh
->notfound
!= true)
451 || (dh
->usable
!= false && dh
->usable
!= true))
454 if (here
->key
< here
->packet
+ sizeof (struct datahead
)
455 || here
->key
> here
->packet
+ dh
->allocsize
456 || here
->key
+ here
->len
> here
->packet
+ dh
->allocsize
)
459 /* If keys can appear outside of data, this should be done
460 instead. But gc doesn't mark the data in that case. */
461 if (! check_use (data
, head
->first_free
, usemap
,
462 use_key
| (here
->first
? use_first
: 0),
463 here
->key
, here
->len
))
471 /* A circular list, this must not happen. */
474 trail
= ((struct hashentry
*) (data
+ trail
))->next
;
479 if (he_cnt
!= head
->nentries
)
482 /* See if all data and keys had at least one reference from
483 he->first == true hashentry. */
484 for (ref_t idx
= 0; idx
< head
->first_free
; ++idx
)
487 if (usemap
[idx
] == use_key_begin
)
490 if (usemap
[idx
] == use_data_begin
)
494 /* Finally, make sure the database hasn't changed since the first test. */
495 if (memcmp (mem
, &head_copy
, sizeof (*head
)) != 0)
508 # define EXTRA_O_FLAGS O_CLOEXEC
510 # define EXTRA_O_FLAGS 0
514 /* Initialize database information structures. */
518 /* Look up unprivileged uid/gid/groups before we start listening on the
520 if (server_user
!= NULL
)
521 begin_drop_privileges ();
524 /* No configuration for this value, assume a default. */
528 /* Use inotify to recognize changed files. */
529 inotify_fd
= inotify_init1 (IN_NONBLOCK
);
530 # ifndef __ASSUME_IN_NONBLOCK
531 if (inotify_fd
== -1 && errno
== ENOSYS
)
533 inotify_fd
= inotify_init ();
534 if (inotify_fd
!= -1)
535 fcntl (inotify_fd
, F_SETFL
, O_RDONLY
| O_NONBLOCK
);
540 for (size_t cnt
= 0; cnt
< lastdb
; ++cnt
)
541 if (dbs
[cnt
].enabled
)
543 pthread_rwlock_init (&dbs
[cnt
].lock
, NULL
);
544 pthread_mutex_init (&dbs
[cnt
].memlock
, NULL
);
546 if (dbs
[cnt
].persistent
)
548 /* Try to open the appropriate file on disk. */
549 int fd
= open (dbs
[cnt
].db_filename
, O_RDWR
| EXTRA_O_FLAGS
);
556 struct database_pers_head head
;
557 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, &head
,
559 if (n
!= sizeof (head
) || fstat64 (fd
, &st
) != 0)
562 /* The code is single-threaded at this point so
563 using strerror is just fine. */
564 msg
= strerror (errno
);
566 dbg_log (_("invalid persistent database file \"%s\": %s"),
567 dbs
[cnt
].db_filename
, msg
);
568 unlink (dbs
[cnt
].db_filename
);
570 else if (head
.module
== 0 && head
.data_size
== 0)
572 /* The file has been created, but the head has not
573 been initialized yet. */
574 msg
= _("uninitialized header");
577 else if (head
.header_size
!= (int) sizeof (head
))
579 msg
= _("header size does not match");
582 else if ((total
= (sizeof (head
)
583 + roundup (head
.module
* sizeof (ref_t
),
587 || total
< sizeof (head
))
589 msg
= _("file size does not match");
592 /* Note we map with the maximum size allowed for the
593 database. This is likely much larger than the
594 actual file size. This is OK on most OSes since
595 extensions of the underlying file will
596 automatically translate more pages available for
598 else if ((mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
599 PROT_READ
| PROT_WRITE
,
603 else if (!verify_persistent_db (mem
, &head
, cnt
))
606 msg
= _("verification failed");
611 /* Success. We have the database. */
613 dbs
[cnt
].memsize
= total
;
614 dbs
[cnt
].data
= (char *)
615 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
616 ALIGN
/ sizeof (ref_t
))];
617 dbs
[cnt
].mmap_used
= true;
619 if (dbs
[cnt
].suggested_module
> head
.module
)
620 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
625 /* We also need a read-only descriptor. */
628 dbs
[cnt
].ro_fd
= open (dbs
[cnt
].db_filename
,
629 O_RDONLY
| EXTRA_O_FLAGS
);
630 if (dbs
[cnt
].ro_fd
== -1)
632 cannot create read-only descriptor for \"%s\"; no mmap"),
633 dbs
[cnt
].db_filename
);
636 // XXX Shall we test whether the descriptors actually
637 // XXX point to the same file?
640 /* Close the file descriptors in case something went
641 wrong in which case the variable have not been
648 if (dbs
[cnt
].head
== NULL
)
650 /* No database loaded. Allocate the data structure,
652 struct database_pers_head head
;
653 size_t total
= (sizeof (head
)
654 + roundup (dbs
[cnt
].suggested_module
655 * sizeof (ref_t
), ALIGN
)
656 + (dbs
[cnt
].suggested_module
657 * DEFAULT_DATASIZE_PER_BUCKET
));
659 /* Try to create the database. If we do not need a
660 persistent database create a temporary file. */
663 if (dbs
[cnt
].persistent
)
665 fd
= open (dbs
[cnt
].db_filename
,
666 O_RDWR
| O_CREAT
| O_EXCL
| O_TRUNC
| EXTRA_O_FLAGS
,
668 if (fd
!= -1 && dbs
[cnt
].shared
)
669 ro_fd
= open (dbs
[cnt
].db_filename
,
670 O_RDONLY
| EXTRA_O_FLAGS
);
674 char fname
[] = _PATH_NSCD_XYZ_DB_TMP
;
675 fd
= mkostemp (fname
, EXTRA_O_FLAGS
);
677 /* We do not need the file name anymore after we
678 opened another file descriptor in read-only mode. */
682 ro_fd
= open (fname
, O_RDONLY
| EXTRA_O_FLAGS
);
692 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
693 dbnames
[cnt
], dbs
[cnt
].db_filename
);
694 // XXX Correct way to terminate?
698 if (dbs
[cnt
].persistent
)
699 dbg_log (_("cannot create %s; no persistent database used"),
700 dbs
[cnt
].db_filename
);
702 dbg_log (_("cannot create %s; no sharing possible"),
703 dbs
[cnt
].db_filename
);
705 dbs
[cnt
].persistent
= 0;
706 // XXX remember: no mmap
710 /* Tell the user if we could not create the read-only
712 if (ro_fd
== -1 && dbs
[cnt
].shared
)
714 cannot create read-only descriptor for \"%s\"; no mmap"),
715 dbs
[cnt
].db_filename
);
717 /* Before we create the header, initialiye the hash
718 table. So that if we get interrupted if writing
719 the header we can recognize a partially initialized
721 size_t ps
= sysconf (_SC_PAGESIZE
);
723 assert (~ENDREF
== 0);
724 memset (tmpbuf
, '\xff', ps
);
726 size_t remaining
= dbs
[cnt
].suggested_module
* sizeof (ref_t
);
727 off_t offset
= sizeof (head
);
730 if (offset
% ps
!= 0)
732 towrite
= MIN (remaining
, ps
- (offset
% ps
));
733 if (pwrite (fd
, tmpbuf
, towrite
, offset
) != towrite
)
736 remaining
-= towrite
;
739 while (remaining
> ps
)
741 if (pwrite (fd
, tmpbuf
, ps
, offset
) == -1)
748 && pwrite (fd
, tmpbuf
, remaining
, offset
) != remaining
)
751 /* Create the header of the file. */
752 struct database_pers_head head
=
754 .version
= DB_VERSION
,
755 .header_size
= sizeof (head
),
756 .module
= dbs
[cnt
].suggested_module
,
757 .data_size
= (dbs
[cnt
].suggested_module
758 * DEFAULT_DATASIZE_PER_BUCKET
),
763 if ((TEMP_FAILURE_RETRY (write (fd
, &head
, sizeof (head
)))
765 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd
, 0, total
))
767 || (mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
768 PROT_READ
| PROT_WRITE
,
769 MAP_SHARED
, fd
, 0)) == MAP_FAILED
)
772 unlink (dbs
[cnt
].db_filename
);
773 dbg_log (_("cannot write to database file %s: %s"),
774 dbs
[cnt
].db_filename
, strerror (errno
));
775 dbs
[cnt
].persistent
= 0;
781 dbs
[cnt
].data
= (char *)
782 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
783 ALIGN
/ sizeof (ref_t
))];
784 dbs
[cnt
].memsize
= total
;
785 dbs
[cnt
].mmap_used
= true;
787 /* Remember the descriptors. */
789 dbs
[cnt
].ro_fd
= ro_fd
;
801 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
802 /* We do not check here whether the O_CLOEXEC provided to the
803 open call was successful or not. The two fcntl calls are
804 only performed once each per process start-up and therefore
805 is not noticeable at all. */
807 && ((dbs
[cnt
].wr_fd
!= -1
808 && fcntl (dbs
[cnt
].wr_fd
, F_SETFD
, FD_CLOEXEC
) == -1)
809 || (dbs
[cnt
].ro_fd
!= -1
810 && fcntl (dbs
[cnt
].ro_fd
, F_SETFD
, FD_CLOEXEC
) == -1)))
813 cannot set socket to close on exec: %s; disabling paranoia mode"),
819 if (dbs
[cnt
].head
== NULL
)
821 /* We do not use the persistent database. Just
822 create an in-memory data structure. */
823 assert (! dbs
[cnt
].persistent
);
825 dbs
[cnt
].head
= xmalloc (sizeof (struct database_pers_head
)
826 + (dbs
[cnt
].suggested_module
828 memset (dbs
[cnt
].head
, '\0', sizeof (struct database_pers_head
));
829 assert (~ENDREF
== 0);
830 memset (dbs
[cnt
].head
->array
, '\xff',
831 dbs
[cnt
].suggested_module
* sizeof (ref_t
));
832 dbs
[cnt
].head
->module
= dbs
[cnt
].suggested_module
;
833 dbs
[cnt
].head
->data_size
= (DEFAULT_DATASIZE_PER_BUCKET
834 * dbs
[cnt
].head
->module
);
835 dbs
[cnt
].data
= xmalloc (dbs
[cnt
].head
->data_size
);
836 dbs
[cnt
].head
->first_free
= 0;
839 assert (dbs
[cnt
].ro_fd
== -1);
842 dbs
[cnt
].inotify_descr
= -1;
843 if (dbs
[cnt
].check_file
)
847 || (dbs
[cnt
].inotify_descr
848 = inotify_add_watch (inotify_fd
, dbs
[cnt
].filename
,
849 IN_DELETE_SELF
| IN_MODIFY
)) < 0)
850 /* We cannot notice changes in the main thread. */
853 /* We need the modification date of the file. */
856 if (stat64 (dbs
[cnt
].filename
, &st
) < 0)
858 /* We cannot stat() the file, disable file checking. */
859 dbg_log (_("cannot stat() file `%s': %s"),
860 dbs
[cnt
].filename
, strerror (errno
));
861 dbs
[cnt
].check_file
= 0;
864 dbs
[cnt
].file_mtime
= st
.st_mtime
;
869 if (cnt
== hstdb
&& inotify_fd
>= -1)
870 /* We also monitor the resolver configuration file. */
871 resolv_conf_descr
= inotify_add_watch (inotify_fd
,
873 IN_DELETE_SELF
| IN_MODIFY
);
877 /* Create the socket. */
878 #ifndef __ASSUME_SOCK_CLOEXEC
880 if (have_sock_cloexec
>= 0)
883 sock
= socket (AF_UNIX
, SOCK_STREAM
| SOCK_CLOEXEC
| SOCK_NONBLOCK
, 0);
884 #ifndef __ASSUME_SOCK_CLOEXEC
885 if (have_sock_cloexec
== 0)
886 have_sock_cloexec
= sock
!= -1 || errno
!= EINVAL
? 1 : -1;
889 #ifndef __ASSUME_SOCK_CLOEXEC
890 if (have_sock_cloexec
< 0)
891 sock
= socket (AF_UNIX
, SOCK_STREAM
, 0);
895 dbg_log (_("cannot open socket: %s"), strerror (errno
));
896 exit (errno
== EACCES
? 4 : 1);
898 /* Bind a name to the socket. */
899 struct sockaddr_un sock_addr
;
900 sock_addr
.sun_family
= AF_UNIX
;
901 strcpy (sock_addr
.sun_path
, _PATH_NSCDSOCKET
);
902 if (bind (sock
, (struct sockaddr
*) &sock_addr
, sizeof (sock_addr
)) < 0)
904 dbg_log ("%s: %s", _PATH_NSCDSOCKET
, strerror (errno
));
905 exit (errno
== EACCES
? 4 : 1);
908 #ifndef __ASSUME_SOCK_CLOEXEC
909 if (have_sock_cloexec
< 0)
911 /* We don't want to get stuck on accept. */
912 int fl
= fcntl (sock
, F_GETFL
);
913 if (fl
== -1 || fcntl (sock
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
915 dbg_log (_("cannot change socket to nonblocking mode: %s"),
920 /* The descriptor needs to be closed on exec. */
921 if (paranoia
&& fcntl (sock
, F_SETFD
, FD_CLOEXEC
) == -1)
923 dbg_log (_("cannot set socket to close on exec: %s"),
930 /* Set permissions for the socket. */
931 chmod (_PATH_NSCDSOCKET
, DEFFILEMODE
);
933 /* Set the socket up to accept connections. */
934 if (listen (sock
, SOMAXCONN
) < 0)
936 dbg_log (_("cannot enable socket to accept connections: %s"),
941 /* Change to unprivileged uid/gid/groups if specifed in config file */
942 if (server_user
!= NULL
)
943 finish_drop_privileges ();
947 /* Close the connections. */
956 invalidate_cache (char *key
, int fd
)
961 for (number
= pwddb
; number
< lastdb
; ++number
)
962 if (strcmp (key
, dbnames
[number
]) == 0)
964 if (dbs
[number
].reset_res
)
970 if (number
== lastdb
)
973 writeall (fd
, &resp
, sizeof (resp
));
977 if (dbs
[number
].enabled
)
979 pthread_mutex_lock (&dbs
[number
].prune_lock
);
980 prune_cache (&dbs
[number
], LONG_MAX
, fd
);
981 pthread_mutex_unlock (&dbs
[number
].prune_lock
);
986 writeall (fd
, &resp
, sizeof (resp
));
993 send_ro_fd (struct database_dyn
*db
, char *key
, int fd
)
995 /* If we do not have an read-only file descriptor do nothing. */
999 /* We need to send some data along with the descriptor. */
1000 uint64_t mapsize
= (db
->head
->data_size
1001 + roundup (db
->head
->module
* sizeof (ref_t
), ALIGN
)
1002 + sizeof (struct database_pers_head
));
1003 struct iovec iov
[2];
1004 iov
[0].iov_base
= key
;
1005 iov
[0].iov_len
= strlen (key
) + 1;
1006 iov
[1].iov_base
= &mapsize
;
1007 iov
[1].iov_len
= sizeof (mapsize
);
1009 /* Prepare the control message to transfer the descriptor. */
1013 char bytes
[CMSG_SPACE (sizeof (int))];
1015 struct msghdr msg
= { .msg_iov
= iov
, .msg_iovlen
= 2,
1016 .msg_control
= buf
.bytes
,
1017 .msg_controllen
= sizeof (buf
) };
1018 struct cmsghdr
*cmsg
= CMSG_FIRSTHDR (&msg
);
1020 cmsg
->cmsg_level
= SOL_SOCKET
;
1021 cmsg
->cmsg_type
= SCM_RIGHTS
;
1022 cmsg
->cmsg_len
= CMSG_LEN (sizeof (int));
1024 *(int *) CMSG_DATA (cmsg
) = db
->ro_fd
;
1026 msg
.msg_controllen
= cmsg
->cmsg_len
;
1028 /* Send the control message. We repeat when we are interrupted but
1029 everything else is ignored. */
1030 #ifndef MSG_NOSIGNAL
1031 # define MSG_NOSIGNAL 0
1033 (void) TEMP_FAILURE_RETRY (sendmsg (fd
, &msg
, MSG_NOSIGNAL
));
1035 if (__builtin_expect (debug_level
> 0, 0))
1036 dbg_log (_("provide access to FD %d, for %s"), db
->ro_fd
, key
);
1038 #endif /* SCM_RIGHTS */
1041 /* Handle new request. */
1043 handle_request (int fd
, request_header
*req
, void *key
, uid_t uid
, pid_t pid
)
1045 if (__builtin_expect (req
->version
, NSCD_VERSION
) != NSCD_VERSION
)
1047 if (debug_level
> 0)
1049 cannot handle old request version %d; current version is %d"),
1050 req
->version
, NSCD_VERSION
);
1054 /* Perform the SELinux check before we go on to the standard checks. */
1055 if (selinux_enabled
&& nscd_request_avc_has_perm (fd
, req
->type
) != 0)
1057 if (debug_level
> 0)
1066 snprintf (buf
, sizeof (buf
), "/proc/%ld/exe", (long int) pid
);
1067 ssize_t n
= readlink (buf
, buf
, sizeof (buf
) - 1);
1071 request from %ld not handled due to missing permission"), (long int) pid
);
1076 request from '%s' [%ld] not handled due to missing permission"),
1077 buf
, (long int) pid
);
1080 dbg_log (_("request not handled due to missing permission"));
1086 struct database_dyn
*db
= reqinfo
[req
->type
].db
;
1088 /* See whether we can service the request from the cache. */
1089 if (__builtin_expect (reqinfo
[req
->type
].data_request
, true))
1091 if (__builtin_expect (debug_level
, 0) > 0)
1093 if (req
->type
== GETHOSTBYADDR
|| req
->type
== GETHOSTBYADDRv6
)
1095 char buf
[INET6_ADDRSTRLEN
];
1097 dbg_log ("\t%s (%s)", serv2str
[req
->type
],
1098 inet_ntop (req
->type
== GETHOSTBYADDR
1099 ? AF_INET
: AF_INET6
,
1100 key
, buf
, sizeof (buf
)));
1103 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1106 /* Is this service enabled? */
1107 if (__builtin_expect (!db
->enabled
, 0))
1109 /* No, sent the prepared record. */
1110 if (TEMP_FAILURE_RETRY (send (fd
, db
->disabled_iov
->iov_base
,
1111 db
->disabled_iov
->iov_len
,
1113 != (ssize_t
) db
->disabled_iov
->iov_len
1114 && __builtin_expect (debug_level
, 0) > 0)
1116 /* We have problems sending the result. */
1118 dbg_log (_("cannot write result: %s"),
1119 strerror_r (errno
, buf
, sizeof (buf
)));
1125 /* Be sure we can read the data. */
1126 if (__builtin_expect (pthread_rwlock_tryrdlock (&db
->lock
) != 0, 0))
1128 ++db
->head
->rdlockdelayed
;
1129 pthread_rwlock_rdlock (&db
->lock
);
1132 /* See whether we can handle it from the cache. */
1133 struct datahead
*cached
;
1134 cached
= (struct datahead
*) cache_search (req
->type
, key
, req
->key_len
,
1138 /* Hurray it's in the cache. */
1141 #ifdef HAVE_SENDFILE
1142 if (__builtin_expect (db
->mmap_used
, 1))
1144 assert (db
->wr_fd
!= -1);
1145 assert ((char *) cached
->data
> (char *) db
->data
);
1146 assert ((char *) cached
->data
- (char *) db
->head
1148 <= (sizeof (struct database_pers_head
)
1149 + db
->head
->module
* sizeof (ref_t
)
1150 + db
->head
->data_size
));
1151 nwritten
= sendfileall (fd
, db
->wr_fd
,
1152 (char *) cached
->data
1153 - (char *) db
->head
, cached
->recsize
);
1154 # ifndef __ASSUME_SENDFILE
1155 if (nwritten
== -1 && errno
== ENOSYS
)
1160 # ifndef __ASSUME_SENDFILE
1164 nwritten
= writeall (fd
, cached
->data
, cached
->recsize
);
1166 if (nwritten
!= cached
->recsize
1167 && __builtin_expect (debug_level
, 0) > 0)
1169 /* We have problems sending the result. */
1171 dbg_log (_("cannot write result: %s"),
1172 strerror_r (errno
, buf
, sizeof (buf
)));
1175 pthread_rwlock_unlock (&db
->lock
);
1180 pthread_rwlock_unlock (&db
->lock
);
1182 else if (__builtin_expect (debug_level
, 0) > 0)
1184 if (req
->type
== INVALIDATE
)
1185 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1187 dbg_log ("\t%s", serv2str
[req
->type
]);
1190 /* Handle the request. */
1194 addpwbyname (db
, fd
, req
, key
, uid
);
1198 addpwbyuid (db
, fd
, req
, key
, uid
);
1202 addgrbyname (db
, fd
, req
, key
, uid
);
1206 addgrbygid (db
, fd
, req
, key
, uid
);
1210 addhstbyname (db
, fd
, req
, key
, uid
);
1213 case GETHOSTBYNAMEv6
:
1214 addhstbynamev6 (db
, fd
, req
, key
, uid
);
1218 addhstbyaddr (db
, fd
, req
, key
, uid
);
1221 case GETHOSTBYADDRv6
:
1222 addhstbyaddrv6 (db
, fd
, req
, key
, uid
);
1226 addhstai (db
, fd
, req
, key
, uid
);
1230 addinitgroups (db
, fd
, req
, key
, uid
);
1234 addservbyname (db
, fd
, req
, key
, uid
);
1238 addservbyport (db
, fd
, req
, key
, uid
);
1245 /* Get the callers credentials. */
1247 struct ucred caller
;
1248 socklen_t optlen
= sizeof (caller
);
1250 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) < 0)
1254 dbg_log (_("error getting caller's id: %s"),
1255 strerror_r (errno
, buf
, sizeof (buf
)));
1261 /* Some systems have no SO_PEERCRED implementation. They don't
1262 care about security so we don't as well. */
1267 /* Accept shutdown, getstat and invalidate only from root. For
1268 the stat call also allow the user specified in the config file. */
1269 if (req
->type
== GETSTAT
)
1271 if (uid
== 0 || uid
== stat_uid
)
1272 send_stats (fd
, dbs
);
1276 if (req
->type
== INVALIDATE
)
1277 invalidate_cache (key
, fd
);
1279 termination_handler (0);
1288 send_ro_fd (reqinfo
[req
->type
].db
, key
, fd
);
1293 /* Ignore the command, it's nothing we know. */
1299 /* Restart the process. */
1303 /* First determine the parameters. We do not use the parameters
1304 passed to main() since in case nscd is started by running the
1305 dynamic linker this will not work. Yes, this is not the usual
1306 case but nscd is part of glibc and we occasionally do this. */
1307 size_t buflen
= 1024;
1308 char *buf
= alloca (buflen
);
1310 int fd
= open ("/proc/self/cmdline", O_RDONLY
);
1314 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1323 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, buf
+ readlen
,
1328 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1338 if (readlen
< buflen
)
1341 /* We might have to extend the buffer. */
1342 size_t old_buflen
= buflen
;
1343 char *newp
= extend_alloca (buf
, buflen
, 2 * buflen
);
1344 buf
= memmove (newp
, buf
, old_buflen
);
1349 /* Parse the command line. Worst case scenario: every two
1350 characters form one parameter (one character plus NUL). */
1351 char **argv
= alloca ((readlen
/ 2 + 1) * sizeof (argv
[0]));
1355 while (cp
< buf
+ readlen
)
1358 cp
= (char *) rawmemchr (cp
, '\0') + 1;
1362 /* Second, change back to the old user if we changed it. */
1363 if (server_user
!= NULL
)
1365 if (setresuid (old_uid
, old_uid
, old_uid
) != 0)
1368 cannot change to old UID: %s; disabling paranoia mode"),
1375 if (setresgid (old_gid
, old_gid
, old_gid
) != 0)
1378 cannot change to old GID: %s; disabling paranoia mode"),
1381 setuid (server_uid
);
1387 /* Next change back to the old working directory. */
1388 if (chdir (oldcwd
) == -1)
1391 cannot change to old working directory: %s; disabling paranoia mode"),
1394 if (server_user
!= NULL
)
1396 setuid (server_uid
);
1397 setgid (server_gid
);
1403 /* Synchronize memory. */
1404 int32_t certainly
[lastdb
];
1405 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1406 if (dbs
[cnt
].enabled
)
1408 /* Make sure nobody keeps using the database. */
1409 dbs
[cnt
].head
->timestamp
= 0;
1410 certainly
[cnt
] = dbs
[cnt
].head
->nscd_certainly_running
;
1411 dbs
[cnt
].head
->nscd_certainly_running
= 0;
1413 if (dbs
[cnt
].persistent
)
1415 msync (dbs
[cnt
].head
, dbs
[cnt
].memsize
, MS_ASYNC
);
1418 /* The preparations are done. */
1419 execv ("/proc/self/exe", argv
);
1421 /* If we come here, we will never be able to re-exec. */
1422 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1425 if (server_user
!= NULL
)
1427 setuid (server_uid
);
1428 setgid (server_gid
);
1430 if (chdir ("/") != 0)
1431 dbg_log (_("cannot change current working directory to \"/\": %s"),
1435 /* Reenable the databases. */
1436 time_t now
= time (NULL
);
1437 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1438 if (dbs
[cnt
].enabled
)
1440 dbs
[cnt
].head
->timestamp
= now
;
1441 dbs
[cnt
].head
->nscd_certainly_running
= certainly
[cnt
];
1446 /* List of file descriptors. */
1450 struct fdlist
*next
;
1452 /* Memory allocated for the list. */
1453 static struct fdlist
*fdlist
;
1454 /* List of currently ready-to-read file descriptors. */
1455 static struct fdlist
*readylist
;
1457 /* Conditional variable and mutex to signal availability of entries in
1458 READYLIST. The condvar is initialized dynamically since we might
1459 use a different clock depending on availability. */
1460 static pthread_cond_t readylist_cond
= PTHREAD_COND_INITIALIZER
;
1461 static pthread_mutex_t readylist_lock
= PTHREAD_MUTEX_INITIALIZER
;
1463 /* The clock to use with the condvar. */
1464 static clockid_t timeout_clock
= CLOCK_REALTIME
;
1466 /* Number of threads ready to handle the READYLIST. */
1467 static unsigned long int nready
;
1470 /* Function for the clean-up threads. */
1472 __attribute__ ((__noreturn__
))
1473 nscd_run_prune (void *p
)
1475 const long int my_number
= (long int) p
;
1476 assert (dbs
[my_number
].enabled
);
1478 int dont_need_update
= setup_thread (&dbs
[my_number
]);
1480 time_t now
= time (NULL
);
1482 /* We are running. */
1483 dbs
[my_number
].head
->timestamp
= now
;
1485 struct timespec prune_ts
;
1486 if (__builtin_expect (clock_gettime (timeout_clock
, &prune_ts
) == -1, 0))
1487 /* Should never happen. */
1490 /* Compute the initial timeout time. Prevent all the timers to go
1491 off at the same time by adding a db-based value. */
1492 prune_ts
.tv_sec
+= CACHE_PRUNE_INTERVAL
+ my_number
;
1493 dbs
[my_number
].wakeup_time
= now
+ CACHE_PRUNE_INTERVAL
+ my_number
;
1495 pthread_mutex_t
*prune_lock
= &dbs
[my_number
].prune_lock
;
1496 pthread_cond_t
*prune_cond
= &dbs
[my_number
].prune_cond
;
1498 pthread_mutex_lock (prune_lock
);
1501 /* Wait, but not forever. */
1503 if (! dbs
[my_number
].clear_cache
)
1504 e
= pthread_cond_timedwait (prune_cond
, prune_lock
, &prune_ts
);
1505 assert (__builtin_expect (e
== 0 || e
== ETIMEDOUT
, 1));
1509 if (e
== ETIMEDOUT
|| now
>= dbs
[my_number
].wakeup_time
1510 || dbs
[my_number
].clear_cache
)
1512 /* We will determine the new timout values based on the
1513 cache content. Should there be concurrent additions to
1514 the cache which are not accounted for in the cache
1515 pruning we want to know about it. Therefore set the
1516 timeout to the maximum. It will be descreased when adding
1517 new entries to the cache, if necessary. */
1518 if (sizeof (time_t) == sizeof (long int))
1519 dbs
[my_number
].wakeup_time
= LONG_MAX
;
1521 dbs
[my_number
].wakeup_time
= INT_MAX
;
1523 /* Unconditionally reset the flag. */
1524 time_t prune_now
= dbs
[my_number
].clear_cache
? LONG_MAX
: now
;
1525 dbs
[my_number
].clear_cache
= 0;
1527 pthread_mutex_unlock (prune_lock
);
1529 next_wait
= prune_cache (&dbs
[my_number
], prune_now
, -1);
1531 next_wait
= MAX (next_wait
, CACHE_PRUNE_INTERVAL
);
1532 /* If clients cannot determine for sure whether nscd is running
1533 we need to wake up occasionally to update the timestamp.
1534 Wait 90% of the update period. */
1535 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1536 if (__builtin_expect (! dont_need_update
, 0))
1538 next_wait
= MIN (UPDATE_MAPPING_TIMEOUT
, next_wait
);
1539 dbs
[my_number
].head
->timestamp
= now
;
1542 pthread_mutex_lock (prune_lock
);
1544 /* Make it known when we will wake up again. */
1545 if (now
+ next_wait
< dbs
[my_number
].wakeup_time
)
1546 dbs
[my_number
].wakeup_time
= now
+ next_wait
;
1548 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1551 /* The cache was just pruned. Do not do it again now. Just
1552 use the new timeout value. */
1553 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1555 if (clock_gettime (timeout_clock
, &prune_ts
) == -1)
1556 /* Should never happen. */
1559 /* Compute next timeout time. */
1560 prune_ts
.tv_sec
+= next_wait
;
1565 /* This is the main loop. It is replicated in different threads but
1566 the the use of the ready list makes sure only one thread handles an
1567 incoming connection. */
1569 __attribute__ ((__noreturn__
))
1570 nscd_run_worker (void *p
)
1574 /* Initialize the memory-in-flight list. */
1575 for (enum in_flight idx
= 0; idx
< IDX_last
; ++idx
)
1576 mem_in_flight
.block
[idx
].dbidx
= -1;
1577 /* And queue this threads structure. */
1579 mem_in_flight
.next
= mem_in_flight_list
;
1580 while (atomic_compare_and_exchange_bool_acq (&mem_in_flight_list
,
1582 mem_in_flight
.next
) != 0);
1584 /* Initial locking. */
1585 pthread_mutex_lock (&readylist_lock
);
1587 /* One more thread available. */
1592 while (readylist
== NULL
)
1593 pthread_cond_wait (&readylist_cond
, &readylist_lock
);
1595 struct fdlist
*it
= readylist
->next
;
1596 if (readylist
->next
== readylist
)
1597 /* Just one entry on the list. */
1600 readylist
->next
= it
->next
;
1602 /* Extract the information and mark the record ready to be used
1607 /* One more thread available. */
1610 /* We are done with the list. */
1611 pthread_mutex_unlock (&readylist_lock
);
1613 #ifndef __ASSUME_ACCEPT4
1614 if (have_accept4
< 0)
1616 /* We do not want to block on a short read or so. */
1617 int fl
= fcntl (fd
, F_GETFL
);
1618 if (fl
== -1 || fcntl (fd
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
1623 /* Now read the request. */
1625 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, &req
, sizeof (req
)))
1626 != sizeof (req
), 0))
1628 /* We failed to read data. Note that this also might mean we
1629 failed because we would have blocked. */
1630 if (debug_level
> 0)
1631 dbg_log (_("short read while reading request: %s"),
1632 strerror_r (errno
, buf
, sizeof (buf
)));
1636 /* Check whether this is a valid request type. */
1637 if (req
.type
< GETPWBYNAME
|| req
.type
>= LASTREQ
)
1640 /* Some systems have no SO_PEERCRED implementation. They don't
1641 care about security so we don't as well. */
1646 if (__builtin_expect (debug_level
> 0, 0))
1648 struct ucred caller
;
1649 socklen_t optlen
= sizeof (caller
);
1651 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) == 0)
1655 const pid_t pid
= 0;
1658 /* It should not be possible to crash the nscd with a silly
1659 request (i.e., a terribly large key). We limit the size to 1kb. */
1660 if (__builtin_expect (req
.key_len
, 1) < 0
1661 || __builtin_expect (req
.key_len
, 1) > MAXKEYLEN
)
1663 if (debug_level
> 0)
1664 dbg_log (_("key length in request too long: %d"), req
.key_len
);
1669 char keybuf
[MAXKEYLEN
];
1671 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, keybuf
,
1675 /* Again, this can also mean we would have blocked. */
1676 if (debug_level
> 0)
1677 dbg_log (_("short read while reading request key: %s"),
1678 strerror_r (errno
, buf
, sizeof (buf
)));
1682 if (__builtin_expect (debug_level
, 0) > 0)
1687 handle_request: request received (Version = %d) from PID %ld"),
1688 req
.version
, (long int) pid
);
1692 handle_request: request received (Version = %d)"), req
.version
);
1695 /* Phew, we got all the data, now process it. */
1696 handle_request (fd
, &req
, keybuf
, uid
, pid
);
1704 pthread_mutex_lock (&readylist_lock
);
1706 /* One more thread available. */
1712 static unsigned int nconns
;
1717 pthread_mutex_lock (&readylist_lock
);
1719 /* Find an empty entry in FDLIST. */
1721 for (inner
= 0; inner
< nconns
; ++inner
)
1722 if (fdlist
[inner
].next
== NULL
)
1724 assert (inner
< nconns
);
1726 fdlist
[inner
].fd
= fd
;
1728 if (readylist
== NULL
)
1729 readylist
= fdlist
[inner
].next
= &fdlist
[inner
];
1732 fdlist
[inner
].next
= readylist
->next
;
1733 readylist
= readylist
->next
= &fdlist
[inner
];
1736 bool do_signal
= true;
1737 if (__builtin_expect (nready
== 0, 0))
1742 /* Try to start another thread to help out. */
1744 if (nthreads
< max_nthreads
1745 && pthread_create (&th
, &attr
, nscd_run_worker
,
1746 (void *) (long int) nthreads
) == 0)
1748 /* We got another thread. */
1750 /* The new thread might need a kick. */
1756 pthread_mutex_unlock (&readylist_lock
);
1758 /* Tell one of the worker threads there is work to do. */
1760 pthread_cond_signal (&readylist_cond
);
1764 /* Check whether restarting should happen. */
1766 restart_p (time_t now
)
1768 return (paranoia
&& readylist
== NULL
&& nready
== nthreads
1769 && now
>= restart_time
);
1773 /* Array for times a connection was accepted. */
1774 static time_t *starttime
;
1778 __attribute__ ((__noreturn__
))
1779 main_loop_poll (void)
1781 struct pollfd
*conns
= (struct pollfd
*) xmalloc (nconns
1782 * sizeof (conns
[0]));
1785 conns
[0].events
= POLLRDNORM
;
1787 size_t firstfree
= 1;
1790 if (inotify_fd
!= -1)
1792 conns
[1].fd
= inotify_fd
;
1793 conns
[1].events
= POLLRDNORM
;
1801 /* Wait for any event. We wait at most a couple of seconds so
1802 that we can check whether we should close any of the accepted
1803 connections since we have not received a request. */
1804 #define MAX_ACCEPT_TIMEOUT 30
1805 #define MIN_ACCEPT_TIMEOUT 5
1806 #define MAIN_THREAD_TIMEOUT \
1807 (MAX_ACCEPT_TIMEOUT * 1000 \
1808 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1810 int n
= poll (conns
, nused
, MAIN_THREAD_TIMEOUT
);
1812 time_t now
= time (NULL
);
1814 /* If there is a descriptor ready for reading or there is a new
1815 connection, process this now. */
1818 if (conns
[0].revents
!= 0)
1820 /* We have a new incoming connection. Accept the connection. */
1823 #ifndef __ASSUME_ACCEPT4
1825 if (have_accept4
>= 0)
1828 fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
1830 #ifndef __ASSUME_ACCEPT4
1831 if (have_accept4
== 0)
1832 have_accept4
= fd
!= -1 || errno
!= ENOSYS
? 1 : -1;
1835 #ifndef __ASSUME_ACCEPT4
1836 if (have_accept4
< 0)
1837 fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
1840 /* Use the descriptor if we have not reached the limit. */
1843 if (firstfree
< nconns
)
1845 conns
[firstfree
].fd
= fd
;
1846 conns
[firstfree
].events
= POLLRDNORM
;
1847 starttime
[firstfree
] = now
;
1848 if (firstfree
>= nused
)
1849 nused
= firstfree
+ 1;
1853 while (firstfree
< nused
&& conns
[firstfree
].fd
!= -1);
1856 /* We cannot use the connection so close it. */
1865 if (inotify_fd
!= -1 && conns
[1].fd
== inotify_fd
)
1867 if (conns
[1].revents
!= 0)
1869 bool to_clear
[lastdb
] = { false, };
1873 # define PATH_MAX 1024
1875 struct inotify_event i
;
1876 char buf
[sizeof (struct inotify_event
) + PATH_MAX
];
1881 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
, &inev
,
1883 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
1885 if (__builtin_expect (nb
== -1 && errno
!= EAGAIN
,
1888 /* Something went wrong when reading the inotify
1889 data. Better disable inotify. */
1891 disabled inotify after read error %d"),
1903 /* Check which of the files changed. */
1904 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
1905 if (inev
.i
.wd
== dbs
[dbcnt
].inotify_descr
)
1907 to_clear
[dbcnt
] = true;
1911 if (inev
.i
.wd
== resolv_conf_descr
)
1914 to_clear
[hstdb
] = true;
1919 /* Actually perform the cache clearing. */
1920 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
1921 if (to_clear
[dbcnt
])
1923 pthread_mutex_lock (&dbs
[dbcnt
].prune_lock
);
1924 dbs
[dbcnt
].clear_cache
= 1;
1925 pthread_mutex_unlock (&dbs
[dbcnt
].prune_lock
);
1926 pthread_cond_signal (&dbs
[dbcnt
].prune_cond
);
1936 for (size_t cnt
= first
; cnt
< nused
&& n
> 0; ++cnt
)
1937 if (conns
[cnt
].revents
!= 0)
1939 fd_ready (conns
[cnt
].fd
);
1941 /* Clean up the CONNS array. */
1943 if (cnt
< firstfree
)
1945 if (cnt
== nused
- 1)
1948 while (conns
[nused
- 1].fd
== -1);
1954 /* Now find entries which have timed out. */
1957 /* We make the timeout length depend on the number of file
1958 descriptors currently used. */
1959 #define ACCEPT_TIMEOUT \
1960 (MAX_ACCEPT_TIMEOUT \
1961 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1962 time_t laststart
= now
- ACCEPT_TIMEOUT
;
1964 for (size_t cnt
= nused
- 1; cnt
> 0; --cnt
)
1966 if (conns
[cnt
].fd
!= -1 && starttime
[cnt
] < laststart
)
1968 /* Remove the entry, it timed out. */
1969 (void) close (conns
[cnt
].fd
);
1972 if (cnt
< firstfree
)
1974 if (cnt
== nused
- 1)
1977 while (conns
[nused
- 1].fd
== -1);
1981 if (restart_p (now
))
1989 main_loop_epoll (int efd
)
1991 struct epoll_event ev
= { 0, };
1995 /* Add the socket. */
1996 ev
.events
= EPOLLRDNORM
;
1998 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, sock
, &ev
) == -1)
1999 /* We cannot use epoll. */
2002 # ifdef HAVE_INOTIFY
2003 if (inotify_fd
!= -1)
2005 ev
.events
= EPOLLRDNORM
;
2006 ev
.data
.fd
= inotify_fd
;
2007 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, inotify_fd
, &ev
) == -1)
2008 /* We cannot use epoll. */
2016 struct epoll_event revs
[100];
2017 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2019 int n
= epoll_wait (efd
, revs
, nrevs
, MAIN_THREAD_TIMEOUT
);
2021 time_t now
= time (NULL
);
2023 for (int cnt
= 0; cnt
< n
; ++cnt
)
2024 if (revs
[cnt
].data
.fd
== sock
)
2026 /* A new connection. */
2029 # ifndef __ASSUME_ACCEPT4
2031 if (have_accept4
>= 0)
2034 fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
2036 # ifndef __ASSUME_ACCEPT4
2037 if (have_accept4
== 0)
2038 have_accept4
= fd
!= -1 || errno
!= ENOSYS
? 1 : -1;
2041 # ifndef __ASSUME_ACCEPT4
2042 if (have_accept4
< 0)
2043 fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
2046 /* Use the descriptor if we have not reached the limit. */
2049 /* Try to add the new descriptor. */
2052 || epoll_ctl (efd
, EPOLL_CTL_ADD
, fd
, &ev
) == -1)
2053 /* The descriptor is too large or something went
2054 wrong. Close the descriptor. */
2058 /* Remember when we accepted the connection. */
2059 starttime
[fd
] = now
;
2068 # ifdef HAVE_INOTIFY
2069 else if (revs
[cnt
].data
.fd
== inotify_fd
)
2071 bool to_clear
[lastdb
] = { false, };
2074 struct inotify_event i
;
2075 char buf
[sizeof (struct inotify_event
) + PATH_MAX
];
2080 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
, &inev
,
2082 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
2084 if (__builtin_expect (nb
== -1 && errno
!= EAGAIN
, 0))
2086 /* Something went wrong when reading the inotify
2087 data. Better disable inotify. */
2088 dbg_log (_("disabled inotify after read error %d"),
2090 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, inotify_fd
,
2098 /* Check which of the files changed. */
2099 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
2100 if (inev
.i
.wd
== dbs
[dbcnt
].inotify_descr
)
2102 to_clear
[dbcnt
] = true;
2106 if (inev
.i
.wd
== resolv_conf_descr
)
2109 to_clear
[hstdb
] = true;
2114 /* Actually perform the cache clearing. */
2115 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
2116 if (to_clear
[dbcnt
])
2118 pthread_mutex_lock (&dbs
[dbcnt
].prune_lock
);
2119 dbs
[dbcnt
].clear_cache
= 1;
2120 pthread_mutex_unlock (&dbs
[dbcnt
].prune_lock
);
2121 pthread_cond_signal (&dbs
[dbcnt
].prune_cond
);
2127 /* Remove the descriptor from the epoll descriptor. */
2128 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, revs
[cnt
].data
.fd
, NULL
);
2130 /* Get a worker to handle the request. */
2131 fd_ready (revs
[cnt
].data
.fd
);
2133 /* Reset the time. */
2134 starttime
[revs
[cnt
].data
.fd
] = 0;
2135 if (revs
[cnt
].data
.fd
== highest
)
2138 while (highest
> 0 && starttime
[highest
] == 0);
2143 /* Now look for descriptors for accepted connections which have
2144 no reply in too long of a time. */
2145 time_t laststart
= now
- ACCEPT_TIMEOUT
;
2146 assert (starttime
[sock
] == 0);
2147 assert (inotify_fd
== -1 || starttime
[inotify_fd
] == 0);
2148 for (int cnt
= highest
; cnt
> STDERR_FILENO
; --cnt
)
2149 if (starttime
[cnt
] != 0 && starttime
[cnt
] < laststart
)
2151 /* We are waiting for this one for too long. Close it. */
2152 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, cnt
, NULL
);
2160 else if (cnt
!= sock
&& starttime
[cnt
] == 0 && cnt
== highest
)
2163 if (restart_p (now
))
2170 /* Start all the threads we want. The initial process is thread no. 1. */
2172 start_threads (void)
2174 /* Initialize the conditional variable we will use. The only
2175 non-standard attribute we might use is the clock selection. */
2176 pthread_condattr_t condattr
;
2177 pthread_condattr_init (&condattr
);
2179 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2180 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2181 /* Determine whether the monotonous clock is available. */
2182 struct timespec dummy
;
2183 # if _POSIX_MONOTONIC_CLOCK == 0
2184 if (sysconf (_SC_MONOTONIC_CLOCK
) > 0)
2186 # if _POSIX_CLOCK_SELECTION == 0
2187 if (sysconf (_SC_CLOCK_SELECTION
) > 0)
2189 if (clock_getres (CLOCK_MONOTONIC
, &dummy
) == 0
2190 && pthread_condattr_setclock (&condattr
, CLOCK_MONOTONIC
) == 0)
2191 timeout_clock
= CLOCK_MONOTONIC
;
2194 /* Create the attribute for the threads. They are all created
2196 pthread_attr_init (&attr
);
2197 pthread_attr_setdetachstate (&attr
, PTHREAD_CREATE_DETACHED
);
2198 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2199 pthread_attr_setstacksize (&attr
, NSCD_THREAD_STACKSIZE
);
2201 /* We allow less than LASTDB threads only for debugging. */
2202 if (debug_level
== 0)
2203 nthreads
= MAX (nthreads
, lastdb
);
2205 /* Create the threads which prune the databases. */
2206 // XXX Ideally this work would be done by some of the worker threads.
2207 // XXX But this is problematic since we would need to be able to wake
2208 // XXX them up explicitly as well as part of the group handling the
2209 // XXX ready-list. This requires an operation where we can wait on
2210 // XXX two conditional variables at the same time. This operation
2211 // XXX does not exist (yet).
2212 for (long int i
= 0; i
< lastdb
; ++i
)
2214 /* Initialize the conditional variable. */
2215 if (pthread_cond_init (&dbs
[i
].prune_cond
, &condattr
) != 0)
2217 dbg_log (_("could not initialize conditional variable"));
2223 && pthread_create (&th
, &attr
, nscd_run_prune
, (void *) i
) != 0)
2225 dbg_log (_("could not start clean-up thread; terminating"));
2230 pthread_condattr_destroy (&condattr
);
2232 for (long int i
= 0; i
< nthreads
; ++i
)
2235 if (pthread_create (&th
, &attr
, nscd_run_worker
, NULL
) != 0)
2239 dbg_log (_("could not start any worker thread; terminating"));
2247 /* Determine how much room for descriptors we should initially
2248 allocate. This might need to change later if we cap the number
2250 const long int nfds
= sysconf (_SC_OPEN_MAX
);
2252 #define MAXCONN 16384
2253 if (nfds
== -1 || nfds
> MAXCONN
)
2255 else if (nfds
< MINCONN
)
2260 /* We need memory to pass descriptors on to the worker threads. */
2261 fdlist
= (struct fdlist
*) xcalloc (nconns
, sizeof (fdlist
[0]));
2262 /* Array to keep track when connection was accepted. */
2263 starttime
= (time_t *) xcalloc (nconns
, sizeof (starttime
[0]));
2265 /* In the main thread we execute the loop which handles incoming
2268 int efd
= epoll_create (100);
2271 main_loop_epoll (efd
);
2280 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2281 this function is called, we are not listening on the nscd socket yet so
2282 we can just use the ordinary lookup functions without causing a lockup */
2284 begin_drop_privileges (void)
2286 struct passwd
*pwd
= getpwnam (server_user
);
2290 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2291 error (EXIT_FAILURE
, 0, _("Failed to run nscd as user '%s'"),
2295 server_uid
= pwd
->pw_uid
;
2296 server_gid
= pwd
->pw_gid
;
2298 /* Save the old UID/GID if we have to change back. */
2301 old_uid
= getuid ();
2302 old_gid
= getgid ();
2305 if (getgrouplist (server_user
, server_gid
, NULL
, &server_ngroups
) == 0)
2307 /* This really must never happen. */
2308 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2309 error (EXIT_FAILURE
, errno
, _("initial getgrouplist failed"));
2312 server_groups
= (gid_t
*) xmalloc (server_ngroups
* sizeof (gid_t
));
2314 if (getgrouplist (server_user
, server_gid
, server_groups
, &server_ngroups
)
2317 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2318 error (EXIT_FAILURE
, errno
, _("getgrouplist failed"));
2323 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2324 run nscd as the user specified in the configuration file. */
2326 finish_drop_privileges (void)
2328 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2329 /* We need to preserve the capabilities to connect to the audit daemon. */
2330 cap_t new_caps
= preserve_capabilities ();
2333 if (setgroups (server_ngroups
, server_groups
) == -1)
2335 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2336 error (EXIT_FAILURE
, errno
, _("setgroups failed"));
2341 res
= setresgid (server_gid
, server_gid
, old_gid
);
2343 res
= setgid (server_gid
);
2346 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2352 res
= setresuid (server_uid
, server_uid
, old_uid
);
2354 res
= setuid (server_uid
);
2357 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2362 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2363 /* Remove the temporary capabilities. */
2364 install_real_capabilities (new_caps
);