1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2007, 2008, 2009, 2011 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
35 #include <arpa/inet.h>
37 # include <linux/netlink.h>
38 # include <linux/rtnetlink.h>
41 # include <sys/epoll.h>
44 # include <sys/inotify.h>
47 #include <sys/param.h>
50 # include <sys/sendfile.h>
52 #include <sys/socket.h>
59 #include <resolv/resolv.h>
61 # include <kernel-features.h>
65 /* Support to run nscd as an unprivileged user */
66 const char *server_user
;
67 static uid_t server_uid
;
68 static gid_t server_gid
;
69 const char *stat_user
;
71 static gid_t
*server_groups
;
75 static int server_ngroups
;
77 static pthread_attr_t attr
;
79 static void begin_drop_privileges (void);
80 static void finish_drop_privileges (void);
82 /* Map request type to a string. */
83 const char *const serv2str
[LASTREQ
] =
85 [GETPWBYNAME
] = "GETPWBYNAME",
86 [GETPWBYUID
] = "GETPWBYUID",
87 [GETGRBYNAME
] = "GETGRBYNAME",
88 [GETGRBYGID
] = "GETGRBYGID",
89 [GETHOSTBYNAME
] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6
] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR
] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6
] = "GETHOSTBYADDRv6",
93 [SHUTDOWN
] = "SHUTDOWN",
94 [GETSTAT
] = "GETSTAT",
95 [INVALIDATE
] = "INVALIDATE",
96 [GETFDPW
] = "GETFDPW",
97 [GETFDGR
] = "GETFDGR",
98 [GETFDHST
] = "GETFDHST",
100 [INITGROUPS
] = "INITGROUPS",
101 [GETSERVBYNAME
] = "GETSERVBYNAME",
102 [GETSERVBYPORT
] = "GETSERVBYPORT",
103 [GETFDSERV
] = "GETFDSERV",
104 [GETNETGRENT
] = "GETNETGRENT",
105 [INNETGR
] = "INNETGR",
106 [GETFDNETGR
] = "GETFDNETGR"
109 /* The control data structures for the services. */
110 struct database_dyn dbs
[lastdb
] =
113 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
114 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
115 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
121 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
122 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
123 .db_filename
= _PATH_NSCD_PASSWD_DB
,
124 .disabled_iov
= &pwd_iov_disabled
,
132 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
133 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
134 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
140 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
141 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
142 .db_filename
= _PATH_NSCD_GROUP_DB
,
143 .disabled_iov
= &grp_iov_disabled
,
151 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
152 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
153 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
157 .propagate
= 0, /* Not used. */
159 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
160 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
161 .db_filename
= _PATH_NSCD_HOSTS_DB
,
162 .disabled_iov
= &hst_iov_disabled
,
170 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
171 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
172 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
176 .propagate
= 0, /* Not used. */
178 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
179 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
180 .db_filename
= _PATH_NSCD_SERVICES_DB
,
181 .disabled_iov
= &serv_iov_disabled
,
189 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
190 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
191 .prune_run_lock
= PTHREAD_MUTEX_INITIALIZER
,
195 .propagate
= 0, /* Not used. */
197 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
198 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
199 .db_filename
= _PATH_NSCD_NETGROUP_DB
,
200 .disabled_iov
= &netgroup_iov_disabled
,
210 /* Mapping of request type to database. */
214 struct database_dyn
*db
;
215 } const reqinfo
[LASTREQ
] =
217 [GETPWBYNAME
] = { true, &dbs
[pwddb
] },
218 [GETPWBYUID
] = { true, &dbs
[pwddb
] },
219 [GETGRBYNAME
] = { true, &dbs
[grpdb
] },
220 [GETGRBYGID
] = { true, &dbs
[grpdb
] },
221 [GETHOSTBYNAME
] = { true, &dbs
[hstdb
] },
222 [GETHOSTBYNAMEv6
] = { true, &dbs
[hstdb
] },
223 [GETHOSTBYADDR
] = { true, &dbs
[hstdb
] },
224 [GETHOSTBYADDRv6
] = { true, &dbs
[hstdb
] },
225 [SHUTDOWN
] = { false, NULL
},
226 [GETSTAT
] = { false, NULL
},
227 [SHUTDOWN
] = { false, NULL
},
228 [GETFDPW
] = { false, &dbs
[pwddb
] },
229 [GETFDGR
] = { false, &dbs
[grpdb
] },
230 [GETFDHST
] = { false, &dbs
[hstdb
] },
231 [GETAI
] = { true, &dbs
[hstdb
] },
232 [INITGROUPS
] = { true, &dbs
[grpdb
] },
233 [GETSERVBYNAME
] = { true, &dbs
[servdb
] },
234 [GETSERVBYPORT
] = { true, &dbs
[servdb
] },
235 [GETFDSERV
] = { false, &dbs
[servdb
] },
236 [GETNETGRENT
] = { true, &dbs
[netgrdb
] },
237 [INNETGR
] = { true, &dbs
[netgrdb
] },
238 [GETFDNETGR
] = { false, &dbs
[netgrdb
] }
242 /* Initial number of threads to use. */
244 /* Maximum number of threads to use. */
245 int max_nthreads
= 32;
247 /* Socket for incoming connections. */
251 /* Inotify descriptor. */
256 /* Descriptor for netlink status updates. */
257 static int nl_status_fd
= -1;
260 #ifndef __ASSUME_SOCK_CLOEXEC
261 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
262 before be know the result. */
263 static int have_sock_cloexec
;
265 #ifndef __ASSUME_ACCEPT4
266 static int have_accept4
;
269 /* Number of times clients had to wait. */
270 unsigned long int client_queued
;
274 writeall (int fd
, const void *buf
, size_t len
)
280 ret
= TEMP_FAILURE_RETRY (send (fd
, buf
, n
, MSG_NOSIGNAL
));
283 buf
= (const char *) buf
+ ret
;
287 return ret
< 0 ? ret
: len
- n
;
293 sendfileall (int tofd
, int fromfd
, off_t off
, size_t len
)
300 ret
= TEMP_FAILURE_RETRY (sendfile (tofd
, fromfd
, &off
, n
));
306 return ret
< 0 ? ret
: len
- n
;
314 /* The following three are not really used, they are symbolic constants. */
320 use_he_begin
= use_he
| use_begin
,
321 use_he_end
= use_he
| use_end
,
324 use_key_begin
= use_key
| use_begin
,
325 use_key_end
= use_key
| use_end
,
326 use_key_first
= use_key_begin
| use_first
,
329 use_data_begin
= use_data
| use_begin
,
330 use_data_end
= use_data
| use_end
,
331 use_data_first
= use_data_begin
| use_first
336 check_use (const char *data
, nscd_ssize_t first_free
, uint8_t *usemap
,
337 enum usekey use
, ref_t start
, size_t len
)
341 if (start
> first_free
|| start
+ len
> first_free
342 || (start
& BLOCK_ALIGN_M1
))
345 if (usemap
[start
] == use_not
)
347 /* Add the start marker. */
348 usemap
[start
] = use
| use_begin
;
352 if (usemap
[++start
] != use_not
)
357 /* Add the end marker. */
358 usemap
[start
] = use
| use_end
;
360 else if ((usemap
[start
] & ~use_first
) == ((use
| use_begin
) & ~use_first
))
362 /* Hash entries can't be shared. */
366 usemap
[start
] |= (use
& use_first
);
370 if (usemap
[++start
] != use
)
373 if (usemap
[++start
] != (use
| use_end
))
377 /* Points to a wrong object or somewhere in the middle. */
384 /* Verify data in persistent database. */
386 verify_persistent_db (void *mem
, struct database_pers_head
*readhead
, int dbnr
)
388 assert (dbnr
== pwddb
|| dbnr
== grpdb
|| dbnr
== hstdb
|| dbnr
== servdb
391 time_t now
= time (NULL
);
393 struct database_pers_head
*head
= mem
;
394 struct database_pers_head head_copy
= *head
;
396 /* Check that the header that was read matches the head in the database. */
397 if (memcmp (head
, readhead
, sizeof (*head
)) != 0)
400 /* First some easy tests: make sure the database header is sane. */
401 if (head
->version
!= DB_VERSION
402 || head
->header_size
!= sizeof (*head
)
403 /* We allow a timestamp to be one hour ahead of the current time.
404 This should cover daylight saving time changes. */
405 || head
->timestamp
> now
+ 60 * 60 + 60
406 || (head
->gc_cycle
& 1)
408 || (size_t) head
->module
> INT32_MAX
/ sizeof (ref_t
)
409 || (size_t) head
->data_size
> INT32_MAX
- head
->module
* sizeof (ref_t
)
410 || head
->first_free
< 0
411 || head
->first_free
> head
->data_size
412 || (head
->first_free
& BLOCK_ALIGN_M1
) != 0
413 || head
->maxnentries
< 0
414 || head
->maxnsearched
< 0)
417 uint8_t *usemap
= calloc (head
->first_free
, 1);
421 const char *data
= (char *) &head
->array
[roundup (head
->module
,
422 ALIGN
/ sizeof (ref_t
))];
424 nscd_ssize_t he_cnt
= 0;
425 for (nscd_ssize_t cnt
= 0; cnt
< head
->module
; ++cnt
)
427 ref_t trail
= head
->array
[cnt
];
431 while (work
!= ENDREF
)
433 if (! check_use (data
, head
->first_free
, usemap
, use_he
, work
,
434 sizeof (struct hashentry
)))
437 /* Now we know we can dereference the record. */
438 struct hashentry
*here
= (struct hashentry
*) (data
+ work
);
442 /* Make sure the record is for this type of service. */
443 if (here
->type
>= LASTREQ
444 || reqinfo
[here
->type
].db
!= &dbs
[dbnr
])
447 /* Validate boolean field value. */
448 if (here
->first
!= false && here
->first
!= true)
456 || here
->packet
> head
->first_free
457 || here
->packet
+ sizeof (struct datahead
) > head
->first_free
)
460 struct datahead
*dh
= (struct datahead
*) (data
+ here
->packet
);
462 if (! check_use (data
, head
->first_free
, usemap
,
463 use_data
| (here
->first
? use_first
: 0),
464 here
->packet
, dh
->allocsize
))
467 if (dh
->allocsize
< sizeof (struct datahead
)
468 || dh
->recsize
> dh
->allocsize
469 || (dh
->notfound
!= false && dh
->notfound
!= true)
470 || (dh
->usable
!= false && dh
->usable
!= true))
473 if (here
->key
< here
->packet
+ sizeof (struct datahead
)
474 || here
->key
> here
->packet
+ dh
->allocsize
475 || here
->key
+ here
->len
> here
->packet
+ dh
->allocsize
)
478 /* If keys can appear outside of data, this should be done
479 instead. But gc doesn't mark the data in that case. */
480 if (! check_use (data
, head
->first_free
, usemap
,
481 use_key
| (here
->first
? use_first
: 0),
482 here
->key
, here
->len
))
490 /* A circular list, this must not happen. */
493 trail
= ((struct hashentry
*) (data
+ trail
))->next
;
498 if (he_cnt
!= head
->nentries
)
501 /* See if all data and keys had at least one reference from
502 he->first == true hashentry. */
503 for (ref_t idx
= 0; idx
< head
->first_free
; ++idx
)
506 if (usemap
[idx
] == use_key_begin
)
509 if (usemap
[idx
] == use_data_begin
)
513 /* Finally, make sure the database hasn't changed since the first test. */
514 if (memcmp (mem
, &head_copy
, sizeof (*head
)) != 0)
527 # define EXTRA_O_FLAGS O_CLOEXEC
529 # define EXTRA_O_FLAGS 0
533 /* Initialize database information structures. */
537 /* Look up unprivileged uid/gid/groups before we start listening on the
539 if (server_user
!= NULL
)
540 begin_drop_privileges ();
543 /* No configuration for this value, assume a default. */
546 for (size_t cnt
= 0; cnt
< lastdb
; ++cnt
)
547 if (dbs
[cnt
].enabled
)
549 pthread_rwlock_init (&dbs
[cnt
].lock
, NULL
);
550 pthread_mutex_init (&dbs
[cnt
].memlock
, NULL
);
552 if (dbs
[cnt
].persistent
)
554 /* Try to open the appropriate file on disk. */
555 int fd
= open (dbs
[cnt
].db_filename
, O_RDWR
| EXTRA_O_FLAGS
);
562 struct database_pers_head head
;
563 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, &head
,
565 if (n
!= sizeof (head
) || fstat64 (fd
, &st
) != 0)
568 /* The code is single-threaded at this point so
569 using strerror is just fine. */
570 msg
= strerror (errno
);
572 dbg_log (_("invalid persistent database file \"%s\": %s"),
573 dbs
[cnt
].db_filename
, msg
);
574 unlink (dbs
[cnt
].db_filename
);
576 else if (head
.module
== 0 && head
.data_size
== 0)
578 /* The file has been created, but the head has not
579 been initialized yet. */
580 msg
= _("uninitialized header");
583 else if (head
.header_size
!= (int) sizeof (head
))
585 msg
= _("header size does not match");
588 else if ((total
= (sizeof (head
)
589 + roundup (head
.module
* sizeof (ref_t
),
593 || total
< sizeof (head
))
595 msg
= _("file size does not match");
598 /* Note we map with the maximum size allowed for the
599 database. This is likely much larger than the
600 actual file size. This is OK on most OSes since
601 extensions of the underlying file will
602 automatically translate more pages available for
604 else if ((mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
605 PROT_READ
| PROT_WRITE
,
609 else if (!verify_persistent_db (mem
, &head
, cnt
))
612 msg
= _("verification failed");
617 /* Success. We have the database. */
619 dbs
[cnt
].memsize
= total
;
620 dbs
[cnt
].data
= (char *)
621 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
622 ALIGN
/ sizeof (ref_t
))];
623 dbs
[cnt
].mmap_used
= true;
625 if (dbs
[cnt
].suggested_module
> head
.module
)
626 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
631 /* We also need a read-only descriptor. */
634 dbs
[cnt
].ro_fd
= open (dbs
[cnt
].db_filename
,
635 O_RDONLY
| EXTRA_O_FLAGS
);
636 if (dbs
[cnt
].ro_fd
== -1)
638 cannot create read-only descriptor for \"%s\"; no mmap"),
639 dbs
[cnt
].db_filename
);
642 // XXX Shall we test whether the descriptors actually
643 // XXX point to the same file?
646 /* Close the file descriptors in case something went
647 wrong in which case the variable have not been
652 else if (errno
== EACCES
)
653 error (EXIT_FAILURE
, 0, _("cannot access '%s'"),
654 dbs
[cnt
].db_filename
);
657 if (dbs
[cnt
].head
== NULL
)
659 /* No database loaded. Allocate the data structure,
661 struct database_pers_head head
;
662 size_t total
= (sizeof (head
)
663 + roundup (dbs
[cnt
].suggested_module
664 * sizeof (ref_t
), ALIGN
)
665 + (dbs
[cnt
].suggested_module
666 * DEFAULT_DATASIZE_PER_BUCKET
));
668 /* Try to create the database. If we do not need a
669 persistent database create a temporary file. */
672 if (dbs
[cnt
].persistent
)
674 fd
= open (dbs
[cnt
].db_filename
,
675 O_RDWR
| O_CREAT
| O_EXCL
| O_TRUNC
| EXTRA_O_FLAGS
,
677 if (fd
!= -1 && dbs
[cnt
].shared
)
678 ro_fd
= open (dbs
[cnt
].db_filename
,
679 O_RDONLY
| EXTRA_O_FLAGS
);
683 char fname
[] = _PATH_NSCD_XYZ_DB_TMP
;
684 fd
= mkostemp (fname
, EXTRA_O_FLAGS
);
686 /* We do not need the file name anymore after we
687 opened another file descriptor in read-only mode. */
691 ro_fd
= open (fname
, O_RDONLY
| EXTRA_O_FLAGS
);
701 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
702 dbnames
[cnt
], dbs
[cnt
].db_filename
);
703 // XXX Correct way to terminate?
707 if (dbs
[cnt
].persistent
)
708 dbg_log (_("cannot create %s; no persistent database used"),
709 dbs
[cnt
].db_filename
);
711 dbg_log (_("cannot create %s; no sharing possible"),
712 dbs
[cnt
].db_filename
);
714 dbs
[cnt
].persistent
= 0;
715 // XXX remember: no mmap
719 /* Tell the user if we could not create the read-only
721 if (ro_fd
== -1 && dbs
[cnt
].shared
)
723 cannot create read-only descriptor for \"%s\"; no mmap"),
724 dbs
[cnt
].db_filename
);
726 /* Before we create the header, initialiye the hash
727 table. So that if we get interrupted if writing
728 the header we can recognize a partially initialized
730 size_t ps
= sysconf (_SC_PAGESIZE
);
732 assert (~ENDREF
== 0);
733 memset (tmpbuf
, '\xff', ps
);
735 size_t remaining
= dbs
[cnt
].suggested_module
* sizeof (ref_t
);
736 off_t offset
= sizeof (head
);
739 if (offset
% ps
!= 0)
741 towrite
= MIN (remaining
, ps
- (offset
% ps
));
742 if (pwrite (fd
, tmpbuf
, towrite
, offset
) != towrite
)
745 remaining
-= towrite
;
748 while (remaining
> ps
)
750 if (pwrite (fd
, tmpbuf
, ps
, offset
) == -1)
757 && pwrite (fd
, tmpbuf
, remaining
, offset
) != remaining
)
760 /* Create the header of the file. */
761 struct database_pers_head head
=
763 .version
= DB_VERSION
,
764 .header_size
= sizeof (head
),
765 .module
= dbs
[cnt
].suggested_module
,
766 .data_size
= (dbs
[cnt
].suggested_module
767 * DEFAULT_DATASIZE_PER_BUCKET
),
772 if ((TEMP_FAILURE_RETRY (write (fd
, &head
, sizeof (head
)))
774 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd
, 0, total
))
776 || (mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
777 PROT_READ
| PROT_WRITE
,
778 MAP_SHARED
, fd
, 0)) == MAP_FAILED
)
781 unlink (dbs
[cnt
].db_filename
);
782 dbg_log (_("cannot write to database file %s: %s"),
783 dbs
[cnt
].db_filename
, strerror (errno
));
784 dbs
[cnt
].persistent
= 0;
790 dbs
[cnt
].data
= (char *)
791 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
792 ALIGN
/ sizeof (ref_t
))];
793 dbs
[cnt
].memsize
= total
;
794 dbs
[cnt
].mmap_used
= true;
796 /* Remember the descriptors. */
798 dbs
[cnt
].ro_fd
= ro_fd
;
810 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
811 /* We do not check here whether the O_CLOEXEC provided to the
812 open call was successful or not. The two fcntl calls are
813 only performed once each per process start-up and therefore
814 is not noticeable at all. */
816 && ((dbs
[cnt
].wr_fd
!= -1
817 && fcntl (dbs
[cnt
].wr_fd
, F_SETFD
, FD_CLOEXEC
) == -1)
818 || (dbs
[cnt
].ro_fd
!= -1
819 && fcntl (dbs
[cnt
].ro_fd
, F_SETFD
, FD_CLOEXEC
) == -1)))
822 cannot set socket to close on exec: %s; disabling paranoia mode"),
828 if (dbs
[cnt
].head
== NULL
)
830 /* We do not use the persistent database. Just
831 create an in-memory data structure. */
832 assert (! dbs
[cnt
].persistent
);
834 dbs
[cnt
].head
= xmalloc (sizeof (struct database_pers_head
)
835 + (dbs
[cnt
].suggested_module
837 memset (dbs
[cnt
].head
, '\0', sizeof (struct database_pers_head
));
838 assert (~ENDREF
== 0);
839 memset (dbs
[cnt
].head
->array
, '\xff',
840 dbs
[cnt
].suggested_module
* sizeof (ref_t
));
841 dbs
[cnt
].head
->module
= dbs
[cnt
].suggested_module
;
842 dbs
[cnt
].head
->data_size
= (DEFAULT_DATASIZE_PER_BUCKET
843 * dbs
[cnt
].head
->module
);
844 dbs
[cnt
].data
= xmalloc (dbs
[cnt
].head
->data_size
);
845 dbs
[cnt
].head
->first_free
= 0;
848 assert (dbs
[cnt
].ro_fd
== -1);
852 /* Create the socket. */
853 #ifndef __ASSUME_SOCK_CLOEXEC
855 if (have_sock_cloexec
>= 0)
858 sock
= socket (AF_UNIX
, SOCK_STREAM
| SOCK_CLOEXEC
| SOCK_NONBLOCK
, 0);
859 #ifndef __ASSUME_SOCK_CLOEXEC
860 if (have_sock_cloexec
== 0)
861 have_sock_cloexec
= sock
!= -1 || errno
!= EINVAL
? 1 : -1;
864 #ifndef __ASSUME_SOCK_CLOEXEC
865 if (have_sock_cloexec
< 0)
866 sock
= socket (AF_UNIX
, SOCK_STREAM
, 0);
870 dbg_log (_("cannot open socket: %s"), strerror (errno
));
871 exit (errno
== EACCES
? 4 : 1);
873 /* Bind a name to the socket. */
874 struct sockaddr_un sock_addr
;
875 sock_addr
.sun_family
= AF_UNIX
;
876 strcpy (sock_addr
.sun_path
, _PATH_NSCDSOCKET
);
877 if (bind (sock
, (struct sockaddr
*) &sock_addr
, sizeof (sock_addr
)) < 0)
879 dbg_log ("%s: %s", _PATH_NSCDSOCKET
, strerror (errno
));
880 exit (errno
== EACCES
? 4 : 1);
883 #ifndef __ASSUME_SOCK_CLOEXEC
884 if (have_sock_cloexec
< 0)
886 /* We don't want to get stuck on accept. */
887 int fl
= fcntl (sock
, F_GETFL
);
888 if (fl
== -1 || fcntl (sock
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
890 dbg_log (_("cannot change socket to nonblocking mode: %s"),
895 /* The descriptor needs to be closed on exec. */
896 if (paranoia
&& fcntl (sock
, F_SETFD
, FD_CLOEXEC
) == -1)
898 dbg_log (_("cannot set socket to close on exec: %s"),
905 /* Set permissions for the socket. */
906 chmod (_PATH_NSCDSOCKET
, DEFFILEMODE
);
908 /* Set the socket up to accept connections. */
909 if (listen (sock
, SOMAXCONN
) < 0)
911 dbg_log (_("cannot enable socket to accept connections: %s"),
917 if (dbs
[hstdb
].enabled
)
919 /* Try to open netlink socket to monitor network setting changes. */
920 nl_status_fd
= socket (AF_NETLINK
,
921 SOCK_RAW
| SOCK_CLOEXEC
| SOCK_NONBLOCK
,
923 if (nl_status_fd
!= -1)
925 struct sockaddr_nl snl
;
926 memset (&snl
, '\0', sizeof (snl
));
927 snl
.nl_family
= AF_NETLINK
;
928 /* XXX Is this the best set to use? */
929 snl
.nl_groups
= (RTMGRP_IPV4_IFADDR
| RTMGRP_TC
| RTMGRP_IPV4_MROUTE
930 | RTMGRP_IPV4_ROUTE
| RTMGRP_IPV4_RULE
931 | RTMGRP_IPV6_IFADDR
| RTMGRP_IPV6_MROUTE
932 | RTMGRP_IPV6_ROUTE
| RTMGRP_IPV6_IFINFO
933 | RTMGRP_IPV6_PREFIX
);
935 if (bind (nl_status_fd
, (struct sockaddr
*) &snl
, sizeof (snl
)) != 0)
937 close (nl_status_fd
);
942 /* Start the timestamp process. */
943 dbs
[hstdb
].head
->extra_data
[NSCD_HST_IDX_CONF_TIMESTAMP
]
944 = __bump_nl_timestamp ();
946 # ifndef __ASSUME_SOCK_CLOEXEC
947 if (have_sock_cloexec
< 0)
949 /* We don't want to get stuck on accept. */
950 int fl
= fcntl (nl_status_fd
, F_GETFL
);
952 || fcntl (nl_status_fd
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
955 cannot change socket to nonblocking mode: %s"),
960 /* The descriptor needs to be closed on exec. */
962 && fcntl (nl_status_fd
, F_SETFD
, FD_CLOEXEC
) == -1)
964 dbg_log (_("cannot set socket to close on exec: %s"),
975 /* Change to unprivileged uid/gid/groups if specified in config file */
976 if (server_user
!= NULL
)
977 finish_drop_privileges ();
982 register_traced_file (size_t dbidx
, struct traced_file
*finfo
)
984 if (! dbs
[dbidx
].enabled
|| ! dbs
[dbidx
].check_file
)
987 if (__builtin_expect (debug_level
> 0, 0))
988 dbg_log (_("register trace file %s for database %s"),
989 finfo
->fname
, dbnames
[dbidx
]);
993 || (finfo
->inotify_descr
= inotify_add_watch (inotify_fd
, finfo
->fname
,
998 /* We need the modification date of the file. */
1001 if (stat64 (finfo
->fname
, &st
) < 0)
1003 /* We cannot stat() the file, disable file checking. */
1004 dbg_log (_("cannot stat() file `%s': %s"),
1005 finfo
->fname
, strerror (errno
));
1009 finfo
->inotify_descr
= -1;
1010 finfo
->mtime
= st
.st_mtime
;
1013 /* Queue up the file name. */
1014 finfo
->next
= dbs
[dbidx
].traced_files
;
1015 dbs
[dbidx
].traced_files
= finfo
;
1019 /* Close the connections. */
1021 close_sockets (void)
1028 invalidate_cache (char *key
, int fd
)
1033 for (number
= pwddb
; number
< lastdb
; ++number
)
1034 if (strcmp (key
, dbnames
[number
]) == 0)
1036 if (number
== hstdb
)
1038 struct traced_file
*runp
= dbs
[hstdb
].traced_files
;
1039 while (runp
!= NULL
)
1040 if (runp
->call_res_init
)
1051 if (number
== lastdb
)
1054 writeall (fd
, &resp
, sizeof (resp
));
1058 if (dbs
[number
].enabled
)
1060 pthread_mutex_lock (&dbs
[number
].prune_run_lock
);
1061 prune_cache (&dbs
[number
], LONG_MAX
, fd
);
1062 pthread_mutex_unlock (&dbs
[number
].prune_run_lock
);
1067 writeall (fd
, &resp
, sizeof (resp
));
1074 send_ro_fd (struct database_dyn
*db
, char *key
, int fd
)
1076 /* If we do not have an read-only file descriptor do nothing. */
1077 if (db
->ro_fd
== -1)
1080 /* We need to send some data along with the descriptor. */
1081 uint64_t mapsize
= (db
->head
->data_size
1082 + roundup (db
->head
->module
* sizeof (ref_t
), ALIGN
)
1083 + sizeof (struct database_pers_head
));
1084 struct iovec iov
[2];
1085 iov
[0].iov_base
= key
;
1086 iov
[0].iov_len
= strlen (key
) + 1;
1087 iov
[1].iov_base
= &mapsize
;
1088 iov
[1].iov_len
= sizeof (mapsize
);
1090 /* Prepare the control message to transfer the descriptor. */
1094 char bytes
[CMSG_SPACE (sizeof (int))];
1096 struct msghdr msg
= { .msg_iov
= iov
, .msg_iovlen
= 2,
1097 .msg_control
= buf
.bytes
,
1098 .msg_controllen
= sizeof (buf
) };
1099 struct cmsghdr
*cmsg
= CMSG_FIRSTHDR (&msg
);
1101 cmsg
->cmsg_level
= SOL_SOCKET
;
1102 cmsg
->cmsg_type
= SCM_RIGHTS
;
1103 cmsg
->cmsg_len
= CMSG_LEN (sizeof (int));
1105 int *ip
= (int *) CMSG_DATA (cmsg
);
1108 msg
.msg_controllen
= cmsg
->cmsg_len
;
1110 /* Send the control message. We repeat when we are interrupted but
1111 everything else is ignored. */
1112 #ifndef MSG_NOSIGNAL
1113 # define MSG_NOSIGNAL 0
1115 (void) TEMP_FAILURE_RETRY (sendmsg (fd
, &msg
, MSG_NOSIGNAL
));
1117 if (__builtin_expect (debug_level
> 0, 0))
1118 dbg_log (_("provide access to FD %d, for %s"), db
->ro_fd
, key
);
1120 #endif /* SCM_RIGHTS */
1123 /* Handle new request. */
1125 handle_request (int fd
, request_header
*req
, void *key
, uid_t uid
, pid_t pid
)
1127 if (__builtin_expect (req
->version
, NSCD_VERSION
) != NSCD_VERSION
)
1129 if (debug_level
> 0)
1131 cannot handle old request version %d; current version is %d"),
1132 req
->version
, NSCD_VERSION
);
1136 /* Perform the SELinux check before we go on to the standard checks. */
1137 if (selinux_enabled
&& nscd_request_avc_has_perm (fd
, req
->type
) != 0)
1139 if (debug_level
> 0)
1148 snprintf (buf
, sizeof (buf
), "/proc/%ld/exe", (long int) pid
);
1149 ssize_t n
= readlink (buf
, buf
, sizeof (buf
) - 1);
1153 request from %ld not handled due to missing permission"), (long int) pid
);
1158 request from '%s' [%ld] not handled due to missing permission"),
1159 buf
, (long int) pid
);
1162 dbg_log (_("request not handled due to missing permission"));
1168 struct database_dyn
*db
= reqinfo
[req
->type
].db
;
1170 /* See whether we can service the request from the cache. */
1171 if (__builtin_expect (reqinfo
[req
->type
].data_request
, true))
1173 if (__builtin_expect (debug_level
, 0) > 0)
1175 if (req
->type
== GETHOSTBYADDR
|| req
->type
== GETHOSTBYADDRv6
)
1177 char buf
[INET6_ADDRSTRLEN
];
1179 dbg_log ("\t%s (%s)", serv2str
[req
->type
],
1180 inet_ntop (req
->type
== GETHOSTBYADDR
1181 ? AF_INET
: AF_INET6
,
1182 key
, buf
, sizeof (buf
)));
1185 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1188 /* Is this service enabled? */
1189 if (__builtin_expect (!db
->enabled
, 0))
1191 /* No, sent the prepared record. */
1192 if (TEMP_FAILURE_RETRY (send (fd
, db
->disabled_iov
->iov_base
,
1193 db
->disabled_iov
->iov_len
,
1195 != (ssize_t
) db
->disabled_iov
->iov_len
1196 && __builtin_expect (debug_level
, 0) > 0)
1198 /* We have problems sending the result. */
1200 dbg_log (_("cannot write result: %s"),
1201 strerror_r (errno
, buf
, sizeof (buf
)));
1207 /* Be sure we can read the data. */
1208 if (__builtin_expect (pthread_rwlock_tryrdlock (&db
->lock
) != 0, 0))
1210 ++db
->head
->rdlockdelayed
;
1211 pthread_rwlock_rdlock (&db
->lock
);
1214 /* See whether we can handle it from the cache. */
1215 struct datahead
*cached
;
1216 cached
= (struct datahead
*) cache_search (req
->type
, key
, req
->key_len
,
1220 /* Hurray it's in the cache. */
1223 #ifdef HAVE_SENDFILE
1224 if (__builtin_expect (db
->mmap_used
, 1))
1226 assert (db
->wr_fd
!= -1);
1227 assert ((char *) cached
->data
> (char *) db
->data
);
1228 assert ((char *) cached
->data
- (char *) db
->head
1230 <= (sizeof (struct database_pers_head
)
1231 + db
->head
->module
* sizeof (ref_t
)
1232 + db
->head
->data_size
));
1233 nwritten
= sendfileall (fd
, db
->wr_fd
,
1234 (char *) cached
->data
1235 - (char *) db
->head
, cached
->recsize
);
1236 # ifndef __ASSUME_SENDFILE
1237 if (nwritten
== -1 && errno
== ENOSYS
)
1242 # ifndef __ASSUME_SENDFILE
1246 nwritten
= writeall (fd
, cached
->data
, cached
->recsize
);
1248 if (nwritten
!= cached
->recsize
1249 && __builtin_expect (debug_level
, 0) > 0)
1251 /* We have problems sending the result. */
1253 dbg_log (_("cannot write result: %s"),
1254 strerror_r (errno
, buf
, sizeof (buf
)));
1257 pthread_rwlock_unlock (&db
->lock
);
1262 pthread_rwlock_unlock (&db
->lock
);
1264 else if (__builtin_expect (debug_level
, 0) > 0)
1266 if (req
->type
== INVALIDATE
)
1267 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1269 dbg_log ("\t%s", serv2str
[req
->type
]);
1272 /* Handle the request. */
1276 addpwbyname (db
, fd
, req
, key
, uid
);
1280 addpwbyuid (db
, fd
, req
, key
, uid
);
1284 addgrbyname (db
, fd
, req
, key
, uid
);
1288 addgrbygid (db
, fd
, req
, key
, uid
);
1292 addhstbyname (db
, fd
, req
, key
, uid
);
1295 case GETHOSTBYNAMEv6
:
1296 addhstbynamev6 (db
, fd
, req
, key
, uid
);
1300 addhstbyaddr (db
, fd
, req
, key
, uid
);
1303 case GETHOSTBYADDRv6
:
1304 addhstbyaddrv6 (db
, fd
, req
, key
, uid
);
1308 addhstai (db
, fd
, req
, key
, uid
);
1312 addinitgroups (db
, fd
, req
, key
, uid
);
1316 addservbyname (db
, fd
, req
, key
, uid
);
1320 addservbyport (db
, fd
, req
, key
, uid
);
1324 addgetnetgrent (db
, fd
, req
, key
, uid
);
1328 addinnetgr (db
, fd
, req
, key
, uid
);
1335 /* Get the callers credentials. */
1337 struct ucred caller
;
1338 socklen_t optlen
= sizeof (caller
);
1340 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) < 0)
1344 dbg_log (_("error getting caller's id: %s"),
1345 strerror_r (errno
, buf
, sizeof (buf
)));
1351 /* Some systems have no SO_PEERCRED implementation. They don't
1352 care about security so we don't as well. */
1357 /* Accept shutdown, getstat and invalidate only from root. For
1358 the stat call also allow the user specified in the config file. */
1359 if (req
->type
== GETSTAT
)
1361 if (uid
== 0 || uid
== stat_uid
)
1362 send_stats (fd
, dbs
);
1366 if (req
->type
== INVALIDATE
)
1367 invalidate_cache (key
, fd
);
1369 termination_handler (0);
1379 send_ro_fd (reqinfo
[req
->type
].db
, key
, fd
);
1384 /* Ignore the command, it's nothing we know. */
1390 /* Restart the process. */
1394 /* First determine the parameters. We do not use the parameters
1395 passed to main() since in case nscd is started by running the
1396 dynamic linker this will not work. Yes, this is not the usual
1397 case but nscd is part of glibc and we occasionally do this. */
1398 size_t buflen
= 1024;
1399 char *buf
= alloca (buflen
);
1401 int fd
= open ("/proc/self/cmdline", O_RDONLY
);
1405 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1414 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, buf
+ readlen
,
1419 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1429 if (readlen
< buflen
)
1432 /* We might have to extend the buffer. */
1433 size_t old_buflen
= buflen
;
1434 char *newp
= extend_alloca (buf
, buflen
, 2 * buflen
);
1435 buf
= memmove (newp
, buf
, old_buflen
);
1440 /* Parse the command line. Worst case scenario: every two
1441 characters form one parameter (one character plus NUL). */
1442 char **argv
= alloca ((readlen
/ 2 + 1) * sizeof (argv
[0]));
1446 while (cp
< buf
+ readlen
)
1449 cp
= (char *) rawmemchr (cp
, '\0') + 1;
1453 /* Second, change back to the old user if we changed it. */
1454 if (server_user
!= NULL
)
1456 if (setresuid (old_uid
, old_uid
, old_uid
) != 0)
1459 cannot change to old UID: %s; disabling paranoia mode"),
1466 if (setresgid (old_gid
, old_gid
, old_gid
) != 0)
1469 cannot change to old GID: %s; disabling paranoia mode"),
1472 setuid (server_uid
);
1478 /* Next change back to the old working directory. */
1479 if (chdir (oldcwd
) == -1)
1482 cannot change to old working directory: %s; disabling paranoia mode"),
1485 if (server_user
!= NULL
)
1487 setuid (server_uid
);
1488 setgid (server_gid
);
1494 /* Synchronize memory. */
1495 int32_t certainly
[lastdb
];
1496 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1497 if (dbs
[cnt
].enabled
)
1499 /* Make sure nobody keeps using the database. */
1500 dbs
[cnt
].head
->timestamp
= 0;
1501 certainly
[cnt
] = dbs
[cnt
].head
->nscd_certainly_running
;
1502 dbs
[cnt
].head
->nscd_certainly_running
= 0;
1504 if (dbs
[cnt
].persistent
)
1506 msync (dbs
[cnt
].head
, dbs
[cnt
].memsize
, MS_ASYNC
);
1509 /* The preparations are done. */
1511 char pathbuf
[PATH_MAX
];
1515 /* Try to exec the real nscd program so the process name (as reported
1516 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1517 if readlink or the exec with the result of the readlink call fails. */
1518 ssize_t n
= readlink ("/proc/self/exe", pathbuf
, sizeof (pathbuf
) - 1);
1522 execv (pathbuf
, argv
);
1524 execv ("/proc/self/exe", argv
);
1526 /* If we come here, we will never be able to re-exec. */
1527 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1530 if (server_user
!= NULL
)
1532 setuid (server_uid
);
1533 setgid (server_gid
);
1535 if (chdir ("/") != 0)
1536 dbg_log (_("cannot change current working directory to \"/\": %s"),
1540 /* Reenable the databases. */
1541 time_t now
= time (NULL
);
1542 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1543 if (dbs
[cnt
].enabled
)
1545 dbs
[cnt
].head
->timestamp
= now
;
1546 dbs
[cnt
].head
->nscd_certainly_running
= certainly
[cnt
];
1551 /* List of file descriptors. */
1555 struct fdlist
*next
;
1557 /* Memory allocated for the list. */
1558 static struct fdlist
*fdlist
;
1559 /* List of currently ready-to-read file descriptors. */
1560 static struct fdlist
*readylist
;
1562 /* Conditional variable and mutex to signal availability of entries in
1563 READYLIST. The condvar is initialized dynamically since we might
1564 use a different clock depending on availability. */
1565 static pthread_cond_t readylist_cond
= PTHREAD_COND_INITIALIZER
;
1566 static pthread_mutex_t readylist_lock
= PTHREAD_MUTEX_INITIALIZER
;
1568 /* The clock to use with the condvar. */
1569 static clockid_t timeout_clock
= CLOCK_REALTIME
;
1571 /* Number of threads ready to handle the READYLIST. */
1572 static unsigned long int nready
;
1575 /* Function for the clean-up threads. */
1577 __attribute__ ((__noreturn__
))
1578 nscd_run_prune (void *p
)
1580 const long int my_number
= (long int) p
;
1581 assert (dbs
[my_number
].enabled
);
1583 int dont_need_update
= setup_thread (&dbs
[my_number
]);
1585 time_t now
= time (NULL
);
1587 /* We are running. */
1588 dbs
[my_number
].head
->timestamp
= now
;
1590 struct timespec prune_ts
;
1591 if (__builtin_expect (clock_gettime (timeout_clock
, &prune_ts
) == -1, 0))
1592 /* Should never happen. */
1595 /* Compute the initial timeout time. Prevent all the timers to go
1596 off at the same time by adding a db-based value. */
1597 prune_ts
.tv_sec
+= CACHE_PRUNE_INTERVAL
+ my_number
;
1598 dbs
[my_number
].wakeup_time
= now
+ CACHE_PRUNE_INTERVAL
+ my_number
;
1600 pthread_mutex_t
*prune_lock
= &dbs
[my_number
].prune_lock
;
1601 pthread_mutex_t
*prune_run_lock
= &dbs
[my_number
].prune_run_lock
;
1602 pthread_cond_t
*prune_cond
= &dbs
[my_number
].prune_cond
;
1604 pthread_mutex_lock (prune_lock
);
1607 /* Wait, but not forever. */
1609 if (! dbs
[my_number
].clear_cache
)
1610 e
= pthread_cond_timedwait (prune_cond
, prune_lock
, &prune_ts
);
1611 assert (__builtin_expect (e
== 0 || e
== ETIMEDOUT
, 1));
1615 if (e
== ETIMEDOUT
|| now
>= dbs
[my_number
].wakeup_time
1616 || dbs
[my_number
].clear_cache
)
1618 /* We will determine the new timout values based on the
1619 cache content. Should there be concurrent additions to
1620 the cache which are not accounted for in the cache
1621 pruning we want to know about it. Therefore set the
1622 timeout to the maximum. It will be descreased when adding
1623 new entries to the cache, if necessary. */
1624 dbs
[my_number
].wakeup_time
= MAX_TIMEOUT_VALUE
;
1626 /* Unconditionally reset the flag. */
1627 time_t prune_now
= dbs
[my_number
].clear_cache
? LONG_MAX
: now
;
1628 dbs
[my_number
].clear_cache
= 0;
1630 pthread_mutex_unlock (prune_lock
);
1632 /* We use a separate lock for running the prune function (instead
1633 of keeping prune_lock locked) because this enables concurrent
1634 invocations of cache_add which might modify the timeout value. */
1635 pthread_mutex_lock (prune_run_lock
);
1636 next_wait
= prune_cache (&dbs
[my_number
], prune_now
, -1);
1637 pthread_mutex_unlock (prune_run_lock
);
1639 next_wait
= MAX (next_wait
, CACHE_PRUNE_INTERVAL
);
1640 /* If clients cannot determine for sure whether nscd is running
1641 we need to wake up occasionally to update the timestamp.
1642 Wait 90% of the update period. */
1643 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1644 if (__builtin_expect (! dont_need_update
, 0))
1646 next_wait
= MIN (UPDATE_MAPPING_TIMEOUT
, next_wait
);
1647 dbs
[my_number
].head
->timestamp
= now
;
1650 pthread_mutex_lock (prune_lock
);
1652 /* Make it known when we will wake up again. */
1653 if (now
+ next_wait
< dbs
[my_number
].wakeup_time
)
1654 dbs
[my_number
].wakeup_time
= now
+ next_wait
;
1656 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1659 /* The cache was just pruned. Do not do it again now. Just
1660 use the new timeout value. */
1661 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1663 if (clock_gettime (timeout_clock
, &prune_ts
) == -1)
1664 /* Should never happen. */
1667 /* Compute next timeout time. */
1668 prune_ts
.tv_sec
+= next_wait
;
1673 /* This is the main loop. It is replicated in different threads but
1674 the use of the ready list makes sure only one thread handles an
1675 incoming connection. */
1677 __attribute__ ((__noreturn__
))
1678 nscd_run_worker (void *p
)
1682 /* Initial locking. */
1683 pthread_mutex_lock (&readylist_lock
);
1685 /* One more thread available. */
1690 while (readylist
== NULL
)
1691 pthread_cond_wait (&readylist_cond
, &readylist_lock
);
1693 struct fdlist
*it
= readylist
->next
;
1694 if (readylist
->next
== readylist
)
1695 /* Just one entry on the list. */
1698 readylist
->next
= it
->next
;
1700 /* Extract the information and mark the record ready to be used
1705 /* One more thread available. */
1708 /* We are done with the list. */
1709 pthread_mutex_unlock (&readylist_lock
);
1711 #ifndef __ASSUME_ACCEPT4
1712 if (have_accept4
< 0)
1714 /* We do not want to block on a short read or so. */
1715 int fl
= fcntl (fd
, F_GETFL
);
1716 if (fl
== -1 || fcntl (fd
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
1721 /* Now read the request. */
1723 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, &req
, sizeof (req
)))
1724 != sizeof (req
), 0))
1726 /* We failed to read data. Note that this also might mean we
1727 failed because we would have blocked. */
1728 if (debug_level
> 0)
1729 dbg_log (_("short read while reading request: %s"),
1730 strerror_r (errno
, buf
, sizeof (buf
)));
1734 /* Check whether this is a valid request type. */
1735 if (req
.type
< GETPWBYNAME
|| req
.type
>= LASTREQ
)
1738 /* Some systems have no SO_PEERCRED implementation. They don't
1739 care about security so we don't as well. */
1744 if (__builtin_expect (debug_level
> 0, 0))
1746 struct ucred caller
;
1747 socklen_t optlen
= sizeof (caller
);
1749 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) == 0)
1753 const pid_t pid
= 0;
1756 /* It should not be possible to crash the nscd with a silly
1757 request (i.e., a terribly large key). We limit the size to 1kb. */
1758 if (__builtin_expect (req
.key_len
, 1) < 0
1759 || __builtin_expect (req
.key_len
, 1) > MAXKEYLEN
)
1761 if (debug_level
> 0)
1762 dbg_log (_("key length in request too long: %d"), req
.key_len
);
1767 char keybuf
[MAXKEYLEN
];
1769 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, keybuf
,
1773 /* Again, this can also mean we would have blocked. */
1774 if (debug_level
> 0)
1775 dbg_log (_("short read while reading request key: %s"),
1776 strerror_r (errno
, buf
, sizeof (buf
)));
1780 if (__builtin_expect (debug_level
, 0) > 0)
1785 handle_request: request received (Version = %d) from PID %ld"),
1786 req
.version
, (long int) pid
);
1790 handle_request: request received (Version = %d)"), req
.version
);
1793 /* Phew, we got all the data, now process it. */
1794 handle_request (fd
, &req
, keybuf
, uid
, pid
);
1802 pthread_mutex_lock (&readylist_lock
);
1804 /* One more thread available. */
1811 static unsigned int nconns
;
1816 pthread_mutex_lock (&readylist_lock
);
1818 /* Find an empty entry in FDLIST. */
1820 for (inner
= 0; inner
< nconns
; ++inner
)
1821 if (fdlist
[inner
].next
== NULL
)
1823 assert (inner
< nconns
);
1825 fdlist
[inner
].fd
= fd
;
1827 if (readylist
== NULL
)
1828 readylist
= fdlist
[inner
].next
= &fdlist
[inner
];
1831 fdlist
[inner
].next
= readylist
->next
;
1832 readylist
= readylist
->next
= &fdlist
[inner
];
1835 bool do_signal
= true;
1836 if (__builtin_expect (nready
== 0, 0))
1841 /* Try to start another thread to help out. */
1843 if (nthreads
< max_nthreads
1844 && pthread_create (&th
, &attr
, nscd_run_worker
,
1845 (void *) (long int) nthreads
) == 0)
1847 /* We got another thread. */
1849 /* The new thread might need a kick. */
1855 pthread_mutex_unlock (&readylist_lock
);
1857 /* Tell one of the worker threads there is work to do. */
1859 pthread_cond_signal (&readylist_cond
);
1863 /* Check whether restarting should happen. */
1865 restart_p (time_t now
)
1867 return (paranoia
&& readylist
== NULL
&& nready
== nthreads
1868 && now
>= restart_time
);
1872 /* Array for times a connection was accepted. */
1873 static time_t *starttime
;
1877 __attribute__ ((__noreturn__
))
1878 main_loop_poll (void)
1880 struct pollfd
*conns
= (struct pollfd
*) xmalloc (nconns
1881 * sizeof (conns
[0]));
1884 conns
[0].events
= POLLRDNORM
;
1886 size_t firstfree
= 1;
1889 if (inotify_fd
!= -1)
1891 conns
[1].fd
= inotify_fd
;
1892 conns
[1].events
= POLLRDNORM
;
1899 size_t idx_nl_status_fd
= 0;
1900 if (nl_status_fd
!= -1)
1902 idx_nl_status_fd
= nused
;
1903 conns
[nused
].fd
= nl_status_fd
;
1904 conns
[nused
].events
= POLLRDNORM
;
1912 /* Wait for any event. We wait at most a couple of seconds so
1913 that we can check whether we should close any of the accepted
1914 connections since we have not received a request. */
1915 #define MAX_ACCEPT_TIMEOUT 30
1916 #define MIN_ACCEPT_TIMEOUT 5
1917 #define MAIN_THREAD_TIMEOUT \
1918 (MAX_ACCEPT_TIMEOUT * 1000 \
1919 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1921 int n
= poll (conns
, nused
, MAIN_THREAD_TIMEOUT
);
1923 time_t now
= time (NULL
);
1925 /* If there is a descriptor ready for reading or there is a new
1926 connection, process this now. */
1929 if (conns
[0].revents
!= 0)
1931 /* We have a new incoming connection. Accept the connection. */
1934 #ifndef __ASSUME_ACCEPT4
1936 if (have_accept4
>= 0)
1939 fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
1941 #ifndef __ASSUME_ACCEPT4
1942 if (have_accept4
== 0)
1943 have_accept4
= fd
!= -1 || errno
!= ENOSYS
? 1 : -1;
1946 #ifndef __ASSUME_ACCEPT4
1947 if (have_accept4
< 0)
1948 fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
1951 /* Use the descriptor if we have not reached the limit. */
1954 if (firstfree
< nconns
)
1956 conns
[firstfree
].fd
= fd
;
1957 conns
[firstfree
].events
= POLLRDNORM
;
1958 starttime
[firstfree
] = now
;
1959 if (firstfree
>= nused
)
1960 nused
= firstfree
+ 1;
1964 while (firstfree
< nused
&& conns
[firstfree
].fd
!= -1);
1967 /* We cannot use the connection so close it. */
1976 if (inotify_fd
!= -1 && conns
[1].fd
== inotify_fd
)
1978 if (conns
[1].revents
!= 0)
1980 bool to_clear
[lastdb
] = { false, };
1984 # define PATH_MAX 1024
1986 struct inotify_event i
;
1987 char buf
[sizeof (struct inotify_event
) + PATH_MAX
];
1992 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
, &inev
,
1994 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
1996 if (__builtin_expect (nb
== -1 && errno
!= EAGAIN
,
1999 /* Something went wrong when reading the inotify
2000 data. Better disable inotify. */
2002 disabled inotify after read error %d"),
2014 /* Check which of the files changed. */
2015 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
2017 struct traced_file
*finfo
= dbs
[dbcnt
].traced_files
;
2019 while (finfo
!= NULL
)
2021 if (finfo
->inotify_descr
== inev
.i
.wd
)
2023 to_clear
[dbcnt
] = true;
2024 if (finfo
->call_res_init
)
2029 finfo
= finfo
->next
;
2035 /* Actually perform the cache clearing. */
2036 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
2037 if (to_clear
[dbcnt
])
2039 pthread_mutex_lock (&dbs
[dbcnt
].prune_lock
);
2040 dbs
[dbcnt
].clear_cache
= 1;
2041 pthread_mutex_unlock (&dbs
[dbcnt
].prune_lock
);
2042 pthread_cond_signal (&dbs
[dbcnt
].prune_cond
);
2053 if (idx_nl_status_fd
!= 0 && conns
[idx_nl_status_fd
].revents
!= 0)
2056 /* Read all the data. We do not interpret it here. */
2057 while (TEMP_FAILURE_RETRY (read (nl_status_fd
, buf
,
2058 sizeof (buf
))) != -1)
2061 dbs
[hstdb
].head
->extra_data
[NSCD_HST_IDX_CONF_TIMESTAMP
]
2062 = __bump_nl_timestamp ();
2066 for (size_t cnt
= first
; cnt
< nused
&& n
> 0; ++cnt
)
2067 if (conns
[cnt
].revents
!= 0)
2069 fd_ready (conns
[cnt
].fd
);
2071 /* Clean up the CONNS array. */
2073 if (cnt
< firstfree
)
2075 if (cnt
== nused
- 1)
2078 while (conns
[nused
- 1].fd
== -1);
2084 /* Now find entries which have timed out. */
2087 /* We make the timeout length depend on the number of file
2088 descriptors currently used. */
2089 #define ACCEPT_TIMEOUT \
2090 (MAX_ACCEPT_TIMEOUT \
2091 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2092 time_t laststart
= now
- ACCEPT_TIMEOUT
;
2094 for (size_t cnt
= nused
- 1; cnt
> 0; --cnt
)
2096 if (conns
[cnt
].fd
!= -1 && starttime
[cnt
] < laststart
)
2098 /* Remove the entry, it timed out. */
2099 (void) close (conns
[cnt
].fd
);
2102 if (cnt
< firstfree
)
2104 if (cnt
== nused
- 1)
2107 while (conns
[nused
- 1].fd
== -1);
2111 if (restart_p (now
))
2119 main_loop_epoll (int efd
)
2121 struct epoll_event ev
= { 0, };
2125 /* Add the socket. */
2126 ev
.events
= EPOLLRDNORM
;
2128 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, sock
, &ev
) == -1)
2129 /* We cannot use epoll. */
2132 # ifdef HAVE_INOTIFY
2133 if (inotify_fd
!= -1)
2135 ev
.events
= EPOLLRDNORM
;
2136 ev
.data
.fd
= inotify_fd
;
2137 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, inotify_fd
, &ev
) == -1)
2138 /* We cannot use epoll. */
2144 # ifdef HAVE_NETLINK
2145 if (nl_status_fd
!= -1)
2147 ev
.events
= EPOLLRDNORM
;
2148 ev
.data
.fd
= nl_status_fd
;
2149 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, nl_status_fd
, &ev
) == -1)
2150 /* We cannot use epoll. */
2157 struct epoll_event revs
[100];
2158 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2160 int n
= epoll_wait (efd
, revs
, nrevs
, MAIN_THREAD_TIMEOUT
);
2162 time_t now
= time (NULL
);
2164 for (int cnt
= 0; cnt
< n
; ++cnt
)
2165 if (revs
[cnt
].data
.fd
== sock
)
2167 /* A new connection. */
2170 # ifndef __ASSUME_ACCEPT4
2172 if (have_accept4
>= 0)
2175 fd
= TEMP_FAILURE_RETRY (accept4 (sock
, NULL
, NULL
,
2177 # ifndef __ASSUME_ACCEPT4
2178 if (have_accept4
== 0)
2179 have_accept4
= fd
!= -1 || errno
!= ENOSYS
? 1 : -1;
2182 # ifndef __ASSUME_ACCEPT4
2183 if (have_accept4
< 0)
2184 fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
2187 /* Use the descriptor if we have not reached the limit. */
2190 /* Try to add the new descriptor. */
2193 || epoll_ctl (efd
, EPOLL_CTL_ADD
, fd
, &ev
) == -1)
2194 /* The descriptor is too large or something went
2195 wrong. Close the descriptor. */
2199 /* Remember when we accepted the connection. */
2200 starttime
[fd
] = now
;
2209 # ifdef HAVE_INOTIFY
2210 else if (revs
[cnt
].data
.fd
== inotify_fd
)
2212 bool to_clear
[lastdb
] = { false, };
2215 struct inotify_event i
;
2216 char buf
[sizeof (struct inotify_event
) + PATH_MAX
];
2221 ssize_t nb
= TEMP_FAILURE_RETRY (read (inotify_fd
, &inev
,
2223 if (nb
< (ssize_t
) sizeof (struct inotify_event
))
2225 if (__builtin_expect (nb
== -1 && errno
!= EAGAIN
, 0))
2227 /* Something went wrong when reading the inotify
2228 data. Better disable inotify. */
2229 dbg_log (_("disabled inotify after read error %d"),
2231 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, inotify_fd
,
2239 /* Check which of the files changed. */
2240 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
2242 struct traced_file
*finfo
= dbs
[dbcnt
].traced_files
;
2244 while (finfo
!= NULL
)
2246 if (finfo
->inotify_descr
== inev
.i
.wd
)
2248 to_clear
[dbcnt
] = true;
2249 if (finfo
->call_res_init
)
2254 finfo
= finfo
->next
;
2260 /* Actually perform the cache clearing. */
2261 for (size_t dbcnt
= 0; dbcnt
< lastdb
; ++dbcnt
)
2262 if (to_clear
[dbcnt
])
2264 pthread_mutex_lock (&dbs
[dbcnt
].prune_lock
);
2265 dbs
[dbcnt
].clear_cache
= 1;
2266 pthread_mutex_unlock (&dbs
[dbcnt
].prune_lock
);
2267 pthread_cond_signal (&dbs
[dbcnt
].prune_cond
);
2271 # ifdef HAVE_NETLINK
2272 else if (revs
[cnt
].data
.fd
== nl_status_fd
)
2275 /* Read all the data. We do not interpret it here. */
2276 while (TEMP_FAILURE_RETRY (read (nl_status_fd
, buf
,
2277 sizeof (buf
))) != -1)
2280 __bump_nl_timestamp ();
2285 /* Remove the descriptor from the epoll descriptor. */
2286 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, revs
[cnt
].data
.fd
, NULL
);
2288 /* Get a worker to handle the request. */
2289 fd_ready (revs
[cnt
].data
.fd
);
2291 /* Reset the time. */
2292 starttime
[revs
[cnt
].data
.fd
] = 0;
2293 if (revs
[cnt
].data
.fd
== highest
)
2296 while (highest
> 0 && starttime
[highest
] == 0);
2301 /* Now look for descriptors for accepted connections which have
2302 no reply in too long of a time. */
2303 time_t laststart
= now
- ACCEPT_TIMEOUT
;
2304 assert (starttime
[sock
] == 0);
2305 assert (inotify_fd
== -1 || starttime
[inotify_fd
] == 0);
2306 assert (nl_status_fd
== -1 || starttime
[nl_status_fd
] == 0);
2307 for (int cnt
= highest
; cnt
> STDERR_FILENO
; --cnt
)
2308 if (starttime
[cnt
] != 0 && starttime
[cnt
] < laststart
)
2310 /* We are waiting for this one for too long. Close it. */
2311 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, cnt
, NULL
);
2319 else if (cnt
!= sock
&& starttime
[cnt
] == 0 && cnt
== highest
)
2322 if (restart_p (now
))
2329 /* Start all the threads we want. The initial process is thread no. 1. */
2331 start_threads (void)
2333 /* Initialize the conditional variable we will use. The only
2334 non-standard attribute we might use is the clock selection. */
2335 pthread_condattr_t condattr
;
2336 pthread_condattr_init (&condattr
);
2338 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2339 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2340 /* Determine whether the monotonous clock is available. */
2341 struct timespec dummy
;
2342 # if _POSIX_MONOTONIC_CLOCK == 0
2343 if (sysconf (_SC_MONOTONIC_CLOCK
) > 0)
2345 # if _POSIX_CLOCK_SELECTION == 0
2346 if (sysconf (_SC_CLOCK_SELECTION
) > 0)
2348 if (clock_getres (CLOCK_MONOTONIC
, &dummy
) == 0
2349 && pthread_condattr_setclock (&condattr
, CLOCK_MONOTONIC
) == 0)
2350 timeout_clock
= CLOCK_MONOTONIC
;
2353 /* Create the attribute for the threads. They are all created
2355 pthread_attr_init (&attr
);
2356 pthread_attr_setdetachstate (&attr
, PTHREAD_CREATE_DETACHED
);
2357 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2358 pthread_attr_setstacksize (&attr
, NSCD_THREAD_STACKSIZE
);
2360 /* We allow less than LASTDB threads only for debugging. */
2361 if (debug_level
== 0)
2362 nthreads
= MAX (nthreads
, lastdb
);
2364 /* Create the threads which prune the databases. */
2365 // XXX Ideally this work would be done by some of the worker threads.
2366 // XXX But this is problematic since we would need to be able to wake
2367 // XXX them up explicitly as well as part of the group handling the
2368 // XXX ready-list. This requires an operation where we can wait on
2369 // XXX two conditional variables at the same time. This operation
2370 // XXX does not exist (yet).
2371 for (long int i
= 0; i
< lastdb
; ++i
)
2373 /* Initialize the conditional variable. */
2374 if (pthread_cond_init (&dbs
[i
].prune_cond
, &condattr
) != 0)
2376 dbg_log (_("could not initialize conditional variable"));
2382 && pthread_create (&th
, &attr
, nscd_run_prune
, (void *) i
) != 0)
2384 dbg_log (_("could not start clean-up thread; terminating"));
2389 pthread_condattr_destroy (&condattr
);
2391 for (long int i
= 0; i
< nthreads
; ++i
)
2394 if (pthread_create (&th
, &attr
, nscd_run_worker
, NULL
) != 0)
2398 dbg_log (_("could not start any worker thread; terminating"));
2406 /* Determine how much room for descriptors we should initially
2407 allocate. This might need to change later if we cap the number
2409 const long int nfds
= sysconf (_SC_OPEN_MAX
);
2411 #define MAXCONN 16384
2412 if (nfds
== -1 || nfds
> MAXCONN
)
2414 else if (nfds
< MINCONN
)
2419 /* We need memory to pass descriptors on to the worker threads. */
2420 fdlist
= (struct fdlist
*) xcalloc (nconns
, sizeof (fdlist
[0]));
2421 /* Array to keep track when connection was accepted. */
2422 starttime
= (time_t *) xcalloc (nconns
, sizeof (starttime
[0]));
2424 /* In the main thread we execute the loop which handles incoming
2427 int efd
= epoll_create (100);
2430 main_loop_epoll (efd
);
2439 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2440 this function is called, we are not listening on the nscd socket yet so
2441 we can just use the ordinary lookup functions without causing a lockup */
2443 begin_drop_privileges (void)
2445 struct passwd
*pwd
= getpwnam (server_user
);
2449 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2450 error (EXIT_FAILURE
, 0, _("Failed to run nscd as user '%s'"),
2454 server_uid
= pwd
->pw_uid
;
2455 server_gid
= pwd
->pw_gid
;
2457 /* Save the old UID/GID if we have to change back. */
2460 old_uid
= getuid ();
2461 old_gid
= getgid ();
2464 if (getgrouplist (server_user
, server_gid
, NULL
, &server_ngroups
) == 0)
2466 /* This really must never happen. */
2467 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2468 error (EXIT_FAILURE
, errno
, _("initial getgrouplist failed"));
2471 server_groups
= (gid_t
*) xmalloc (server_ngroups
* sizeof (gid_t
));
2473 if (getgrouplist (server_user
, server_gid
, server_groups
, &server_ngroups
)
2476 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2477 error (EXIT_FAILURE
, errno
, _("getgrouplist failed"));
2482 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2483 run nscd as the user specified in the configuration file. */
2485 finish_drop_privileges (void)
2487 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2488 /* We need to preserve the capabilities to connect to the audit daemon. */
2489 cap_t new_caps
= preserve_capabilities ();
2492 if (setgroups (server_ngroups
, server_groups
) == -1)
2494 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2495 error (EXIT_FAILURE
, errno
, _("setgroups failed"));
2500 res
= setresgid (server_gid
, server_gid
, old_gid
);
2502 res
= setgid (server_gid
);
2505 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2511 res
= setresuid (server_uid
, server_uid
, old_uid
);
2513 res
= setuid (server_uid
);
2516 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2521 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2522 /* Remove the temporary capabilities. */
2523 install_real_capabilities (new_caps
);