1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2003, 2004, 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
35 #include <arpa/inet.h>
37 # include <sys/epoll.h>
40 #include <sys/param.h>
42 #include <sys/socket.h>
51 /* Number of bytes of data we initially reserve for each hash table bucket. */
52 #define DEFAULT_DATASIZE_PER_BUCKET 1024
55 /* Wrapper functions with error checking for standard functions. */
56 extern void *xmalloc (size_t n
);
57 extern void *xcalloc (size_t n
, size_t s
);
58 extern void *xrealloc (void *o
, size_t n
);
60 /* Support to run nscd as an unprivileged user */
61 const char *server_user
;
62 static uid_t server_uid
;
63 static gid_t server_gid
;
64 const char *stat_user
;
66 static gid_t
*server_groups
;
70 static int server_ngroups
;
72 static pthread_attr_t attr
;
74 static void begin_drop_privileges (void);
75 static void finish_drop_privileges (void);
77 /* Map request type to a string. */
78 const char *serv2str
[LASTREQ
] =
80 [GETPWBYNAME
] = "GETPWBYNAME",
81 [GETPWBYUID
] = "GETPWBYUID",
82 [GETGRBYNAME
] = "GETGRBYNAME",
83 [GETGRBYGID
] = "GETGRBYGID",
84 [GETHOSTBYNAME
] = "GETHOSTBYNAME",
85 [GETHOSTBYNAMEv6
] = "GETHOSTBYNAMEv6",
86 [GETHOSTBYADDR
] = "GETHOSTBYADDR",
87 [GETHOSTBYADDRv6
] = "GETHOSTBYADDRv6",
88 [SHUTDOWN
] = "SHUTDOWN",
89 [GETSTAT
] = "GETSTAT",
90 [INVALIDATE
] = "INVALIDATE",
91 [GETFDPW
] = "GETFDPW",
92 [GETFDGR
] = "GETFDGR",
93 [GETFDHST
] = "GETFDHST",
95 [INITGROUPS
] = "INITGROUPS"
98 /* The control data structures for the services. */
99 struct database_dyn dbs
[lastdb
] =
102 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
107 .filename
= "/etc/passwd",
108 .db_filename
= _PATH_NSCD_PASSWD_DB
,
109 .disabled_iov
= &pwd_iov_disabled
,
117 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
122 .filename
= "/etc/group",
123 .db_filename
= _PATH_NSCD_GROUP_DB
,
124 .disabled_iov
= &grp_iov_disabled
,
132 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
137 .filename
= "/etc/hosts",
138 .db_filename
= _PATH_NSCD_HOSTS_DB
,
139 .disabled_iov
= &hst_iov_disabled
,
149 /* Mapping of request type to database. */
150 static struct database_dyn
*const serv2db
[LASTREQ
] =
152 [GETPWBYNAME
] = &dbs
[pwddb
],
153 [GETPWBYUID
] = &dbs
[pwddb
],
154 [GETGRBYNAME
] = &dbs
[grpdb
],
155 [GETGRBYGID
] = &dbs
[grpdb
],
156 [GETHOSTBYNAME
] = &dbs
[hstdb
],
157 [GETHOSTBYNAMEv6
] = &dbs
[hstdb
],
158 [GETHOSTBYADDR
] = &dbs
[hstdb
],
159 [GETHOSTBYADDRv6
] = &dbs
[hstdb
],
160 [GETFDPW
] = &dbs
[pwddb
],
161 [GETFDGR
] = &dbs
[grpdb
],
162 [GETFDHST
] = &dbs
[hstdb
],
163 [GETAI
] = &dbs
[hstdb
],
164 [INITGROUPS
] = &dbs
[grpdb
]
168 /* Number of seconds between two cache pruning runs. */
169 #define CACHE_PRUNE_INTERVAL 15
172 /* Initial number of threads to use. */
174 /* Maximum number of threads to use. */
175 int max_nthreads
= 32;
177 /* Socket for incoming connections. */
180 /* Number of times clients had to wait. */
181 unsigned long int client_queued
;
184 /* Initialize database information structures. */
188 struct sockaddr_un sock_addr
;
191 /* Secure mode and unprivileged mode are incompatible */
192 if (server_user
!= NULL
&& secure_in_use
)
194 dbg_log (_("Cannot run nscd in secure mode as unprivileged user"));
198 /* Look up unprivileged uid/gid/groups before we start listening on the
200 if (server_user
!= NULL
)
201 begin_drop_privileges ();
204 /* No configuration for this value, assume a default. */
205 nthreads
= 2 * lastdb
;
207 for (cnt
= 0; cnt
< lastdb
; ++cnt
)
208 if (dbs
[cnt
].enabled
)
210 pthread_rwlock_init (&dbs
[cnt
].lock
, NULL
);
211 pthread_mutex_init (&dbs
[cnt
].memlock
, NULL
);
213 if (dbs
[cnt
].persistent
)
215 /* Try to open the appropriate file on disk. */
216 int fd
= open (dbs
[cnt
].db_filename
, O_RDWR
);
222 struct database_pers_head head
;
223 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, &head
,
225 if (n
!= sizeof (head
) || fstat64 (fd
, &st
) != 0)
228 dbg_log (_("invalid persistent database file \"%s\": %s"),
229 dbs
[cnt
].db_filename
, strerror (errno
));
230 dbs
[cnt
].persistent
= 0;
232 else if (head
.module
== 0 && head
.data_size
== 0)
234 /* The file has been created, but the head has not been
235 initialized yet. Remove the old file. */
236 unlink (dbs
[cnt
].db_filename
);
238 else if (head
.header_size
!= (int) sizeof (head
))
240 dbg_log (_("invalid persistent database file \"%s\": %s"),
241 dbs
[cnt
].db_filename
,
242 _("header size does not match"));
243 dbs
[cnt
].persistent
= 0;
245 else if ((total
= (sizeof (head
)
246 + roundup (head
.module
* sizeof (ref_t
),
251 dbg_log (_("invalid persistent database file \"%s\": %s"),
252 dbs
[cnt
].db_filename
,
253 _("file size does not match"));
254 dbs
[cnt
].persistent
= 0;
256 else if ((mem
= mmap (NULL
, total
, PROT_READ
| PROT_WRITE
,
257 MAP_SHARED
, fd
, 0)) == MAP_FAILED
)
261 /* Success. We have the database. */
263 dbs
[cnt
].memsize
= total
;
264 dbs
[cnt
].data
= (char *)
265 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
266 ALIGN
/ sizeof (ref_t
))];
267 dbs
[cnt
].mmap_used
= true;
269 if (dbs
[cnt
].suggested_module
> head
.module
)
270 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
275 /* We also need a read-only descriptor. */
278 dbs
[cnt
].ro_fd
= open (dbs
[cnt
].db_filename
, O_RDONLY
);
279 if (dbs
[cnt
].ro_fd
== -1)
281 cannot create read-only descriptor for \"%s\"; no mmap"),
282 dbs
[cnt
].db_filename
);
285 // XXX Shall we test whether the descriptors actually
286 // XXX point to the same file?
289 /* Close the file descriptors in case something went
290 wrong in which case the variable have not been
297 if (dbs
[cnt
].head
== NULL
)
299 /* No database loaded. Allocate the data structure,
301 struct database_pers_head head
;
302 size_t total
= (sizeof (head
)
303 + roundup (dbs
[cnt
].suggested_module
304 * sizeof (ref_t
), ALIGN
)
305 + (dbs
[cnt
].suggested_module
306 * DEFAULT_DATASIZE_PER_BUCKET
));
308 /* Try to create the database. If we do not need a
309 persistent database create a temporary file. */
312 if (dbs
[cnt
].persistent
)
314 fd
= open (dbs
[cnt
].db_filename
,
315 O_RDWR
| O_CREAT
| O_EXCL
| O_TRUNC
,
317 if (fd
!= -1 && dbs
[cnt
].shared
)
318 ro_fd
= open (dbs
[cnt
].db_filename
, O_RDONLY
);
322 char fname
[] = _PATH_NSCD_XYZ_DB_TMP
;
323 fd
= mkstemp (fname
);
325 /* We do not need the file name anymore after we
326 opened another file descriptor in read-only mode. */
330 ro_fd
= open (fname
, O_RDONLY
);
340 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
341 dbnames
[cnt
], dbs
[cnt
].db_filename
);
342 // XXX Correct way to terminate?
346 if (dbs
[cnt
].persistent
)
347 dbg_log (_("cannot create %s; no persistent database used"),
348 dbs
[cnt
].db_filename
);
350 dbg_log (_("cannot create %s; no sharing possible"),
351 dbs
[cnt
].db_filename
);
353 dbs
[cnt
].persistent
= 0;
354 // XXX remember: no mmap
358 /* Tell the user if we could not create the read-only
360 if (ro_fd
== -1 && dbs
[cnt
].shared
)
362 cannot create read-only descriptor for \"%s\"; no mmap"),
363 dbs
[cnt
].db_filename
);
365 /* Before we create the header, initialiye the hash
366 table. So that if we get interrupted if writing
367 the header we can recognize a partially initialized
369 size_t ps
= sysconf (_SC_PAGESIZE
);
371 assert (~ENDREF
== 0);
372 memset (tmpbuf
, '\xff', ps
);
374 size_t remaining
= dbs
[cnt
].suggested_module
* sizeof (ref_t
);
375 off_t offset
= sizeof (head
);
378 if (offset
% ps
!= 0)
380 towrite
= MIN (remaining
, ps
- (offset
% ps
));
381 pwrite (fd
, tmpbuf
, towrite
, offset
);
383 remaining
-= towrite
;
386 while (remaining
> ps
)
388 pwrite (fd
, tmpbuf
, ps
, offset
);
394 pwrite (fd
, tmpbuf
, remaining
, offset
);
396 /* Create the header of the file. */
397 struct database_pers_head head
=
399 .version
= DB_VERSION
,
400 .header_size
= sizeof (head
),
401 .module
= dbs
[cnt
].suggested_module
,
402 .data_size
= (dbs
[cnt
].suggested_module
403 * DEFAULT_DATASIZE_PER_BUCKET
),
408 if ((TEMP_FAILURE_RETRY (write (fd
, &head
, sizeof (head
)))
410 || ftruncate (fd
, total
) != 0
411 || (mem
= mmap (NULL
, total
, PROT_READ
| PROT_WRITE
,
412 MAP_SHARED
, fd
, 0)) == MAP_FAILED
)
414 unlink (dbs
[cnt
].db_filename
);
415 dbg_log (_("cannot write to database file %s: %s"),
416 dbs
[cnt
].db_filename
, strerror (errno
));
417 dbs
[cnt
].persistent
= 0;
423 dbs
[cnt
].data
= (char *)
424 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
425 ALIGN
/ sizeof (ref_t
))];
426 dbs
[cnt
].memsize
= total
;
427 dbs
[cnt
].mmap_used
= true;
429 /* Remember the descriptors. */
431 dbs
[cnt
].ro_fd
= ro_fd
;
444 && ((dbs
[cnt
].wr_fd
!= -1
445 && fcntl (dbs
[cnt
].wr_fd
, F_SETFD
, FD_CLOEXEC
) == -1)
446 || (dbs
[cnt
].ro_fd
!= -1
447 && fcntl (dbs
[cnt
].ro_fd
, F_SETFD
, FD_CLOEXEC
) == -1)))
450 cannot set socket to close on exec: %s; disabling paranoia mode"),
455 if (dbs
[cnt
].head
== NULL
)
457 /* We do not use the persistent database. Just
458 create an in-memory data structure. */
459 assert (! dbs
[cnt
].persistent
);
461 dbs
[cnt
].head
= xmalloc (sizeof (struct database_pers_head
)
462 + (dbs
[cnt
].suggested_module
464 memset (dbs
[cnt
].head
, '\0', sizeof (dbs
[cnt
].head
));
465 assert (~ENDREF
== 0);
466 memset (dbs
[cnt
].head
->array
, '\xff',
467 dbs
[cnt
].suggested_module
* sizeof (ref_t
));
468 dbs
[cnt
].head
->module
= dbs
[cnt
].suggested_module
;
469 dbs
[cnt
].head
->data_size
= (DEFAULT_DATASIZE_PER_BUCKET
470 * dbs
[cnt
].head
->module
);
471 dbs
[cnt
].data
= xmalloc (dbs
[cnt
].head
->data_size
);
472 dbs
[cnt
].head
->first_free
= 0;
475 assert (dbs
[cnt
].ro_fd
== -1);
478 if (dbs
[cnt
].check_file
)
480 /* We need the modification date of the file. */
483 if (stat (dbs
[cnt
].filename
, &st
) < 0)
485 /* We cannot stat() the file, disable file checking. */
486 dbg_log (_("cannot stat() file `%s': %s"),
487 dbs
[cnt
].filename
, strerror (errno
));
488 dbs
[cnt
].check_file
= 0;
491 dbs
[cnt
].file_mtime
= st
.st_mtime
;
495 /* Create the socket. */
496 sock
= socket (AF_UNIX
, SOCK_STREAM
, 0);
499 dbg_log (_("cannot open socket: %s"), strerror (errno
));
502 /* Bind a name to the socket. */
503 sock_addr
.sun_family
= AF_UNIX
;
504 strcpy (sock_addr
.sun_path
, _PATH_NSCDSOCKET
);
505 if (bind (sock
, (struct sockaddr
*) &sock_addr
, sizeof (sock_addr
)) < 0)
507 dbg_log ("%s: %s", _PATH_NSCDSOCKET
, strerror (errno
));
511 /* We don't want to get stuck on accept. */
512 int fl
= fcntl (sock
, F_GETFL
);
513 if (fl
== -1 || fcntl (sock
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
515 dbg_log (_("cannot change socket to nonblocking mode: %s"),
520 /* The descriptor needs to be closed on exec. */
521 if (paranoia
&& fcntl (sock
, F_SETFD
, FD_CLOEXEC
) == -1)
523 dbg_log (_("cannot set socket to close on exec: %s"),
528 /* Set permissions for the socket. */
529 chmod (_PATH_NSCDSOCKET
, DEFFILEMODE
);
531 /* Set the socket up to accept connections. */
532 if (listen (sock
, SOMAXCONN
) < 0)
534 dbg_log (_("cannot enable socket to accept connections: %s"),
539 /* Change to unprivileged uid/gid/groups if specifed in config file */
540 if (server_user
!= NULL
)
541 finish_drop_privileges ();
545 /* Close the connections. */
554 invalidate_cache (char *key
)
558 if (strcmp (key
, "passwd") == 0)
560 else if (strcmp (key
, "group") == 0)
562 else if (__builtin_expect (strcmp (key
, "hosts"), 0) == 0)
566 /* Re-initialize the resolver. resolv.conf might have changed. */
572 if (dbs
[number
].enabled
)
573 prune_cache (&dbs
[number
], LONG_MAX
);
579 send_ro_fd (struct database_dyn
*db
, char *key
, int fd
)
581 /* If we do not have an read-only file descriptor do nothing. */
585 /* We need to send some data along with the descriptor. */
587 iov
[0].iov_base
= key
;
588 iov
[0].iov_len
= strlen (key
) + 1;
590 /* Prepare the control message to transfer the descriptor. */
591 char buf
[CMSG_SPACE (sizeof (int))];
592 struct msghdr msg
= { .msg_iov
= iov
, .msg_iovlen
= 1,
593 .msg_control
= buf
, .msg_controllen
= sizeof (buf
) };
594 struct cmsghdr
*cmsg
= CMSG_FIRSTHDR (&msg
);
596 cmsg
->cmsg_level
= SOL_SOCKET
;
597 cmsg
->cmsg_type
= SCM_RIGHTS
;
598 cmsg
->cmsg_len
= CMSG_LEN (sizeof (int));
600 *(int *) CMSG_DATA (cmsg
) = db
->ro_fd
;
602 msg
.msg_controllen
= cmsg
->cmsg_len
;
604 /* Send the control message. We repeat when we are interrupted but
605 everything else is ignored. */
607 # define MSG_NOSIGNAL 0
609 (void) TEMP_FAILURE_RETRY (sendmsg (fd
, &msg
, MSG_NOSIGNAL
));
611 if (__builtin_expect (debug_level
> 0, 0))
612 dbg_log (_("provide access to FD %d, for %s"), db
->ro_fd
, key
);
614 #endif /* SCM_RIGHTS */
617 /* Handle new request. */
619 handle_request (int fd
, request_header
*req
, void *key
, uid_t uid
)
621 if (__builtin_expect (req
->version
, NSCD_VERSION
) != NSCD_VERSION
)
625 cannot handle old request version %d; current version is %d"),
626 req
->version
, NSCD_VERSION
);
630 /* Make the SELinux check before we go on to the standard checks. We
631 need to verify that the request type is valid, since it has not
632 yet been checked at this point. */
634 && __builtin_expect (req
->type
, GETPWBYNAME
) >= GETPWBYNAME
635 && __builtin_expect (req
->type
, LASTREQ
) < LASTREQ
636 && nscd_request_avc_has_perm (fd
, req
->type
) != 0)
639 struct database_dyn
*db
= serv2db
[req
->type
];
641 // XXX Clean up so that each new command need not introduce a
642 // XXX new conditional.
643 if ((__builtin_expect (req
->type
, GETPWBYNAME
) >= GETPWBYNAME
644 && __builtin_expect (req
->type
, LASTDBREQ
) <= LASTDBREQ
)
645 || req
->type
== GETAI
|| req
->type
== INITGROUPS
)
647 if (__builtin_expect (debug_level
, 0) > 0)
649 if (req
->type
== GETHOSTBYADDR
|| req
->type
== GETHOSTBYADDRv6
)
651 char buf
[INET6_ADDRSTRLEN
];
653 dbg_log ("\t%s (%s)", serv2str
[req
->type
],
654 inet_ntop (req
->type
== GETHOSTBYADDR
655 ? AF_INET
: AF_INET6
,
656 key
, buf
, sizeof (buf
)));
659 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
662 /* Is this service enabled? */
665 /* No, sent the prepared record. */
666 if (TEMP_FAILURE_RETRY (write (fd
, db
->disabled_iov
->iov_base
,
667 db
->disabled_iov
->iov_len
))
668 != (ssize_t
) db
->disabled_iov
->iov_len
669 && __builtin_expect (debug_level
, 0) > 0)
671 /* We have problems sending the result. */
673 dbg_log (_("cannot write result: %s"),
674 strerror_r (errno
, buf
, sizeof (buf
)));
680 /* Be sure we can read the data. */
681 if (__builtin_expect (pthread_rwlock_tryrdlock (&db
->lock
) != 0, 0))
683 ++db
->head
->rdlockdelayed
;
684 pthread_rwlock_rdlock (&db
->lock
);
687 /* See whether we can handle it from the cache. */
688 struct datahead
*cached
;
689 cached
= (struct datahead
*) cache_search (req
->type
, key
, req
->key_len
,
693 /* Hurray it's in the cache. */
694 if (TEMP_FAILURE_RETRY (write (fd
, cached
->data
, cached
->recsize
))
696 && __builtin_expect (debug_level
, 0) > 0)
698 /* We have problems sending the result. */
700 dbg_log (_("cannot write result: %s"),
701 strerror_r (errno
, buf
, sizeof (buf
)));
704 pthread_rwlock_unlock (&db
->lock
);
709 pthread_rwlock_unlock (&db
->lock
);
711 else if (__builtin_expect (debug_level
, 0) > 0)
713 if (req
->type
== INVALIDATE
)
714 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
716 dbg_log ("\t%s", serv2str
[req
->type
]);
719 /* Handle the request. */
723 addpwbyname (db
, fd
, req
, key
, uid
);
727 addpwbyuid (db
, fd
, req
, key
, uid
);
731 addgrbyname (db
, fd
, req
, key
, uid
);
735 addgrbygid (db
, fd
, req
, key
, uid
);
739 addhstbyname (db
, fd
, req
, key
, uid
);
742 case GETHOSTBYNAMEv6
:
743 addhstbynamev6 (db
, fd
, req
, key
, uid
);
747 addhstbyaddr (db
, fd
, req
, key
, uid
);
750 case GETHOSTBYADDRv6
:
751 addhstbyaddrv6 (db
, fd
, req
, key
, uid
);
755 addhstai (db
, fd
, req
, key
, uid
);
759 addinitgroups (db
, fd
, req
, key
, uid
);
767 /* Get the callers credentials. */
770 socklen_t optlen
= sizeof (caller
);
772 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) < 0)
776 dbg_log (_("error getting callers id: %s"),
777 strerror_r (errno
, buf
, sizeof (buf
)));
783 /* Some systems have no SO_PEERCRED implementation. They don't
784 care about security so we don't as well. */
789 /* Accept shutdown, getstat and invalidate only from root. For
790 the stat call also allow the user specified in the config file. */
791 if (req
->type
== GETSTAT
)
793 if (uid
== 0 || uid
== stat_uid
)
794 send_stats (fd
, dbs
);
798 if (req
->type
== INVALIDATE
)
799 invalidate_cache (key
);
801 termination_handler (0);
809 send_ro_fd (serv2db
[req
->type
], key
, fd
);
814 /* Ignore the command, it's nothing we know. */
820 /* Restart the process. */
824 /* First determine the parameters. We do not use the parameters
825 passed to main() since in case nscd is started by running the
826 dynamic linker this will not work. Yes, this is not the usual
827 case but nscd is part of glibc and we occasionally do this. */
828 size_t buflen
= 1024;
829 char *buf
= alloca (buflen
);
831 int fd
= open ("/proc/self/cmdline", O_RDONLY
);
835 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
844 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, buf
+ readlen
,
849 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
859 if (readlen
< buflen
)
862 /* We might have to extend the buffer. */
863 size_t old_buflen
= buflen
;
864 char *newp
= extend_alloca (buf
, buflen
, 2 * buflen
);
865 buf
= memmove (newp
, buf
, old_buflen
);
870 /* Parse the command line. Worst case scenario: every two
871 characters form one parameter (one character plus NUL). */
872 char **argv
= alloca ((readlen
/ 2 + 1) * sizeof (argv
[0]));
876 while (cp
< buf
+ readlen
)
879 cp
= (char *) rawmemchr (cp
, '\0') + 1;
883 /* Second, change back to the old user if we changed it. */
884 if (server_user
!= NULL
)
886 if (setuid (old_uid
) != 0)
889 cannot change to old UID: %s; disabling paranoia mode"),
896 if (setgid (old_gid
) != 0)
899 cannot change to old GID: %s; disabling paranoia mode"),
908 /* Next change back to the old working directory. */
909 if (chdir (oldcwd
) == -1)
912 cannot change to old working directory: %s; disabling paranoia mode"),
915 if (server_user
!= NULL
)
924 /* Synchronize memory. */
925 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
927 /* Make sure nobody keeps using the database. */
928 dbs
[cnt
].head
->timestamp
= 0;
930 if (dbs
[cnt
].persistent
)
932 msync (dbs
[cnt
].head
, dbs
[cnt
].memsize
, MS_ASYNC
);
935 /* The preparations are done. */
936 execv ("/proc/self/exe", argv
);
938 /* If we come here, we will never be able to re-exec. */
939 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
942 if (server_user
!= NULL
)
952 /* List of file descriptors. */
958 /* Memory allocated for the list. */
959 static struct fdlist
*fdlist
;
960 /* List of currently ready-to-read file descriptors. */
961 static struct fdlist
*readylist
;
963 /* Conditional variable and mutex to signal availability of entries in
964 READYLIST. The condvar is initialized dynamically since we might
965 use a different clock depending on availability. */
966 static pthread_cond_t readylist_cond
;
967 static pthread_mutex_t readylist_lock
= PTHREAD_MUTEX_INITIALIZER
;
969 /* The clock to use with the condvar. */
970 static clockid_t timeout_clock
= CLOCK_REALTIME
;
972 /* Number of threads ready to handle the READYLIST. */
973 static unsigned long int nready
;
976 /* This is the main loop. It is replicated in different threads but the
977 `poll' call makes sure only one thread handles an incoming connection. */
979 __attribute__ ((__noreturn__
))
982 const long int my_number
= (long int) p
;
983 const int run_prune
= my_number
< lastdb
&& dbs
[my_number
].enabled
;
984 struct timespec prune_ts
;
990 setup_thread (&dbs
[my_number
]);
992 /* We are running. */
993 dbs
[my_number
].head
->timestamp
= time (NULL
);
995 if (clock_gettime (timeout_clock
, &prune_ts
) == -1)
996 /* Should never happen. */
999 /* Compute timeout time. */
1000 prune_ts
.tv_sec
+= CACHE_PRUNE_INTERVAL
;
1003 /* Initial locking. */
1004 pthread_mutex_lock (&readylist_lock
);
1006 /* One more thread available. */
1011 while (readylist
== NULL
)
1015 /* Wait, but not forever. */
1016 to
= pthread_cond_timedwait (&readylist_cond
, &readylist_lock
,
1019 /* If we were woken and there is no work to be done,
1020 just start pruning. */
1021 if (readylist
== NULL
&& to
== ETIMEDOUT
)
1024 pthread_mutex_unlock (&readylist_lock
);
1029 /* No need to timeout. */
1030 pthread_cond_wait (&readylist_cond
, &readylist_lock
);
1033 struct fdlist
*it
= readylist
->next
;
1034 if (readylist
->next
== readylist
)
1035 /* Just one entry on the list. */
1038 readylist
->next
= it
->next
;
1040 /* Extract the information and mark the record ready to be used
1045 /* One more thread available. */
1048 /* We are done with the list. */
1049 pthread_mutex_unlock (&readylist_lock
);
1051 /* We do not want to block on a short read or so. */
1052 int fl
= fcntl (fd
, F_GETFL
);
1053 if (fl
== -1 || fcntl (fd
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
1056 /* Now read the request. */
1058 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, &req
, sizeof (req
)))
1059 != sizeof (req
), 0))
1061 /* We failed to read data. Note that this also might mean we
1062 failed because we would have blocked. */
1063 if (debug_level
> 0)
1064 dbg_log (_("short read while reading request: %s"),
1065 strerror_r (errno
, buf
, sizeof (buf
)));
1069 /* Check whether this is a valid request type. */
1070 if (req
.type
< GETPWBYNAME
|| req
.type
>= LASTREQ
)
1073 /* Some systems have no SO_PEERCRED implementation. They don't
1074 care about security so we don't as well. */
1081 struct ucred caller
;
1082 socklen_t optlen
= sizeof (caller
);
1084 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) < 0)
1086 dbg_log (_("error getting callers id: %s"),
1087 strerror_r (errno
, buf
, sizeof (buf
)));
1091 if (req
.type
< GETPWBYNAME
|| req
.type
> LASTDBREQ
1092 || serv2db
[req
.type
]->secure
)
1097 else if (__builtin_expect (debug_level
> 0, 0))
1099 struct ucred caller
;
1100 socklen_t optlen
= sizeof (caller
);
1102 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) == 0)
1107 /* It should not be possible to crash the nscd with a silly
1108 request (i.e., a terribly large key). We limit the size to 1kb. */
1109 #define MAXKEYLEN 1024
1110 if (__builtin_expect (req
.key_len
, 1) < 0
1111 || __builtin_expect (req
.key_len
, 1) > MAXKEYLEN
)
1113 if (debug_level
> 0)
1114 dbg_log (_("key length in request too long: %d"), req
.key_len
);
1119 char keybuf
[MAXKEYLEN
];
1121 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, keybuf
,
1125 /* Again, this can also mean we would have blocked. */
1126 if (debug_level
> 0)
1127 dbg_log (_("short read while reading request key: %s"),
1128 strerror_r (errno
, buf
, sizeof (buf
)));
1132 if (__builtin_expect (debug_level
, 0) > 0)
1137 handle_request: request received (Version = %d) from PID %ld"),
1138 req
.version
, (long int) pid
);
1142 handle_request: request received (Version = %d)"), req
.version
);
1145 /* Phew, we got all the data, now process it. */
1146 handle_request (fd
, &req
, keybuf
, uid
);
1153 /* Check whether we should be pruning the cache. */
1154 assert (run_prune
|| to
== 0);
1155 if (to
== ETIMEDOUT
)
1158 /* The pthread_cond_timedwait() call timed out. It is time
1159 to clean up the cache. */
1160 assert (my_number
< lastdb
);
1161 prune_cache (&dbs
[my_number
],
1162 prune_ts
.tv_sec
+ (prune_ts
.tv_nsec
>= 500000000));
1164 if (clock_gettime (timeout_clock
, &prune_ts
) == -1)
1165 /* Should never happen. */
1168 /* Compute next timeout time. */
1169 prune_ts
.tv_sec
+= CACHE_PRUNE_INTERVAL
;
1171 /* In case the list is emtpy we do not want to run the prune
1172 code right away again. */
1177 pthread_mutex_lock (&readylist_lock
);
1179 /* One more thread available. */
1185 static unsigned int nconns
;
1190 pthread_mutex_lock (&readylist_lock
);
1192 /* Find an empty entry in FDLIST. */
1194 for (inner
= 0; inner
< nconns
; ++inner
)
1195 if (fdlist
[inner
].next
== NULL
)
1197 assert (inner
< nconns
);
1199 fdlist
[inner
].fd
= fd
;
1201 if (readylist
== NULL
)
1202 readylist
= fdlist
[inner
].next
= &fdlist
[inner
];
1205 fdlist
[inner
].next
= readylist
->next
;
1206 readylist
= readylist
->next
= &fdlist
[inner
];
1209 bool do_signal
= true;
1210 if (__builtin_expect (nready
== 0, 0))
1215 /* Try to start another thread to help out. */
1217 if (nthreads
< max_nthreads
1218 && pthread_create (&th
, &attr
, nscd_run
,
1219 (void *) (long int) nthreads
) == 0)
1221 /* We got another thread. */
1223 /* The new thread might new a kick. */
1229 pthread_mutex_unlock (&readylist_lock
);
1231 /* Tell one of the worker threads there is work to do. */
1233 pthread_cond_signal (&readylist_cond
);
1237 /* Check whether restarting should happen. */
1239 restart_p (time_t now
)
1241 return (paranoia
&& readylist
== NULL
&& nready
== nthreads
1242 && now
>= restart_time
);
1246 /* Array for times a connection was accepted. */
1247 static time_t *starttime
;
1251 __attribute__ ((__noreturn__
))
1252 main_loop_poll (void)
1254 struct pollfd
*conns
= (struct pollfd
*) xmalloc (nconns
1255 * sizeof (conns
[0]));
1258 conns
[0].events
= POLLRDNORM
;
1260 size_t firstfree
= 1;
1264 /* Wait for any event. We wait at most a couple of seconds so
1265 that we can check whether we should close any of the accepted
1266 connections since we have not received a request. */
1267 #define MAX_ACCEPT_TIMEOUT 30
1268 #define MIN_ACCEPT_TIMEOUT 5
1269 #define MAIN_THREAD_TIMEOUT \
1270 (MAX_ACCEPT_TIMEOUT * 1000 \
1271 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1273 int n
= poll (conns
, nused
, MAIN_THREAD_TIMEOUT
);
1275 time_t now
= time (NULL
);
1277 /* If there is a descriptor ready for reading or there is a new
1278 connection, process this now. */
1281 if (conns
[0].revents
!= 0)
1283 /* We have a new incoming connection. Accept the connection. */
1284 int fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
1286 /* use the descriptor if we have not reached the limit. */
1287 if (fd
>= 0 && firstfree
< nconns
)
1289 conns
[firstfree
].fd
= fd
;
1290 conns
[firstfree
].events
= POLLRDNORM
;
1291 starttime
[firstfree
] = now
;
1292 if (firstfree
>= nused
)
1293 nused
= firstfree
+ 1;
1297 while (firstfree
< nused
&& conns
[firstfree
].fd
!= -1);
1303 for (size_t cnt
= 1; cnt
< nused
&& n
> 0; ++cnt
)
1304 if (conns
[cnt
].revents
!= 0)
1306 fd_ready (conns
[cnt
].fd
);
1308 /* Clean up the CONNS array. */
1310 if (cnt
< firstfree
)
1312 if (cnt
== nused
- 1)
1315 while (conns
[nused
- 1].fd
== -1);
1321 /* Now find entries which have timed out. */
1324 /* We make the timeout length depend on the number of file
1325 descriptors currently used. */
1326 #define ACCEPT_TIMEOUT \
1327 (MAX_ACCEPT_TIMEOUT \
1328 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1329 time_t laststart
= now
- ACCEPT_TIMEOUT
;
1331 for (size_t cnt
= nused
- 1; cnt
> 0; --cnt
)
1333 if (conns
[cnt
].fd
!= -1 && starttime
[cnt
] < laststart
)
1335 /* Remove the entry, it timed out. */
1336 (void) close (conns
[cnt
].fd
);
1339 if (cnt
< firstfree
)
1341 if (cnt
== nused
- 1)
1344 while (conns
[nused
- 1].fd
== -1);
1348 if (restart_p (now
))
1356 main_loop_epoll (int efd
)
1358 struct epoll_event ev
= { 0, };
1362 /* Add the socket. */
1363 ev
.events
= EPOLLRDNORM
;
1365 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, sock
, &ev
) == -1)
1366 /* We cannot use epoll. */
1371 struct epoll_event revs
[100];
1372 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1374 int n
= epoll_wait (efd
, revs
, nrevs
, MAIN_THREAD_TIMEOUT
);
1376 time_t now
= time (NULL
);
1378 for (int cnt
= 0; cnt
< n
; ++cnt
)
1379 if (revs
[cnt
].data
.fd
== sock
)
1381 /* A new connection. */
1382 int fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
1386 /* Try to add the new descriptor. */
1389 || epoll_ctl (efd
, EPOLL_CTL_ADD
, fd
, &ev
) == -1)
1390 /* The descriptor is too large or something went
1391 wrong. Close the descriptor. */
1395 /* Remember when we accepted the connection. */
1396 starttime
[fd
] = now
;
1407 /* Remove the descriptor from the epoll descriptor. */
1408 struct epoll_event ev
= { 0, };
1409 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, revs
[cnt
].data
.fd
, &ev
);
1411 /* Get a worked to handle the request. */
1412 fd_ready (revs
[cnt
].data
.fd
);
1414 /* Reset the time. */
1415 starttime
[revs
[cnt
].data
.fd
] = 0;
1416 if (revs
[cnt
].data
.fd
== highest
)
1419 while (highest
> 0 && starttime
[highest
] == 0);
1424 /* Now look for descriptors for accepted connections which have
1425 no reply in too long of a time. */
1426 time_t laststart
= now
- ACCEPT_TIMEOUT
;
1427 for (int cnt
= highest
; cnt
> STDERR_FILENO
; --cnt
)
1428 if (cnt
!= sock
&& starttime
[cnt
] != 0 && starttime
[cnt
] < laststart
)
1430 /* We are waiting for this one for too long. Close it. */
1431 struct epoll_event ev
= {0, };
1432 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, cnt
, &ev
);
1440 else if (cnt
!= sock
&& starttime
[cnt
] == 0 && cnt
== highest
)
1443 if (restart_p (now
))
1450 /* Start all the threads we want. The initial process is thread no. 1. */
1452 start_threads (void)
1454 /* Initialize the conditional variable we will use. The only
1455 non-standard attribute we might use is the clock selection. */
1456 pthread_condattr_t condattr
;
1457 pthread_condattr_init (&condattr
);
1459 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1460 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1461 /* Determine whether the monotonous clock is available. */
1462 struct timespec dummy
;
1463 # if _POSIX_MONOTONIC_CLOCK == 0
1464 if (sysconf (_SC_MONOTONIC_CLOCK
) > 0)
1466 # if _POSIX_CLOCK_SELECTION == 0
1467 if (sysconf (_SC_CLOCK_SELECTION
) > 0)
1469 if (clock_getres (CLOCK_MONOTONIC
, &dummy
) == 0
1470 && pthread_condattr_setclock (&condattr
, CLOCK_MONOTONIC
) == 0)
1471 timeout_clock
= CLOCK_MONOTONIC
;
1474 pthread_cond_init (&readylist_cond
, &condattr
);
1475 pthread_condattr_destroy (&condattr
);
1478 /* Create the attribute for the threads. They are all created
1480 pthread_attr_init (&attr
);
1481 pthread_attr_setdetachstate (&attr
, PTHREAD_CREATE_DETACHED
);
1482 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1483 pthread_attr_setstacksize (&attr
, 1024 * 1024 * (sizeof (void *) / 4));
1485 /* We allow less than LASTDB threads only for debugging. */
1486 if (debug_level
== 0)
1487 nthreads
= MAX (nthreads
, lastdb
);
1490 for (long int i
= 0; i
< nthreads
; ++i
)
1493 if (pthread_create (&th
, &attr
, nscd_run
, (void *) (i
- nfailed
)) != 0)
1496 if (nthreads
- nfailed
< lastdb
)
1498 /* We could not start enough threads. */
1499 dbg_log (_("could only start %d threads; terminating"),
1500 nthreads
- nfailed
);
1504 /* Determine how much room for descriptors we should initially
1505 allocate. This might need to change later if we cap the number
1507 const long int nfds
= sysconf (_SC_OPEN_MAX
);
1509 #define MAXCONN 16384
1510 if (nfds
== -1 || nfds
> MAXCONN
)
1512 else if (nfds
< MINCONN
)
1517 /* We need memory to pass descriptors on to the worker threads. */
1518 fdlist
= (struct fdlist
*) xcalloc (nconns
, sizeof (fdlist
[0]));
1519 /* Array to keep track when connection was accepted. */
1520 starttime
= (time_t *) xcalloc (nconns
, sizeof (starttime
[0]));
1522 /* In the main thread we execute the loop which handles incoming
1525 int efd
= epoll_create (100);
1528 main_loop_epoll (efd
);
1537 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1538 this function is called, we are not listening on the nscd socket yet so
1539 we can just use the ordinary lookup functions without causing a lockup */
1541 begin_drop_privileges (void)
1543 struct passwd
*pwd
= getpwnam (server_user
);
1547 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
1548 error (EXIT_FAILURE
, 0, _("Failed to run nscd as user '%s'"),
1552 server_uid
= pwd
->pw_uid
;
1553 server_gid
= pwd
->pw_gid
;
1555 /* Save the old UID/GID if we have to change back. */
1558 old_uid
= getuid ();
1559 old_gid
= getgid ();
1562 if (getgrouplist (server_user
, server_gid
, NULL
, &server_ngroups
) == 0)
1564 /* This really must never happen. */
1565 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
1566 error (EXIT_FAILURE
, errno
, _("initial getgrouplist failed"));
1569 server_groups
= (gid_t
*) xmalloc (server_ngroups
* sizeof (gid_t
));
1571 if (getgrouplist (server_user
, server_gid
, server_groups
, &server_ngroups
)
1574 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
1575 error (EXIT_FAILURE
, errno
, _("getgrouplist failed"));
1580 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
1581 run nscd as the user specified in the configuration file. */
1583 finish_drop_privileges (void)
1585 if (setgroups (server_ngroups
, server_groups
) == -1)
1587 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
1588 error (EXIT_FAILURE
, errno
, _("setgroups failed"));
1591 if (setgid (server_gid
) == -1)
1593 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
1598 if (setuid (server_uid
) == -1)
1600 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);