1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2007, 2008 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
34 #include <arpa/inet.h>
36 # include <sys/epoll.h>
39 #include <sys/param.h>
42 # include <sys/sendfile.h>
44 #include <sys/socket.h>
52 # include <kernel-features.h>
56 /* Wrapper functions with error checking for standard functions. */
57 extern void *xmalloc (size_t n
);
58 extern void *xcalloc (size_t n
, size_t s
);
59 extern void *xrealloc (void *o
, size_t n
);
61 /* Support to run nscd as an unprivileged user */
62 const char *server_user
;
63 static uid_t server_uid
;
64 static gid_t server_gid
;
65 const char *stat_user
;
67 static gid_t
*server_groups
;
71 static int server_ngroups
;
73 static pthread_attr_t attr
;
75 static void begin_drop_privileges (void);
76 static void finish_drop_privileges (void);
78 /* Map request type to a string. */
79 const char *const serv2str
[LASTREQ
] =
81 [GETPWBYNAME
] = "GETPWBYNAME",
82 [GETPWBYUID
] = "GETPWBYUID",
83 [GETGRBYNAME
] = "GETGRBYNAME",
84 [GETGRBYGID
] = "GETGRBYGID",
85 [GETHOSTBYNAME
] = "GETHOSTBYNAME",
86 [GETHOSTBYNAMEv6
] = "GETHOSTBYNAMEv6",
87 [GETHOSTBYADDR
] = "GETHOSTBYADDR",
88 [GETHOSTBYADDRv6
] = "GETHOSTBYADDRv6",
89 [SHUTDOWN
] = "SHUTDOWN",
90 [GETSTAT
] = "GETSTAT",
91 [INVALIDATE
] = "INVALIDATE",
92 [GETFDPW
] = "GETFDPW",
93 [GETFDGR
] = "GETFDGR",
94 [GETFDHST
] = "GETFDHST",
96 [INITGROUPS
] = "INITGROUPS",
97 [GETSERVBYNAME
] = "GETSERVBYNAME",
98 [GETSERVBYPORT
] = "GETSERVBYPORT",
99 [GETFDSERV
] = "GETFDSERV"
102 /* The control data structures for the services. */
103 struct database_dyn dbs
[lastdb
] =
106 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
107 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
113 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
114 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
116 .filename
= "/etc/passwd",
117 .db_filename
= _PATH_NSCD_PASSWD_DB
,
118 .disabled_iov
= &pwd_iov_disabled
,
126 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
127 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
133 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
134 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
136 .filename
= "/etc/group",
137 .db_filename
= _PATH_NSCD_GROUP_DB
,
138 .disabled_iov
= &grp_iov_disabled
,
146 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
147 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
151 .propagate
= 0, /* Not used. */
153 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
154 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
156 .filename
= "/etc/hosts",
157 .db_filename
= _PATH_NSCD_HOSTS_DB
,
158 .disabled_iov
= &hst_iov_disabled
,
166 .lock
= PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
,
167 .prune_lock
= PTHREAD_MUTEX_INITIALIZER
,
171 .propagate
= 0, /* Not used. */
173 .max_db_size
= DEFAULT_MAX_DB_SIZE
,
174 .suggested_module
= DEFAULT_SUGGESTED_MODULE
,
176 .filename
= "/etc/services",
177 .db_filename
= _PATH_NSCD_SERVICES_DB
,
178 .disabled_iov
= &serv_iov_disabled
,
188 /* Mapping of request type to database. */
192 struct database_dyn
*db
;
193 } const reqinfo
[LASTREQ
] =
195 [GETPWBYNAME
] = { true, &dbs
[pwddb
] },
196 [GETPWBYUID
] = { true, &dbs
[pwddb
] },
197 [GETGRBYNAME
] = { true, &dbs
[grpdb
] },
198 [GETGRBYGID
] = { true, &dbs
[grpdb
] },
199 [GETHOSTBYNAME
] = { true, &dbs
[hstdb
] },
200 [GETHOSTBYNAMEv6
] = { true, &dbs
[hstdb
] },
201 [GETHOSTBYADDR
] = { true, &dbs
[hstdb
] },
202 [GETHOSTBYADDRv6
] = { true, &dbs
[hstdb
] },
203 [SHUTDOWN
] = { false, NULL
},
204 [GETSTAT
] = { false, NULL
},
205 [SHUTDOWN
] = { false, NULL
},
206 [GETFDPW
] = { false, &dbs
[pwddb
] },
207 [GETFDGR
] = { false, &dbs
[grpdb
] },
208 [GETFDHST
] = { false, &dbs
[hstdb
] },
209 [GETAI
] = { true, &dbs
[hstdb
] },
210 [INITGROUPS
] = { true, &dbs
[grpdb
] },
211 [GETSERVBYNAME
] = { true, &dbs
[servdb
] },
212 [GETSERVBYPORT
] = { true, &dbs
[servdb
] },
213 [GETFDSERV
] = { false, &dbs
[servdb
] }
217 /* Initial number of threads to use. */
219 /* Maximum number of threads to use. */
220 int max_nthreads
= 32;
222 /* Socket for incoming connections. */
225 /* Number of times clients had to wait. */
226 unsigned long int client_queued
;
228 /* Data structure for recording in-flight memory allocation. */
229 __thread
struct mem_in_flight mem_in_flight attribute_tls_model_ie
;
230 /* Global list of the mem_in_flight variables of all the threads. */
231 struct mem_in_flight
*mem_in_flight_list
;
235 writeall (int fd
, const void *buf
, size_t len
)
241 ret
= TEMP_FAILURE_RETRY (send (fd
, buf
, n
, MSG_NOSIGNAL
));
244 buf
= (const char *) buf
+ ret
;
248 return ret
< 0 ? ret
: len
- n
;
254 sendfileall (int tofd
, int fromfd
, off_t off
, size_t len
)
261 ret
= TEMP_FAILURE_RETRY (sendfile (tofd
, fromfd
, &off
, n
));
267 return ret
< 0 ? ret
: len
- n
;
275 /* The following three are not really used, they are symbolic constants. */
281 use_he_begin
= use_he
| use_begin
,
282 use_he_end
= use_he
| use_end
,
285 use_key_begin
= use_key
| use_begin
,
286 use_key_end
= use_key
| use_end
,
287 use_key_first
= use_key_begin
| use_first
,
290 use_data_begin
= use_data
| use_begin
,
291 use_data_end
= use_data
| use_end
,
292 use_data_first
= use_data_begin
| use_first
297 check_use (const char *data
, nscd_ssize_t first_free
, uint8_t *usemap
,
298 enum usekey use
, ref_t start
, size_t len
)
302 if (start
> first_free
|| start
+ len
> first_free
303 || (start
& BLOCK_ALIGN_M1
))
306 if (usemap
[start
] == use_not
)
308 /* Add the start marker. */
309 usemap
[start
] = use
| use_begin
;
313 if (usemap
[++start
] != use_not
)
318 /* Add the end marker. */
319 usemap
[start
] = use
| use_end
;
321 else if ((usemap
[start
] & ~use_first
) == ((use
| use_begin
) & ~use_first
))
323 /* Hash entries can't be shared. */
327 usemap
[start
] |= (use
& use_first
);
331 if (usemap
[++start
] != use
)
334 if (usemap
[++start
] != (use
| use_end
))
338 /* Points to a wrong object or somewhere in the middle. */
345 /* Verify data in persistent database. */
347 verify_persistent_db (void *mem
, struct database_pers_head
*readhead
, int dbnr
)
349 assert (dbnr
== pwddb
|| dbnr
== grpdb
|| dbnr
== hstdb
|| dbnr
== servdb
);
351 time_t now
= time (NULL
);
353 struct database_pers_head
*head
= mem
;
354 struct database_pers_head head_copy
= *head
;
356 /* Check that the header that was read matches the head in the database. */
357 if (memcmp (head
, readhead
, sizeof (*head
)) != 0)
360 /* First some easy tests: make sure the database header is sane. */
361 if (head
->version
!= DB_VERSION
362 || head
->header_size
!= sizeof (*head
)
363 /* We allow a timestamp to be one hour ahead of the current time.
364 This should cover daylight saving time changes. */
365 || head
->timestamp
> now
+ 60 * 60 + 60
366 || (head
->gc_cycle
& 1)
368 || (size_t) head
->module
> INT32_MAX
/ sizeof (ref_t
)
369 || (size_t) head
->data_size
> INT32_MAX
- head
->module
* sizeof (ref_t
)
370 || head
->first_free
< 0
371 || head
->first_free
> head
->data_size
372 || (head
->first_free
& BLOCK_ALIGN_M1
) != 0
373 || head
->maxnentries
< 0
374 || head
->maxnsearched
< 0)
377 uint8_t *usemap
= calloc (head
->first_free
, 1);
381 const char *data
= (char *) &head
->array
[roundup (head
->module
,
382 ALIGN
/ sizeof (ref_t
))];
384 nscd_ssize_t he_cnt
= 0;
385 for (nscd_ssize_t cnt
= 0; cnt
< head
->module
; ++cnt
)
387 ref_t trail
= head
->array
[cnt
];
391 while (work
!= ENDREF
)
393 if (! check_use (data
, head
->first_free
, usemap
, use_he
, work
,
394 sizeof (struct hashentry
)))
397 /* Now we know we can dereference the record. */
398 struct hashentry
*here
= (struct hashentry
*) (data
+ work
);
402 /* Make sure the record is for this type of service. */
403 if (here
->type
>= LASTREQ
404 || reqinfo
[here
->type
].db
!= &dbs
[dbnr
])
407 /* Validate boolean field value. */
408 if (here
->first
!= false && here
->first
!= true)
416 || here
->packet
> head
->first_free
417 || here
->packet
+ sizeof (struct datahead
) > head
->first_free
)
420 struct datahead
*dh
= (struct datahead
*) (data
+ here
->packet
);
422 if (! check_use (data
, head
->first_free
, usemap
,
423 use_data
| (here
->first
? use_first
: 0),
424 here
->packet
, dh
->allocsize
))
427 if (dh
->allocsize
< sizeof (struct datahead
)
428 || dh
->recsize
> dh
->allocsize
429 || (dh
->notfound
!= false && dh
->notfound
!= true)
430 || (dh
->usable
!= false && dh
->usable
!= true))
433 if (here
->key
< here
->packet
+ sizeof (struct datahead
)
434 || here
->key
> here
->packet
+ dh
->allocsize
435 || here
->key
+ here
->len
> here
->packet
+ dh
->allocsize
)
438 /* If keys can appear outside of data, this should be done
439 instead. But gc doesn't mark the data in that case. */
440 if (! check_use (data
, head
->first_free
, usemap
,
441 use_key
| (here
->first
? use_first
: 0),
442 here
->key
, here
->len
))
450 /* A circular list, this must not happen. */
453 trail
= ((struct hashentry
*) (data
+ trail
))->next
;
458 if (he_cnt
!= head
->nentries
)
461 /* See if all data and keys had at least one reference from
462 he->first == true hashentry. */
463 for (ref_t idx
= 0; idx
< head
->first_free
; ++idx
)
466 if (usemap
[idx
] == use_key_begin
)
469 if (usemap
[idx
] == use_data_begin
)
473 /* Finally, make sure the database hasn't changed since the first test. */
474 if (memcmp (mem
, &head_copy
, sizeof (*head
)) != 0)
487 # define EXTRA_O_FLAGS O_CLOEXEC
489 # define EXTRA_O_FLAGS 0
493 /* Initialize database information structures. */
497 /* Look up unprivileged uid/gid/groups before we start listening on the
499 if (server_user
!= NULL
)
500 begin_drop_privileges ();
503 /* No configuration for this value, assume a default. */
506 for (size_t cnt
= 0; cnt
< lastdb
; ++cnt
)
507 if (dbs
[cnt
].enabled
)
509 pthread_rwlock_init (&dbs
[cnt
].lock
, NULL
);
510 pthread_mutex_init (&dbs
[cnt
].memlock
, NULL
);
512 if (dbs
[cnt
].persistent
)
514 /* Try to open the appropriate file on disk. */
515 int fd
= open (dbs
[cnt
].db_filename
, O_RDWR
| EXTRA_O_FLAGS
);
522 struct database_pers_head head
;
523 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, &head
,
525 if (n
!= sizeof (head
) || fstat64 (fd
, &st
) != 0)
528 /* The code is single-threaded at this point so
529 using strerror is just fine. */
530 msg
= strerror (errno
);
532 dbg_log (_("invalid persistent database file \"%s\": %s"),
533 dbs
[cnt
].db_filename
, msg
);
534 unlink (dbs
[cnt
].db_filename
);
536 else if (head
.module
== 0 && head
.data_size
== 0)
538 /* The file has been created, but the head has not
539 been initialized yet. */
540 msg
= _("uninitialized header");
543 else if (head
.header_size
!= (int) sizeof (head
))
545 msg
= _("header size does not match");
548 else if ((total
= (sizeof (head
)
549 + roundup (head
.module
* sizeof (ref_t
),
553 || total
< sizeof (head
))
555 msg
= _("file size does not match");
558 /* Note we map with the maximum size allowed for the
559 database. This is likely much larger than the
560 actual file size. This is OK on most OSes since
561 extensions of the underlying file will
562 automatically translate more pages available for
564 else if ((mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
565 PROT_READ
| PROT_WRITE
,
569 else if (!verify_persistent_db (mem
, &head
, cnt
))
572 msg
= _("verification failed");
577 /* Success. We have the database. */
579 dbs
[cnt
].memsize
= total
;
580 dbs
[cnt
].data
= (char *)
581 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
582 ALIGN
/ sizeof (ref_t
))];
583 dbs
[cnt
].mmap_used
= true;
585 if (dbs
[cnt
].suggested_module
> head
.module
)
586 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
591 /* We also need a read-only descriptor. */
594 dbs
[cnt
].ro_fd
= open (dbs
[cnt
].db_filename
,
595 O_RDONLY
| EXTRA_O_FLAGS
);
596 if (dbs
[cnt
].ro_fd
== -1)
598 cannot create read-only descriptor for \"%s\"; no mmap"),
599 dbs
[cnt
].db_filename
);
602 // XXX Shall we test whether the descriptors actually
603 // XXX point to the same file?
606 /* Close the file descriptors in case something went
607 wrong in which case the variable have not been
614 if (dbs
[cnt
].head
== NULL
)
616 /* No database loaded. Allocate the data structure,
618 struct database_pers_head head
;
619 size_t total
= (sizeof (head
)
620 + roundup (dbs
[cnt
].suggested_module
621 * sizeof (ref_t
), ALIGN
)
622 + (dbs
[cnt
].suggested_module
623 * DEFAULT_DATASIZE_PER_BUCKET
));
625 /* Try to create the database. If we do not need a
626 persistent database create a temporary file. */
629 if (dbs
[cnt
].persistent
)
631 fd
= open (dbs
[cnt
].db_filename
,
632 O_RDWR
| O_CREAT
| O_EXCL
| O_TRUNC
| EXTRA_O_FLAGS
,
634 if (fd
!= -1 && dbs
[cnt
].shared
)
635 ro_fd
= open (dbs
[cnt
].db_filename
,
636 O_RDONLY
| EXTRA_O_FLAGS
);
640 char fname
[] = _PATH_NSCD_XYZ_DB_TMP
;
641 fd
= mkostemp (fname
, EXTRA_O_FLAGS
);
643 /* We do not need the file name anymore after we
644 opened another file descriptor in read-only mode. */
648 ro_fd
= open (fname
, O_RDONLY
| EXTRA_O_FLAGS
);
658 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
659 dbnames
[cnt
], dbs
[cnt
].db_filename
);
660 // XXX Correct way to terminate?
664 if (dbs
[cnt
].persistent
)
665 dbg_log (_("cannot create %s; no persistent database used"),
666 dbs
[cnt
].db_filename
);
668 dbg_log (_("cannot create %s; no sharing possible"),
669 dbs
[cnt
].db_filename
);
671 dbs
[cnt
].persistent
= 0;
672 // XXX remember: no mmap
676 /* Tell the user if we could not create the read-only
678 if (ro_fd
== -1 && dbs
[cnt
].shared
)
680 cannot create read-only descriptor for \"%s\"; no mmap"),
681 dbs
[cnt
].db_filename
);
683 /* Before we create the header, initialiye the hash
684 table. So that if we get interrupted if writing
685 the header we can recognize a partially initialized
687 size_t ps
= sysconf (_SC_PAGESIZE
);
689 assert (~ENDREF
== 0);
690 memset (tmpbuf
, '\xff', ps
);
692 size_t remaining
= dbs
[cnt
].suggested_module
* sizeof (ref_t
);
693 off_t offset
= sizeof (head
);
696 if (offset
% ps
!= 0)
698 towrite
= MIN (remaining
, ps
- (offset
% ps
));
699 if (pwrite (fd
, tmpbuf
, towrite
, offset
) != towrite
)
702 remaining
-= towrite
;
705 while (remaining
> ps
)
707 if (pwrite (fd
, tmpbuf
, ps
, offset
) == -1)
714 && pwrite (fd
, tmpbuf
, remaining
, offset
) != remaining
)
717 /* Create the header of the file. */
718 struct database_pers_head head
=
720 .version
= DB_VERSION
,
721 .header_size
= sizeof (head
),
722 .module
= dbs
[cnt
].suggested_module
,
723 .data_size
= (dbs
[cnt
].suggested_module
724 * DEFAULT_DATASIZE_PER_BUCKET
),
729 if ((TEMP_FAILURE_RETRY (write (fd
, &head
, sizeof (head
)))
731 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd
, 0, total
))
733 || (mem
= mmap (NULL
, dbs
[cnt
].max_db_size
,
734 PROT_READ
| PROT_WRITE
,
735 MAP_SHARED
, fd
, 0)) == MAP_FAILED
)
738 unlink (dbs
[cnt
].db_filename
);
739 dbg_log (_("cannot write to database file %s: %s"),
740 dbs
[cnt
].db_filename
, strerror (errno
));
741 dbs
[cnt
].persistent
= 0;
747 dbs
[cnt
].data
= (char *)
748 &dbs
[cnt
].head
->array
[roundup (dbs
[cnt
].head
->module
,
749 ALIGN
/ sizeof (ref_t
))];
750 dbs
[cnt
].memsize
= total
;
751 dbs
[cnt
].mmap_used
= true;
753 /* Remember the descriptors. */
755 dbs
[cnt
].ro_fd
= ro_fd
;
767 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
768 /* We do not check here whether the O_CLOEXEC provided to the
769 open call was successful or not. The two fcntl calls are
770 only performed once each per process start-up and therefore
771 is not noticeable at all. */
773 && ((dbs
[cnt
].wr_fd
!= -1
774 && fcntl (dbs
[cnt
].wr_fd
, F_SETFD
, FD_CLOEXEC
) == -1)
775 || (dbs
[cnt
].ro_fd
!= -1
776 && fcntl (dbs
[cnt
].ro_fd
, F_SETFD
, FD_CLOEXEC
) == -1)))
779 cannot set socket to close on exec: %s; disabling paranoia mode"),
785 if (dbs
[cnt
].head
== NULL
)
787 /* We do not use the persistent database. Just
788 create an in-memory data structure. */
789 assert (! dbs
[cnt
].persistent
);
791 dbs
[cnt
].head
= xmalloc (sizeof (struct database_pers_head
)
792 + (dbs
[cnt
].suggested_module
794 memset (dbs
[cnt
].head
, '\0', sizeof (struct database_pers_head
));
795 assert (~ENDREF
== 0);
796 memset (dbs
[cnt
].head
->array
, '\xff',
797 dbs
[cnt
].suggested_module
* sizeof (ref_t
));
798 dbs
[cnt
].head
->module
= dbs
[cnt
].suggested_module
;
799 dbs
[cnt
].head
->data_size
= (DEFAULT_DATASIZE_PER_BUCKET
800 * dbs
[cnt
].head
->module
);
801 dbs
[cnt
].data
= xmalloc (dbs
[cnt
].head
->data_size
);
802 dbs
[cnt
].head
->first_free
= 0;
805 assert (dbs
[cnt
].ro_fd
== -1);
808 if (dbs
[cnt
].check_file
)
810 /* We need the modification date of the file. */
813 if (stat64 (dbs
[cnt
].filename
, &st
) < 0)
815 /* We cannot stat() the file, disable file checking. */
816 dbg_log (_("cannot stat() file `%s': %s"),
817 dbs
[cnt
].filename
, strerror (errno
));
818 dbs
[cnt
].check_file
= 0;
821 dbs
[cnt
].file_mtime
= st
.st_mtime
;
825 /* Create the socket. */
826 sock
= socket (AF_UNIX
, SOCK_STREAM
, 0);
829 dbg_log (_("cannot open socket: %s"), strerror (errno
));
830 exit (errno
== EACCES
? 4 : 1);
832 /* Bind a name to the socket. */
833 struct sockaddr_un sock_addr
;
834 sock_addr
.sun_family
= AF_UNIX
;
835 strcpy (sock_addr
.sun_path
, _PATH_NSCDSOCKET
);
836 if (bind (sock
, (struct sockaddr
*) &sock_addr
, sizeof (sock_addr
)) < 0)
838 dbg_log ("%s: %s", _PATH_NSCDSOCKET
, strerror (errno
));
839 exit (errno
== EACCES
? 4 : 1);
842 /* We don't want to get stuck on accept. */
843 int fl
= fcntl (sock
, F_GETFL
);
844 if (fl
== -1 || fcntl (sock
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
846 dbg_log (_("cannot change socket to nonblocking mode: %s"),
851 /* The descriptor needs to be closed on exec. */
852 if (paranoia
&& fcntl (sock
, F_SETFD
, FD_CLOEXEC
) == -1)
854 dbg_log (_("cannot set socket to close on exec: %s"),
859 /* Set permissions for the socket. */
860 chmod (_PATH_NSCDSOCKET
, DEFFILEMODE
);
862 /* Set the socket up to accept connections. */
863 if (listen (sock
, SOMAXCONN
) < 0)
865 dbg_log (_("cannot enable socket to accept connections: %s"),
870 /* Change to unprivileged uid/gid/groups if specifed in config file */
871 if (server_user
!= NULL
)
872 finish_drop_privileges ();
876 /* Close the connections. */
885 invalidate_cache (char *key
, int fd
)
890 for (number
= pwddb
; number
< lastdb
; ++number
)
891 if (strcmp (key
, dbnames
[number
]) == 0)
893 if (dbs
[number
].reset_res
)
899 if (number
== lastdb
)
902 writeall (fd
, &resp
, sizeof (resp
));
906 if (dbs
[number
].enabled
)
908 pthread_mutex_lock (&dbs
[number
].prune_lock
);
909 prune_cache (&dbs
[number
], LONG_MAX
, fd
);
910 pthread_mutex_unlock (&dbs
[number
].prune_lock
);
915 writeall (fd
, &resp
, sizeof (resp
));
922 send_ro_fd (struct database_dyn
*db
, char *key
, int fd
)
924 /* If we do not have an read-only file descriptor do nothing. */
928 /* We need to send some data along with the descriptor. */
929 uint64_t mapsize
= (db
->head
->data_size
930 + roundup (db
->head
->module
* sizeof (ref_t
), ALIGN
)
931 + sizeof (struct database_pers_head
));
933 iov
[0].iov_base
= key
;
934 iov
[0].iov_len
= strlen (key
) + 1;
935 iov
[1].iov_base
= &mapsize
;
936 iov
[1].iov_len
= sizeof (mapsize
);
938 /* Prepare the control message to transfer the descriptor. */
942 char bytes
[CMSG_SPACE (sizeof (int))];
944 struct msghdr msg
= { .msg_iov
= iov
, .msg_iovlen
= 2,
945 .msg_control
= buf
.bytes
,
946 .msg_controllen
= sizeof (buf
) };
947 struct cmsghdr
*cmsg
= CMSG_FIRSTHDR (&msg
);
949 cmsg
->cmsg_level
= SOL_SOCKET
;
950 cmsg
->cmsg_type
= SCM_RIGHTS
;
951 cmsg
->cmsg_len
= CMSG_LEN (sizeof (int));
953 *(int *) CMSG_DATA (cmsg
) = db
->ro_fd
;
955 msg
.msg_controllen
= cmsg
->cmsg_len
;
957 /* Send the control message. We repeat when we are interrupted but
958 everything else is ignored. */
960 # define MSG_NOSIGNAL 0
962 (void) TEMP_FAILURE_RETRY (sendmsg (fd
, &msg
, MSG_NOSIGNAL
));
964 if (__builtin_expect (debug_level
> 0, 0))
965 dbg_log (_("provide access to FD %d, for %s"), db
->ro_fd
, key
);
967 #endif /* SCM_RIGHTS */
970 /* Handle new request. */
972 handle_request (int fd
, request_header
*req
, void *key
, uid_t uid
, pid_t pid
)
974 if (__builtin_expect (req
->version
, NSCD_VERSION
) != NSCD_VERSION
)
978 cannot handle old request version %d; current version is %d"),
979 req
->version
, NSCD_VERSION
);
983 /* Perform the SELinux check before we go on to the standard checks. */
984 if (selinux_enabled
&& nscd_request_avc_has_perm (fd
, req
->type
) != 0)
995 snprintf (buf
, sizeof (buf
), "/proc/%ld/exe", (long int) pid
);
996 ssize_t n
= readlink (buf
, buf
, sizeof (buf
) - 1);
1000 request from %ld not handled due to missing permission"), (long int) pid
);
1005 request from '%s' [%ld] not handled due to missing permission"),
1006 buf
, (long int) pid
);
1009 dbg_log (_("request not handled due to missing permission"));
1015 struct database_dyn
*db
= reqinfo
[req
->type
].db
;
1017 /* See whether we can service the request from the cache. */
1018 if (__builtin_expect (reqinfo
[req
->type
].data_request
, true))
1020 if (__builtin_expect (debug_level
, 0) > 0)
1022 if (req
->type
== GETHOSTBYADDR
|| req
->type
== GETHOSTBYADDRv6
)
1024 char buf
[INET6_ADDRSTRLEN
];
1026 dbg_log ("\t%s (%s)", serv2str
[req
->type
],
1027 inet_ntop (req
->type
== GETHOSTBYADDR
1028 ? AF_INET
: AF_INET6
,
1029 key
, buf
, sizeof (buf
)));
1032 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1035 /* Is this service enabled? */
1036 if (__builtin_expect (!db
->enabled
, 0))
1038 /* No, sent the prepared record. */
1039 if (TEMP_FAILURE_RETRY (send (fd
, db
->disabled_iov
->iov_base
,
1040 db
->disabled_iov
->iov_len
,
1042 != (ssize_t
) db
->disabled_iov
->iov_len
1043 && __builtin_expect (debug_level
, 0) > 0)
1045 /* We have problems sending the result. */
1047 dbg_log (_("cannot write result: %s"),
1048 strerror_r (errno
, buf
, sizeof (buf
)));
1054 /* Be sure we can read the data. */
1055 if (__builtin_expect (pthread_rwlock_tryrdlock (&db
->lock
) != 0, 0))
1057 ++db
->head
->rdlockdelayed
;
1058 pthread_rwlock_rdlock (&db
->lock
);
1061 /* See whether we can handle it from the cache. */
1062 struct datahead
*cached
;
1063 cached
= (struct datahead
*) cache_search (req
->type
, key
, req
->key_len
,
1067 /* Hurray it's in the cache. */
1070 #ifdef HAVE_SENDFILE
1071 if (__builtin_expect (db
->mmap_used
, 1))
1073 assert (db
->wr_fd
!= -1);
1074 assert ((char *) cached
->data
> (char *) db
->data
);
1075 assert ((char *) cached
->data
- (char *) db
->head
1077 <= (sizeof (struct database_pers_head
)
1078 + db
->head
->module
* sizeof (ref_t
)
1079 + db
->head
->data_size
));
1080 nwritten
= sendfileall (fd
, db
->wr_fd
,
1081 (char *) cached
->data
1082 - (char *) db
->head
, cached
->recsize
);
1083 # ifndef __ASSUME_SENDFILE
1084 if (nwritten
== -1 && errno
== ENOSYS
)
1089 # ifndef __ASSUME_SENDFILE
1093 nwritten
= writeall (fd
, cached
->data
, cached
->recsize
);
1095 if (nwritten
!= cached
->recsize
1096 && __builtin_expect (debug_level
, 0) > 0)
1098 /* We have problems sending the result. */
1100 dbg_log (_("cannot write result: %s"),
1101 strerror_r (errno
, buf
, sizeof (buf
)));
1104 pthread_rwlock_unlock (&db
->lock
);
1109 pthread_rwlock_unlock (&db
->lock
);
1111 else if (__builtin_expect (debug_level
, 0) > 0)
1113 if (req
->type
== INVALIDATE
)
1114 dbg_log ("\t%s (%s)", serv2str
[req
->type
], (char *) key
);
1116 dbg_log ("\t%s", serv2str
[req
->type
]);
1119 /* Handle the request. */
1123 addpwbyname (db
, fd
, req
, key
, uid
);
1127 addpwbyuid (db
, fd
, req
, key
, uid
);
1131 addgrbyname (db
, fd
, req
, key
, uid
);
1135 addgrbygid (db
, fd
, req
, key
, uid
);
1139 addhstbyname (db
, fd
, req
, key
, uid
);
1142 case GETHOSTBYNAMEv6
:
1143 addhstbynamev6 (db
, fd
, req
, key
, uid
);
1147 addhstbyaddr (db
, fd
, req
, key
, uid
);
1150 case GETHOSTBYADDRv6
:
1151 addhstbyaddrv6 (db
, fd
, req
, key
, uid
);
1155 addhstai (db
, fd
, req
, key
, uid
);
1159 addinitgroups (db
, fd
, req
, key
, uid
);
1163 addservbyname (db
, fd
, req
, key
, uid
);
1167 addservbyport (db
, fd
, req
, key
, uid
);
1174 /* Get the callers credentials. */
1176 struct ucred caller
;
1177 socklen_t optlen
= sizeof (caller
);
1179 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) < 0)
1183 dbg_log (_("error getting caller's id: %s"),
1184 strerror_r (errno
, buf
, sizeof (buf
)));
1190 /* Some systems have no SO_PEERCRED implementation. They don't
1191 care about security so we don't as well. */
1196 /* Accept shutdown, getstat and invalidate only from root. For
1197 the stat call also allow the user specified in the config file. */
1198 if (req
->type
== GETSTAT
)
1200 if (uid
== 0 || uid
== stat_uid
)
1201 send_stats (fd
, dbs
);
1205 if (req
->type
== INVALIDATE
)
1206 invalidate_cache (key
, fd
);
1208 termination_handler (0);
1217 send_ro_fd (reqinfo
[req
->type
].db
, key
, fd
);
1222 /* Ignore the command, it's nothing we know. */
1228 /* Restart the process. */
1232 /* First determine the parameters. We do not use the parameters
1233 passed to main() since in case nscd is started by running the
1234 dynamic linker this will not work. Yes, this is not the usual
1235 case but nscd is part of glibc and we occasionally do this. */
1236 size_t buflen
= 1024;
1237 char *buf
= alloca (buflen
);
1239 int fd
= open ("/proc/self/cmdline", O_RDONLY
);
1243 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1252 ssize_t n
= TEMP_FAILURE_RETRY (read (fd
, buf
+ readlen
,
1257 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1267 if (readlen
< buflen
)
1270 /* We might have to extend the buffer. */
1271 size_t old_buflen
= buflen
;
1272 char *newp
= extend_alloca (buf
, buflen
, 2 * buflen
);
1273 buf
= memmove (newp
, buf
, old_buflen
);
1278 /* Parse the command line. Worst case scenario: every two
1279 characters form one parameter (one character plus NUL). */
1280 char **argv
= alloca ((readlen
/ 2 + 1) * sizeof (argv
[0]));
1284 while (cp
< buf
+ readlen
)
1287 cp
= (char *) rawmemchr (cp
, '\0') + 1;
1291 /* Second, change back to the old user if we changed it. */
1292 if (server_user
!= NULL
)
1294 if (setresuid (old_uid
, old_uid
, old_uid
) != 0)
1297 cannot change to old UID: %s; disabling paranoia mode"),
1304 if (setresgid (old_gid
, old_gid
, old_gid
) != 0)
1307 cannot change to old GID: %s; disabling paranoia mode"),
1310 setuid (server_uid
);
1316 /* Next change back to the old working directory. */
1317 if (chdir (oldcwd
) == -1)
1320 cannot change to old working directory: %s; disabling paranoia mode"),
1323 if (server_user
!= NULL
)
1325 setuid (server_uid
);
1326 setgid (server_gid
);
1332 /* Synchronize memory. */
1333 int32_t certainly
[lastdb
];
1334 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1335 if (dbs
[cnt
].enabled
)
1337 /* Make sure nobody keeps using the database. */
1338 dbs
[cnt
].head
->timestamp
= 0;
1339 certainly
[cnt
] = dbs
[cnt
].head
->nscd_certainly_running
;
1340 dbs
[cnt
].head
->nscd_certainly_running
= 0;
1342 if (dbs
[cnt
].persistent
)
1344 msync (dbs
[cnt
].head
, dbs
[cnt
].memsize
, MS_ASYNC
);
1347 /* The preparations are done. */
1348 execv ("/proc/self/exe", argv
);
1350 /* If we come here, we will never be able to re-exec. */
1351 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1354 if (server_user
!= NULL
)
1356 setuid (server_uid
);
1357 setgid (server_gid
);
1359 if (chdir ("/") != 0)
1360 dbg_log (_("cannot change current working directory to \"/\": %s"),
1364 /* Reenable the databases. */
1365 time_t now
= time (NULL
);
1366 for (int cnt
= 0; cnt
< lastdb
; ++cnt
)
1367 if (dbs
[cnt
].enabled
)
1369 dbs
[cnt
].head
->timestamp
= now
;
1370 dbs
[cnt
].head
->nscd_certainly_running
= certainly
[cnt
];
1375 /* List of file descriptors. */
1379 struct fdlist
*next
;
1381 /* Memory allocated for the list. */
1382 static struct fdlist
*fdlist
;
1383 /* List of currently ready-to-read file descriptors. */
1384 static struct fdlist
*readylist
;
1386 /* Conditional variable and mutex to signal availability of entries in
1387 READYLIST. The condvar is initialized dynamically since we might
1388 use a different clock depending on availability. */
1389 static pthread_cond_t readylist_cond
= PTHREAD_COND_INITIALIZER
;
1390 static pthread_mutex_t readylist_lock
= PTHREAD_MUTEX_INITIALIZER
;
1392 /* The clock to use with the condvar. */
1393 static clockid_t timeout_clock
= CLOCK_REALTIME
;
1395 /* Number of threads ready to handle the READYLIST. */
1396 static unsigned long int nready
;
1399 /* Function for the clean-up threads. */
1401 __attribute__ ((__noreturn__
))
1402 nscd_run_prune (void *p
)
1404 const long int my_number
= (long int) p
;
1405 assert (dbs
[my_number
].enabled
);
1407 int dont_need_update
= setup_thread (&dbs
[my_number
]);
1409 time_t now
= time (NULL
);
1411 /* We are running. */
1412 dbs
[my_number
].head
->timestamp
= now
;
1414 struct timespec prune_ts
;
1415 if (__builtin_expect (clock_gettime (timeout_clock
, &prune_ts
) == -1, 0))
1416 /* Should never happen. */
1419 /* Compute the initial timeout time. Prevent all the timers to go
1420 off at the same time by adding a db-based value. */
1421 prune_ts
.tv_sec
+= CACHE_PRUNE_INTERVAL
+ my_number
;
1422 dbs
[my_number
].wakeup_time
= now
+ CACHE_PRUNE_INTERVAL
+ my_number
;
1424 pthread_mutex_t
*prune_lock
= &dbs
[my_number
].prune_lock
;
1425 pthread_cond_t
*prune_cond
= &dbs
[my_number
].prune_cond
;
1427 pthread_mutex_lock (prune_lock
);
1430 /* Wait, but not forever. */
1431 int e
= pthread_cond_timedwait (prune_cond
, prune_lock
, &prune_ts
);
1432 assert (__builtin_expect (e
== 0 || e
== ETIMEDOUT
, 1));
1436 if (e
== ETIMEDOUT
|| now
>= dbs
[my_number
].wakeup_time
)
1438 /* We will determine the new timout values based on the
1439 cache content. Should there be concurrent additions to
1440 the cache which are not accounted for in the cache
1441 pruning we want to know about it. Therefore set the
1442 timeout to the maximum. It will be descreased when adding
1443 new entries to the cache, if necessary. */
1444 if (sizeof (time_t) == sizeof (long int))
1445 dbs
[my_number
].wakeup_time
= LONG_MAX
;
1447 dbs
[my_number
].wakeup_time
= INT_MAX
;
1449 pthread_mutex_unlock (prune_lock
);
1451 next_wait
= prune_cache (&dbs
[my_number
], now
, -1);
1453 next_wait
= MAX (next_wait
, CACHE_PRUNE_INTERVAL
);
1454 /* If clients cannot determine for sure whether nscd is running
1455 we need to wake up occasionally to update the timestamp.
1456 Wait 90% of the update period. */
1457 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1458 if (__builtin_expect (! dont_need_update
, 0))
1460 next_wait
= MIN (UPDATE_MAPPING_TIMEOUT
, next_wait
);
1461 dbs
[my_number
].head
->timestamp
= now
;
1464 pthread_mutex_lock (prune_lock
);
1466 /* Make it known when we will wake up again. */
1467 if (now
+ next_wait
< dbs
[my_number
].wakeup_time
)
1468 dbs
[my_number
].wakeup_time
= now
+ next_wait
;
1470 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1473 /* The cache was just pruned. Do not do it again now. Just
1474 use the new timeout value. */
1475 next_wait
= dbs
[my_number
].wakeup_time
- now
;
1477 if (clock_gettime (timeout_clock
, &prune_ts
) == -1)
1478 /* Should never happen. */
1481 /* Compute next timeout time. */
1482 prune_ts
.tv_sec
+= next_wait
;
1487 /* This is the main loop. It is replicated in different threads but
1488 the the use of the ready list makes sure only one thread handles an
1489 incoming connection. */
1491 __attribute__ ((__noreturn__
))
1492 nscd_run_worker (void *p
)
1496 /* Initialize the memory-in-flight list. */
1497 for (enum in_flight idx
= 0; idx
< IDX_last
; ++idx
)
1498 mem_in_flight
.block
[idx
].dbidx
= -1;
1499 /* And queue this threads structure. */
1501 mem_in_flight
.next
= mem_in_flight_list
;
1502 while (atomic_compare_and_exchange_bool_acq (&mem_in_flight_list
,
1504 mem_in_flight
.next
) != 0);
1506 /* Initial locking. */
1507 pthread_mutex_lock (&readylist_lock
);
1509 /* One more thread available. */
1514 while (readylist
== NULL
)
1515 pthread_cond_wait (&readylist_cond
, &readylist_lock
);
1517 struct fdlist
*it
= readylist
->next
;
1518 if (readylist
->next
== readylist
)
1519 /* Just one entry on the list. */
1522 readylist
->next
= it
->next
;
1524 /* Extract the information and mark the record ready to be used
1529 /* One more thread available. */
1532 /* We are done with the list. */
1533 pthread_mutex_unlock (&readylist_lock
);
1535 /* We do not want to block on a short read or so. */
1536 int fl
= fcntl (fd
, F_GETFL
);
1537 if (fl
== -1 || fcntl (fd
, F_SETFL
, fl
| O_NONBLOCK
) == -1)
1540 /* Now read the request. */
1542 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, &req
, sizeof (req
)))
1543 != sizeof (req
), 0))
1545 /* We failed to read data. Note that this also might mean we
1546 failed because we would have blocked. */
1547 if (debug_level
> 0)
1548 dbg_log (_("short read while reading request: %s"),
1549 strerror_r (errno
, buf
, sizeof (buf
)));
1553 /* Check whether this is a valid request type. */
1554 if (req
.type
< GETPWBYNAME
|| req
.type
>= LASTREQ
)
1557 /* Some systems have no SO_PEERCRED implementation. They don't
1558 care about security so we don't as well. */
1563 if (__builtin_expect (debug_level
> 0, 0))
1565 struct ucred caller
;
1566 socklen_t optlen
= sizeof (caller
);
1568 if (getsockopt (fd
, SOL_SOCKET
, SO_PEERCRED
, &caller
, &optlen
) == 0)
1572 const pid_t pid
= 0;
1575 /* It should not be possible to crash the nscd with a silly
1576 request (i.e., a terribly large key). We limit the size to 1kb. */
1577 if (__builtin_expect (req
.key_len
, 1) < 0
1578 || __builtin_expect (req
.key_len
, 1) > MAXKEYLEN
)
1580 if (debug_level
> 0)
1581 dbg_log (_("key length in request too long: %d"), req
.key_len
);
1586 char keybuf
[MAXKEYLEN
];
1588 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd
, keybuf
,
1592 /* Again, this can also mean we would have blocked. */
1593 if (debug_level
> 0)
1594 dbg_log (_("short read while reading request key: %s"),
1595 strerror_r (errno
, buf
, sizeof (buf
)));
1599 if (__builtin_expect (debug_level
, 0) > 0)
1604 handle_request: request received (Version = %d) from PID %ld"),
1605 req
.version
, (long int) pid
);
1609 handle_request: request received (Version = %d)"), req
.version
);
1612 /* Phew, we got all the data, now process it. */
1613 handle_request (fd
, &req
, keybuf
, uid
, pid
);
1621 pthread_mutex_lock (&readylist_lock
);
1623 /* One more thread available. */
1629 static unsigned int nconns
;
1634 pthread_mutex_lock (&readylist_lock
);
1636 /* Find an empty entry in FDLIST. */
1638 for (inner
= 0; inner
< nconns
; ++inner
)
1639 if (fdlist
[inner
].next
== NULL
)
1641 assert (inner
< nconns
);
1643 fdlist
[inner
].fd
= fd
;
1645 if (readylist
== NULL
)
1646 readylist
= fdlist
[inner
].next
= &fdlist
[inner
];
1649 fdlist
[inner
].next
= readylist
->next
;
1650 readylist
= readylist
->next
= &fdlist
[inner
];
1653 bool do_signal
= true;
1654 if (__builtin_expect (nready
== 0, 0))
1659 /* Try to start another thread to help out. */
1661 if (nthreads
< max_nthreads
1662 && pthread_create (&th
, &attr
, nscd_run_worker
,
1663 (void *) (long int) nthreads
) == 0)
1665 /* We got another thread. */
1667 /* The new thread might need a kick. */
1673 pthread_mutex_unlock (&readylist_lock
);
1675 /* Tell one of the worker threads there is work to do. */
1677 pthread_cond_signal (&readylist_cond
);
1681 /* Check whether restarting should happen. */
1683 restart_p (time_t now
)
1685 return (paranoia
&& readylist
== NULL
&& nready
== nthreads
1686 && now
>= restart_time
);
1690 /* Array for times a connection was accepted. */
1691 static time_t *starttime
;
1695 __attribute__ ((__noreturn__
))
1696 main_loop_poll (void)
1698 struct pollfd
*conns
= (struct pollfd
*) xmalloc (nconns
1699 * sizeof (conns
[0]));
1702 conns
[0].events
= POLLRDNORM
;
1704 size_t firstfree
= 1;
1708 /* Wait for any event. We wait at most a couple of seconds so
1709 that we can check whether we should close any of the accepted
1710 connections since we have not received a request. */
1711 #define MAX_ACCEPT_TIMEOUT 30
1712 #define MIN_ACCEPT_TIMEOUT 5
1713 #define MAIN_THREAD_TIMEOUT \
1714 (MAX_ACCEPT_TIMEOUT * 1000 \
1715 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1717 int n
= poll (conns
, nused
, MAIN_THREAD_TIMEOUT
);
1719 time_t now
= time (NULL
);
1721 /* If there is a descriptor ready for reading or there is a new
1722 connection, process this now. */
1725 if (conns
[0].revents
!= 0)
1727 /* We have a new incoming connection. Accept the connection. */
1728 int fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
1730 /* Use the descriptor if we have not reached the limit. */
1733 if (firstfree
< nconns
)
1735 conns
[firstfree
].fd
= fd
;
1736 conns
[firstfree
].events
= POLLRDNORM
;
1737 starttime
[firstfree
] = now
;
1738 if (firstfree
>= nused
)
1739 nused
= firstfree
+ 1;
1743 while (firstfree
< nused
&& conns
[firstfree
].fd
!= -1);
1746 /* We cannot use the connection so close it. */
1753 for (size_t cnt
= 1; cnt
< nused
&& n
> 0; ++cnt
)
1754 if (conns
[cnt
].revents
!= 0)
1756 fd_ready (conns
[cnt
].fd
);
1758 /* Clean up the CONNS array. */
1760 if (cnt
< firstfree
)
1762 if (cnt
== nused
- 1)
1765 while (conns
[nused
- 1].fd
== -1);
1771 /* Now find entries which have timed out. */
1774 /* We make the timeout length depend on the number of file
1775 descriptors currently used. */
1776 #define ACCEPT_TIMEOUT \
1777 (MAX_ACCEPT_TIMEOUT \
1778 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1779 time_t laststart
= now
- ACCEPT_TIMEOUT
;
1781 for (size_t cnt
= nused
- 1; cnt
> 0; --cnt
)
1783 if (conns
[cnt
].fd
!= -1 && starttime
[cnt
] < laststart
)
1785 /* Remove the entry, it timed out. */
1786 (void) close (conns
[cnt
].fd
);
1789 if (cnt
< firstfree
)
1791 if (cnt
== nused
- 1)
1794 while (conns
[nused
- 1].fd
== -1);
1798 if (restart_p (now
))
1806 main_loop_epoll (int efd
)
1808 struct epoll_event ev
= { 0, };
1812 /* Add the socket. */
1813 ev
.events
= EPOLLRDNORM
;
1815 if (epoll_ctl (efd
, EPOLL_CTL_ADD
, sock
, &ev
) == -1)
1816 /* We cannot use epoll. */
1821 struct epoll_event revs
[100];
1822 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1824 int n
= epoll_wait (efd
, revs
, nrevs
, MAIN_THREAD_TIMEOUT
);
1826 time_t now
= time (NULL
);
1828 for (int cnt
= 0; cnt
< n
; ++cnt
)
1829 if (revs
[cnt
].data
.fd
== sock
)
1831 /* A new connection. */
1832 int fd
= TEMP_FAILURE_RETRY (accept (sock
, NULL
, NULL
));
1836 /* Try to add the new descriptor. */
1839 || epoll_ctl (efd
, EPOLL_CTL_ADD
, fd
, &ev
) == -1)
1840 /* The descriptor is too large or something went
1841 wrong. Close the descriptor. */
1845 /* Remember when we accepted the connection. */
1846 starttime
[fd
] = now
;
1857 /* Remove the descriptor from the epoll descriptor. */
1858 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, revs
[cnt
].data
.fd
, NULL
);
1860 /* Get a worker to handle the request. */
1861 fd_ready (revs
[cnt
].data
.fd
);
1863 /* Reset the time. */
1864 starttime
[revs
[cnt
].data
.fd
] = 0;
1865 if (revs
[cnt
].data
.fd
== highest
)
1868 while (highest
> 0 && starttime
[highest
] == 0);
1873 /* Now look for descriptors for accepted connections which have
1874 no reply in too long of a time. */
1875 time_t laststart
= now
- ACCEPT_TIMEOUT
;
1876 for (int cnt
= highest
; cnt
> STDERR_FILENO
; --cnt
)
1877 if (cnt
!= sock
&& starttime
[cnt
] != 0 && starttime
[cnt
] < laststart
)
1879 /* We are waiting for this one for too long. Close it. */
1880 (void) epoll_ctl (efd
, EPOLL_CTL_DEL
, cnt
, NULL
);
1888 else if (cnt
!= sock
&& starttime
[cnt
] == 0 && cnt
== highest
)
1891 if (restart_p (now
))
1898 /* Start all the threads we want. The initial process is thread no. 1. */
1900 start_threads (void)
1902 /* Initialize the conditional variable we will use. The only
1903 non-standard attribute we might use is the clock selection. */
1904 pthread_condattr_t condattr
;
1905 pthread_condattr_init (&condattr
);
1907 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1908 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1909 /* Determine whether the monotonous clock is available. */
1910 struct timespec dummy
;
1911 # if _POSIX_MONOTONIC_CLOCK == 0
1912 if (sysconf (_SC_MONOTONIC_CLOCK
) > 0)
1914 # if _POSIX_CLOCK_SELECTION == 0
1915 if (sysconf (_SC_CLOCK_SELECTION
) > 0)
1917 if (clock_getres (CLOCK_MONOTONIC
, &dummy
) == 0
1918 && pthread_condattr_setclock (&condattr
, CLOCK_MONOTONIC
) == 0)
1919 timeout_clock
= CLOCK_MONOTONIC
;
1922 /* Create the attribute for the threads. They are all created
1924 pthread_attr_init (&attr
);
1925 pthread_attr_setdetachstate (&attr
, PTHREAD_CREATE_DETACHED
);
1926 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1927 pthread_attr_setstacksize (&attr
, NSCD_THREAD_STACKSIZE
);
1929 /* We allow less than LASTDB threads only for debugging. */
1930 if (debug_level
== 0)
1931 nthreads
= MAX (nthreads
, lastdb
);
1933 /* Create the threads which prune the databases. */
1934 // XXX Ideally this work would be done by some of the worker threads.
1935 // XXX But this is problematic since we would need to be able to wake
1936 // XXX them up explicitly as well as part of the group handling the
1937 // XXX ready-list. This requires an operation where we can wait on
1938 // XXX two conditional variables at the same time. This operation
1939 // XXX does not exist (yet).
1940 for (long int i
= 0; i
< lastdb
; ++i
)
1942 /* Initialize the conditional variable. */
1943 if (pthread_cond_init (&dbs
[i
].prune_cond
, &condattr
) != 0)
1945 dbg_log (_("could not initialize conditional variable"));
1951 && pthread_create (&th
, &attr
, nscd_run_prune
, (void *) i
) != 0)
1953 dbg_log (_("could not start clean-up thread; terminating"));
1958 pthread_condattr_destroy (&condattr
);
1960 for (long int i
= 0; i
< nthreads
; ++i
)
1963 if (pthread_create (&th
, &attr
, nscd_run_worker
, NULL
) != 0)
1967 dbg_log (_("could not start any worker thread; terminating"));
1975 /* Determine how much room for descriptors we should initially
1976 allocate. This might need to change later if we cap the number
1978 const long int nfds
= sysconf (_SC_OPEN_MAX
);
1980 #define MAXCONN 16384
1981 if (nfds
== -1 || nfds
> MAXCONN
)
1983 else if (nfds
< MINCONN
)
1988 /* We need memory to pass descriptors on to the worker threads. */
1989 fdlist
= (struct fdlist
*) xcalloc (nconns
, sizeof (fdlist
[0]));
1990 /* Array to keep track when connection was accepted. */
1991 starttime
= (time_t *) xcalloc (nconns
, sizeof (starttime
[0]));
1993 /* In the main thread we execute the loop which handles incoming
1996 int efd
= epoll_create (100);
1999 main_loop_epoll (efd
);
2008 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2009 this function is called, we are not listening on the nscd socket yet so
2010 we can just use the ordinary lookup functions without causing a lockup */
2012 begin_drop_privileges (void)
2014 struct passwd
*pwd
= getpwnam (server_user
);
2018 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2019 error (EXIT_FAILURE
, 0, _("Failed to run nscd as user '%s'"),
2023 server_uid
= pwd
->pw_uid
;
2024 server_gid
= pwd
->pw_gid
;
2026 /* Save the old UID/GID if we have to change back. */
2029 old_uid
= getuid ();
2030 old_gid
= getgid ();
2033 if (getgrouplist (server_user
, server_gid
, NULL
, &server_ngroups
) == 0)
2035 /* This really must never happen. */
2036 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2037 error (EXIT_FAILURE
, errno
, _("initial getgrouplist failed"));
2040 server_groups
= (gid_t
*) xmalloc (server_ngroups
* sizeof (gid_t
));
2042 if (getgrouplist (server_user
, server_gid
, server_groups
, &server_ngroups
)
2045 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2046 error (EXIT_FAILURE
, errno
, _("getgrouplist failed"));
2051 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2052 run nscd as the user specified in the configuration file. */
2054 finish_drop_privileges (void)
2056 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2057 /* We need to preserve the capabilities to connect to the audit daemon. */
2058 cap_t new_caps
= preserve_capabilities ();
2061 if (setgroups (server_ngroups
, server_groups
) == -1)
2063 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2064 error (EXIT_FAILURE
, errno
, _("setgroups failed"));
2069 res
= setresgid (server_gid
, server_gid
, old_gid
);
2071 res
= setgid (server_gid
);
2074 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2080 res
= setresuid (server_uid
, server_uid
, old_uid
);
2082 res
= setuid (server_uid
);
2085 dbg_log (_("Failed to run nscd as user '%s'"), server_user
);
2090 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2091 /* Remove the temporary capabilities. */
2092 install_real_capabilities (new_caps
);