* nscd/nscd_helper.c (open_socket): Use SOCK_CLOEXEC and
[glibc.git] / nscd / nscd_helper.c
blob7db5d098456bd6b3e84cf18623a4c1d62173419b
1 /* Copyright (C) 1998-2007, 2008 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA. */
20 #include <assert.h>
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <stdbool.h>
24 #include <string.h>
25 #include <time.h>
26 #include <unistd.h>
27 #include <sys/mman.h>
28 #include <sys/poll.h>
29 #include <sys/socket.h>
30 #include <sys/stat.h>
31 #include <sys/time.h>
32 #include <sys/uio.h>
33 #include <sys/un.h>
34 #include <not-cancel.h>
35 #include <nis/rpcsvc/nis.h>
36 #include <kernel-features.h>
38 #include "nscd-client.h"
41 /* Extra time we wait if the socket is still receiving data. This
42 value is in milliseconds. Note that the other side is nscd on the
43 local machine and it is already transmitting data. So the wait
44 time need not be long. */
45 #define EXTRA_RECEIVE_TIME 200
48 static int
49 wait_on_socket (int sock, long int usectmo)
51 struct pollfd fds[1];
52 fds[0].fd = sock;
53 fds[0].events = POLLIN | POLLERR | POLLHUP;
54 int n = __poll (fds, 1, usectmo);
55 if (n == -1 && __builtin_expect (errno == EINTR, 0))
57 /* Handle the case where the poll() call is interrupted by a
58 signal. We cannot just use TEMP_FAILURE_RETRY since it might
59 lead to infinite loops. */
60 struct timeval now;
61 (void) __gettimeofday (&now, NULL);
62 long int end = now.tv_sec * 1000 + usectmo + (now.tv_usec + 500) / 1000;
63 long int timeout = usectmo;
64 while (1)
66 n = __poll (fds, 1, timeout);
67 if (n != -1 || errno != EINTR)
68 break;
70 /* Recompute the timeout time. */
71 (void) __gettimeofday (&now, NULL);
72 timeout = end - (now.tv_sec * 1000 + (now.tv_usec + 500) / 1000);
76 return n;
80 ssize_t
81 __readall (int fd, void *buf, size_t len)
83 size_t n = len;
84 ssize_t ret;
87 again:
88 ret = TEMP_FAILURE_RETRY (__read (fd, buf, n));
89 if (ret <= 0)
91 if (__builtin_expect (ret < 0 && errno == EAGAIN, 0)
92 /* The socket is still receiving data. Wait a bit more. */
93 && wait_on_socket (fd, EXTRA_RECEIVE_TIME) > 0)
94 goto again;
96 break;
98 buf = (char *) buf + ret;
99 n -= ret;
101 while (n > 0);
102 return ret < 0 ? ret : len - n;
106 ssize_t
107 __readvall (int fd, const struct iovec *iov, int iovcnt)
109 ssize_t ret = TEMP_FAILURE_RETRY (__readv (fd, iov, iovcnt));
110 if (ret <= 0)
112 if (__builtin_expect (ret == 0 || errno != EAGAIN, 1))
113 /* A genuine error or no data to read. */
114 return ret;
116 /* The data has not all yet been received. Do as if we have not
117 read anything yet. */
118 ret = 0;
121 size_t total = 0;
122 for (int i = 0; i < iovcnt; ++i)
123 total += iov[i].iov_len;
125 if (ret < total)
127 struct iovec iov_buf[iovcnt];
128 ssize_t r = ret;
130 struct iovec *iovp = memcpy (iov_buf, iov, iovcnt * sizeof (*iov));
133 while (iovp->iov_len <= r)
135 r -= iovp->iov_len;
136 --iovcnt;
137 ++iovp;
139 iovp->iov_base = (char *) iovp->iov_base + r;
140 iovp->iov_len -= r;
141 again:
142 r = TEMP_FAILURE_RETRY (__readv (fd, iovp, iovcnt));
143 if (r <= 0)
145 if (__builtin_expect (r < 0 && errno == EAGAIN, 0)
146 /* The socket is still receiving data. Wait a bit more. */
147 && wait_on_socket (fd, EXTRA_RECEIVE_TIME) > 0)
148 goto again;
150 break;
152 ret += r;
154 while (ret < total);
155 if (r < 0)
156 ret = r;
158 return ret;
162 static int
163 open_socket (request_type type, const char *key, size_t keylen)
165 int sock;
167 #ifdef SOCK_CLOEXEC
168 # ifndef __ASSUME_SOCK_CLOEXEC
169 if (__have_sock_cloexec >= 0)
170 # endif
172 sock = __socket (PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
173 # ifndef __ASSUME_SOCK_CLOEXEC
174 if (__have_sock_cloexec == 0)
175 __have_sock_cloexec = sock != -1 || errno != EINVAL ? 1 : -1;
176 # endif
178 #endif
179 #ifndef __ASSUME_SOCK_CLOEXEC
180 # ifdef SOCK_CLOEXEC
181 if (__have_sock_cloexec < 0)
182 # endif
183 sock = __socket (PF_UNIX, SOCK_STREAM, 0);
184 #endif
185 if (sock < 0)
186 return -1;
188 struct
190 request_header req;
191 char key[keylen];
192 } reqdata;
193 size_t real_sizeof_reqdata = sizeof (request_header) + keylen;
195 #ifndef __ASSUME_SOCK_CLOEXEC
196 # ifdef SOCK_NONBLOCK
197 if (__have_sock_cloexec < 0)
198 # endif
199 /* Make socket non-blocking. */
200 __fcntl (sock, F_SETFL, O_RDWR | O_NONBLOCK);
201 #endif
203 struct sockaddr_un sun;
204 sun.sun_family = AF_UNIX;
205 strcpy (sun.sun_path, _PATH_NSCDSOCKET);
206 if (__connect (sock, (struct sockaddr *) &sun, sizeof (sun)) < 0
207 && errno != EINPROGRESS)
208 goto out;
210 reqdata.req.version = NSCD_VERSION;
211 reqdata.req.type = type;
212 reqdata.req.key_len = keylen;
214 memcpy (reqdata.key, key, keylen);
216 bool first_try = true;
217 struct timeval tvend;
218 /* Fake initializing tvend. */
219 asm ("" : "=m" (tvend));
220 while (1)
222 #ifndef MSG_NOSIGNAL
223 # define MSG_NOSIGNAL 0
224 #endif
225 ssize_t wres = TEMP_FAILURE_RETRY (__send (sock, &reqdata,
226 real_sizeof_reqdata,
227 MSG_NOSIGNAL));
228 if (__builtin_expect (wres == (ssize_t) real_sizeof_reqdata, 1))
229 /* We managed to send the request. */
230 return sock;
232 if (wres != -1 || errno != EAGAIN)
233 /* Something is really wrong, no chance to continue. */
234 break;
236 /* The daemon is busy wait for it. */
237 int to;
238 struct timeval now;
239 (void) __gettimeofday (&now, NULL);
240 if (first_try)
242 tvend.tv_usec = now.tv_usec;
243 tvend.tv_sec = now.tv_sec + 5;
244 to = 5 * 1000;
245 first_try = false;
247 else
248 to = ((tvend.tv_sec - now.tv_sec) * 1000
249 + (tvend.tv_usec - now.tv_usec) / 1000);
251 struct pollfd fds[1];
252 fds[0].fd = sock;
253 fds[0].events = POLLOUT | POLLERR | POLLHUP;
254 if (__poll (fds, 1, to) <= 0)
255 /* The connection timed out or broke down. */
256 break;
258 /* We try to write again. */
261 out:
262 close_not_cancel_no_status (sock);
264 return -1;
268 void
269 __nscd_unmap (struct mapped_database *mapped)
271 assert (mapped->counter == 0);
272 __munmap ((void *) mapped->head, mapped->mapsize);
273 free (mapped);
277 /* Try to get a file descriptor for the shared meory segment
278 containing the database. */
279 static struct mapped_database *
280 get_mapping (request_type type, const char *key,
281 struct mapped_database **mappedp)
283 struct mapped_database *result = NO_MAPPING;
284 #ifdef SCM_RIGHTS
285 const size_t keylen = strlen (key) + 1;
286 int saved_errno = errno;
288 int mapfd = -1;
289 char resdata[keylen];
291 /* Open a socket and send the request. */
292 int sock = open_socket (type, key, keylen);
293 if (sock < 0)
294 goto out;
296 /* Room for the data sent along with the file descriptor. We expect
297 the key name back. */
298 uint64_t mapsize;
299 struct iovec iov[2];
300 iov[0].iov_base = resdata;
301 iov[0].iov_len = keylen;
302 iov[1].iov_base = &mapsize;
303 iov[1].iov_len = sizeof (mapsize);
305 union
307 struct cmsghdr hdr;
308 char bytes[CMSG_SPACE (sizeof (int))];
309 } buf;
310 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
311 .msg_control = buf.bytes,
312 .msg_controllen = sizeof (buf) };
313 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
315 cmsg->cmsg_level = SOL_SOCKET;
316 cmsg->cmsg_type = SCM_RIGHTS;
317 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
319 /* This access is well-aligned since BUF is correctly aligned for an
320 int and CMSG_DATA preserves this alignment. */
321 *(int *) CMSG_DATA (cmsg) = -1;
323 msg.msg_controllen = cmsg->cmsg_len;
325 if (wait_on_socket (sock, 5 * 1000) <= 0)
326 goto out_close2;
328 # ifndef MSG_CMSG_CLOEXEC
329 # define MSG_CMSG_CLOEXEC 0
330 # endif
331 ssize_t n = TEMP_FAILURE_RETRY (__recvmsg (sock, &msg, MSG_CMSG_CLOEXEC));
333 if (__builtin_expect (CMSG_FIRSTHDR (&msg) == NULL
334 || (CMSG_FIRSTHDR (&msg)->cmsg_len
335 != CMSG_LEN (sizeof (int))), 0))
336 goto out_close2;
338 mapfd = *(int *) CMSG_DATA (cmsg);
340 if (__builtin_expect (n != keylen && n != keylen + sizeof (mapsize), 0))
341 goto out_close;
343 if (__builtin_expect (strcmp (resdata, key) != 0, 0))
344 goto out_close;
346 if (__builtin_expect (n == keylen, 0))
348 struct stat64 st;
349 if (__builtin_expect (fstat64 (mapfd, &st) != 0, 0)
350 || __builtin_expect (st.st_size < sizeof (struct database_pers_head),
352 goto out_close;
354 mapsize = st.st_size;
357 /* The file is large enough, map it now. */
358 void *mapping = __mmap (NULL, mapsize, PROT_READ, MAP_SHARED, mapfd, 0);
359 if (__builtin_expect (mapping != MAP_FAILED, 1))
361 /* Check whether the database is correct and up-to-date. */
362 struct database_pers_head *head = mapping;
364 if (__builtin_expect (head->version != DB_VERSION, 0)
365 || __builtin_expect (head->header_size != sizeof (*head), 0)
366 /* Catch some misconfiguration. The server should catch
367 them now but some older versions did not. */
368 || __builtin_expect (head->module == 0, 0)
369 /* This really should not happen but who knows, maybe the update
370 thread got stuck. */
371 || __builtin_expect (! head->nscd_certainly_running
372 && (head->timestamp + MAPPING_TIMEOUT
373 < time (NULL)), 0))
375 out_unmap:
376 __munmap (mapping, mapsize);
377 goto out_close;
380 size_t size = (sizeof (*head) + roundup (head->module * sizeof (ref_t),
381 ALIGN)
382 + head->data_size);
384 if (__builtin_expect (mapsize < size, 0))
385 goto out_unmap;
387 /* Allocate a record for the mapping. */
388 struct mapped_database *newp = malloc (sizeof (*newp));
389 if (newp == NULL)
390 /* Ugh, after all we went through the memory allocation failed. */
391 goto out_unmap;
393 newp->head = mapping;
394 newp->data = ((char *) mapping + head->header_size
395 + roundup (head->module * sizeof (ref_t), ALIGN));
396 newp->mapsize = size;
397 newp->datasize = head->data_size;
398 /* Set counter to 1 to show it is usable. */
399 newp->counter = 1;
401 result = newp;
404 out_close:
405 __close (mapfd);
406 out_close2:
407 __close (sock);
408 out:
409 __set_errno (saved_errno);
410 #endif /* SCM_RIGHTS */
412 struct mapped_database *oldval = *mappedp;
413 *mappedp = result;
415 if (oldval != NULL && atomic_decrement_val (&oldval->counter) == 0)
416 __nscd_unmap (oldval);
418 return result;
422 struct mapped_database *
423 __nscd_get_map_ref (request_type type, const char *name,
424 volatile struct locked_map_ptr *mapptr, int *gc_cyclep)
426 struct mapped_database *cur = mapptr->mapped;
427 if (cur == NO_MAPPING)
428 return cur;
430 int cnt = 0;
431 while (__builtin_expect (atomic_compare_and_exchange_val_acq (&mapptr->lock,
432 1, 0) != 0, 0))
434 // XXX Best number of rounds?
435 if (__builtin_expect (++cnt > 5, 0))
436 return NO_MAPPING;
438 atomic_delay ();
441 cur = mapptr->mapped;
443 if (__builtin_expect (cur != NO_MAPPING, 1))
445 /* If not mapped or timestamp not updated, request new map. */
446 if (cur == NULL
447 || (cur->head->nscd_certainly_running == 0
448 && cur->head->timestamp + MAPPING_TIMEOUT < time (NULL))
449 || cur->head->data_size > cur->datasize)
450 cur = get_mapping (type, name,
451 (struct mapped_database **) &mapptr->mapped);
453 if (__builtin_expect (cur != NO_MAPPING, 1))
455 if (__builtin_expect (((*gc_cyclep = cur->head->gc_cycle) & 1) != 0,
457 cur = NO_MAPPING;
458 else
459 atomic_increment (&cur->counter);
463 mapptr->lock = 0;
465 return cur;
469 /* Don't return const struct datahead *, as eventhough the record
470 is normally constant, it can change arbitrarily during nscd
471 garbage collection. */
472 struct datahead *
473 __nscd_cache_search (request_type type, const char *key, size_t keylen,
474 const struct mapped_database *mapped)
476 unsigned long int hash = __nis_hash (key, keylen) % mapped->head->module;
477 size_t datasize = mapped->datasize;
479 ref_t trail = mapped->head->array[hash];
480 ref_t work = trail;
481 int tick = 0;
483 while (work != ENDREF && work + sizeof (struct hashentry) <= datasize)
485 struct hashentry *here = (struct hashentry *) (mapped->data + work);
487 #ifndef _STRING_ARCH_unaligned
488 /* Although during garbage collection when moving struct hashentry
489 records around we first copy from old to new location and then
490 adjust pointer from previous hashentry to it, there is no barrier
491 between those memory writes. It is very unlikely to hit it,
492 so check alignment only if a misaligned load can crash the
493 application. */
494 if ((uintptr_t) here & (__alignof__ (*here) - 1))
495 return NULL;
496 #endif
498 if (type == here->type
499 && keylen == here->len
500 && here->key + keylen <= datasize
501 && memcmp (key, mapped->data + here->key, keylen) == 0
502 && here->packet + sizeof (struct datahead) <= datasize)
504 /* We found the entry. Increment the appropriate counter. */
505 struct datahead *dh
506 = (struct datahead *) (mapped->data + here->packet);
508 #ifndef _STRING_ARCH_unaligned
509 if ((uintptr_t) dh & (__alignof__ (*dh) - 1))
510 return NULL;
511 #endif
513 /* See whether we must ignore the entry or whether something
514 is wrong because garbage collection is in progress. */
515 if (dh->usable && here->packet + dh->allocsize <= datasize)
516 return dh;
519 work = here->next;
520 /* Prevent endless loops. This should never happen but perhaps
521 the database got corrupted, accidentally or deliberately. */
522 if (work == trail)
523 break;
524 if (tick)
526 struct hashentry *trailelem;
527 trailelem = (struct hashentry *) (mapped->data + trail);
529 #ifndef _STRING_ARCH_unaligned
530 /* We have to redo the checks. Maybe the data changed. */
531 if ((uintptr_t) trailelem & (__alignof__ (*trailelem) - 1))
532 return NULL;
533 #endif
534 trail = trailelem->next;
536 tick = 1 - tick;
539 return NULL;
543 /* Create a socket connected to a name. */
545 __nscd_open_socket (const char *key, size_t keylen, request_type type,
546 void *response, size_t responselen)
548 /* This should never happen and it is something the nscd daemon
549 enforces, too. He it helps to limit the amount of stack
550 used. */
551 if (keylen > MAXKEYLEN)
552 return -1;
554 int saved_errno = errno;
556 int sock = open_socket (type, key, keylen);
557 if (sock >= 0)
559 /* Wait for data. */
560 if (wait_on_socket (sock, 5 * 1000) > 0)
562 ssize_t nbytes = TEMP_FAILURE_RETRY (__read (sock, response,
563 responselen));
564 if (nbytes == (ssize_t) responselen)
565 return sock;
568 close_not_cancel_no_status (sock);
571 __set_errno (saved_errno);
573 return -1;