hurd: Fix aligning signal stack pointer
[glibc.git] / hurd / hurdselect.c
blob9630cae47403bfa0a647c4c10d14f53a771d5ed8
1 /* Guts of both `select' and `poll' for Hurd.
2 Copyright (C) 1991-2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <sys/time.h>
20 #include <sys/types.h>
21 #include <sys/poll.h>
22 #include <hurd.h>
23 #include <hurd/fd.h>
24 #include <hurd/io_request.h>
25 #include <mach_rpc.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <assert.h>
29 #include <stdint.h>
30 #include <limits.h>
31 #include <time.h>
32 #include <sysdep-cancel.h>
34 /* All user select types. */
35 #define SELECT_ALL (SELECT_READ | SELECT_WRITE | SELECT_URG)
37 /* Used to record that a particular select rpc returned. Must be distinct
38 from SELECT_ALL (which better not have the high bit set). */
39 #define SELECT_RETURNED ((SELECT_ALL << 1) & ~SELECT_ALL)
40 #define SELECT_ERROR (SELECT_RETURNED << 1)
42 /* Check the first NFDS descriptors either in POLLFDS (if nonnnull) or in
43 each of READFDS, WRITEFDS, EXCEPTFDS that is nonnull. If TIMEOUT is not
44 NULL, time out after waiting the interval specified therein. Returns
45 the number of ready descriptors, or -1 for errors. */
46 int
47 _hurd_select (int nfds,
48 struct pollfd *pollfds,
49 fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
50 const struct timespec *timeout, const sigset_t *sigmask)
52 int i;
53 mach_port_t portset, sigport;
54 int got, ready;
55 error_t err;
56 fd_set rfds, wfds, xfds;
57 int firstfd, lastfd;
58 mach_msg_id_t reply_msgid;
59 mach_msg_timeout_t to;
60 struct timespec ts;
61 struct
63 struct hurd_userlink ulink;
64 struct hurd_fd *cell;
65 mach_port_t io_port;
66 int type;
67 mach_port_t reply_port;
68 int error;
69 } d[nfds];
70 sigset_t oset;
71 struct hurd_sigstate *ss = NULL;
73 if (nfds < 0 || (pollfds == NULL && nfds > FD_SETSIZE))
75 errno = EINVAL;
76 return -1;
79 #define IO_SELECT_REPLY_MSGID (21012 + 100) /* XXX */
80 #define IO_SELECT_TIMEOUT_REPLY_MSGID (21031 + 100) /* XXX */
82 if (timeout == NULL)
83 reply_msgid = IO_SELECT_REPLY_MSGID;
84 else
86 struct timespec now;
88 if (timeout->tv_sec < 0 || ! valid_nanoseconds (timeout->tv_nsec))
90 errno = EINVAL;
91 return -1;
94 err = __clock_gettime (CLOCK_REALTIME, &now);
95 if (err)
96 return -1;
98 ts.tv_sec = now.tv_sec + timeout->tv_sec;
99 ts.tv_nsec = now.tv_nsec + timeout->tv_nsec;
101 if (ts.tv_nsec >= 1000000000)
103 ts.tv_sec++;
104 ts.tv_nsec -= 1000000000;
107 if (ts.tv_sec < 0)
108 ts.tv_sec = LONG_MAX; /* XXX */
110 reply_msgid = IO_SELECT_TIMEOUT_REPLY_MSGID;
113 if (sigmask)
115 /* Add a port to the portset for the case when we get the signal even
116 before calling __mach_msg. */
118 sigport = __mach_reply_port ();
120 ss = _hurd_self_sigstate ();
121 _hurd_sigstate_lock (ss);
122 /* And tell the signal thread to message us when a signal arrives. */
123 ss->suspended = sigport;
124 _hurd_sigstate_unlock (ss);
126 if (__sigprocmask (SIG_SETMASK, sigmask, &oset))
128 _hurd_sigstate_lock (ss);
129 ss->suspended = MACH_PORT_NULL;
130 _hurd_sigstate_unlock (ss);
131 __mach_port_destroy (__mach_task_self (), sigport);
132 return -1;
135 else
136 sigport = MACH_PORT_NULL;
138 if (pollfds)
140 int error = 0;
141 /* Collect interesting descriptors from the user's `pollfd' array.
142 We do a first pass that reads the user's array before taking
143 any locks. The second pass then only touches our own stack,
144 and gets the port references. */
146 for (i = 0; i < nfds; ++i)
147 if (pollfds[i].fd >= 0)
149 int type = 0;
150 if (pollfds[i].events & POLLIN)
151 type |= SELECT_READ;
152 if (pollfds[i].events & POLLOUT)
153 type |= SELECT_WRITE;
154 if (pollfds[i].events & POLLPRI)
155 type |= SELECT_URG;
157 d[i].io_port = pollfds[i].fd;
158 d[i].type = type;
160 else
161 d[i].type = 0;
163 HURD_CRITICAL_BEGIN;
164 __mutex_lock (&_hurd_dtable_lock);
166 for (i = 0; i < nfds; ++i)
167 if (d[i].type != 0)
169 const int fd = (int) d[i].io_port;
171 if (fd < _hurd_dtablesize)
173 d[i].cell = _hurd_dtable[fd];
174 if (d[i].cell != NULL)
176 d[i].io_port = _hurd_port_get (&d[i].cell->port,
177 &d[i].ulink);
178 if (d[i].io_port != MACH_PORT_NULL)
179 continue;
183 /* Bogus descriptor, make it EBADF already. */
184 d[i].error = EBADF;
185 d[i].type = SELECT_ERROR;
186 error = 1;
189 __mutex_unlock (&_hurd_dtable_lock);
190 HURD_CRITICAL_END;
192 if (error)
194 /* Set timeout to 0. */
195 err = __clock_gettime (CLOCK_REALTIME, &ts);
196 if (err)
198 /* Really bad luck. */
199 err = errno;
200 HURD_CRITICAL_BEGIN;
201 __mutex_lock (&_hurd_dtable_lock);
202 while (i-- > 0)
203 if (d[i].type & ~SELECT_ERROR != 0)
204 _hurd_port_free (&d[i].cell->port, &d[i].ulink,
205 d[i].io_port);
206 __mutex_unlock (&_hurd_dtable_lock);
207 HURD_CRITICAL_END;
208 if (sigmask)
209 __sigprocmask (SIG_SETMASK, &oset, NULL);
210 errno = err;
211 return -1;
213 reply_msgid = IO_SELECT_TIMEOUT_REPLY_MSGID;
216 lastfd = i - 1;
217 firstfd = i == 0 ? lastfd : 0;
219 else
221 /* Collect interested descriptors from the user's fd_set arguments.
222 Use local copies so we can't crash from user bogosity. */
224 if (readfds == NULL)
225 FD_ZERO (&rfds);
226 else
227 rfds = *readfds;
228 if (writefds == NULL)
229 FD_ZERO (&wfds);
230 else
231 wfds = *writefds;
232 if (exceptfds == NULL)
233 FD_ZERO (&xfds);
234 else
235 xfds = *exceptfds;
237 HURD_CRITICAL_BEGIN;
238 __mutex_lock (&_hurd_dtable_lock);
240 /* Collect the ports for interesting FDs. */
241 firstfd = lastfd = -1;
242 for (i = 0; i < nfds; ++i)
244 int type = 0;
245 if (readfds != NULL && FD_ISSET (i, &rfds))
246 type |= SELECT_READ;
247 if (writefds != NULL && FD_ISSET (i, &wfds))
248 type |= SELECT_WRITE;
249 if (exceptfds != NULL && FD_ISSET (i, &xfds))
250 type |= SELECT_URG;
251 d[i].type = type;
252 if (type)
254 if (i < _hurd_dtablesize)
256 d[i].cell = _hurd_dtable[i];
257 if (d[i].cell != NULL)
258 d[i].io_port = _hurd_port_get (&d[i].cell->port,
259 &d[i].ulink);
261 if (i >= _hurd_dtablesize || d[i].cell == NULL ||
262 d[i].io_port == MACH_PORT_NULL)
264 /* If one descriptor is bogus, we fail completely. */
265 while (i-- > 0)
266 if (d[i].type != 0)
267 _hurd_port_free (&d[i].cell->port, &d[i].ulink,
268 d[i].io_port);
269 break;
271 lastfd = i;
272 if (firstfd == -1)
273 firstfd = i;
277 __mutex_unlock (&_hurd_dtable_lock);
278 HURD_CRITICAL_END;
280 if (i < nfds)
282 if (sigmask)
283 __sigprocmask (SIG_SETMASK, &oset, NULL);
284 errno = EBADF;
285 return -1;
288 if (nfds > _hurd_dtablesize)
289 nfds = _hurd_dtablesize;
293 err = 0;
294 got = 0;
296 /* Send them all io_select request messages. */
298 if (firstfd == -1)
300 if (sigport == MACH_PORT_NULL)
301 /* But not if there were no ports to deal with at all.
302 We are just a pure timeout. */
303 portset = __mach_reply_port ();
304 else
305 portset = sigport;
307 else
309 portset = MACH_PORT_NULL;
311 for (i = firstfd; i <= lastfd; ++i)
312 if (!(d[i].type & ~SELECT_ERROR))
313 d[i].reply_port = MACH_PORT_NULL;
314 else
316 int type = d[i].type;
317 d[i].reply_port = __mach_reply_port ();
318 if (timeout == NULL)
319 err = __io_select_request (d[i].io_port, d[i].reply_port, type);
320 else
321 err = __io_select_timeout_request (d[i].io_port, d[i].reply_port,
322 ts, type);
323 if (!err)
325 if (firstfd == lastfd && sigport == MACH_PORT_NULL)
326 /* When there's a single descriptor, we don't need a
327 portset, so just pretend we have one, but really
328 use the single reply port. */
329 portset = d[i].reply_port;
330 else if (got == 0)
331 /* We've got multiple reply ports, so we need a port set to
332 multiplex them. */
334 /* We will wait again for a reply later. */
335 if (portset == MACH_PORT_NULL)
336 /* Create the portset to receive all the replies on. */
337 err = __mach_port_allocate (__mach_task_self (),
338 MACH_PORT_RIGHT_PORT_SET,
339 &portset);
340 if (! err)
341 /* Put this reply port in the port set. */
342 __mach_port_move_member (__mach_task_self (),
343 d[i].reply_port, portset);
346 else
348 /* No error should happen, but record it for later
349 processing. */
350 d[i].error = err;
351 d[i].type |= SELECT_ERROR;
352 ++got;
354 _hurd_port_free (&d[i].cell->port, &d[i].ulink, d[i].io_port);
357 if (got == 0 && sigport != MACH_PORT_NULL)
359 if (portset == MACH_PORT_NULL)
360 /* Create the portset to receive the signal message on. */
361 __mach_port_allocate (__mach_task_self (), MACH_PORT_RIGHT_PORT_SET,
362 &portset);
363 /* Put the signal reply port in the port set. */
364 __mach_port_move_member (__mach_task_self (), sigport, portset);
368 /* GOT is the number of replies (or errors), while READY is the number of
369 replies with at least one type bit set. */
370 ready = 0;
372 /* Now wait for reply messages. */
373 if (!err && got == 0)
375 /* Now wait for io_select_reply messages on PORT,
376 timing out as appropriate. */
378 union
380 mach_msg_header_t head;
381 #ifdef MACH_MSG_TRAILER_MINIMUM_SIZE
382 struct
384 mach_msg_header_t head;
385 NDR_record_t ndr;
386 error_t err;
387 } error;
388 struct
390 mach_msg_header_t head;
391 NDR_record_t ndr;
392 error_t err;
393 int result;
394 mach_msg_trailer_t trailer;
395 } success;
396 #else
397 struct
399 mach_msg_header_t head;
400 mach_msg_type_t err_type;
401 error_t err;
402 } error;
403 struct
405 mach_msg_header_t head;
406 mach_msg_type_t err_type;
407 error_t err;
408 mach_msg_type_t result_type;
409 int result;
410 } success;
411 #endif
412 } msg;
413 mach_msg_option_t options;
414 error_t msgerr;
416 /* We rely on servers to implement the timeout, but when there are none,
417 do it on the client side. */
418 if (timeout != NULL && firstfd == -1)
420 options = MACH_RCV_TIMEOUT;
421 to = timeout->tv_sec * 1000 + (timeout->tv_nsec + 999999) / 1000000;
423 else
425 options = 0;
426 to = MACH_MSG_TIMEOUT_NONE;
429 int cancel_oldtype = LIBC_CANCEL_ASYNC();
430 while ((msgerr = __mach_msg (&msg.head,
431 MACH_RCV_MSG | MACH_RCV_INTERRUPT | options,
432 0, sizeof msg, portset, to,
433 MACH_PORT_NULL)) == MACH_MSG_SUCCESS)
435 LIBC_CANCEL_RESET (cancel_oldtype);
437 /* We got a message. Decode it. */
438 #ifdef MACH_MSG_TYPE_BIT
439 static const mach_msg_type_t inttype = {
440 .msgt_name = MACH_MSG_TYPE_INTEGER_T,
441 .msgt_size = sizeof (integer_t) * 8,
442 .msgt_number = 1,
443 .msgt_inline = TRUE,
444 .msgt_longform = FALSE,
445 .msgt_deallocate = FALSE,
446 .msgt_unused = 0
448 #endif
450 if (sigport != MACH_PORT_NULL && sigport == msg.head.msgh_local_port)
452 /* We actually got interrupted by a signal before
453 __mach_msg; poll for further responses and then
454 return quickly. */
455 err = EINTR;
456 goto poll;
459 if (msg.head.msgh_id == reply_msgid
460 && msg.head.msgh_size >= sizeof msg.error
461 && !(msg.head.msgh_bits & MACH_MSGH_BITS_COMPLEX)
462 #ifdef MACH_MSG_TYPE_BIT
463 && !BAD_TYPECHECK (&msg.error.err_type, &inttype)
464 #endif
467 /* This is a properly formatted message so far.
468 See if it is a success or a failure. */
469 if (msg.error.err == EINTR
470 && msg.head.msgh_size == sizeof msg.error)
472 /* EINTR response; poll for further responses
473 and then return quickly. */
474 err = EINTR;
475 goto poll;
477 /* Keep in mind msg.success.result can be 0 if a timeout
478 occurred. */
479 if (msg.error.err
480 #ifdef MACH_MSG_TYPE_BIT
481 || BAD_TYPECHECK (&msg.success.result_type, &inttype)
482 #endif
483 || msg.head.msgh_size != sizeof msg.success)
485 /* Error or bogus reply. */
486 if (!msg.error.err)
487 msg.error.err = EIO;
488 __mach_msg_destroy (&msg.head);
491 /* Look up the respondent's reply port and record its
492 readiness. */
494 int had = got;
495 if (firstfd != -1)
496 for (i = firstfd; i <= lastfd; ++i)
497 if (d[i].type
498 && d[i].reply_port == msg.head.msgh_local_port)
500 if (msg.error.err)
502 d[i].error = msg.error.err;
503 d[i].type = SELECT_ERROR;
504 ++ready;
506 else
508 d[i].type &= msg.success.result;
509 if (d[i].type)
510 ++ready;
513 d[i].type |= SELECT_RETURNED;
514 ++got;
516 assert (got > had);
520 if (msg.head.msgh_remote_port != MACH_PORT_NULL)
521 __mach_port_deallocate (__mach_task_self (),
522 msg.head.msgh_remote_port);
524 if (got)
525 poll:
527 /* Poll for another message. */
528 to = 0;
529 options |= MACH_RCV_TIMEOUT;
532 LIBC_CANCEL_RESET (cancel_oldtype);
534 if (msgerr == MACH_RCV_INTERRUPTED)
535 /* Interruption on our side (e.g. signal reception). */
536 err = EINTR;
538 if (ready)
539 /* At least one descriptor is known to be ready now, so we will
540 return success. */
541 err = 0;
544 if (firstfd != -1)
545 for (i = firstfd; i <= lastfd; ++i)
546 if (d[i].reply_port != MACH_PORT_NULL)
547 __mach_port_destroy (__mach_task_self (), d[i].reply_port);
549 if (sigport != MACH_PORT_NULL)
551 _hurd_sigstate_lock (ss);
552 ss->suspended = MACH_PORT_NULL;
553 _hurd_sigstate_unlock (ss);
554 __mach_port_destroy (__mach_task_self (), sigport);
557 if ((firstfd == -1 && sigport == MACH_PORT_NULL)
558 || ((firstfd != lastfd || sigport != MACH_PORT_NULL) && portset != MACH_PORT_NULL))
559 /* Destroy PORTSET, but only if it's not actually the reply port for a
560 single descriptor (in which case it's destroyed in the previous loop;
561 not doing it here is just a bit more efficient). */
562 __mach_port_destroy (__mach_task_self (), portset);
564 if (err)
566 if (sigmask)
567 __sigprocmask (SIG_SETMASK, &oset, NULL);
568 return __hurd_fail (err);
571 if (pollfds)
572 /* Fill in the `revents' members of the user's array. */
573 for (i = 0; i < nfds; ++i)
575 int type = d[i].type;
576 int revents = 0;
578 if (type & SELECT_ERROR)
579 switch (d[i].error)
581 case EPIPE:
582 revents = POLLHUP;
583 break;
584 case EBADF:
585 revents = POLLNVAL;
586 break;
587 default:
588 revents = POLLERR;
589 break;
591 else
592 if (type & SELECT_RETURNED)
594 if (type & SELECT_READ)
595 revents |= POLLIN;
596 if (type & SELECT_WRITE)
597 revents |= POLLOUT;
598 if (type & SELECT_URG)
599 revents |= POLLPRI;
602 pollfds[i].revents = revents;
604 else
606 /* Below we recalculate READY to include an increment for each operation
607 allowed on each fd. */
608 ready = 0;
610 /* Set the user bitarrays. We only ever have to clear bits, as all
611 desired ones are initially set. */
612 if (firstfd != -1)
613 for (i = firstfd; i <= lastfd; ++i)
615 int type = d[i].type;
617 if ((type & SELECT_RETURNED) == 0)
618 type = 0;
620 /* Callers of select don't expect to see errors, so we simulate
621 readiness of the erring object and the next call hopefully
622 will get the error again. */
623 if (type & SELECT_ERROR)
625 type = 0;
626 if (readfds != NULL && FD_ISSET (i, readfds))
627 type |= SELECT_READ;
628 if (writefds != NULL && FD_ISSET (i, writefds))
629 type |= SELECT_WRITE;
630 if (exceptfds != NULL && FD_ISSET (i, exceptfds))
631 type |= SELECT_URG;
634 if (type & SELECT_READ)
635 ready++;
636 else if (readfds)
637 FD_CLR (i, readfds);
638 if (type & SELECT_WRITE)
639 ready++;
640 else if (writefds)
641 FD_CLR (i, writefds);
642 if (type & SELECT_URG)
643 ready++;
644 else if (exceptfds)
645 FD_CLR (i, exceptfds);
649 if (sigmask && __sigprocmask (SIG_SETMASK, &oset, NULL))
650 return -1;
652 return ready;