nptl/sem_waitcommon.c

   1 /* sem_waitcommon -- wait on a semaphore, shared code.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Paul Mackerras <paulus@au.ibm.com>, 2003.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 #include <errno.h>
  21 #include <sysdep.h>
  22 #include <lowlevellock.h>
  23 #include <internaltypes.h>
  24 #include <semaphore.h>
  25 #include <sys/time.h>
  26
  27 #include <pthreadP.h>
  28 #include <shlib-compat.h>
  29 #include <atomic.h>
  30
  31 /* Wrapper for lll_futex_wait with absolute timeout and error checking.
  32    TODO Remove when cleaning up the futex API throughout glibc.  */
  33 static __always_inline int
  34 futex_abstimed_wait (unsigned int* futex, unsigned int expected,
  35                      const struct timespec* abstime, int private, bool cancel)
  36 {
  37   int err, oldtype;
  38   if (abstime == NULL)
  39     {
  40       if (cancel)
  41         oldtype = __pthread_enable_asynccancel ();
  42       err = lll_futex_wait (futex, expected, private);
  43       if (cancel)
  44         __pthread_disable_asynccancel (oldtype);
  45     }
  46   else
  47     {
  48       struct timeval tv;
  49       struct timespec rt;
  50       int sec, nsec;
  51
  52       /* Get the current time.  */
  53       __gettimeofday (&tv, NULL);
  54
  55       /* Compute relative timeout.  */
  56       sec = abstime->tv_sec - tv.tv_sec;
  57       nsec = abstime->tv_nsec - tv.tv_usec * 1000;
  58       if (nsec < 0)
  59         {
  60           nsec += 1000000000;
  61           --sec;
  62         }
  63
  64       /* Already timed out?  */
  65       if (sec < 0)
  66         return ETIMEDOUT;
  67
  68       /* Do wait.  */
  69       rt.tv_sec = sec;
  70       rt.tv_nsec = nsec;
  71       if (cancel)
  72         oldtype = __pthread_enable_asynccancel ();
  73       err = lll_futex_timed_wait (futex, expected, &rt, private);
  74       if (cancel)
  75         __pthread_disable_asynccancel (oldtype);
  76     }
  77   switch (err)
  78     {
  79     case 0:
  80     case -EAGAIN:
  81     case -EINTR:
  82     case -ETIMEDOUT:
  83       return -err;
  84
  85     case -EFAULT: /* Must have been caused by a glibc or application bug.  */
  86     case -EINVAL: /* Either due to wrong alignment or due to the timeout not
  87                      being normalized.  Must have been caused by a glibc or
  88                      application bug.  */
  89     case -ENOSYS: /* Must have been caused by a glibc bug.  */
  90     /* No other errors are documented at this time.  */
  91     default:
  92       abort ();
  93     }
  94 }
  95
  96 /* Wrapper for lll_futex_wake, with error checking.
  97    TODO Remove when cleaning up the futex API throughout glibc.  */
  98 static __always_inline void
  99 futex_wake (unsigned int* futex, int processes_to_wake, int private)
 100 {
 101   int res = lll_futex_wake (futex, processes_to_wake, private);
 102   /* No error.  Ignore the number of woken processes.  */
 103   if (res >= 0)
 104     return;
 105   switch (res)
 106     {
 107     case -EFAULT: /* Could have happened due to memory reuse.  */
 108     case -EINVAL: /* Could be either due to incorrect alignment (a bug in
 109                      glibc or in the application) or due to memory being
 110                      reused for a PI futex.  We cannot distinguish between the
 111                      two causes, and one of them is correct use, so we do not
 112                      act in this case.  */
 113       return;
 114     case -ENOSYS: /* Must have been caused by a glibc bug.  */
 115     /* No other errors are documented at this time.  */
 116     default:
 117       abort ();
 118     }
 119 }
 120
 121
 122 /* The semaphore provides two main operations: sem_post adds a token to the
 123    semaphore; sem_wait grabs a token from the semaphore, potentially waiting
 124    until there is a token available.  A sem_wait needs to synchronize with
 125    the sem_post that provided the token, so that whatever lead to the sem_post
 126    happens before the code after sem_wait.
 127
 128    Conceptually, available tokens can simply be counted; let's call that the
 129    value of the semaphore.  However, we also want to know whether there might
 130    be a sem_wait that is blocked on the value because it was zero (using a
 131    futex with the value being the futex variable); if there is no blocked
 132    sem_wait, sem_post does not need to execute a futex_wake call.  Therefore,
 133    we also need to count the number of potentially blocked sem_wait calls
 134    (which we call nwaiters).
 135
 136    What makes this tricky is that POSIX requires that a semaphore can be
 137    destroyed as soon as the last remaining sem_wait has returned, and no
 138    other sem_wait or sem_post calls are executing concurrently.  However, the
 139    sem_post call whose token was consumed by the last sem_wait is considered
 140    to have finished once it provided the token to the sem_wait.
 141    Thus, sem_post must not access the semaphore struct anymore after it has
 142    made a token available; IOW, it needs to be able to atomically provide
 143    a token and check whether any blocked sem_wait calls might exist.
 144
 145    This is straightforward to do if the architecture provides 64b atomics
 146    because we can just put both the value and nwaiters into one variable that
 147    we access atomically: This is the data field, the value is in the
 148    least-significant 32 bits, and nwaiters in the other bits.  When sem_post
 149    makes a value available, it can atomically check nwaiters.
 150
 151    If we have only 32b atomics available, we cannot put both nwaiters and
 152    value into one 32b value because then we might have too few bits for both
 153    of those counters.  Therefore, we need to use two distinct fields.
 154
 155    To allow sem_post to atomically make a token available and check for
 156    blocked sem_wait calls, we use one bit in value to indicate whether
 157    nwaiters is nonzero.  That allows sem_post to use basically the same
 158    algorithm as with 64b atomics, but requires sem_wait to update the bit; it
 159    can't do this atomically with another access to nwaiters, but it can compute
 160    a conservative value for the bit because it's benign if the bit is set
 161    even if nwaiters is zero (all we get is an unnecessary futex wake call by
 162    sem_post).
 163    Specifically, sem_wait will unset the bit speculatively if it believes that
 164    there is no other concurrently executing sem_wait.  If it misspeculated,
 165    it will have to clean up by waking any other sem_wait call (i.e., what
 166    sem_post would do otherwise).  This does not conflict with the destruction
 167    requirement because the semaphore must not be destructed while any sem_wait
 168    is still executing.  */
 169
 170 /* Set this to true if you assume that, in contrast to current Linux futex
 171    documentation, lll_futex_wake can return -EINTR only if interrupted by a
 172    signal, not spuriously due to some other reason.
 173    TODO Discuss EINTR conditions with the Linux kernel community.  For
 174    now, we set this to true to not change behavior of semaphores compared
 175    to previous glibc builds.  */
 176 static const int sem_assume_only_signals_cause_futex_EINTR = 1;
 177
 178 #if !__HAVE_64B_ATOMICS
 179 static void
 180 __sem_wait_32_finish (struct new_sem *sem);
 181 #endif
 182
 183 static void
 184 __sem_wait_cleanup (void *arg)
 185 {
 186   struct new_sem *sem = (struct new_sem *) arg;
 187
 188 #if __HAVE_64B_ATOMICS
 189   /* Stop being registered as a waiter.  See below for MO.  */
 190   atomic_fetch_add_relaxed (&sem->data, -((uint64_t) 1 << SEM_NWAITERS_SHIFT));
 191 #else
 192   __sem_wait_32_finish (sem);
 193 #endif
 194 }
 195
 196 /* Wait until at least one token is available, possibly with a timeout.
 197    This is in a separate function in order to make sure gcc
 198    puts the call site into an exception region, and thus the
 199    cleanups get properly run.  TODO still necessary?  Other futex_wait
 200    users don't seem to need it.  */
 201 static int
 202 __attribute__ ((noinline))
 203 do_futex_wait (struct new_sem *sem, const struct timespec *abstime)
 204 {
 205   int err;
 206
 207 #if __HAVE_64B_ATOMICS
 208   err = futex_abstimed_wait ((unsigned int *) &sem->data + SEM_VALUE_OFFSET, 0,
 209                              abstime, sem->private, true);
 210 #else
 211   err = futex_abstimed_wait (&sem->value, SEM_NWAITERS_MASK, abstime,
 212                              sem->private, true);
 213 #endif
 214
 215   return err;
 216 }
 217
 218 /* Fast path: Try to grab a token without blocking.  */
 219 static int
 220 __new_sem_wait_fast (struct new_sem *sem, int definitive_result)
 221 {
 222   /* We need acquire MO if we actually grab a token, so that this
 223      synchronizes with all token providers (i.e., the RMW operation we read
 224      from or all those before it in modification order; also see sem_post).
 225      We do not need to guarantee any ordering if we observed that there is
 226      no token (POSIX leaves it unspecified whether functions that fail
 227      synchronize memory); thus, relaxed MO is sufficient for the initial load
 228      and the failure path of the CAS.  If the weak CAS fails and we need a
 229      definitive result, retry.  */
 230 #if __HAVE_64B_ATOMICS
 231   uint64_t d = atomic_load_relaxed (&sem->data);
 232   do
 233     {
 234       if ((d & SEM_VALUE_MASK) == 0)
 235         break;
 236       if (atomic_compare_exchange_weak_acquire (&sem->data, &d, d - 1))
 237         return 0;
 238     }
 239   while (definitive_result);
 240   return -1;
 241 #else
 242   unsigned int v = atomic_load_relaxed (&sem->value);
 243   do
 244     {
 245       if ((v >> SEM_VALUE_SHIFT) == 0)
 246         break;
 247       if (atomic_compare_exchange_weak_acquire (&sem->value,
 248           &v, v - (1 << SEM_VALUE_SHIFT)))
 249         return 0;
 250     }
 251   while (definitive_result);
 252   return -1;
 253 #endif
 254 }
 255
 256 /* Slow path that blocks.  */
 257 static int
 258 __attribute__ ((noinline))
 259 __new_sem_wait_slow (struct new_sem *sem, const struct timespec *abstime)
 260 {
 261   int err = 0;
 262
 263 #if __HAVE_64B_ATOMICS
 264   /* Add a waiter.  Relaxed MO is sufficient because we can rely on the
 265      ordering provided by the RMW operations we use.  */
 266   uint64_t d = atomic_fetch_add_relaxed (&sem->data,
 267       (uint64_t) 1 << SEM_NWAITERS_SHIFT);
 268
 269   pthread_cleanup_push (__sem_wait_cleanup, sem);
 270
 271   /* Wait for a token to be available.  Retry until we can grab one.  */
 272   for (;;)
 273     {
 274       /* If there is no token available, sleep until there is.  */
 275       if ((d & SEM_VALUE_MASK) == 0)
 276         {
 277           err = do_futex_wait (sem, abstime);
 278           /* A futex return value of 0 or EAGAIN is due to a real or spurious
 279              wake-up, or due to a change in the number of tokens.  We retry in
 280              these cases.
 281              If we timed out, forward this to the caller.
 282              EINTR could be either due to being interrupted by a signal, or
 283              due to a spurious wake-up.  Thus, we cannot distinguish between
 284              both, and are not allowed to return EINTR to the caller but have
 285              to retry; this is because we may not have been interrupted by a
 286              signal.  However, if we assume that only signals cause a futex
 287              return of EINTR, we forward EINTR to the caller.
 288
 289              Retrying on EINTR is technically always allowed because to
 290              reliably interrupt sem_wait with a signal, the signal handler
 291              must call sem_post (which is AS-Safe).  In executions where the
 292              signal handler does not do that, the implementation can correctly
 293              claim that sem_wait hadn't actually started to execute yet, and
 294              thus the signal never actually interrupted sem_wait.  We make no
 295              timing guarantees, so the program can never observe that sem_wait
 296              actually did start to execute.  Thus, in a correct program, we
 297              can expect a signal that wanted to interrupt the sem_wait to have
 298              provided a token, and can just try to grab this token if
 299              futex_wait returns EINTR.  */
 300           if (err == ETIMEDOUT ||
 301               (err == EINTR && sem_assume_only_signals_cause_futex_EINTR))
 302             {
 303               __set_errno (err);
 304               err = -1;
 305               /* Stop being registered as a waiter.  */
 306               atomic_fetch_add_relaxed (&sem->data,
 307                   -((uint64_t) 1 << SEM_NWAITERS_SHIFT));
 308               break;
 309             }
 310           /* Relaxed MO is sufficient; see below.  */
 311           d = atomic_load_relaxed (&sem->data);
 312         }
 313       else
 314         {
 315           /* Try to grab both a token and stop being a waiter.  We need
 316              acquire MO so this synchronizes with all token providers (i.e.,
 317              the RMW operation we read from or all those before it in
 318              modification order; also see sem_post).  On the failure path,
 319              relaxed MO is sufficient because we only eventually need the
 320              up-to-date value; the futex_wait or the CAS perform the real
 321              work.  */
 322           if (atomic_compare_exchange_weak_acquire (&sem->data,
 323               &d, d - 1 - ((uint64_t) 1 << SEM_NWAITERS_SHIFT)))
 324             {
 325               err = 0;
 326               break;
 327             }
 328         }
 329     }
 330
 331   pthread_cleanup_pop (0);
 332 #else
 333   /* The main difference to the 64b-atomics implementation is that we need to
 334      access value and nwaiters in separate steps, and that the nwaiters bit
 335      in the value can temporarily not be set even if nwaiters is nonzero.
 336      We work around incorrectly unsetting the nwaiters bit by letting sem_wait
 337      set the bit again and waking the number of waiters that could grab a
 338      token.  There are two additional properties we need to ensure:
 339      (1) We make sure that whenever unsetting the bit, we see the increment of
 340      nwaiters by the other thread that set the bit.  IOW, we will notice if
 341      we make a mistake.
 342      (2) When setting the nwaiters bit, we make sure that we see the unsetting
 343      of the bit by another waiter that happened before us.  This avoids having
 344      to blindly set the bit whenever we need to block on it.  We set/unset
 345      the bit while having incremented nwaiters (i.e., are a registered
 346      waiter), and the problematic case only happens when one waiter indeed
 347      followed another (i.e., nwaiters was never larger than 1); thus, this
 348      works similarly as with a critical section using nwaiters (see the MOs
 349      and related comments below).
 350
 351      An alternative approach would be to unset the bit after decrementing
 352      nwaiters; however, that would result in needing Dekker-like
 353      synchronization and thus full memory barriers.  We also would not be able
 354      to prevent misspeculation, so this alternative scheme does not seem
 355      beneficial.  */
 356   unsigned int v;
 357
 358   /* Add a waiter.  We need acquire MO so this synchronizes with the release
 359      MO we use when decrementing nwaiters below; it ensures that if another
 360      waiter unset the bit before us, we see that and set it again.  Also see
 361      property (2) above.  */
 362   atomic_fetch_add_acquire (&sem->nwaiters, 1);
 363
 364   pthread_cleanup_push (__sem_wait_cleanup, sem);
 365
 366   /* Wait for a token to be available.  Retry until we can grab one.  */
 367   /* We do not need any ordering wrt. to this load's reads-from, so relaxed
 368      MO is sufficient.  The acquire MO above ensures that in the problematic
 369      case, we do see the unsetting of the bit by another waiter.  */
 370   v = atomic_load_relaxed (&sem->value);
 371   do
 372     {
 373       do
 374         {
 375           /* We are about to block, so make sure that the nwaiters bit is
 376              set.  We need release MO on the CAS to ensure that when another
 377              waiter unsets the nwaiters bit, it will also observe that we
 378              incremented nwaiters in the meantime (also see the unsetting of
 379              the bit below).  Relaxed MO on CAS failure is sufficient (see
 380              above).  */
 381           do
 382             {
 383               if ((v & SEM_NWAITERS_MASK) != 0)
 384                 break;
 385             }
 386           while (!atomic_compare_exchange_weak_release (&sem->value,
 387               &v, v | SEM_NWAITERS_MASK));
 388           /* If there is no token, wait.  */
 389           if ((v >> SEM_VALUE_SHIFT) == 0)
 390             {
 391               /* See __HAVE_64B_ATOMICS variant.  */
 392               err = do_futex_wait(sem, abstime);
 393               if (err == ETIMEDOUT ||
 394                   (err == EINTR && sem_assume_only_signals_cause_futex_EINTR))
 395                 {
 396                   __set_errno (err);
 397                   err = -1;
 398                   goto error;
 399                 }
 400               err = 0;
 401               /* We blocked, so there might be a token now.  Relaxed MO is
 402                  sufficient (see above).  */
 403               v = atomic_load_relaxed (&sem->value);
 404             }
 405         }
 406       /* If there is no token, we must not try to grab one.  */
 407       while ((v >> SEM_VALUE_SHIFT) == 0);
 408     }
 409   /* Try to grab a token.  We need acquire MO so this synchronizes with
 410      all token providers (i.e., the RMW operation we read from or all those
 411      before it in modification order; also see sem_post).  */
 412   while (!atomic_compare_exchange_weak_acquire (&sem->value,
 413       &v, v - (1 << SEM_VALUE_SHIFT)));
 414
 415 error:
 416   pthread_cleanup_pop (0);
 417
 418   __sem_wait_32_finish (sem);
 419 #endif
 420
 421   return err;
 422 }
 423
 424 /* Stop being a registered waiter (non-64b-atomics code only).  */
 425 #if !__HAVE_64B_ATOMICS
 426 static void
 427 __sem_wait_32_finish (struct new_sem *sem)
 428 {
 429   /* The nwaiters bit is still set, try to unset it now if this seems
 430      necessary.  We do this before decrementing nwaiters so that the unsetting
 431      is visible to other waiters entering after us.  Relaxed MO is sufficient
 432      because we are just speculating here; a stronger MO would not prevent
 433      misspeculation.  */
 434   unsigned int wguess = atomic_load_relaxed (&sem->nwaiters);
 435   if (wguess == 1)
 436     /* We might be the last waiter, so unset.  This needs acquire MO so that
 437        it syncronizes with the release MO when setting the bit above; if we
 438        overwrite someone else that set the bit, we'll read in the following
 439        decrement of nwaiters at least from that release sequence, so we'll
 440        see if the other waiter is still active or if another writer entered
 441        in the meantime (i.e., using the check below).  */
 442     atomic_fetch_and_acquire (&sem->value, ~SEM_NWAITERS_MASK);
 443
 444   /* Now stop being a waiter, and see whether our guess was correct.
 445      This needs release MO so that it synchronizes with the acquire MO when
 446      a waiter increments nwaiters; this makes sure that newer writers see that
 447      we reset the waiters_present bit.  */
 448   unsigned int wfinal = atomic_fetch_add_release (&sem->nwaiters, -1);
 449   if (wfinal > 1 && wguess == 1)
 450     {
 451       /* We guessed wrong, and so need to clean up after the mistake and
 452          unblock any waiters that could have not been woken.  There is no
 453          additional ordering that we need to set up, so relaxed MO is
 454          sufficient.  */
 455       unsigned int v = atomic_fetch_or_relaxed (&sem->value,
 456                                                 SEM_NWAITERS_MASK);
 457       /* If there are available tokens, then wake as many waiters.  If there
 458          aren't any, then there is no need to wake anyone because there is
 459          none to grab for another waiter.  If tokens become available
 460          subsequently, then the respective sem_post calls will do the wake-up
 461          due to us having set the nwaiters bit again.  */
 462       v >>= SEM_VALUE_SHIFT;
 463       if (v > 0)
 464         futex_wake (&sem->value, v, sem->private);
 465     }
 466 }
 467 #endif