nptl/sem_waitcommon.c

   1 /* sem_waitcommon -- wait on a semaphore, shared code.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Paul Mackerras <paulus@au.ibm.com>, 2003.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 #include <kernel-features.h>
  21 #include <errno.h>
  22 #include <sysdep.h>
  23 #include <lowlevellock.h>
  24 #include <internaltypes.h>
  25 #include <semaphore.h>
  26 #include <sys/time.h>
  27
  28 #include <pthreadP.h>
  29 #include <shlib-compat.h>
  30 #include <atomic.h>
  31
  32 /* Wrapper for lll_futex_wait with absolute timeout and error checking.
  33    TODO Remove when cleaning up the futex API throughout glibc.  */
  34 static __always_inline int
  35 futex_abstimed_wait (unsigned int* futex, unsigned int expected,
  36                      const struct timespec* abstime, int private, bool cancel)
  37 {
  38   int err, oldtype;
  39   if (abstime == NULL)
  40     {
  41       if (cancel)
  42         oldtype = __pthread_enable_asynccancel ();
  43       err = lll_futex_wait (futex, expected, private);
  44       if (cancel)
  45         __pthread_disable_asynccancel (oldtype);
  46     }
  47   else
  48     {
  49 #if (defined __ASSUME_FUTEX_CLOCK_REALTIME      \
  50      && defined lll_futex_timed_wait_bitset)
  51       /* The Linux kernel returns EINVAL for this, but in userspace
  52          such a value is valid.  */
  53       if (abstime->tv_sec < 0)
  54         return ETIMEDOUT;
  55 #else
  56       struct timeval tv;
  57       struct timespec rt;
  58       int sec, nsec;
  59
  60       /* Get the current time.  */
  61       __gettimeofday (&tv, NULL);
  62
  63       /* Compute relative timeout.  */
  64       sec = abstime->tv_sec - tv.tv_sec;
  65       nsec = abstime->tv_nsec - tv.tv_usec * 1000;
  66       if (nsec < 0)
  67         {
  68           nsec += 1000000000;
  69           --sec;
  70         }
  71
  72       /* Already timed out?  */
  73       if (sec < 0)
  74         return ETIMEDOUT;
  75
  76       /* Do wait.  */
  77       rt.tv_sec = sec;
  78       rt.tv_nsec = nsec;
  79 #endif
  80       if (cancel)
  81         oldtype = __pthread_enable_asynccancel ();
  82 #if (defined __ASSUME_FUTEX_CLOCK_REALTIME      \
  83      && defined lll_futex_timed_wait_bitset)
  84       err = lll_futex_timed_wait_bitset (futex, expected, abstime,
  85                                          FUTEX_CLOCK_REALTIME, private);
  86 #else
  87       err = lll_futex_timed_wait (futex, expected, &rt, private);
  88 #endif
  89       if (cancel)
  90         __pthread_disable_asynccancel (oldtype);
  91     }
  92   switch (err)
  93     {
  94     case 0:
  95     case -EAGAIN:
  96     case -EINTR:
  97     case -ETIMEDOUT:
  98       return -err;
  99
 100     case -EFAULT: /* Must have been caused by a glibc or application bug.  */
 101     case -EINVAL: /* Either due to wrong alignment or due to the timeout not
 102                      being normalized.  Must have been caused by a glibc or
 103                      application bug.  */
 104     case -ENOSYS: /* Must have been caused by a glibc bug.  */
 105     /* No other errors are documented at this time.  */
 106     default:
 107       abort ();
 108     }
 109 }
 110
 111 /* Wrapper for lll_futex_wake, with error checking.
 112    TODO Remove when cleaning up the futex API throughout glibc.  */
 113 static __always_inline void
 114 futex_wake (unsigned int* futex, int processes_to_wake, int private)
 115 {
 116   int res = lll_futex_wake (futex, processes_to_wake, private);
 117   /* No error.  Ignore the number of woken processes.  */
 118   if (res >= 0)
 119     return;
 120   switch (res)
 121     {
 122     case -EFAULT: /* Could have happened due to memory reuse.  */
 123     case -EINVAL: /* Could be either due to incorrect alignment (a bug in
 124                      glibc or in the application) or due to memory being
 125                      reused for a PI futex.  We cannot distinguish between the
 126                      two causes, and one of them is correct use, so we do not
 127                      act in this case.  */
 128       return;
 129     case -ENOSYS: /* Must have been caused by a glibc bug.  */
 130     /* No other errors are documented at this time.  */
 131     default:
 132       abort ();
 133     }
 134 }
 135
 136
 137 /* The semaphore provides two main operations: sem_post adds a token to the
 138    semaphore; sem_wait grabs a token from the semaphore, potentially waiting
 139    until there is a token available.  A sem_wait needs to synchronize with
 140    the sem_post that provided the token, so that whatever lead to the sem_post
 141    happens before the code after sem_wait.
 142
 143    Conceptually, available tokens can simply be counted; let's call that the
 144    value of the semaphore.  However, we also want to know whether there might
 145    be a sem_wait that is blocked on the value because it was zero (using a
 146    futex with the value being the futex variable); if there is no blocked
 147    sem_wait, sem_post does not need to execute a futex_wake call.  Therefore,
 148    we also need to count the number of potentially blocked sem_wait calls
 149    (which we call nwaiters).
 150
 151    What makes this tricky is that POSIX requires that a semaphore can be
 152    destroyed as soon as the last remaining sem_wait has returned, and no
 153    other sem_wait or sem_post calls are executing concurrently.  However, the
 154    sem_post call whose token was consumed by the last sem_wait is considered
 155    to have finished once it provided the token to the sem_wait.
 156    Thus, sem_post must not access the semaphore struct anymore after it has
 157    made a token available; IOW, it needs to be able to atomically provide
 158    a token and check whether any blocked sem_wait calls might exist.
 159
 160    This is straightforward to do if the architecture provides 64b atomics
 161    because we can just put both the value and nwaiters into one variable that
 162    we access atomically: This is the data field, the value is in the
 163    least-significant 32 bits, and nwaiters in the other bits.  When sem_post
 164    makes a value available, it can atomically check nwaiters.
 165
 166    If we have only 32b atomics available, we cannot put both nwaiters and
 167    value into one 32b value because then we might have too few bits for both
 168    of those counters.  Therefore, we need to use two distinct fields.
 169
 170    To allow sem_post to atomically make a token available and check for
 171    blocked sem_wait calls, we use one bit in value to indicate whether
 172    nwaiters is nonzero.  That allows sem_post to use basically the same
 173    algorithm as with 64b atomics, but requires sem_wait to update the bit; it
 174    can't do this atomically with another access to nwaiters, but it can compute
 175    a conservative value for the bit because it's benign if the bit is set
 176    even if nwaiters is zero (all we get is an unnecessary futex wake call by
 177    sem_post).
 178    Specifically, sem_wait will unset the bit speculatively if it believes that
 179    there is no other concurrently executing sem_wait.  If it misspeculated,
 180    it will have to clean up by waking any other sem_wait call (i.e., what
 181    sem_post would do otherwise).  This does not conflict with the destruction
 182    requirement because the semaphore must not be destructed while any sem_wait
 183    is still executing.  */
 184
 185 /* Set this to true if you assume that, in contrast to current Linux futex
 186    documentation, lll_futex_wake can return -EINTR only if interrupted by a
 187    signal, not spuriously due to some other reason.
 188    TODO Discuss EINTR conditions with the Linux kernel community.  For
 189    now, we set this to true to not change behavior of semaphores compared
 190    to previous glibc builds.  */
 191 static const int sem_assume_only_signals_cause_futex_EINTR = 1;
 192
 193 #if !__HAVE_64B_ATOMICS
 194 static void
 195 __sem_wait_32_finish (struct new_sem *sem);
 196 #endif
 197
 198 static void
 199 __sem_wait_cleanup (void *arg)
 200 {
 201   struct new_sem *sem = (struct new_sem *) arg;
 202
 203 #if __HAVE_64B_ATOMICS
 204   /* Stop being registered as a waiter.  See below for MO.  */
 205   atomic_fetch_add_relaxed (&sem->data, -((uint64_t) 1 << SEM_NWAITERS_SHIFT));
 206 #else
 207   __sem_wait_32_finish (sem);
 208 #endif
 209 }
 210
 211 /* Wait until at least one token is available, possibly with a timeout.
 212    This is in a separate function in order to make sure gcc
 213    puts the call site into an exception region, and thus the
 214    cleanups get properly run.  TODO still necessary?  Other futex_wait
 215    users don't seem to need it.  */
 216 static int
 217 __attribute__ ((noinline))
 218 do_futex_wait (struct new_sem *sem, const struct timespec *abstime)
 219 {
 220   int err;
 221
 222 #if __HAVE_64B_ATOMICS
 223   err = futex_abstimed_wait ((unsigned int *) &sem->data + SEM_VALUE_OFFSET, 0,
 224                              abstime, sem->private, true);
 225 #else
 226   err = futex_abstimed_wait (&sem->value, SEM_NWAITERS_MASK, abstime,
 227                              sem->private, true);
 228 #endif
 229
 230   return err;
 231 }
 232
 233 /* Fast path: Try to grab a token without blocking.  */
 234 static int
 235 __new_sem_wait_fast (struct new_sem *sem, int definitive_result)
 236 {
 237   /* We need acquire MO if we actually grab a token, so that this
 238      synchronizes with all token providers (i.e., the RMW operation we read
 239      from or all those before it in modification order; also see sem_post).
 240      We do not need to guarantee any ordering if we observed that there is
 241      no token (POSIX leaves it unspecified whether functions that fail
 242      synchronize memory); thus, relaxed MO is sufficient for the initial load
 243      and the failure path of the CAS.  If the weak CAS fails and we need a
 244      definitive result, retry.  */
 245 #if __HAVE_64B_ATOMICS
 246   uint64_t d = atomic_load_relaxed (&sem->data);
 247   do
 248     {
 249       if ((d & SEM_VALUE_MASK) == 0)
 250         break;
 251       if (atomic_compare_exchange_weak_acquire (&sem->data, &d, d - 1))
 252         return 0;
 253     }
 254   while (definitive_result);
 255   return -1;
 256 #else
 257   unsigned int v = atomic_load_relaxed (&sem->value);
 258   do
 259     {
 260       if ((v >> SEM_VALUE_SHIFT) == 0)
 261         break;
 262       if (atomic_compare_exchange_weak_acquire (&sem->value,
 263           &v, v - (1 << SEM_VALUE_SHIFT)))
 264         return 0;
 265     }
 266   while (definitive_result);
 267   return -1;
 268 #endif
 269 }
 270
 271 /* Slow path that blocks.  */
 272 static int
 273 __attribute__ ((noinline))
 274 __new_sem_wait_slow (struct new_sem *sem, const struct timespec *abstime)
 275 {
 276   int err = 0;
 277
 278 #if __HAVE_64B_ATOMICS
 279   /* Add a waiter.  Relaxed MO is sufficient because we can rely on the
 280      ordering provided by the RMW operations we use.  */
 281   uint64_t d = atomic_fetch_add_relaxed (&sem->data,
 282       (uint64_t) 1 << SEM_NWAITERS_SHIFT);
 283
 284   pthread_cleanup_push (__sem_wait_cleanup, sem);
 285
 286   /* Wait for a token to be available.  Retry until we can grab one.  */
 287   for (;;)
 288     {
 289       /* If there is no token available, sleep until there is.  */
 290       if ((d & SEM_VALUE_MASK) == 0)
 291         {
 292           err = do_futex_wait (sem, abstime);
 293           /* A futex return value of 0 or EAGAIN is due to a real or spurious
 294              wake-up, or due to a change in the number of tokens.  We retry in
 295              these cases.
 296              If we timed out, forward this to the caller.
 297              EINTR could be either due to being interrupted by a signal, or
 298              due to a spurious wake-up.  Thus, we cannot distinguish between
 299              both, and are not allowed to return EINTR to the caller but have
 300              to retry; this is because we may not have been interrupted by a
 301              signal.  However, if we assume that only signals cause a futex
 302              return of EINTR, we forward EINTR to the caller.
 303
 304              Retrying on EINTR is technically always allowed because to
 305              reliably interrupt sem_wait with a signal, the signal handler
 306              must call sem_post (which is AS-Safe).  In executions where the
 307              signal handler does not do that, the implementation can correctly
 308              claim that sem_wait hadn't actually started to execute yet, and
 309              thus the signal never actually interrupted sem_wait.  We make no
 310              timing guarantees, so the program can never observe that sem_wait
 311              actually did start to execute.  Thus, in a correct program, we
 312              can expect a signal that wanted to interrupt the sem_wait to have
 313              provided a token, and can just try to grab this token if
 314              futex_wait returns EINTR.  */
 315           if (err == ETIMEDOUT ||
 316               (err == EINTR && sem_assume_only_signals_cause_futex_EINTR))
 317             {
 318               __set_errno (err);
 319               err = -1;
 320               /* Stop being registered as a waiter.  */
 321               atomic_fetch_add_relaxed (&sem->data,
 322                   -((uint64_t) 1 << SEM_NWAITERS_SHIFT));
 323               break;
 324             }
 325           /* Relaxed MO is sufficient; see below.  */
 326           d = atomic_load_relaxed (&sem->data);
 327         }
 328       else
 329         {
 330           /* Try to grab both a token and stop being a waiter.  We need
 331              acquire MO so this synchronizes with all token providers (i.e.,
 332              the RMW operation we read from or all those before it in
 333              modification order; also see sem_post).  On the failure path,
 334              relaxed MO is sufficient because we only eventually need the
 335              up-to-date value; the futex_wait or the CAS perform the real
 336              work.  */
 337           if (atomic_compare_exchange_weak_acquire (&sem->data,
 338               &d, d - 1 - ((uint64_t) 1 << SEM_NWAITERS_SHIFT)))
 339             {
 340               err = 0;
 341               break;
 342             }
 343         }
 344     }
 345
 346   pthread_cleanup_pop (0);
 347 #else
 348   /* The main difference to the 64b-atomics implementation is that we need to
 349      access value and nwaiters in separate steps, and that the nwaiters bit
 350      in the value can temporarily not be set even if nwaiters is nonzero.
 351      We work around incorrectly unsetting the nwaiters bit by letting sem_wait
 352      set the bit again and waking the number of waiters that could grab a
 353      token.  There are two additional properties we need to ensure:
 354      (1) We make sure that whenever unsetting the bit, we see the increment of
 355      nwaiters by the other thread that set the bit.  IOW, we will notice if
 356      we make a mistake.
 357      (2) When setting the nwaiters bit, we make sure that we see the unsetting
 358      of the bit by another waiter that happened before us.  This avoids having
 359      to blindly set the bit whenever we need to block on it.  We set/unset
 360      the bit while having incremented nwaiters (i.e., are a registered
 361      waiter), and the problematic case only happens when one waiter indeed
 362      followed another (i.e., nwaiters was never larger than 1); thus, this
 363      works similarly as with a critical section using nwaiters (see the MOs
 364      and related comments below).
 365
 366      An alternative approach would be to unset the bit after decrementing
 367      nwaiters; however, that would result in needing Dekker-like
 368      synchronization and thus full memory barriers.  We also would not be able
 369      to prevent misspeculation, so this alternative scheme does not seem
 370      beneficial.  */
 371   unsigned int v;
 372
 373   /* Add a waiter.  We need acquire MO so this synchronizes with the release
 374      MO we use when decrementing nwaiters below; it ensures that if another
 375      waiter unset the bit before us, we see that and set it again.  Also see
 376      property (2) above.  */
 377   atomic_fetch_add_acquire (&sem->nwaiters, 1);
 378
 379   pthread_cleanup_push (__sem_wait_cleanup, sem);
 380
 381   /* Wait for a token to be available.  Retry until we can grab one.  */
 382   /* We do not need any ordering wrt. to this load's reads-from, so relaxed
 383      MO is sufficient.  The acquire MO above ensures that in the problematic
 384      case, we do see the unsetting of the bit by another waiter.  */
 385   v = atomic_load_relaxed (&sem->value);
 386   do
 387     {
 388       do
 389         {
 390           /* We are about to block, so make sure that the nwaiters bit is
 391              set.  We need release MO on the CAS to ensure that when another
 392              waiter unsets the nwaiters bit, it will also observe that we
 393              incremented nwaiters in the meantime (also see the unsetting of
 394              the bit below).  Relaxed MO on CAS failure is sufficient (see
 395              above).  */
 396           do
 397             {
 398               if ((v & SEM_NWAITERS_MASK) != 0)
 399                 break;
 400             }
 401           while (!atomic_compare_exchange_weak_release (&sem->value,
 402               &v, v | SEM_NWAITERS_MASK));
 403           /* If there is no token, wait.  */
 404           if ((v >> SEM_VALUE_SHIFT) == 0)
 405             {
 406               /* See __HAVE_64B_ATOMICS variant.  */
 407               err = do_futex_wait(sem, abstime);
 408               if (err == ETIMEDOUT ||
 409                   (err == EINTR && sem_assume_only_signals_cause_futex_EINTR))
 410                 {
 411                   __set_errno (err);
 412                   err = -1;
 413                   goto error;
 414                 }
 415               err = 0;
 416               /* We blocked, so there might be a token now.  Relaxed MO is
 417                  sufficient (see above).  */
 418               v = atomic_load_relaxed (&sem->value);
 419             }
 420         }
 421       /* If there is no token, we must not try to grab one.  */
 422       while ((v >> SEM_VALUE_SHIFT) == 0);
 423     }
 424   /* Try to grab a token.  We need acquire MO so this synchronizes with
 425      all token providers (i.e., the RMW operation we read from or all those
 426      before it in modification order; also see sem_post).  */
 427   while (!atomic_compare_exchange_weak_acquire (&sem->value,
 428       &v, v - (1 << SEM_VALUE_SHIFT)));
 429
 430 error:
 431   pthread_cleanup_pop (0);
 432
 433   __sem_wait_32_finish (sem);
 434 #endif
 435
 436   return err;
 437 }
 438
 439 /* Stop being a registered waiter (non-64b-atomics code only).  */
 440 #if !__HAVE_64B_ATOMICS
 441 static void
 442 __sem_wait_32_finish (struct new_sem *sem)
 443 {
 444   /* The nwaiters bit is still set, try to unset it now if this seems
 445      necessary.  We do this before decrementing nwaiters so that the unsetting
 446      is visible to other waiters entering after us.  Relaxed MO is sufficient
 447      because we are just speculating here; a stronger MO would not prevent
 448      misspeculation.  */
 449   unsigned int wguess = atomic_load_relaxed (&sem->nwaiters);
 450   if (wguess == 1)
 451     /* We might be the last waiter, so unset.  This needs acquire MO so that
 452        it syncronizes with the release MO when setting the bit above; if we
 453        overwrite someone else that set the bit, we'll read in the following
 454        decrement of nwaiters at least from that release sequence, so we'll
 455        see if the other waiter is still active or if another writer entered
 456        in the meantime (i.e., using the check below).  */
 457     atomic_fetch_and_acquire (&sem->value, ~SEM_NWAITERS_MASK);
 458
 459   /* Now stop being a waiter, and see whether our guess was correct.
 460      This needs release MO so that it synchronizes with the acquire MO when
 461      a waiter increments nwaiters; this makes sure that newer writers see that
 462      we reset the waiters_present bit.  */
 463   unsigned int wfinal = atomic_fetch_add_release (&sem->nwaiters, -1);
 464   if (wfinal > 1 && wguess == 1)
 465     {
 466       /* We guessed wrong, and so need to clean up after the mistake and
 467          unblock any waiters that could have not been woken.  There is no
 468          additional ordering that we need to set up, so relaxed MO is
 469          sufficient.  */
 470       unsigned int v = atomic_fetch_or_relaxed (&sem->value,
 471                                                 SEM_NWAITERS_MASK);
 472       /* If there are available tokens, then wake as many waiters.  If there
 473          aren't any, then there is no need to wake anyone because there is
 474          none to grab for another waiter.  If tokens become available
 475          subsequently, then the respective sem_post calls will do the wake-up
 476          due to us having set the nwaiters bit again.  */
 477       v >>= SEM_VALUE_SHIFT;
 478       if (v > 0)
 479         futex_wake (&sem->value, v, sem->private);
 480     }
 481 }
 482 #endif