lib/rwsem.c

   1 /* rwsem.c: R/W semaphores: contention handling functions
   2  *
   3  * Written by David Howells (dhowells@redhat.com).
   4  * Derived from arch/i386/kernel/semaphore.c
   5  */
   6 #include <linux/rwsem.h>
   7 #include <linux/sched.h>
   8 #include <linux/init.h>
   9 #include <linux/module.h>
  10
  11 /*
  12  * Initialize an rwsem:
  13  */
  14 void __init_rwsem(struct rw_semaphore *sem, const char *name,
  15                   struct lock_class_key *key)
  16 {
  17 #ifdef CONFIG_DEBUG_LOCK_ALLOC
  18         /*
  19          * Make sure we are not reinitializing a held semaphore:
  20          */
  21         debug_check_no_locks_freed((void *)sem, sizeof(*sem));
  22         lockdep_init_map(&sem->dep_map, name, key, 0);
  23 #endif
  24         sem->count = RWSEM_UNLOCKED_VALUE;
  25         spin_lock_init(&sem->wait_lock);
  26         INIT_LIST_HEAD(&sem->wait_list);
  27 }
  28
  29 EXPORT_SYMBOL(__init_rwsem);
  30
  31 struct rwsem_waiter {
  32         struct list_head list;
  33         struct task_struct *task;
  34         unsigned int flags;
  35 #define RWSEM_WAITING_FOR_READ  0x00000001
  36 #define RWSEM_WAITING_FOR_WRITE 0x00000002
  37 };
  38
  39 /* Wake types for __rwsem_do_wake().  Note that RWSEM_WAKE_NO_ACTIVE and
  40  * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held
  41  * since the rwsem value was observed.
  42  */
  43 #define RWSEM_WAKE_ANY        0 /* Wake whatever's at head of wait list */
  44 #define RWSEM_WAKE_NO_ACTIVE  1 /* rwsem was observed with no active thread */
  45 #define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */
  46
  47 /*
  48  * handle the lock release when processes blocked on it that can now run
  49  * - if we come here from up_xxxx(), then:
  50  *   - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
  51  *   - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
  52  * - there must be someone on the queue
  53  * - the spinlock must be held by the caller
  54  * - woken process blocks are discarded from the list after having task zeroed
  55  * - writers are only woken if downgrading is false
  56  */
  57 static struct rw_semaphore *
  58 __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
  59 {
  60         struct rwsem_waiter *waiter;
  61         struct task_struct *tsk;
  62         struct list_head *next;
  63         signed long oldcount, woken, loop, adjustment;
  64
  65         waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
  66         if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
  67                 goto readers_only;
  68
  69         if (wake_type == RWSEM_WAKE_READ_OWNED)
  70                 /* Another active reader was observed, so wakeup is not
  71                  * likely to succeed. Save the atomic op.
  72                  */
  73                 goto out;
  74
  75         /* There's a writer at the front of the queue - try to grant it the
  76          * write lock.  However, we only wake this writer if we can transition
  77          * the active part of the count from 0 -> 1
  78          */
  79         adjustment = RWSEM_ACTIVE_WRITE_BIAS;
  80         if (waiter->list.next == &sem->wait_list)
  81                 adjustment -= RWSEM_WAITING_BIAS;
  82
  83  try_again_write:
  84         oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
  85         if (oldcount & RWSEM_ACTIVE_MASK)
  86                 /* Someone grabbed the sem already */
  87                 goto undo_write;
  88
  89         /* We must be careful not to touch 'waiter' after we set ->task = NULL.
  90          * It is an allocated on the waiter's stack and may become invalid at
  91          * any time after that point (due to a wakeup from another source).
  92          */
  93         list_del(&waiter->list);
  94         tsk = waiter->task;
  95         smp_mb();
  96         waiter->task = NULL;
  97         wake_up_process(tsk);
  98         put_task_struct(tsk);
  99         goto out;
 100
 101  readers_only:
 102         /* If we come here from up_xxxx(), another thread might have reached
 103          * rwsem_down_failed_common() before we acquired the spinlock and
 104          * woken up a waiter, making it now active.  We prefer to check for
 105          * this first in order to not spend too much time with the spinlock
 106          * held if we're not going to be able to wake up readers in the end.
 107          *
 108          * Note that we do not need to update the rwsem count: any writer
 109          * trying to acquire rwsem will run rwsem_down_write_failed() due
 110          * to the waiting threads and block trying to acquire the spinlock.
 111          *
 112          * We use a dummy atomic update in order to acquire the cache line
 113          * exclusively since we expect to succeed and run the final rwsem
 114          * count adjustment pretty soon.
 115          */
 116         if (wake_type == RWSEM_WAKE_ANY &&
 117             rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS)
 118                 /* Someone grabbed the sem for write already */
 119                 goto out;
 120
 121         /* Grant an infinite number of read locks to the readers at the front
 122          * of the queue.  Note we increment the 'active part' of the count by
 123          * the number of readers before waking any processes up.
 124          */
 125         woken = 0;
 126         do {
 127                 woken++;
 128
 129                 if (waiter->list.next == &sem->wait_list)
 130                         break;
 131
 132                 waiter = list_entry(waiter->list.next,
 133                                         struct rwsem_waiter, list);
 134
 135         } while (waiter->flags & RWSEM_WAITING_FOR_READ);
 136
 137         adjustment = woken * RWSEM_ACTIVE_READ_BIAS;
 138         if (waiter->flags & RWSEM_WAITING_FOR_READ)
 139                 /* hit end of list above */
 140                 adjustment -= RWSEM_WAITING_BIAS;
 141
 142         rwsem_atomic_add(adjustment, sem);
 143
 144         next = sem->wait_list.next;
 145         for (loop = woken; loop > 0; loop--) {
 146                 waiter = list_entry(next, struct rwsem_waiter, list);
 147                 next = waiter->list.next;
 148                 tsk = waiter->task;
 149                 smp_mb();
 150                 waiter->task = NULL;
 151                 wake_up_process(tsk);
 152                 put_task_struct(tsk);
 153         }
 154
 155         sem->wait_list.next = next;
 156         next->prev = &sem->wait_list;
 157
 158  out:
 159         return sem;
 160
 161         /* undo the change to the active count, but check for a transition
 162          * 1->0 */
 163  undo_write:
 164         if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK)
 165                 goto out;
 166         goto try_again_write;
 167 }
 168
 169 /*
 170  * wait for a lock to be granted
 171  */
 172 static struct rw_semaphore __sched *
 173 rwsem_down_failed_common(struct rw_semaphore *sem,
 174                          unsigned int flags, signed long adjustment)
 175 {
 176         struct rwsem_waiter waiter;
 177         struct task_struct *tsk = current;
 178         signed long count;
 179
 180         set_task_state(tsk, TASK_UNINTERRUPTIBLE);
 181
 182         /* set up my own style of waitqueue */
 183         spin_lock_irq(&sem->wait_lock);
 184         waiter.task = tsk;
 185         waiter.flags = flags;
 186         get_task_struct(tsk);
 187
 188         if (list_empty(&sem->wait_list))
 189                 adjustment += RWSEM_WAITING_BIAS;
 190         list_add_tail(&waiter.list, &sem->wait_list);
 191
 192         /* we're now waiting on the lock, but no longer actively locking */
 193         count = rwsem_atomic_update(adjustment, sem);
 194
 195         /* If there are no active locks, wake the front queued process(es) up.
 196          *
 197          * Alternatively, if we're called from a failed down_write(), there
 198          * were already threads queued before us and there are no active
 199          * writers, the lock must be read owned; so we try to wake any read
 200          * locks that were queued ahead of us. */
 201         if (count == RWSEM_WAITING_BIAS)
 202                 sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE);
 203         else if (count > RWSEM_WAITING_BIAS &&
 204                  adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
 205                 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
 206
 207         spin_unlock_irq(&sem->wait_lock);
 208
 209         /* wait to be given the lock */
 210         for (;;) {
 211                 if (!waiter.task)
 212                         break;
 213                 schedule();
 214                 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
 215         }
 216
 217         tsk->state = TASK_RUNNING;
 218
 219         return sem;
 220 }
 221
 222 /*
 223  * wait for the read lock to be granted
 224  */
 225 asmregparm struct rw_semaphore __sched *
 226 rwsem_down_read_failed(struct rw_semaphore *sem)
 227 {
 228         return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
 229                                         -RWSEM_ACTIVE_READ_BIAS);
 230 }
 231
 232 /*
 233  * wait for the write lock to be granted
 234  */
 235 asmregparm struct rw_semaphore __sched *
 236 rwsem_down_write_failed(struct rw_semaphore *sem)
 237 {
 238         return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
 239                                         -RWSEM_ACTIVE_WRITE_BIAS);
 240 }
 241
 242 /*
 243  * handle waking up a waiter on the semaphore
 244  * - up_read/up_write has decremented the active part of count if we come here
 245  */
 246 asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
 247 {
 248         unsigned long flags;
 249
 250         spin_lock_irqsave(&sem->wait_lock, flags);
 251
 252         /* do nothing if list empty */
 253         if (!list_empty(&sem->wait_list))
 254                 sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
 255
 256         spin_unlock_irqrestore(&sem->wait_lock, flags);
 257
 258         return sem;
 259 }
 260
 261 /*
 262  * downgrade a write lock into a read lock
 263  * - caller incremented waiting part of count and discovered it still negative
 264  * - just wake up any readers at the front of the queue
 265  */
 266 asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 267 {
 268         unsigned long flags;
 269
 270         spin_lock_irqsave(&sem->wait_lock, flags);
 271
 272         /* do nothing if list empty */
 273         if (!list_empty(&sem->wait_list))
 274                 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
 275
 276         spin_unlock_irqrestore(&sem->wait_lock, flags);
 277
 278         return sem;
 279 }
 280
 281 EXPORT_SYMBOL(rwsem_down_read_failed);
 282 EXPORT_SYMBOL(rwsem_down_write_failed);
 283 EXPORT_SYMBOL(rwsem_wake);
 284 EXPORT_SYMBOL(rwsem_downgrade_wake);