1 /* rwsem.c: R/W semaphores: contention handling functions
3 * Written by David Howells (dhowells@redhat.com).
4 * Derived from arch/i386/kernel/semaphore.c
6 #include <linux/rwsem.h>
7 #include <linux/sched.h>
8 #include <linux/init.h>
9 #include <linux/module.h>
12 * Initialize an rwsem:
14 void __init_rwsem(struct rw_semaphore
*sem
, const char *name
,
15 struct lock_class_key
*key
)
17 #ifdef CONFIG_DEBUG_LOCK_ALLOC
19 * Make sure we are not reinitializing a held semaphore:
21 debug_check_no_locks_freed((void *)sem
, sizeof(*sem
));
22 lockdep_init_map(&sem
->dep_map
, name
, key
, 0);
24 sem
->count
= RWSEM_UNLOCKED_VALUE
;
25 spin_lock_init(&sem
->wait_lock
);
26 INIT_LIST_HEAD(&sem
->wait_list
);
29 EXPORT_SYMBOL(__init_rwsem
);
32 struct list_head list
;
33 struct task_struct
*task
;
35 #define RWSEM_WAITING_FOR_READ 0x00000001
36 #define RWSEM_WAITING_FOR_WRITE 0x00000002
39 /* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and
40 * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held
41 * since the rwsem value was observed.
43 #define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */
44 #define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */
45 #define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */
48 * handle the lock release when processes blocked on it that can now run
49 * - if we come here from up_xxxx(), then:
50 * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
51 * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
52 * - there must be someone on the queue
53 * - the spinlock must be held by the caller
54 * - woken process blocks are discarded from the list after having task zeroed
55 * - writers are only woken if downgrading is false
57 static struct rw_semaphore
*
58 __rwsem_do_wake(struct rw_semaphore
*sem
, int wake_type
)
60 struct rwsem_waiter
*waiter
;
61 struct task_struct
*tsk
;
62 struct list_head
*next
;
63 signed long oldcount
, woken
, loop
, adjustment
;
65 waiter
= list_entry(sem
->wait_list
.next
, struct rwsem_waiter
, list
);
66 if (!(waiter
->flags
& RWSEM_WAITING_FOR_WRITE
))
69 if (wake_type
== RWSEM_WAKE_READ_OWNED
)
70 /* Another active reader was observed, so wakeup is not
71 * likely to succeed. Save the atomic op.
75 /* There's a writer at the front of the queue - try to grant it the
76 * write lock. However, we only wake this writer if we can transition
77 * the active part of the count from 0 -> 1
79 adjustment
= RWSEM_ACTIVE_WRITE_BIAS
;
80 if (waiter
->list
.next
== &sem
->wait_list
)
81 adjustment
-= RWSEM_WAITING_BIAS
;
84 oldcount
= rwsem_atomic_update(adjustment
, sem
) - adjustment
;
85 if (oldcount
& RWSEM_ACTIVE_MASK
)
86 /* Someone grabbed the sem already */
89 /* We must be careful not to touch 'waiter' after we set ->task = NULL.
90 * It is an allocated on the waiter's stack and may become invalid at
91 * any time after that point (due to a wakeup from another source).
93 list_del(&waiter
->list
);
102 /* If we come here from up_xxxx(), another thread might have reached
103 * rwsem_down_failed_common() before we acquired the spinlock and
104 * woken up a waiter, making it now active. We prefer to check for
105 * this first in order to not spend too much time with the spinlock
106 * held if we're not going to be able to wake up readers in the end.
108 * Note that we do not need to update the rwsem count: any writer
109 * trying to acquire rwsem will run rwsem_down_write_failed() due
110 * to the waiting threads and block trying to acquire the spinlock.
112 * We use a dummy atomic update in order to acquire the cache line
113 * exclusively since we expect to succeed and run the final rwsem
114 * count adjustment pretty soon.
116 if (wake_type
== RWSEM_WAKE_ANY
&&
117 rwsem_atomic_update(0, sem
) < RWSEM_WAITING_BIAS
)
118 /* Someone grabbed the sem for write already */
121 /* Grant an infinite number of read locks to the readers at the front
122 * of the queue. Note we increment the 'active part' of the count by
123 * the number of readers before waking any processes up.
129 if (waiter
->list
.next
== &sem
->wait_list
)
132 waiter
= list_entry(waiter
->list
.next
,
133 struct rwsem_waiter
, list
);
135 } while (waiter
->flags
& RWSEM_WAITING_FOR_READ
);
137 adjustment
= woken
* RWSEM_ACTIVE_READ_BIAS
;
138 if (waiter
->flags
& RWSEM_WAITING_FOR_READ
)
139 /* hit end of list above */
140 adjustment
-= RWSEM_WAITING_BIAS
;
142 rwsem_atomic_add(adjustment
, sem
);
144 next
= sem
->wait_list
.next
;
145 for (loop
= woken
; loop
> 0; loop
--) {
146 waiter
= list_entry(next
, struct rwsem_waiter
, list
);
147 next
= waiter
->list
.next
;
151 wake_up_process(tsk
);
152 put_task_struct(tsk
);
155 sem
->wait_list
.next
= next
;
156 next
->prev
= &sem
->wait_list
;
161 /* undo the change to the active count, but check for a transition
164 if (rwsem_atomic_update(-adjustment
, sem
) & RWSEM_ACTIVE_MASK
)
166 goto try_again_write
;
170 * wait for a lock to be granted
172 static struct rw_semaphore __sched
*
173 rwsem_down_failed_common(struct rw_semaphore
*sem
,
174 unsigned int flags
, signed long adjustment
)
176 struct rwsem_waiter waiter
;
177 struct task_struct
*tsk
= current
;
180 set_task_state(tsk
, TASK_UNINTERRUPTIBLE
);
182 /* set up my own style of waitqueue */
183 spin_lock_irq(&sem
->wait_lock
);
185 waiter
.flags
= flags
;
186 get_task_struct(tsk
);
188 if (list_empty(&sem
->wait_list
))
189 adjustment
+= RWSEM_WAITING_BIAS
;
190 list_add_tail(&waiter
.list
, &sem
->wait_list
);
192 /* we're now waiting on the lock, but no longer actively locking */
193 count
= rwsem_atomic_update(adjustment
, sem
);
195 /* If there are no active locks, wake the front queued process(es) up.
197 * Alternatively, if we're called from a failed down_write(), there
198 * were already threads queued before us and there are no active
199 * writers, the lock must be read owned; so we try to wake any read
200 * locks that were queued ahead of us. */
201 if (count
== RWSEM_WAITING_BIAS
)
202 sem
= __rwsem_do_wake(sem
, RWSEM_WAKE_NO_ACTIVE
);
203 else if (count
> RWSEM_WAITING_BIAS
&&
204 adjustment
== -RWSEM_ACTIVE_WRITE_BIAS
)
205 sem
= __rwsem_do_wake(sem
, RWSEM_WAKE_READ_OWNED
);
207 spin_unlock_irq(&sem
->wait_lock
);
209 /* wait to be given the lock */
214 set_task_state(tsk
, TASK_UNINTERRUPTIBLE
);
217 tsk
->state
= TASK_RUNNING
;
223 * wait for the read lock to be granted
225 asmregparm
struct rw_semaphore __sched
*
226 rwsem_down_read_failed(struct rw_semaphore
*sem
)
228 return rwsem_down_failed_common(sem
, RWSEM_WAITING_FOR_READ
,
229 -RWSEM_ACTIVE_READ_BIAS
);
233 * wait for the write lock to be granted
235 asmregparm
struct rw_semaphore __sched
*
236 rwsem_down_write_failed(struct rw_semaphore
*sem
)
238 return rwsem_down_failed_common(sem
, RWSEM_WAITING_FOR_WRITE
,
239 -RWSEM_ACTIVE_WRITE_BIAS
);
243 * handle waking up a waiter on the semaphore
244 * - up_read/up_write has decremented the active part of count if we come here
246 asmregparm
struct rw_semaphore
*rwsem_wake(struct rw_semaphore
*sem
)
250 spin_lock_irqsave(&sem
->wait_lock
, flags
);
252 /* do nothing if list empty */
253 if (!list_empty(&sem
->wait_list
))
254 sem
= __rwsem_do_wake(sem
, RWSEM_WAKE_ANY
);
256 spin_unlock_irqrestore(&sem
->wait_lock
, flags
);
262 * downgrade a write lock into a read lock
263 * - caller incremented waiting part of count and discovered it still negative
264 * - just wake up any readers at the front of the queue
266 asmregparm
struct rw_semaphore
*rwsem_downgrade_wake(struct rw_semaphore
*sem
)
270 spin_lock_irqsave(&sem
->wait_lock
, flags
);
272 /* do nothing if list empty */
273 if (!list_empty(&sem
->wait_list
))
274 sem
= __rwsem_do_wake(sem
, RWSEM_WAKE_READ_OWNED
);
276 spin_unlock_irqrestore(&sem
->wait_lock
, flags
);
281 EXPORT_SYMBOL(rwsem_down_read_failed
);
282 EXPORT_SYMBOL(rwsem_down_write_failed
);
283 EXPORT_SYMBOL(rwsem_wake
);
284 EXPORT_SYMBOL(rwsem_downgrade_wake
);