Add tunable for each_burst.
[dragonfly.git] / sys / kern / kern_spinlock.c
bloba96645e7a8540d88654413dede497af390dbc8a6
1 /*
2 * Copyright (c) 2005 Jeffrey M. Hsu. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Jeffrey M. Hsu. and Matthew Dillon
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of The DragonFly Project nor the names of its
16 * contributors may be used to endorse or promote products derived
17 * from this software without specific, prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
32 * $DragonFly: src/sys/kern/kern_spinlock.c,v 1.11 2007/07/02 16:51:58 dillon Exp $
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/types.h>
38 #include <sys/kernel.h>
39 #include <sys/sysctl.h>
40 #ifdef INVARIANTS
41 #include <sys/proc.h>
42 #endif
43 #include <ddb/ddb.h>
44 #include <machine/atomic.h>
45 #include <machine/cpufunc.h>
46 #include <machine/clock.h>
47 #include <sys/spinlock.h>
48 #include <sys/spinlock2.h>
49 #include <sys/ktr.h>
51 #define BACKOFF_INITIAL 1
52 #define BACKOFF_LIMIT 256
54 #ifdef SMP
57 * Kernal Trace
59 #if !defined(KTR_SPIN_CONTENTION)
60 #define KTR_SPIN_CONTENTION KTR_ALL
61 #endif
62 #define SPIN_STRING "spin=%p type=%c"
63 #define SPIN_ARG_SIZE (sizeof(void *) + sizeof(int))
65 KTR_INFO_MASTER(spin);
66 KTR_INFO(KTR_SPIN_CONTENTION, spin, beg, 0, SPIN_STRING, SPIN_ARG_SIZE);
67 KTR_INFO(KTR_SPIN_CONTENTION, spin, end, 1, SPIN_STRING, SPIN_ARG_SIZE);
69 #define logspin(name, mtx, type) \
70 KTR_LOG(spin_ ## name, mtx, type)
72 #ifdef INVARIANTS
73 static int spin_lock_test_mode;
74 #endif
76 static int64_t spinlocks_contested1;
77 SYSCTL_QUAD(_debug, OID_AUTO, spinlocks_contested1, CTLFLAG_RD, &spinlocks_contested1, 0, "");
78 static int64_t spinlocks_contested2;
79 SYSCTL_QUAD(_debug, OID_AUTO, spinlocks_contested2, CTLFLAG_RD, &spinlocks_contested2, 0, "");
81 struct exponential_backoff {
82 int backoff;
83 int nsec;
84 struct spinlock *mtx;
85 sysclock_t base;
87 static int exponential_backoff(struct exponential_backoff *bo);
89 static __inline
90 void
91 exponential_init(struct exponential_backoff *bo, struct spinlock *mtx)
93 bo->backoff = BACKOFF_INITIAL;
94 bo->nsec = 0;
95 bo->mtx = mtx;
99 * We were either contested due to another exclusive lock holder,
100 * or due to the presence of shared locks. We have to undo the mess
101 * we created by returning the shared locks.
103 * If there was another exclusive lock holder only the exclusive bit
104 * in value will be the only bit set. We don't have to do anything since
105 * restoration does not involve any work.
107 * Otherwise we successfully obtained the exclusive bit. Attempt to
108 * clear the shared bits. If we are able to clear the shared bits
109 * we win. Otherwise we lose and we have to restore the shared bits
110 * we couldn't clear (and also clear our exclusive bit).
113 spin_trylock_wr_contested(struct spinlock *mtx, int value)
115 int bit;
117 ++spinlocks_contested1;
118 if ((value & SPINLOCK_EXCLUSIVE) == 0) {
119 while (value) {
120 bit = bsfl(value);
121 if (globaldata_find(bit)->gd_spinlock_rd != mtx) {
122 atomic_swap_int(&mtx->lock, value);
123 return (FALSE);
125 value &= ~(1 << bit);
127 return (TRUE);
129 return (FALSE);
133 * We were either contested due to another exclusive lock holder,
134 * or due to the presence of shared locks
136 * NOTE: If value indicates an exclusively held mutex, no shared bits
137 * would have been set and we can throw away value.
139 void
140 spin_lock_wr_contested(struct spinlock *mtx, int value)
142 struct exponential_backoff backoff;
143 globaldata_t gd = mycpu;
144 int bit;
145 int mask;
148 * Wait until we can gain exclusive access vs another exclusive
149 * holder.
151 exponential_init(&backoff, mtx);
152 ++spinlocks_contested1;
153 logspin(beg, mtx, 'w');
155 while (value & SPINLOCK_EXCLUSIVE) {
156 value = atomic_swap_int(&mtx->lock, SPINLOCK_EXCLUSIVE);
157 if (exponential_backoff(&backoff)) {
158 value &= ~SPINLOCK_EXCLUSIVE;
159 break;
164 * Kill the cached shared bit for our own cpu. This is the most
165 * common case and there's no sense wasting cpu on it. Since
166 * spinlocks aren't recursive, we can't own a shared ref on the
167 * spinlock while trying to get an exclusive one.
169 * If multiple bits are set do not stall on any single cpu. Check
170 * all cpus that have the cache bit set, then loop and check again,
171 * until we've cleaned all the bits.
173 value &= ~gd->gd_cpumask;
175 while ((mask = value) != 0) {
176 while (mask) {
177 bit = bsfl(value);
178 if (globaldata_find(bit)->gd_spinlock_rd != mtx) {
179 value &= ~(1 << bit);
180 } else if (exponential_backoff(&backoff)) {
181 value = 0;
182 break;
184 mask &= ~(1 << bit);
187 logspin(end, mtx, 'w');
191 * The cache bit wasn't set for our cpu. Loop until we can set the bit.
192 * As with the spin_lock_rd() inline we need a memory fence after setting
193 * gd_spinlock_rd to interlock against exclusive spinlocks waiting for
194 * that field to clear.
196 void
197 spin_lock_rd_contested(struct spinlock *mtx)
199 struct exponential_backoff backoff;
200 globaldata_t gd = mycpu;
201 int value = mtx->lock;
204 * Shortcut the op if we can just set the cache bit. This case
205 * occurs when the last lock was an exclusive lock.
207 while ((value & SPINLOCK_EXCLUSIVE) == 0) {
208 if (atomic_cmpset_int(&mtx->lock, value, value|gd->gd_cpumask))
209 return;
210 value = mtx->lock;
213 exponential_init(&backoff, mtx);
214 ++spinlocks_contested1;
216 logspin(beg, mtx, 'r');
218 while ((value & gd->gd_cpumask) == 0) {
219 if (value & SPINLOCK_EXCLUSIVE) {
220 gd->gd_spinlock_rd = NULL;
221 if (exponential_backoff(&backoff)) {
222 gd->gd_spinlock_rd = mtx;
223 break;
225 gd->gd_spinlock_rd = mtx;
226 cpu_mfence();
227 } else {
228 if (atomic_cmpset_int(&mtx->lock, value, value|gd->gd_cpumask))
229 break;
231 value = mtx->lock;
233 logspin(end, mtx, 'r');
237 * Handle exponential backoff and indefinite waits.
239 * If the system is handling a panic we hand the spinlock over to the caller
240 * after 1 second. After 10 seconds we attempt to print a debugger
241 * backtrace. We also run pending interrupts in order to allow a console
242 * break into DDB.
244 static
246 exponential_backoff(struct exponential_backoff *bo)
248 sysclock_t count;
249 int i;
252 * Quick backoff
254 for (i = 0; i < bo->backoff; ++i)
255 cpu_nop();
256 if (bo->backoff < BACKOFF_LIMIT) {
257 bo->backoff <<= 1;
258 return (FALSE);
262 * Indefinite
264 ++spinlocks_contested2;
265 cpu_spinlock_contested();
266 if (bo->nsec == 0) {
267 bo->base = sys_cputimer->count();
268 bo->nsec = 1;
271 count = sys_cputimer->count();
272 if (count - bo->base > sys_cputimer->freq) {
273 kprintf("spin_lock: %p, indefinite wait!\n", bo->mtx);
274 if (panicstr)
275 return (TRUE);
276 #if defined(INVARIANTS) && defined(DDB)
277 if (spin_lock_test_mode) {
278 db_print_backtrace();
279 return (TRUE);
281 #endif
282 ++bo->nsec;
283 #if defined(INVARIANTS) && defined(DDB)
284 if (bo->nsec == 11)
285 db_print_backtrace();
286 #endif
287 if (bo->nsec == 60)
288 panic("spin_lock: %p, indefinite wait!\n", bo->mtx);
289 splz();
290 bo->base = count;
292 return (FALSE);
296 * If INVARIANTS is enabled various spinlock timing tests can be run
297 * by setting debug.spin_lock_test:
299 * 1 Test the indefinite wait code
300 * 2 Time the best-case exclusive lock overhead (spin_test_count)
301 * 3 Time the best-case shared lock overhead (spin_test_count)
304 #ifdef INVARIANTS
306 static int spin_test_count = 10000000;
307 SYSCTL_INT(_debug, OID_AUTO, spin_test_count, CTLFLAG_RW, &spin_test_count, 0, "");
309 static int
310 sysctl_spin_lock_test(SYSCTL_HANDLER_ARGS)
312 struct spinlock mtx;
313 int error;
314 int value = 0;
315 int i;
317 if ((error = suser(curthread)) != 0)
318 return (error);
319 if ((error = SYSCTL_IN(req, &value, sizeof(value))) != 0)
320 return (error);
323 * Indefinite wait test
325 if (value == 1) {
326 spin_init(&mtx);
327 spin_lock_wr(&mtx); /* force an indefinite wait */
328 spin_lock_test_mode = 1;
329 spin_lock_wr(&mtx);
330 spin_unlock_wr(&mtx); /* Clean up the spinlock count */
331 spin_unlock_wr(&mtx);
332 spin_lock_test_mode = 0;
336 * Time best-case exclusive spinlocks
338 if (value == 2) {
339 globaldata_t gd = mycpu;
341 spin_init(&mtx);
342 for (i = spin_test_count; i > 0; --i) {
343 spin_lock_wr_quick(gd, &mtx);
344 spin_unlock_wr_quick(gd, &mtx);
349 * Time best-case shared spinlocks
351 if (value == 3) {
352 globaldata_t gd = mycpu;
354 spin_init(&mtx);
355 for (i = spin_test_count; i > 0; --i) {
356 spin_lock_rd_quick(gd, &mtx);
357 spin_unlock_rd_quick(gd, &mtx);
360 return (0);
363 SYSCTL_PROC(_debug, KERN_PROC_ALL, spin_lock_test, CTLFLAG_RW|CTLTYPE_INT,
364 0, 0, sysctl_spin_lock_test, "I", "Test spinlock wait code");
366 #endif /* INVARIANTS */
367 #endif /* SMP */