Add missing section number.
[dragonfly.git] / sys / kern / kern_spinlock.c
blob32a358c6d02fdb2702443bddf10cd58fcff29e6d
1 /*
2 * Copyright (c) 2005 Jeffrey M. Hsu. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Jeffrey M. Hsu. and Matthew Dillon
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of The DragonFly Project nor the names of its
16 * contributors may be used to endorse or promote products derived
17 * from this software without specific, prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
32 * $DragonFly: src/sys/kern/kern_spinlock.c,v 1.16 2008/09/11 01:11:42 y0netan1 Exp $
35 #include "opt_ddb.h"
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/types.h>
40 #include <sys/kernel.h>
41 #include <sys/sysctl.h>
42 #ifdef INVARIANTS
43 #include <sys/proc.h>
44 #endif
45 #include <ddb/ddb.h>
46 #include <machine/atomic.h>
47 #include <machine/cpufunc.h>
48 #include <machine/specialreg.h>
49 #include <machine/clock.h>
50 #include <sys/spinlock.h>
51 #include <sys/spinlock2.h>
52 #include <sys/ktr.h>
54 #define BACKOFF_INITIAL 1
55 #define BACKOFF_LIMIT 256
57 #ifdef SMP
60 * Kernal Trace
62 #if !defined(KTR_SPIN_CONTENTION)
63 #define KTR_SPIN_CONTENTION KTR_ALL
64 #endif
65 #define SPIN_STRING "spin=%p type=%c"
66 #define SPIN_ARG_SIZE (sizeof(void *) + sizeof(int))
68 KTR_INFO_MASTER(spin);
69 KTR_INFO(KTR_SPIN_CONTENTION, spin, beg, 0, SPIN_STRING, SPIN_ARG_SIZE);
70 KTR_INFO(KTR_SPIN_CONTENTION, spin, end, 1, SPIN_STRING, SPIN_ARG_SIZE);
71 KTR_INFO(KTR_SPIN_CONTENTION, spin, backoff, 2,
72 "spin=%p bo1=%d thr=%p bo=%d",
73 ((2 * sizeof(void *)) + (2 * sizeof(int))));
74 KTR_INFO(KTR_SPIN_CONTENTION, spin, bofail, 3, SPIN_STRING, SPIN_ARG_SIZE);
76 #define logspin(name, mtx, type) \
77 KTR_LOG(spin_ ## name, mtx, type)
79 #define logspin_backoff(mtx, bo1, thr, bo) \
80 KTR_LOG(spin_backoff, mtx, bo1, thr, bo)
82 #ifdef INVARIANTS
83 static int spin_lock_test_mode;
84 #endif
86 static int64_t spinlocks_contested1;
87 SYSCTL_QUAD(_debug, OID_AUTO, spinlocks_contested1, CTLFLAG_RD,
88 &spinlocks_contested1, 0, "");
90 static int64_t spinlocks_contested2;
91 SYSCTL_QUAD(_debug, OID_AUTO, spinlocks_contested2, CTLFLAG_RD,
92 &spinlocks_contested2, 0, "");
94 static int spinlocks_backoff_limit = BACKOFF_LIMIT;
95 SYSCTL_INT(_debug, OID_AUTO, spinlocks_bolim, CTLFLAG_RW,
96 &spinlocks_backoff_limit, 0, "");
98 struct exponential_backoff {
99 int backoff;
100 int nsec;
101 struct spinlock *mtx;
102 sysclock_t base;
104 static int exponential_backoff(struct exponential_backoff *bo);
106 static __inline
107 void
108 exponential_init(struct exponential_backoff *bo, struct spinlock *mtx)
110 bo->backoff = BACKOFF_INITIAL;
111 bo->nsec = 0;
112 bo->mtx = mtx;
116 * We were either contested due to another exclusive lock holder,
117 * or due to the presence of shared locks. We have to undo the mess
118 * we created by returning the shared locks.
120 * If there was another exclusive lock holder only the exclusive bit
121 * in value will be the only bit set. We don't have to do anything since
122 * restoration does not involve any work.
124 * Otherwise we successfully obtained the exclusive bit. Attempt to
125 * clear the shared bits. If we are able to clear the shared bits
126 * we win. Otherwise we lose and we have to restore the shared bits
127 * we couldn't clear (and also clear our exclusive bit).
130 spin_trylock_wr_contested(globaldata_t gd, struct spinlock *mtx, int value)
132 int bit;
134 ++spinlocks_contested1;
135 if ((value & SPINLOCK_EXCLUSIVE) == 0) {
136 while (value) {
137 bit = bsfl(value);
138 if (globaldata_find(bit)->gd_spinlock_rd == mtx) {
139 atomic_swap_int(&mtx->lock, value);
140 --gd->gd_spinlocks_wr;
141 return (FALSE);
143 value &= ~(1 << bit);
145 return (TRUE);
147 --gd->gd_spinlocks_wr;
148 return (FALSE);
152 * We were either contested due to another exclusive lock holder,
153 * or due to the presence of shared locks
155 * NOTE: If value indicates an exclusively held mutex, no shared bits
156 * would have been set and we can throw away value.
158 void
159 spin_lock_wr_contested(struct spinlock *mtx, int value)
161 struct exponential_backoff backoff;
162 globaldata_t gd = mycpu;
163 int bit;
164 int mask;
167 * Wait until we can gain exclusive access vs another exclusive
168 * holder.
170 exponential_init(&backoff, mtx);
171 ++spinlocks_contested1;
172 logspin(beg, mtx, 'w');
174 while (value & SPINLOCK_EXCLUSIVE) {
175 value = atomic_swap_int(&mtx->lock, SPINLOCK_EXCLUSIVE);
176 if (exponential_backoff(&backoff)) {
177 value &= ~SPINLOCK_EXCLUSIVE;
178 break;
183 * Kill the cached shared bit for our own cpu. This is the most
184 * common case and there's no sense wasting cpu on it. Since
185 * spinlocks aren't recursive, we can't own a shared ref on the
186 * spinlock while trying to get an exclusive one.
188 * If multiple bits are set do not stall on any single cpu. Check
189 * all cpus that have the cache bit set, then loop and check again,
190 * until we've cleaned all the bits.
192 value &= ~gd->gd_cpumask;
194 while ((mask = value) != 0) {
195 while (mask) {
196 bit = bsfl(value);
197 if (globaldata_find(bit)->gd_spinlock_rd != mtx) {
198 value &= ~(1 << bit);
199 } else if (exponential_backoff(&backoff)) {
200 value = 0;
201 break;
203 mask &= ~(1 << bit);
206 logspin(end, mtx, 'w');
210 * The cache bit wasn't set for our cpu. Loop until we can set the bit.
211 * As with the spin_lock_rd() inline we need a memory fence after setting
212 * gd_spinlock_rd to interlock against exclusive spinlocks waiting for
213 * that field to clear.
215 void
216 spin_lock_rd_contested(struct spinlock *mtx)
218 struct exponential_backoff backoff;
219 globaldata_t gd = mycpu;
220 int value = mtx->lock;
223 * Shortcut the op if we can just set the cache bit. This case
224 * occurs when the last lock was an exclusive lock.
226 while ((value & SPINLOCK_EXCLUSIVE) == 0) {
227 if (atomic_cmpset_int(&mtx->lock, value, value|gd->gd_cpumask))
228 return;
229 value = mtx->lock;
232 exponential_init(&backoff, mtx);
233 ++spinlocks_contested1;
235 logspin(beg, mtx, 'r');
237 while ((value & gd->gd_cpumask) == 0) {
238 if (value & SPINLOCK_EXCLUSIVE) {
239 gd->gd_spinlock_rd = NULL;
240 if (exponential_backoff(&backoff)) {
241 gd->gd_spinlock_rd = mtx;
242 break;
244 gd->gd_spinlock_rd = mtx;
245 cpu_mfence();
246 } else {
247 if (atomic_cmpset_int(&mtx->lock, value, value|gd->gd_cpumask))
248 break;
250 value = mtx->lock;
252 logspin(end, mtx, 'r');
256 * Handle exponential backoff and indefinite waits.
258 * If the system is handling a panic we hand the spinlock over to the caller
259 * after 1 second. After 10 seconds we attempt to print a debugger
260 * backtrace. We also run pending interrupts in order to allow a console
261 * break into DDB.
263 static
265 exponential_backoff(struct exponential_backoff *bo)
267 sysclock_t count;
268 int backoff;
270 #ifdef _RDTSC_SUPPORTED_
271 if (cpu_feature & CPUID_TSC) {
272 backoff =
273 (((u_long)rdtsc() ^ (((u_long)curthread) >> 5)) &
274 (bo->backoff - 1)) + BACKOFF_INITIAL;
275 } else
276 #endif
277 backoff = bo->backoff;
278 logspin_backoff(bo->mtx, bo->backoff, curthread, backoff);
281 * Quick backoff
283 for (; backoff; --backoff)
284 cpu_pause();
285 if (bo->backoff < spinlocks_backoff_limit) {
286 bo->backoff <<= 1;
287 return (FALSE);
288 } else {
289 bo->backoff = BACKOFF_INITIAL;
292 logspin(bofail, bo->mtx, 'u');
295 * Indefinite
297 ++spinlocks_contested2;
298 cpu_spinlock_contested();
299 if (bo->nsec == 0) {
300 bo->base = sys_cputimer->count();
301 bo->nsec = 1;
304 count = sys_cputimer->count();
305 if (count - bo->base > sys_cputimer->freq) {
306 kprintf("spin_lock: %p, indefinite wait!\n", bo->mtx);
307 if (panicstr)
308 return (TRUE);
309 #if defined(INVARIANTS) && defined(DDB)
310 if (spin_lock_test_mode) {
311 db_print_backtrace();
312 return (TRUE);
314 #endif
315 ++bo->nsec;
316 #if defined(INVARIANTS) && defined(DDB)
317 if (bo->nsec == 11)
318 db_print_backtrace();
319 #endif
320 if (bo->nsec == 60)
321 panic("spin_lock: %p, indefinite wait!\n", bo->mtx);
322 splz();
323 bo->base = count;
325 return (FALSE);
329 * If INVARIANTS is enabled various spinlock timing tests can be run
330 * by setting debug.spin_lock_test:
332 * 1 Test the indefinite wait code
333 * 2 Time the best-case exclusive lock overhead (spin_test_count)
334 * 3 Time the best-case shared lock overhead (spin_test_count)
337 #ifdef INVARIANTS
339 static int spin_test_count = 10000000;
340 SYSCTL_INT(_debug, OID_AUTO, spin_test_count, CTLFLAG_RW, &spin_test_count, 0, "");
342 static int
343 sysctl_spin_lock_test(SYSCTL_HANDLER_ARGS)
345 struct spinlock mtx;
346 int error;
347 int value = 0;
348 int i;
350 if ((error = suser(curthread)) != 0)
351 return (error);
352 if ((error = SYSCTL_IN(req, &value, sizeof(value))) != 0)
353 return (error);
356 * Indefinite wait test
358 if (value == 1) {
359 spin_init(&mtx);
360 spin_lock_wr(&mtx); /* force an indefinite wait */
361 spin_lock_test_mode = 1;
362 spin_lock_wr(&mtx);
363 spin_unlock_wr(&mtx); /* Clean up the spinlock count */
364 spin_unlock_wr(&mtx);
365 spin_lock_test_mode = 0;
369 * Time best-case exclusive spinlocks
371 if (value == 2) {
372 globaldata_t gd = mycpu;
374 spin_init(&mtx);
375 for (i = spin_test_count; i > 0; --i) {
376 spin_lock_wr_quick(gd, &mtx);
377 spin_unlock_wr_quick(gd, &mtx);
382 * Time best-case shared spinlocks
384 if (value == 3) {
385 globaldata_t gd = mycpu;
387 spin_init(&mtx);
388 for (i = spin_test_count; i > 0; --i) {
389 spin_lock_rd_quick(gd, &mtx);
390 spin_unlock_rd_quick(gd, &mtx);
393 return (0);
396 SYSCTL_PROC(_debug, KERN_PROC_ALL, spin_lock_test, CTLFLAG_RW|CTLTYPE_INT,
397 0, 0, sysctl_spin_lock_test, "I", "Test spinlock wait code");
399 #endif /* INVARIANTS */
400 #endif /* SMP */