2 * Copyright 1999, 2000 John D. Polstra.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 * $FreeBSD: src/libexec/rtld-elf/i386/lockdflt.c,v 1.5.2.4 2002/07/11 23:52:32 jdp Exp $
26 * $DragonFly: src/libexec/rtld-elf/i386/lockdflt.c,v 1.2 2003/06/17 04:27:08 dillon Exp $
30 * Thread locking implementation for the dynamic linker.
32 * On 80486 and later CPUs we use the "simple, non-scalable
33 * reader-preference lock" from:
35 * J. M. Mellor-Crummey and M. L. Scott. "Scalable Reader-Writer
36 * Synchronization for Shared-Memory Multiprocessors." 3rd ACM Symp. on
37 * Principles and Practice of Parallel Programming, April 1991.
39 * In this algorithm the lock is a single word. Its low-order bit is
40 * set when a writer holds the lock. The remaining high-order bits
41 * contain a count of readers desiring the lock. The algorithm requires
42 * atomic "compare_and_store" and "add" operations.
44 * The "compare_and_store" operation requires the "cmpxchg" instruction
45 * on the x86. Unfortunately, the 80386 CPU does not support that
46 * instruction -- only the 80486 and later models support it. So on the
47 * 80386 we must use simple test-and-set exclusive locks instead. We
48 * determine which kind of lock to use by trying to execute a "cmpxchg"
49 * instruction and catching the SIGILL which results on the 80386.
60 #define CACHE_LINE_SIZE 32
62 #define WAFLAG 0x1 /* A writer holds the lock */
63 #define RC_INCR 0x2 /* Adjusts count of readers desiring lock */
65 typedef struct Struct_Lock
{
70 static sigset_t fullsigmask
, oldsigmask
;
73 cmpxchgl(int old
, int new, volatile int *m
)
77 __asm
__volatile ("lock; cmpxchgl %2, %0"
78 : "+m"(*m
), "=a"(result
)
86 xchgl(int v
, volatile int *m
)
90 __asm
__volatile ("xchgl %0, %1"
91 : "=r"(result
), "+m"(*m
)
98 lock_create(void *context
)
106 * Arrange for the lock to occupy its own cache line. First, we
107 * optimistically allocate just a cache line, hoping that malloc
108 * will give us a well-aligned block of memory. If that doesn't
109 * work, we allocate a larger block and take a well-aligned cache
112 base
= xmalloc(CACHE_LINE_SIZE
);
114 if ((uintptr_t)p
% CACHE_LINE_SIZE
!= 0) {
116 base
= xmalloc(2 * CACHE_LINE_SIZE
);
118 if ((r
= (uintptr_t)p
% CACHE_LINE_SIZE
) != 0)
119 p
+= CACHE_LINE_SIZE
- r
;
128 lock_destroy(void *lock
)
130 Lock
*l
= (Lock
*)lock
;
136 * Crude exclusive locks for the 80386, which does not support the
137 * cmpxchg instruction.
140 lock80386_acquire(void *lock
)
142 Lock
*l
= (Lock
*)lock
;
143 sigset_t tmp_oldsigmask
;
146 sigprocmask(SIG_BLOCK
, &fullsigmask
, &tmp_oldsigmask
);
147 if (xchgl(1, &l
->lock
) == 0)
149 sigprocmask(SIG_SETMASK
, &tmp_oldsigmask
, NULL
);
153 oldsigmask
= tmp_oldsigmask
;
157 lock80386_release(void *lock
)
159 Lock
*l
= (Lock
*)lock
;
162 sigprocmask(SIG_SETMASK
, &oldsigmask
, NULL
);
166 * Better reader/writer locks for the 80486 and later CPUs.
169 rlock_acquire(void *lock
)
171 Lock
*l
= (Lock
*)lock
;
173 atomic_add_int(&l
->lock
, RC_INCR
);
174 while (l
->lock
& WAFLAG
)
179 wlock_acquire(void *lock
)
181 Lock
*l
= (Lock
*)lock
;
182 sigset_t tmp_oldsigmask
;
185 sigprocmask(SIG_BLOCK
, &fullsigmask
, &tmp_oldsigmask
);
186 if (cmpxchgl(0, WAFLAG
, &l
->lock
) == 0)
188 sigprocmask(SIG_SETMASK
, &tmp_oldsigmask
, NULL
);
190 oldsigmask
= tmp_oldsigmask
;
194 rlock_release(void *lock
)
196 Lock
*l
= (Lock
*)lock
;
198 atomic_add_int(&l
->lock
, -RC_INCR
);
202 wlock_release(void *lock
)
204 Lock
*l
= (Lock
*)lock
;
206 atomic_add_int(&l
->lock
, -WAFLAG
);
207 sigprocmask(SIG_SETMASK
, &oldsigmask
, NULL
);
211 * Code to determine at runtime whether the CPU supports the cmpxchg
212 * instruction. This instruction allows us to use locks that are more
213 * efficient, but it didn't exist on the 80386.
215 static jmp_buf sigill_env
;
220 longjmp(sigill_env
, 1);
224 cpu_supports_cmpxchg(void)
226 struct sigaction act
, oact
;
230 memset(&act
, 0, sizeof act
);
231 act
.sa_handler
= sigill
;
232 sigemptyset(&act
.sa_mask
);
235 sigaction(SIGILL
, &act
, &oact
);
236 if (setjmp(sigill_env
) == 0) {
238 cmpxchgl(0, 1, &lock
);
242 sigaction(SIGILL
, &oact
, NULL
);
247 lockdflt_init(LockInfo
*li
)
250 li
->context_destroy
= NULL
;
251 li
->lock_create
= lock_create
;
252 li
->lock_destroy
= lock_destroy
;
253 if (cpu_supports_cmpxchg()) {
254 /* Use fast locks that require an 80486 or later. */
255 li
->rlock_acquire
= rlock_acquire
;
256 li
->wlock_acquire
= wlock_acquire
;
257 li
->rlock_release
= rlock_release
;
258 li
->wlock_release
= wlock_release
;
260 /* It's a cruddy old 80386. */
261 li
->rlock_acquire
= li
->wlock_acquire
= lock80386_acquire
;
262 li
->rlock_release
= li
->wlock_release
= lock80386_release
;
265 * Construct a mask to block all signals except traps which might
266 * conceivably be generated within the dynamic linker itself.
268 sigfillset(&fullsigmask
);
269 sigdelset(&fullsigmask
, SIGILL
);
270 sigdelset(&fullsigmask
, SIGTRAP
);
271 sigdelset(&fullsigmask
, SIGABRT
);
272 sigdelset(&fullsigmask
, SIGEMT
);
273 sigdelset(&fullsigmask
, SIGFPE
);
274 sigdelset(&fullsigmask
, SIGBUS
);
275 sigdelset(&fullsigmask
, SIGSEGV
);
276 sigdelset(&fullsigmask
, SIGSYS
);