amd64 port: mainly on the pmap headers, identify_cpu and initcpu
[dragonfly/port-amd64.git] / libexec / rtld-elf / i386 / lockdflt.c
blob100abfeb62c215022454180a91fe38a9e2ff5fc6
1 /*-
2 * Copyright 1999, 2000 John D. Polstra.
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 * $FreeBSD: src/libexec/rtld-elf/i386/lockdflt.c,v 1.5.2.4 2002/07/11 23:52:32 jdp Exp $
26 * $DragonFly: src/libexec/rtld-elf/i386/lockdflt.c,v 1.2 2003/06/17 04:27:08 dillon Exp $
30 * Thread locking implementation for the dynamic linker.
32 * On 80486 and later CPUs we use the "simple, non-scalable
33 * reader-preference lock" from:
35 * J. M. Mellor-Crummey and M. L. Scott. "Scalable Reader-Writer
36 * Synchronization for Shared-Memory Multiprocessors." 3rd ACM Symp. on
37 * Principles and Practice of Parallel Programming, April 1991.
39 * In this algorithm the lock is a single word. Its low-order bit is
40 * set when a writer holds the lock. The remaining high-order bits
41 * contain a count of readers desiring the lock. The algorithm requires
42 * atomic "compare_and_store" and "add" operations.
44 * The "compare_and_store" operation requires the "cmpxchg" instruction
45 * on the x86. Unfortunately, the 80386 CPU does not support that
46 * instruction -- only the 80486 and later models support it. So on the
47 * 80386 we must use simple test-and-set exclusive locks instead. We
48 * determine which kind of lock to use by trying to execute a "cmpxchg"
49 * instruction and catching the SIGILL which results on the 80386.
52 #include <setjmp.h>
53 #include <signal.h>
54 #include <stdlib.h>
55 #include <time.h>
57 #include "debug.h"
58 #include "rtld.h"
60 #define CACHE_LINE_SIZE 32
62 #define WAFLAG 0x1 /* A writer holds the lock */
63 #define RC_INCR 0x2 /* Adjusts count of readers desiring lock */
65 typedef struct Struct_Lock {
66 volatile int lock;
67 void *base;
68 } Lock;
70 static sigset_t fullsigmask, oldsigmask;
72 static inline int
73 cmpxchgl(int old, int new, volatile int *m)
75 int result;
77 __asm __volatile ("lock; cmpxchgl %2, %0"
78 : "+m"(*m), "=a"(result)
79 : "r"(new), "1"(old)
80 : "cc");
82 return result;
85 static inline int
86 xchgl(int v, volatile int *m)
88 int result;
90 __asm __volatile ("xchgl %0, %1"
91 : "=r"(result), "+m"(*m)
92 : "0"(v));
94 return result;
97 static void *
98 lock_create(void *context)
100 void *base;
101 char *p;
102 uintptr_t r;
103 Lock *l;
106 * Arrange for the lock to occupy its own cache line. First, we
107 * optimistically allocate just a cache line, hoping that malloc
108 * will give us a well-aligned block of memory. If that doesn't
109 * work, we allocate a larger block and take a well-aligned cache
110 * line from it.
112 base = xmalloc(CACHE_LINE_SIZE);
113 p = (char *)base;
114 if ((uintptr_t)p % CACHE_LINE_SIZE != 0) {
115 free(base);
116 base = xmalloc(2 * CACHE_LINE_SIZE);
117 p = (char *)base;
118 if ((r = (uintptr_t)p % CACHE_LINE_SIZE) != 0)
119 p += CACHE_LINE_SIZE - r;
121 l = (Lock *)p;
122 l->base = base;
123 l->lock = 0;
124 return l;
127 static void
128 lock_destroy(void *lock)
130 Lock *l = (Lock *)lock;
132 free(l->base);
136 * Crude exclusive locks for the 80386, which does not support the
137 * cmpxchg instruction.
139 static void
140 lock80386_acquire(void *lock)
142 Lock *l = (Lock *)lock;
143 sigset_t tmp_oldsigmask;
145 for ( ; ; ) {
146 sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask);
147 if (xchgl(1, &l->lock) == 0)
148 break;
149 sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL);
150 while (l->lock != 0)
151 ; /* Spin */
153 oldsigmask = tmp_oldsigmask;
156 static void
157 lock80386_release(void *lock)
159 Lock *l = (Lock *)lock;
161 l->lock = 0;
162 sigprocmask(SIG_SETMASK, &oldsigmask, NULL);
166 * Better reader/writer locks for the 80486 and later CPUs.
168 static void
169 rlock_acquire(void *lock)
171 Lock *l = (Lock *)lock;
173 atomic_add_int(&l->lock, RC_INCR);
174 while (l->lock & WAFLAG)
175 ; /* Spin */
178 static void
179 wlock_acquire(void *lock)
181 Lock *l = (Lock *)lock;
182 sigset_t tmp_oldsigmask;
184 for ( ; ; ) {
185 sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask);
186 if (cmpxchgl(0, WAFLAG, &l->lock) == 0)
187 break;
188 sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL);
190 oldsigmask = tmp_oldsigmask;
193 static void
194 rlock_release(void *lock)
196 Lock *l = (Lock *)lock;
198 atomic_add_int(&l->lock, -RC_INCR);
201 static void
202 wlock_release(void *lock)
204 Lock *l = (Lock *)lock;
206 atomic_add_int(&l->lock, -WAFLAG);
207 sigprocmask(SIG_SETMASK, &oldsigmask, NULL);
211 * Code to determine at runtime whether the CPU supports the cmpxchg
212 * instruction. This instruction allows us to use locks that are more
213 * efficient, but it didn't exist on the 80386.
215 static jmp_buf sigill_env;
217 static void
218 sigill(int sig)
220 longjmp(sigill_env, 1);
223 static int
224 cpu_supports_cmpxchg(void)
226 struct sigaction act, oact;
227 int result;
228 volatile int lock;
230 memset(&act, 0, sizeof act);
231 act.sa_handler = sigill;
232 sigemptyset(&act.sa_mask);
233 act.sa_flags = 0;
235 sigaction(SIGILL, &act, &oact);
236 if (setjmp(sigill_env) == 0) {
237 lock = 0;
238 cmpxchgl(0, 1, &lock);
239 result = 1;
240 } else
241 result = 0;
242 sigaction(SIGILL, &oact, NULL);
243 return result;
246 void
247 lockdflt_init(LockInfo *li)
249 li->context = NULL;
250 li->context_destroy = NULL;
251 li->lock_create = lock_create;
252 li->lock_destroy = lock_destroy;
253 if (cpu_supports_cmpxchg()) {
254 /* Use fast locks that require an 80486 or later. */
255 li->rlock_acquire = rlock_acquire;
256 li->wlock_acquire = wlock_acquire;
257 li->rlock_release = rlock_release;
258 li->wlock_release = wlock_release;
259 } else {
260 /* It's a cruddy old 80386. */
261 li->rlock_acquire = li->wlock_acquire = lock80386_acquire;
262 li->rlock_release = li->wlock_release = lock80386_release;
265 * Construct a mask to block all signals except traps which might
266 * conceivably be generated within the dynamic linker itself.
268 sigfillset(&fullsigmask);
269 sigdelset(&fullsigmask, SIGILL);
270 sigdelset(&fullsigmask, SIGTRAP);
271 sigdelset(&fullsigmask, SIGABRT);
272 sigdelset(&fullsigmask, SIGEMT);
273 sigdelset(&fullsigmask, SIGFPE);
274 sigdelset(&fullsigmask, SIGBUS);
275 sigdelset(&fullsigmask, SIGSEGV);
276 sigdelset(&fullsigmask, SIGSYS);