4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
30 #include <sys/param.h>
31 #include <sys/types.h>
32 #include <sys/sysmacros.h>
33 #include <sys/systm.h>
36 #include <sys/errno.h>
39 #include <sys/prsystm.h>
41 #include <sys/sobject.h>
42 #include <sys/fault.h>
43 #include <sys/procfs.h>
44 #include <sys/watchpoint.h>
46 #include <sys/cmn_err.h>
47 #include <sys/machlock.h>
48 #include <sys/debug.h>
49 #include <sys/synch.h>
50 #include <sys/synch32.h>
52 #include <sys/class.h>
53 #include <sys/schedctl.h>
54 #include <sys/sleepq.h>
55 #include <sys/policy.h>
56 #include <sys/tnf_probe.h>
57 #include <sys/lwpchan_impl.h>
58 #include <sys/turnstile.h>
59 #include <sys/atomic.h>
60 #include <sys/lwp_timer_impl.h>
61 #include <sys/lwp_upimutex_impl.h>
65 static kthread_t
*lwpsobj_owner(caddr_t
);
66 static void lwp_unsleep(kthread_t
*t
);
67 static void lwp_change_pri(kthread_t
*t
, pri_t pri
, pri_t
*t_prip
);
68 static void lwp_mutex_cleanup(lwpchan_entry_t
*ent
, uint16_t lockflg
);
69 static void lwp_mutex_unregister(void *uaddr
);
70 static void set_owner_pid(lwp_mutex_t
*, uintptr_t, pid_t
);
71 static int iswanted(kthread_t
*, lwpchan_t
*);
73 extern int lwp_cond_signal(lwp_cond_t
*cv
);
76 * Maximum number of user prio inheritance locks that can be held by a thread.
77 * Used to limit kmem for each thread. This is a per-thread limit that
78 * can be administered on a system wide basis (using /etc/system).
80 * Also, when a limit, say maxlwps is added for numbers of lwps within a
81 * process, the per-thread limit automatically becomes a process-wide limit
82 * of maximum number of held upi locks within a process:
83 * maxheldupimx = maxnestupimx * maxlwps;
85 static uint32_t maxnestupimx
= 2000;
88 * The sobj_ops vector exports a set of functions needed when a thread
89 * is asleep on a synchronization object of this type.
91 static sobj_ops_t lwp_sobj_ops
= {
92 SOBJ_USER
, lwpsobj_owner
, lwp_unsleep
, lwp_change_pri
95 static kthread_t
*lwpsobj_pi_owner(upimutex_t
*up
);
97 static sobj_ops_t lwp_sobj_pi_ops
= {
98 SOBJ_USER_PI
, lwpsobj_pi_owner
, turnstile_unsleep
,
102 static sleepq_head_t lwpsleepq
[NSLEEPQ
];
103 upib_t upimutextab
[UPIMUTEX_TABSIZE
];
105 #define LWPCHAN_LOCK_SHIFT 10 /* 1024 locks for each pool */
106 #define LWPCHAN_LOCK_SIZE (1 << LWPCHAN_LOCK_SHIFT)
109 * We know that both lc_wchan and lc_wchan0 are addresses that most
110 * likely are 8-byte aligned, so we shift off the low-order 3 bits.
111 * 'pool' is either 0 or 1.
113 #define LWPCHAN_LOCK_HASH(X, pool) \
114 (((((X) >> 3) ^ ((X) >> (LWPCHAN_LOCK_SHIFT + 3))) & \
115 (LWPCHAN_LOCK_SIZE - 1)) + ((pool)? LWPCHAN_LOCK_SIZE : 0))
117 static kmutex_t lwpchanlock
[2 * LWPCHAN_LOCK_SIZE
];
120 * Is this a POSIX threads user-level lock requiring priority inheritance?
122 #define UPIMUTEX(type) ((type) & LOCK_PRIO_INHERIT)
124 static sleepq_head_t
*
125 lwpsqhash(lwpchan_t
*lwpchan
)
127 uint_t x
= (uintptr_t)lwpchan
->lc_wchan
^ (uintptr_t)lwpchan
->lc_wchan0
;
128 return (&lwpsleepq
[SQHASHINDEX(x
)]);
133 * Keep this in sync with lwpchan_unlock(), below.
136 lwpchan_lock(lwpchan_t
*lwpchan
, int pool
)
138 uint_t x
= (uintptr_t)lwpchan
->lc_wchan
^ (uintptr_t)lwpchan
->lc_wchan0
;
139 mutex_enter(&lwpchanlock
[LWPCHAN_LOCK_HASH(x
, pool
)]);
144 * Keep this in sync with lwpchan_lock(), above.
147 lwpchan_unlock(lwpchan_t
*lwpchan
, int pool
)
149 uint_t x
= (uintptr_t)lwpchan
->lc_wchan
^ (uintptr_t)lwpchan
->lc_wchan0
;
150 mutex_exit(&lwpchanlock
[LWPCHAN_LOCK_HASH(x
, pool
)]);
154 * Delete mappings from the lwpchan cache for pages that are being
155 * unmapped by as_unmap(). Given a range of addresses, "start" to "end",
156 * all mappings within the range are deleted from the lwpchan cache.
159 lwpchan_delete_mapping(proc_t
*p
, caddr_t start
, caddr_t end
)
162 lwpchan_hashbucket_t
*hashbucket
;
163 lwpchan_hashbucket_t
*endbucket
;
164 lwpchan_entry_t
*ent
;
165 lwpchan_entry_t
**prev
;
168 mutex_enter(&p
->p_lcp_lock
);
170 hashbucket
= lcp
->lwpchan_cache
;
171 endbucket
= hashbucket
+ lcp
->lwpchan_size
;
172 for (; hashbucket
< endbucket
; hashbucket
++) {
173 if (hashbucket
->lwpchan_chain
== NULL
)
175 mutex_enter(&hashbucket
->lwpchan_lock
);
176 prev
= &hashbucket
->lwpchan_chain
;
177 /* check entire chain */
178 while ((ent
= *prev
) != NULL
) {
179 addr
= ent
->lwpchan_addr
;
180 if (start
<= addr
&& addr
< end
) {
181 *prev
= ent
->lwpchan_next
;
183 * We do this only for the obsolete type
184 * USYNC_PROCESS_ROBUST. Otherwise robust
185 * locks do not draw ELOCKUNMAPPED or
186 * EOWNERDEAD due to being unmapped.
188 if (ent
->lwpchan_pool
== LWPCHAN_MPPOOL
&&
189 (ent
->lwpchan_type
& USYNC_PROCESS_ROBUST
))
190 lwp_mutex_cleanup(ent
, LOCK_UNMAPPED
);
192 * If there is a user-level robust lock
193 * registration, mark it as invalid.
195 if ((addr
= ent
->lwpchan_uaddr
) != NULL
)
196 lwp_mutex_unregister(addr
);
197 kmem_free(ent
, sizeof (*ent
));
198 atomic_dec_32(&lcp
->lwpchan_entries
);
200 prev
= &ent
->lwpchan_next
;
203 mutex_exit(&hashbucket
->lwpchan_lock
);
205 mutex_exit(&p
->p_lcp_lock
);
209 * Given an lwpchan cache pointer and a process virtual address,
210 * return a pointer to the corresponding lwpchan hash bucket.
212 static lwpchan_hashbucket_t
*
213 lwpchan_bucket(lwpchan_data_t
*lcp
, uintptr_t addr
)
218 * All user-level sync object addresses are 8-byte aligned.
219 * Ignore the lowest 3 bits of the address and use the
220 * higher-order 2*lwpchan_bits bits for the hash index.
223 i
= (addr
^ (addr
>> lcp
->lwpchan_bits
)) & lcp
->lwpchan_mask
;
224 return (lcp
->lwpchan_cache
+ i
);
228 * (Re)allocate the per-process lwpchan cache.
231 lwpchan_alloc_cache(proc_t
*p
, uint_t bits
)
234 lwpchan_data_t
*old_lcp
;
235 lwpchan_hashbucket_t
*hashbucket
;
236 lwpchan_hashbucket_t
*endbucket
;
237 lwpchan_hashbucket_t
*newbucket
;
238 lwpchan_entry_t
*ent
;
239 lwpchan_entry_t
*next
;
242 ASSERT(bits
>= LWPCHAN_INITIAL_BITS
&& bits
<= LWPCHAN_MAX_BITS
);
244 lcp
= kmem_alloc(sizeof (lwpchan_data_t
), KM_SLEEP
);
245 lcp
->lwpchan_bits
= bits
;
246 lcp
->lwpchan_size
= 1 << lcp
->lwpchan_bits
;
247 lcp
->lwpchan_mask
= lcp
->lwpchan_size
- 1;
248 lcp
->lwpchan_entries
= 0;
249 lcp
->lwpchan_cache
= kmem_zalloc(lcp
->lwpchan_size
*
250 sizeof (lwpchan_hashbucket_t
), KM_SLEEP
);
251 lcp
->lwpchan_next_data
= NULL
;
253 mutex_enter(&p
->p_lcp_lock
);
254 if ((old_lcp
= p
->p_lcp
) != NULL
) {
255 if (old_lcp
->lwpchan_bits
>= bits
) {
256 /* someone beat us to it */
257 mutex_exit(&p
->p_lcp_lock
);
258 kmem_free(lcp
->lwpchan_cache
, lcp
->lwpchan_size
*
259 sizeof (lwpchan_hashbucket_t
));
260 kmem_free(lcp
, sizeof (lwpchan_data_t
));
264 * Acquire all of the old hash table locks.
266 hashbucket
= old_lcp
->lwpchan_cache
;
267 endbucket
= hashbucket
+ old_lcp
->lwpchan_size
;
268 for (; hashbucket
< endbucket
; hashbucket
++)
269 mutex_enter(&hashbucket
->lwpchan_lock
);
271 * Move all of the old hash table entries to the
272 * new hash table. The new hash table has not yet
273 * been installed so we don't need any of its locks.
276 hashbucket
= old_lcp
->lwpchan_cache
;
277 for (; hashbucket
< endbucket
; hashbucket
++) {
278 ent
= hashbucket
->lwpchan_chain
;
279 while (ent
!= NULL
) {
280 next
= ent
->lwpchan_next
;
281 newbucket
= lwpchan_bucket(lcp
,
282 (uintptr_t)ent
->lwpchan_addr
);
283 ent
->lwpchan_next
= newbucket
->lwpchan_chain
;
284 newbucket
->lwpchan_chain
= ent
;
288 hashbucket
->lwpchan_chain
= NULL
;
290 lcp
->lwpchan_entries
= count
;
294 * Retire the old hash table. We can't actually kmem_free() it
295 * now because someone may still have a pointer to it. Instead,
296 * we link it onto the new hash table's list of retired hash tables.
297 * The new hash table is double the size of the previous one, so
298 * the total size of all retired hash tables is less than the size
299 * of the new one. exit() and exec() free the retired hash tables
300 * (see lwpchan_destroy_cache(), below).
302 lcp
->lwpchan_next_data
= old_lcp
;
305 * As soon as we store the new lcp, future locking operations will
306 * use it. Therefore, we must ensure that all the state we've just
307 * established reaches global visibility before the new lcp does.
312 if (old_lcp
!= NULL
) {
314 * Release all of the old hash table locks.
316 hashbucket
= old_lcp
->lwpchan_cache
;
317 for (; hashbucket
< endbucket
; hashbucket
++)
318 mutex_exit(&hashbucket
->lwpchan_lock
);
320 mutex_exit(&p
->p_lcp_lock
);
324 * Deallocate the lwpchan cache, and any dynamically allocated mappings.
325 * Called when the process exits or execs. All lwps except one have
326 * exited so we need no locks here.
329 lwpchan_destroy_cache(int exec
)
332 lwpchan_hashbucket_t
*hashbucket
;
333 lwpchan_hashbucket_t
*endbucket
;
335 lwpchan_entry_t
*ent
;
336 lwpchan_entry_t
*next
;
342 lockflg
= exec
? LOCK_UNMAPPED
: LOCK_OWNERDEAD
;
343 hashbucket
= lcp
->lwpchan_cache
;
344 endbucket
= hashbucket
+ lcp
->lwpchan_size
;
345 for (; hashbucket
< endbucket
; hashbucket
++) {
346 ent
= hashbucket
->lwpchan_chain
;
347 hashbucket
->lwpchan_chain
= NULL
;
348 while (ent
!= NULL
) {
349 next
= ent
->lwpchan_next
;
350 if (ent
->lwpchan_pool
== LWPCHAN_MPPOOL
&&
351 (ent
->lwpchan_type
& (USYNC_PROCESS
| LOCK_ROBUST
))
352 == (USYNC_PROCESS
| LOCK_ROBUST
))
353 lwp_mutex_cleanup(ent
, lockflg
);
354 kmem_free(ent
, sizeof (*ent
));
359 while (lcp
!= NULL
) {
360 lwpchan_data_t
*next_lcp
= lcp
->lwpchan_next_data
;
361 kmem_free(lcp
->lwpchan_cache
, lcp
->lwpchan_size
*
362 sizeof (lwpchan_hashbucket_t
));
363 kmem_free(lcp
, sizeof (lwpchan_data_t
));
369 * Return zero when there is an entry in the lwpchan cache for the
370 * given process virtual address and non-zero when there is not.
371 * The returned non-zero value is the current length of the
372 * hash chain plus one. The caller holds the hash bucket lock.
375 lwpchan_cache_mapping(caddr_t addr
, int type
, int pool
, lwpchan_t
*lwpchan
,
376 lwpchan_hashbucket_t
*hashbucket
)
378 lwpchan_entry_t
*ent
;
381 for (ent
= hashbucket
->lwpchan_chain
; ent
; ent
= ent
->lwpchan_next
) {
382 if (ent
->lwpchan_addr
== addr
) {
383 if (ent
->lwpchan_type
!= type
||
384 ent
->lwpchan_pool
!= pool
) {
386 * This shouldn't happen, but might if the
387 * process reuses its memory for different
388 * types of sync objects. We test first
389 * to avoid grabbing the memory cache line.
391 ent
->lwpchan_type
= (uint16_t)type
;
392 ent
->lwpchan_pool
= (uint16_t)pool
;
394 *lwpchan
= ent
->lwpchan_lwpchan
;
403 * Return the cached lwpchan mapping if cached, otherwise insert
404 * a virtual address to lwpchan mapping into the cache.
407 lwpchan_get_mapping(struct as
*as
, caddr_t addr
, caddr_t uaddr
,
408 int type
, lwpchan_t
*lwpchan
, int pool
)
412 lwpchan_hashbucket_t
*hashbucket
;
413 lwpchan_entry_t
*ent
;
419 /* initialize the lwpchan cache, if necesary */
420 if ((lcp
= p
->p_lcp
) == NULL
) {
421 lwpchan_alloc_cache(p
, LWPCHAN_INITIAL_BITS
);
424 hashbucket
= lwpchan_bucket(lcp
, (uintptr_t)addr
);
425 mutex_enter(&hashbucket
->lwpchan_lock
);
426 if (lcp
!= p
->p_lcp
) {
427 /* someone resized the lwpchan cache; start over */
428 mutex_exit(&hashbucket
->lwpchan_lock
);
431 if (lwpchan_cache_mapping(addr
, type
, pool
, lwpchan
, hashbucket
) == 0) {
432 /* it's in the cache */
433 mutex_exit(&hashbucket
->lwpchan_lock
);
436 mutex_exit(&hashbucket
->lwpchan_lock
);
437 if (as_getmemid(as
, addr
, &memid
) != 0)
439 lwpchan
->lc_wchan0
= (caddr_t
)(uintptr_t)memid
.val
[0];
440 lwpchan
->lc_wchan
= (caddr_t
)(uintptr_t)memid
.val
[1];
441 ent
= kmem_alloc(sizeof (lwpchan_entry_t
), KM_SLEEP
);
442 mutex_enter(&hashbucket
->lwpchan_lock
);
443 if (lcp
!= p
->p_lcp
) {
444 /* someone resized the lwpchan cache; start over */
445 mutex_exit(&hashbucket
->lwpchan_lock
);
446 kmem_free(ent
, sizeof (*ent
));
449 count
= lwpchan_cache_mapping(addr
, type
, pool
, lwpchan
, hashbucket
);
451 /* someone else added this entry to the cache */
452 mutex_exit(&hashbucket
->lwpchan_lock
);
453 kmem_free(ent
, sizeof (*ent
));
456 if (count
> lcp
->lwpchan_bits
+ 2 && /* larger table, longer chains */
457 (bits
= lcp
->lwpchan_bits
) < LWPCHAN_MAX_BITS
) {
458 /* hash chain too long; reallocate the hash table */
459 mutex_exit(&hashbucket
->lwpchan_lock
);
460 kmem_free(ent
, sizeof (*ent
));
461 lwpchan_alloc_cache(p
, bits
+ 1);
464 ent
->lwpchan_addr
= addr
;
465 ent
->lwpchan_uaddr
= uaddr
;
466 ent
->lwpchan_type
= (uint16_t)type
;
467 ent
->lwpchan_pool
= (uint16_t)pool
;
468 ent
->lwpchan_lwpchan
= *lwpchan
;
469 ent
->lwpchan_next
= hashbucket
->lwpchan_chain
;
470 hashbucket
->lwpchan_chain
= ent
;
471 atomic_inc_32(&lcp
->lwpchan_entries
);
472 mutex_exit(&hashbucket
->lwpchan_lock
);
477 * Return a unique pair of identifiers that corresponds to a
478 * synchronization object's virtual address. Process-shared
479 * sync objects usually get vnode/offset from as_getmemid().
482 get_lwpchan(struct as
*as
, caddr_t addr
, int type
, lwpchan_t
*lwpchan
, int pool
)
485 * If the lwp synch object is defined to be process-private,
486 * we just make the first field of the lwpchan be 'as' and
487 * the second field be the synch object's virtual address.
488 * (segvn_getmemid() does the same for MAP_PRIVATE mappings.)
489 * The lwpchan cache is used only for process-shared objects.
491 if (!(type
& USYNC_PROCESS
)) {
492 lwpchan
->lc_wchan0
= (caddr_t
)as
;
493 lwpchan
->lc_wchan
= addr
;
497 return (lwpchan_get_mapping(as
, addr
, NULL
, type
, lwpchan
, pool
));
501 lwp_block(lwpchan_t
*lwpchan
)
503 kthread_t
*t
= curthread
;
504 klwp_t
*lwp
= ttolwp(t
);
508 t
->t_flag
|= T_WAKEABLE
;
509 t
->t_lwpchan
= *lwpchan
;
510 t
->t_sobj_ops
= &lwp_sobj_ops
;
512 sqh
= lwpsqhash(lwpchan
);
513 disp_lock_enter_high(&sqh
->sq_lock
);
516 THREAD_SLEEP(t
, &sqh
->sq_lock
);
517 sleepq_insert(&sqh
->sq_queue
, t
);
520 lwp
->lwp_sysabort
= 0;
522 (void) new_mstate(curthread
, LMS_SLEEP
);
526 lwpsobj_pi_owner(upimutex_t
*up
)
528 return (up
->upi_owner
);
531 static struct upimutex
*
532 upi_get(upib_t
*upibp
, lwpchan_t
*lcp
)
534 struct upimutex
*upip
;
536 for (upip
= upibp
->upib_first
; upip
!= NULL
;
537 upip
= upip
->upi_nextchain
) {
538 if (upip
->upi_lwpchan
.lc_wchan0
== lcp
->lc_wchan0
&&
539 upip
->upi_lwpchan
.lc_wchan
== lcp
->lc_wchan
)
546 upi_chain_add(upib_t
*upibp
, struct upimutex
*upimutex
)
548 ASSERT(MUTEX_HELD(&upibp
->upib_lock
));
551 * Insert upimutex at front of list. Maybe a bit unfair
552 * but assume that not many lwpchans hash to the same
553 * upimutextab bucket, i.e. the list of upimutexes from
554 * upib_first is not too long.
556 upimutex
->upi_nextchain
= upibp
->upib_first
;
557 upibp
->upib_first
= upimutex
;
561 upi_chain_del(upib_t
*upibp
, struct upimutex
*upimutex
)
563 struct upimutex
**prev
;
565 ASSERT(MUTEX_HELD(&upibp
->upib_lock
));
567 prev
= &upibp
->upib_first
;
568 while (*prev
!= upimutex
) {
569 prev
= &(*prev
)->upi_nextchain
;
571 *prev
= upimutex
->upi_nextchain
;
572 upimutex
->upi_nextchain
= NULL
;
576 * Add upimutex to chain of upimutexes held by curthread.
577 * Returns number of upimutexes held by curthread.
580 upi_mylist_add(struct upimutex
*upimutex
)
582 kthread_t
*t
= curthread
;
585 * Insert upimutex at front of list of upimutexes owned by t. This
586 * would match typical LIFO order in which nested locks are acquired
589 upimutex
->upi_nextowned
= t
->t_upimutex
;
590 t
->t_upimutex
= upimutex
;
592 ASSERT(t
->t_nupinest
> 0);
593 return (t
->t_nupinest
);
597 * Delete upimutex from list of upimutexes owned by curthread.
600 upi_mylist_del(struct upimutex
*upimutex
)
602 kthread_t
*t
= curthread
;
603 struct upimutex
**prev
;
606 * Since the order in which nested locks are acquired and released,
607 * is typically LIFO, and typical nesting levels are not too deep, the
608 * following should not be expensive in the general case.
610 prev
= &t
->t_upimutex
;
611 while (*prev
!= upimutex
) {
612 prev
= &(*prev
)->upi_nextowned
;
614 *prev
= upimutex
->upi_nextowned
;
615 upimutex
->upi_nextowned
= NULL
;
616 ASSERT(t
->t_nupinest
> 0);
621 * Returns true if upimutex is owned. Should be called only when upim points
622 * to kmem which cannot disappear from underneath.
625 upi_owned(upimutex_t
*upim
)
627 return (upim
->upi_owner
== curthread
);
631 * Returns pointer to kernel object (upimutex_t *) if lp is owned.
633 static struct upimutex
*
634 lwp_upimutex_owned(lwp_mutex_t
*lp
, uint8_t type
)
638 struct upimutex
*upimutex
;
640 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)lp
, type
,
641 &lwpchan
, LWPCHAN_MPPOOL
))
644 upibp
= &UPI_CHAIN(lwpchan
);
645 mutex_enter(&upibp
->upib_lock
);
646 upimutex
= upi_get(upibp
, &lwpchan
);
647 if (upimutex
== NULL
|| upimutex
->upi_owner
!= curthread
) {
648 mutex_exit(&upibp
->upib_lock
);
651 mutex_exit(&upibp
->upib_lock
);
656 * Unlocks upimutex, waking up waiters if any. upimutex kmem is freed if
657 * no lock hand-off occurrs.
660 upimutex_unlock(struct upimutex
*upimutex
, uint16_t flag
)
666 upi_mylist_del(upimutex
);
667 upibp
= upimutex
->upi_upibp
;
668 mutex_enter(&upibp
->upib_lock
);
669 if (upimutex
->upi_waiter
!= 0) { /* if waiters */
670 ts
= turnstile_lookup(upimutex
);
671 if (ts
!= NULL
&& !(flag
& LOCK_NOTRECOVERABLE
)) {
672 /* hand-off lock to highest prio waiter */
673 newowner
= ts
->ts_sleepq
[TS_WRITER_Q
].sq_first
;
674 upimutex
->upi_owner
= newowner
;
675 if (ts
->ts_waiters
== 1)
676 upimutex
->upi_waiter
= 0;
677 turnstile_wakeup(ts
, TS_WRITER_Q
, 1, newowner
);
678 mutex_exit(&upibp
->upib_lock
);
680 } else if (ts
!= NULL
) {
681 /* LOCK_NOTRECOVERABLE: wakeup all */
682 turnstile_wakeup(ts
, TS_WRITER_Q
, ts
->ts_waiters
, NULL
);
685 * Misleading w bit. Waiters might have been
686 * interrupted. No need to clear the w bit (upimutex
687 * will soon be freed). Re-calculate PI from existing
690 turnstile_exit(upimutex
);
691 turnstile_pi_recalc();
695 * no waiters, or LOCK_NOTRECOVERABLE.
696 * remove from the bucket chain of upi mutexes.
697 * de-allocate kernel memory (upimutex).
699 upi_chain_del(upimutex
->upi_upibp
, upimutex
);
700 mutex_exit(&upibp
->upib_lock
);
701 kmem_free(upimutex
, sizeof (upimutex_t
));
705 lwp_upimutex_lock(lwp_mutex_t
*lp
, uint8_t type
, int try, lwp_timer_t
*lwptp
)
712 volatile struct upimutex
*upimutex
= NULL
;
715 volatile int upilocked
= 0;
717 if (on_fault(&ljb
)) {
719 upimutex_unlock((upimutex_t
*)upimutex
, 0);
723 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)lp
, type
,
724 &lwpchan
, LWPCHAN_MPPOOL
)) {
728 upibp
= &UPI_CHAIN(lwpchan
);
730 mutex_enter(&upibp
->upib_lock
);
731 upimutex
= upi_get(upibp
, &lwpchan
);
732 if (upimutex
== NULL
) {
733 /* lock available since lwpchan has no upimutex */
734 upimutex
= kmem_zalloc(sizeof (upimutex_t
), KM_SLEEP
);
735 upi_chain_add(upibp
, (upimutex_t
*)upimutex
);
736 upimutex
->upi_owner
= curthread
; /* grab lock */
737 upimutex
->upi_upibp
= upibp
;
738 upimutex
->upi_vaddr
= lp
;
739 upimutex
->upi_lwpchan
= lwpchan
;
740 mutex_exit(&upibp
->upib_lock
);
741 nupinest
= upi_mylist_add((upimutex_t
*)upimutex
);
743 fuword16_noerr(&lp
->mutex_flag
, &flag
);
744 if (nupinest
> maxnestupimx
&&
745 secpolicy_resource(CRED()) != 0) {
746 upimutex_unlock((upimutex_t
*)upimutex
, flag
);
750 if (flag
& LOCK_NOTRECOVERABLE
) {
752 * Since the setting of LOCK_NOTRECOVERABLE
753 * was done under the high-level upi mutex,
754 * in lwp_upimutex_unlock(), this flag needs to
755 * be checked while holding the upi mutex.
756 * If set, this thread should return without
757 * the lock held, and with the right error code.
759 upimutex_unlock((upimutex_t
*)upimutex
, flag
);
761 error
= ENOTRECOVERABLE
;
762 } else if (flag
& (LOCK_OWNERDEAD
| LOCK_UNMAPPED
)) {
763 if (flag
& LOCK_OWNERDEAD
)
765 else if (type
& USYNC_PROCESS_ROBUST
)
766 error
= ELOCKUNMAPPED
;
773 * If a upimutex object exists, it must have an owner.
774 * This is due to lock hand-off, and release of upimutex when no
775 * waiters are present at unlock time,
777 ASSERT(upimutex
->upi_owner
!= NULL
);
778 if (upimutex
->upi_owner
== curthread
) {
780 * The user wrapper can check if the mutex type is
781 * ERRORCHECK: if not, it should stall at user-level.
782 * If so, it should return the error code.
784 mutex_exit(&upibp
->upib_lock
);
788 if (try == UPIMUTEX_TRY
) {
789 mutex_exit(&upibp
->upib_lock
);
794 * Block for the lock.
796 if ((error
= lwptp
->lwpt_time_error
) != 0) {
798 * The SUSV3 Posix spec is very clear that we
799 * should get no error from validating the
800 * timer until we would actually sleep.
802 mutex_exit(&upibp
->upib_lock
);
805 if (lwptp
->lwpt_tsp
!= NULL
) {
807 * Unlike the protocol for other lwp timedwait operations,
808 * we must drop t_delay_lock before going to sleep in
809 * turnstile_block() for a upi mutex.
810 * See the comments below and in turnstile.c
812 mutex_enter(&curthread
->t_delay_lock
);
813 (void) lwp_timer_enqueue(lwptp
);
814 mutex_exit(&curthread
->t_delay_lock
);
817 * Now, set the waiter bit and block for the lock in turnstile_block().
818 * No need to preserve the previous wbit since a lock try is not
819 * attempted after setting the wait bit. Wait bit is set under
820 * the upib_lock, which is not released until the turnstile lock
821 * is acquired. Say, the upimutex is L:
823 * 1. upib_lock is held so the waiter does not have to retry L after
824 * setting the wait bit: since the owner has to grab the upib_lock
825 * to unlock L, it will certainly see the wait bit set.
826 * 2. upib_lock is not released until the turnstile lock is acquired.
827 * This is the key to preventing a missed wake-up. Otherwise, the
828 * owner could acquire the upib_lock, and the tc_lock, to call
829 * turnstile_wakeup(). All this, before the waiter gets tc_lock
830 * to sleep in turnstile_block(). turnstile_wakeup() will then not
831 * find this waiter, resulting in the missed wakeup.
832 * 3. The upib_lock, being a kernel mutex, cannot be released while
833 * holding the tc_lock (since mutex_exit() could need to acquire
834 * the same tc_lock)...and so is held when calling turnstile_block().
835 * The address of upib_lock is passed to turnstile_block() which
836 * releases it after releasing all turnstile locks, and before going
837 * to sleep in swtch().
838 * 4. The waiter value cannot be a count of waiters, because a waiter
839 * can be interrupted. The interrupt occurs under the tc_lock, at
840 * which point, the upib_lock cannot be locked, to decrement waiter
841 * count. So, just treat the waiter state as a bit, not a count.
843 ts
= turnstile_lookup((upimutex_t
*)upimutex
);
844 upimutex
->upi_waiter
= 1;
845 error
= turnstile_block(ts
, TS_WRITER_Q
, (upimutex_t
*)upimutex
,
846 &lwp_sobj_pi_ops
, &upibp
->upib_lock
, lwptp
);
848 * Hand-off implies that we wakeup holding the lock, except when:
849 * - deadlock is detected
850 * - lock is not recoverable
851 * - we got an interrupt or timeout
852 * If we wake up due to an interrupt or timeout, we may
853 * or may not be holding the lock due to mutex hand-off.
854 * Use lwp_upimutex_owned() to check if we do hold the lock.
857 if ((error
== EINTR
|| error
== ETIME
) &&
858 (upimutex
= lwp_upimutex_owned(lp
, type
))) {
860 * Unlock and return - the re-startable syscall will
861 * try the lock again if we got EINTR.
863 (void) upi_mylist_add((upimutex_t
*)upimutex
);
864 upimutex_unlock((upimutex_t
*)upimutex
, 0);
867 * The only other possible error is EDEADLK. If so, upimutex
868 * is valid, since its owner is deadlocked with curthread.
870 ASSERT(error
== EINTR
|| error
== ETIME
||
871 (error
== EDEADLK
&& !upi_owned((upimutex_t
*)upimutex
)));
872 ASSERT(!lwp_upimutex_owned(lp
, type
));
875 if (lwp_upimutex_owned(lp
, type
)) {
876 ASSERT(lwp_upimutex_owned(lp
, type
) == upimutex
);
877 nupinest
= upi_mylist_add((upimutex_t
*)upimutex
);
881 * Now, need to read the user-level lp->mutex_flag to do the following:
883 * - if lock is held, check if EOWNERDEAD or ELOCKUNMAPPED
884 * should be returned.
885 * - if lock isn't held, check if ENOTRECOVERABLE should
888 * Now, either lp->mutex_flag is readable or it's not. If not
889 * readable, the on_fault path will cause a return with EFAULT
890 * as it should. If it is readable, the state of the flag
891 * encodes the robustness state of the lock:
893 * If the upimutex is locked here, the flag's LOCK_OWNERDEAD
894 * or LOCK_UNMAPPED setting will influence the return code
895 * appropriately. If the upimutex is not locked here, this
896 * could be due to a spurious wake-up or a NOTRECOVERABLE
897 * event. The flag's setting can be used to distinguish
898 * between these two events.
900 fuword16_noerr(&lp
->mutex_flag
, &flag
);
903 * If the thread wakes up from turnstile_block with the lock
904 * held, the flag could not be set to LOCK_NOTRECOVERABLE,
905 * since it would not have been handed-off the lock.
906 * So, no need to check for this case.
908 if (nupinest
> maxnestupimx
&&
909 secpolicy_resource(CRED()) != 0) {
910 upimutex_unlock((upimutex_t
*)upimutex
, flag
);
913 } else if (flag
& (LOCK_OWNERDEAD
| LOCK_UNMAPPED
)) {
914 if (flag
& LOCK_OWNERDEAD
)
916 else if (type
& USYNC_PROCESS_ROBUST
)
917 error
= ELOCKUNMAPPED
;
923 * Wake-up without the upimutex held. Either this is a
924 * spurious wake-up (due to signals, forkall(), whatever), or
925 * it is a LOCK_NOTRECOVERABLE robustness event. The setting
926 * of the mutex flag can be used to distinguish between the
929 if (flag
& LOCK_NOTRECOVERABLE
) {
930 error
= ENOTRECOVERABLE
;
933 * Here, the flag could be set to LOCK_OWNERDEAD or
934 * not. In both cases, this is a spurious wakeup,
935 * since the upi lock is not held, but the thread
936 * has returned from turnstile_block().
938 * The user flag could be LOCK_OWNERDEAD if, at the
939 * same time as curthread having been woken up
940 * spuriously, the owner (say Tdead) has died, marked
941 * the mutex flag accordingly, and handed off the lock
942 * to some other waiter (say Tnew). curthread just
943 * happened to read the flag while Tnew has yet to deal
944 * with the owner-dead event.
946 * In this event, curthread should retry the lock.
947 * If Tnew is able to cleanup the lock, curthread
948 * will eventually get the lock with a zero error code,
949 * If Tnew is unable to cleanup, its eventual call to
950 * unlock the lock will result in the mutex flag being
951 * set to LOCK_NOTRECOVERABLE, and the wake-up of
952 * all waiters, including curthread, which will then
953 * eventually return ENOTRECOVERABLE due to the above
956 * Of course, if the user-flag is not set with
957 * LOCK_OWNERDEAD, retrying is the thing to do, since
958 * this is definitely a spurious wakeup.
971 lwp_upimutex_unlock(lwp_mutex_t
*lp
, uint8_t type
)
978 volatile struct upimutex
*upimutex
= NULL
;
979 volatile int upilocked
= 0;
981 if (on_fault(&ljb
)) {
983 upimutex_unlock((upimutex_t
*)upimutex
, 0);
987 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)lp
, type
,
988 &lwpchan
, LWPCHAN_MPPOOL
)) {
992 upibp
= &UPI_CHAIN(lwpchan
);
993 mutex_enter(&upibp
->upib_lock
);
994 upimutex
= upi_get(upibp
, &lwpchan
);
996 * If the lock is not held, or the owner is not curthread, return
997 * error. The user-level wrapper can return this error or stall,
998 * depending on whether mutex is of ERRORCHECK type or not.
1000 if (upimutex
== NULL
|| upimutex
->upi_owner
!= curthread
) {
1001 mutex_exit(&upibp
->upib_lock
);
1005 mutex_exit(&upibp
->upib_lock
); /* release for user memory access */
1007 fuword16_noerr(&lp
->mutex_flag
, &flag
);
1008 if (flag
& (LOCK_OWNERDEAD
| LOCK_UNMAPPED
)) {
1010 * transition mutex to the LOCK_NOTRECOVERABLE state.
1012 flag
&= ~(LOCK_OWNERDEAD
| LOCK_UNMAPPED
);
1013 flag
|= LOCK_NOTRECOVERABLE
;
1014 suword16_noerr(&lp
->mutex_flag
, flag
);
1016 set_owner_pid(lp
, 0, 0);
1017 upimutex_unlock((upimutex_t
*)upimutex
, flag
);
1025 * Set the owner and ownerpid fields of a user-level mutex.
1028 set_owner_pid(lwp_mutex_t
*lp
, uintptr_t owner
, pid_t pid
)
1035 un
.word64
= (uint64_t)owner
;
1037 suword32_noerr(&lp
->mutex_ownerpid
, pid
);
1039 if (((uintptr_t)lp
& (_LONG_LONG_ALIGNMENT
- 1)) == 0) { /* aligned */
1040 suword64_noerr(&lp
->mutex_owner
, un
.word64
);
1044 /* mutex is unaligned or we are running on a 32-bit kernel */
1045 suword32_noerr((uint32_t *)&lp
->mutex_owner
, un
.word32
[0]);
1046 suword32_noerr((uint32_t *)&lp
->mutex_owner
+ 1, un
.word32
[1]);
1050 * Clear the contents of a user-level mutex; return the flags.
1051 * Used only by upi_dead() and lwp_mutex_cleanup(), below.
1054 lwp_clear_mutex(lwp_mutex_t
*lp
, uint16_t lockflg
)
1058 fuword16_noerr(&lp
->mutex_flag
, &flag
);
1060 (LOCK_OWNERDEAD
| LOCK_UNMAPPED
| LOCK_NOTRECOVERABLE
)) == 0) {
1062 suword16_noerr(&lp
->mutex_flag
, flag
);
1064 set_owner_pid(lp
, 0, 0);
1065 suword8_noerr(&lp
->mutex_rcount
, 0);
1071 * Mark user mutex state, corresponding to kernel upimutex,
1072 * as LOCK_UNMAPPED or LOCK_OWNERDEAD, as appropriate
1075 upi_dead(upimutex_t
*upip
, uint16_t lockflg
)
1081 if (on_fault(&ljb
)) {
1086 lp
= upip
->upi_vaddr
;
1087 (void) lwp_clear_mutex(lp
, lockflg
);
1088 suword8_noerr(&lp
->mutex_lockw
, 0);
1095 * Unlock all upimutexes held by curthread, since curthread is dying.
1096 * For each upimutex, attempt to mark its corresponding user mutex object as
1102 kthread_t
*t
= curthread
;
1103 uint16_t lockflg
= (ttoproc(t
)->p_proc_flag
& P_PR_EXEC
)?
1104 LOCK_UNMAPPED
: LOCK_OWNERDEAD
;
1105 struct upimutex
*upip
;
1107 while ((upip
= t
->t_upimutex
) != NULL
) {
1108 if (upi_dead(upip
, lockflg
) != 0) {
1110 * If the user object associated with this upimutex is
1111 * unmapped, unlock upimutex with the
1112 * LOCK_NOTRECOVERABLE flag, so that all waiters are
1113 * woken up. Since user object is unmapped, it could
1114 * not be marked as dead or notrecoverable.
1115 * The waiters will now all wake up and return
1116 * ENOTRECOVERABLE, since they would find that the lock
1117 * has not been handed-off to them.
1118 * See lwp_upimutex_lock().
1120 upimutex_unlock(upip
, LOCK_NOTRECOVERABLE
);
1123 * The user object has been updated as dead.
1124 * Unlock the upimutex: if no waiters, upip kmem will
1125 * be freed. If there is a waiter, the lock will be
1126 * handed off. If exit() is in progress, each existing
1127 * waiter will successively get the lock, as owners
1128 * die, and each new owner will call this routine as
1129 * it dies. The last owner will free kmem, since
1130 * it will find the upimutex has no waiters. So,
1131 * eventually, the kmem is guaranteed to be freed.
1133 upimutex_unlock(upip
, 0);
1136 * Note that the call to upimutex_unlock() above will delete
1137 * upimutex from the t_upimutexes chain. And so the
1138 * while loop will eventually terminate.
1144 lwp_mutex_timedlock(lwp_mutex_t
*lp
, timespec_t
*tsp
, uintptr_t owner
)
1146 kthread_t
*t
= curthread
;
1147 klwp_t
*lwp
= ttolwp(t
);
1148 proc_t
*p
= ttoproc(t
);
1155 volatile int locked
= 0;
1156 volatile int watched
= 0;
1158 volatile uint8_t type
= 0;
1162 int imm_timeout
= 0;
1164 if ((caddr_t
)lp
>= p
->p_as
->a_userlimit
)
1165 return (set_errno(EFAULT
));
1168 * Put the lwp in an orderly state for debugging,
1169 * in case we are stopped while sleeping, below.
1171 prstop(PR_REQUESTED
, 0);
1173 timedwait
= (caddr_t
)tsp
;
1174 if ((time_error
= lwp_timer_copyin(&lwpt
, tsp
)) == 0 &&
1175 lwpt
.lwpt_imm_timeout
) {
1181 * Although LMS_USER_LOCK implies "asleep waiting for user-mode lock",
1182 * this micro state is really a run state. If the thread indeed blocks,
1183 * this state becomes valid. If not, the state is converted back to
1184 * LMS_SYSTEM. So, it is OK to set the mstate here, instead of just
1187 (void) new_mstate(t
, LMS_USER_LOCK
);
1188 if (on_fault(&ljb
)) {
1190 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
1195 * Force Copy-on-write if necessary and ensure that the
1196 * synchronization object resides in read/write memory.
1197 * Cause an EFAULT return now if this is not so.
1199 fuword8_noerr(&lp
->mutex_type
, (uint8_t *)&type
);
1200 suword8_noerr(&lp
->mutex_type
, type
);
1201 if (UPIMUTEX(type
)) {
1203 error
= lwp_upimutex_lock(lp
, type
, UPIMUTEX_BLOCK
, &lwpt
);
1204 if (error
== 0 || error
== EOWNERDEAD
|| error
== ELOCKUNMAPPED
)
1205 set_owner_pid(lp
, owner
,
1206 (type
& USYNC_PROCESS
)? p
->p_pid
: 0);
1207 if (tsp
&& !time_error
) /* copyout the residual time left */
1208 error
= lwp_timer_copyout(&lwpt
, error
);
1210 return (set_errno(error
));
1213 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)lp
, type
,
1214 &lwpchan
, LWPCHAN_MPPOOL
)) {
1218 lwpchan_lock(&lwpchan
, LWPCHAN_MPPOOL
);
1220 if (type
& LOCK_ROBUST
) {
1221 fuword16_noerr(&lp
->mutex_flag
, &flag
);
1222 if (flag
& LOCK_NOTRECOVERABLE
) {
1223 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
1224 error
= ENOTRECOVERABLE
;
1228 fuword8_noerr(&lp
->mutex_waiters
, &waiters
);
1229 suword8_noerr(&lp
->mutex_waiters
, 1);
1232 * If watchpoints are set, they need to be restored, since
1233 * atomic accesses of memory such as the call to ulock_try()
1234 * below cannot be watched.
1237 watched
= watch_disable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
1239 while (!ulock_try(&lp
->mutex_lockw
)) {
1242 * The SUSV3 Posix spec is very clear that we
1243 * should get no error from validating the
1244 * timer until we would actually sleep.
1251 watch_enable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
1257 * If we successfully queue the timeout,
1258 * then don't drop t_delay_lock until
1259 * we are on the sleep queue (below).
1261 mutex_enter(&t
->t_delay_lock
);
1262 if (lwp_timer_enqueue(&lwpt
) != 0) {
1263 mutex_exit(&t
->t_delay_lock
);
1268 lwp_block(&lwpchan
);
1270 * Nothing should happen to cause the lwp to go to
1271 * sleep again until after it returns from swtch().
1274 mutex_exit(&t
->t_delay_lock
);
1276 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
1277 if (ISSIG(t
, JUSTLOOKING
) || MUSTRETURN(p
, t
) || imm_timeout
)
1280 t
->t_flag
&= ~T_WAKEABLE
;
1282 tim
= lwp_timer_dequeue(&lwpt
);
1284 if (ISSIG(t
, FORREAL
) || lwp
->lwp_sysabort
|| MUSTRETURN(p
, t
))
1286 else if (imm_timeout
|| (timedwait
&& tim
== -1))
1289 lwp
->lwp_asleep
= 0;
1290 lwp
->lwp_sysabort
= 0;
1291 watched
= watch_disable_addr((caddr_t
)lp
, sizeof (*lp
),
1295 * Need to re-compute waiters bit. The waiters field in
1296 * the lock is not reliable. Either of two things could
1297 * have occurred: no lwp may have called lwp_release()
1298 * for me but I have woken up due to a signal or
1299 * timeout. In this case, the waiter bit is incorrect
1300 * since it is still set to 1, set above.
1301 * OR an lwp_release() did occur for some other lwp on
1302 * the same lwpchan. In this case, the waiter bit is
1303 * correct. But which event occurred, one can't tell.
1306 lwpchan_lock(&lwpchan
, LWPCHAN_MPPOOL
);
1308 sqh
= lwpsqhash(&lwpchan
);
1309 disp_lock_enter(&sqh
->sq_lock
);
1310 waiters
= iswanted(sqh
->sq_queue
.sq_first
, &lwpchan
);
1311 disp_lock_exit(&sqh
->sq_lock
);
1314 lwp
->lwp_asleep
= 0;
1315 watched
= watch_disable_addr((caddr_t
)lp
, sizeof (*lp
),
1317 lwpchan_lock(&lwpchan
, LWPCHAN_MPPOOL
);
1319 fuword8_noerr(&lp
->mutex_waiters
, &waiters
);
1320 suword8_noerr(&lp
->mutex_waiters
, 1);
1321 if (type
& LOCK_ROBUST
) {
1322 fuword16_noerr(&lp
->mutex_flag
, &flag
);
1323 if (flag
& LOCK_NOTRECOVERABLE
) {
1324 error
= ENOTRECOVERABLE
;
1330 if (t
->t_mstate
== LMS_USER_LOCK
)
1331 (void) new_mstate(t
, LMS_SYSTEM
);
1334 set_owner_pid(lp
, owner
, (type
& USYNC_PROCESS
)? p
->p_pid
: 0);
1335 if (type
& LOCK_ROBUST
) {
1336 fuword16_noerr(&lp
->mutex_flag
, &flag
);
1337 if (flag
& (LOCK_OWNERDEAD
| LOCK_UNMAPPED
)) {
1338 if (flag
& LOCK_OWNERDEAD
)
1340 else if (type
& USYNC_PROCESS_ROBUST
)
1341 error
= ELOCKUNMAPPED
;
1347 suword8_noerr(&lp
->mutex_waiters
, waiters
);
1349 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
1353 watch_enable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
1354 if (tsp
&& !time_error
) /* copyout the residual time left */
1355 error
= lwp_timer_copyout(&lwpt
, error
);
1357 return (set_errno(error
));
1362 iswanted(kthread_t
*t
, lwpchan_t
*lwpchan
)
1365 * The caller holds the dispatcher lock on the sleep queue.
1368 if (t
->t_lwpchan
.lc_wchan0
== lwpchan
->lc_wchan0
&&
1369 t
->t_lwpchan
.lc_wchan
== lwpchan
->lc_wchan
)
1377 * Return the highest priority thread sleeping on this lwpchan.
1380 lwp_queue_waiter(lwpchan_t
*lwpchan
)
1385 sqh
= lwpsqhash(lwpchan
);
1386 disp_lock_enter(&sqh
->sq_lock
); /* lock the sleep queue */
1387 for (tp
= sqh
->sq_queue
.sq_first
; tp
!= NULL
; tp
= tp
->t_link
) {
1388 if (tp
->t_lwpchan
.lc_wchan0
== lwpchan
->lc_wchan0
&&
1389 tp
->t_lwpchan
.lc_wchan
== lwpchan
->lc_wchan
)
1392 disp_lock_exit(&sqh
->sq_lock
);
1397 lwp_release(lwpchan_t
*lwpchan
, uchar_t
*waiters
, int sync_type
)
1403 sqh
= lwpsqhash(lwpchan
);
1404 disp_lock_enter(&sqh
->sq_lock
); /* lock the sleep queue */
1405 tpp
= &sqh
->sq_queue
.sq_first
;
1406 while ((tp
= *tpp
) != NULL
) {
1407 if (tp
->t_lwpchan
.lc_wchan0
== lwpchan
->lc_wchan0
&&
1408 tp
->t_lwpchan
.lc_wchan
== lwpchan
->lc_wchan
) {
1410 * The following is typically false. It could be true
1411 * only if lwp_release() is called from
1412 * lwp_mutex_wakeup() after reading the waiters field
1413 * from memory in which the lwp lock used to be, but has
1414 * since been re-used to hold a lwp cv or lwp semaphore.
1415 * The thread "tp" found to match the lwp lock's wchan
1416 * is actually sleeping for the cv or semaphore which
1417 * now has the same wchan. In this case, lwp_release()
1418 * should return failure.
1420 if (sync_type
!= (tp
->t_flag
& T_WAITCVSEM
)) {
1421 ASSERT(sync_type
== 0);
1423 * assert that this can happen only for mutexes
1424 * i.e. sync_type == 0, for correctly written
1427 disp_lock_exit(&sqh
->sq_lock
);
1430 *waiters
= iswanted(tp
->t_link
, lwpchan
);
1431 sleepq_unlink(tpp
, tp
);
1432 DTRACE_SCHED1(wakeup
, kthread_t
*, tp
);
1433 tp
->t_wchan0
= NULL
;
1435 tp
->t_sobj_ops
= NULL
;
1437 THREAD_TRANSITION(tp
); /* drops sleepq lock */
1439 thread_unlock(tp
); /* drop run queue lock */
1445 disp_lock_exit(&sqh
->sq_lock
);
1450 lwp_release_all(lwpchan_t
*lwpchan
)
1456 sqh
= lwpsqhash(lwpchan
);
1457 disp_lock_enter(&sqh
->sq_lock
); /* lock sleep q queue */
1458 tpp
= &sqh
->sq_queue
.sq_first
;
1459 while ((tp
= *tpp
) != NULL
) {
1460 if (tp
->t_lwpchan
.lc_wchan0
== lwpchan
->lc_wchan0
&&
1461 tp
->t_lwpchan
.lc_wchan
== lwpchan
->lc_wchan
) {
1462 sleepq_unlink(tpp
, tp
);
1463 DTRACE_SCHED1(wakeup
, kthread_t
*, tp
);
1464 tp
->t_wchan0
= NULL
;
1466 tp
->t_sobj_ops
= NULL
;
1468 thread_unlock_high(tp
); /* release run queue lock */
1473 disp_lock_exit(&sqh
->sq_lock
); /* drop sleep q lock */
1477 * unblock a lwp that is trying to acquire this mutex. the blocked
1478 * lwp resumes and retries to acquire the lock.
1481 lwp_mutex_wakeup(lwp_mutex_t
*lp
, int release_all
)
1483 proc_t
*p
= ttoproc(curthread
);
1486 volatile int locked
= 0;
1487 volatile int watched
= 0;
1488 volatile uint8_t type
= 0;
1492 if ((caddr_t
)lp
>= p
->p_as
->a_userlimit
)
1493 return (set_errno(EFAULT
));
1495 watched
= watch_disable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
1497 if (on_fault(&ljb
)) {
1499 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
1504 * Force Copy-on-write if necessary and ensure that the
1505 * synchronization object resides in read/write memory.
1506 * Cause an EFAULT return now if this is not so.
1508 fuword8_noerr(&lp
->mutex_type
, (uint8_t *)&type
);
1509 suword8_noerr(&lp
->mutex_type
, type
);
1510 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)lp
, type
,
1511 &lwpchan
, LWPCHAN_MPPOOL
)) {
1515 lwpchan_lock(&lwpchan
, LWPCHAN_MPPOOL
);
1518 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will
1519 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release()
1520 * may fail. If it fails, do not write into the waiter bit.
1521 * The call to lwp_release() might fail due to one of three reasons:
1523 * 1. due to the thread which set the waiter bit not actually
1524 * sleeping since it got the lock on the re-try. The waiter
1525 * bit will then be correctly updated by that thread. This
1526 * window may be closed by reading the wait bit again here
1527 * and not calling lwp_release() at all if it is zero.
1528 * 2. the thread which set the waiter bit and went to sleep
1529 * was woken up by a signal. This time, the waiter recomputes
1530 * the wait bit in the return with EINTR code.
1531 * 3. the waiter bit read by lwp_mutex_wakeup() was in
1532 * memory that has been re-used after the lock was dropped.
1533 * In this case, writing into the waiter bit would cause data
1537 lwp_release_all(&lwpchan
);
1538 else if (lwp_release(&lwpchan
, &waiters
, 0))
1539 suword8_noerr(&lp
->mutex_waiters
, waiters
);
1540 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
1544 watch_enable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
1546 return (set_errno(error
));
1551 * lwp_cond_wait() has four arguments, a pointer to a condition variable,
1552 * a pointer to a mutex, a pointer to a timespec for a timed wait and
1553 * a flag telling the kernel whether or not to honor the kernel/user
1554 * schedctl parking protocol (see schedctl_is_park() in schedctl.c).
1555 * The kernel puts the lwp to sleep on a unique pair of caddr_t's called an
1556 * lwpchan, returned by get_lwpchan(). If the timespec pointer is non-NULL,
1557 * it is used an an in/out parameter. On entry, it contains the relative
1558 * time until timeout. On exit, we copyout the residual time left to it.
1561 lwp_cond_wait(lwp_cond_t
*cv
, lwp_mutex_t
*mp
, timespec_t
*tsp
, int check_park
)
1563 kthread_t
*t
= curthread
;
1564 klwp_t
*lwp
= ttolwp(t
);
1565 proc_t
*p
= ttoproc(t
);
1567 lwpchan_t cv_lwpchan
;
1568 lwpchan_t m_lwpchan
;
1570 volatile uint16_t type
= 0;
1571 volatile uint8_t mtype
= 0;
1575 volatile int locked
= 0;
1576 volatile int m_locked
= 0;
1577 volatile int cvwatched
= 0;
1578 volatile int mpwatched
= 0;
1580 volatile int no_lwpchan
= 1;
1581 int imm_timeout
= 0;
1584 if ((caddr_t
)cv
>= p
->p_as
->a_userlimit
||
1585 (caddr_t
)mp
>= p
->p_as
->a_userlimit
)
1586 return (set_errno(EFAULT
));
1589 * Put the lwp in an orderly state for debugging,
1590 * in case we are stopped while sleeping, below.
1592 prstop(PR_REQUESTED
, 0);
1594 timedwait
= (caddr_t
)tsp
;
1595 if ((error
= lwp_timer_copyin(&lwpt
, tsp
)) != 0)
1596 return (set_errno(error
));
1597 if (lwpt
.lwpt_imm_timeout
) {
1602 (void) new_mstate(t
, LMS_USER_LOCK
);
1604 if (on_fault(&ljb
)) {
1611 lwpchan_unlock(&m_lwpchan
, LWPCHAN_MPPOOL
);
1615 lwpchan_unlock(&cv_lwpchan
, LWPCHAN_CVPOOL
);
1618 * set up another on_fault() for a possible fault
1619 * on the user lock accessed at "efault"
1621 if (on_fault(&ljb
)) {
1624 lwpchan_unlock(&m_lwpchan
, LWPCHAN_MPPOOL
);
1633 * Force Copy-on-write if necessary and ensure that the
1634 * synchronization object resides in read/write memory.
1635 * Cause an EFAULT return now if this is not so.
1637 fuword8_noerr(&mp
->mutex_type
, (uint8_t *)&mtype
);
1638 suword8_noerr(&mp
->mutex_type
, mtype
);
1639 if (UPIMUTEX(mtype
) == 0) {
1640 /* convert user level mutex, "mp", to a unique lwpchan */
1641 /* check if mtype is ok to use below, instead of type from cv */
1642 if (!get_lwpchan(p
->p_as
, (caddr_t
)mp
, mtype
,
1643 &m_lwpchan
, LWPCHAN_MPPOOL
)) {
1648 fuword16_noerr(&cv
->cond_type
, (uint16_t *)&type
);
1649 suword16_noerr(&cv
->cond_type
, type
);
1650 /* convert user level condition variable, "cv", to a unique lwpchan */
1651 if (!get_lwpchan(p
->p_as
, (caddr_t
)cv
, type
,
1652 &cv_lwpchan
, LWPCHAN_CVPOOL
)) {
1657 cvwatched
= watch_disable_addr((caddr_t
)cv
, sizeof (*cv
), S_WRITE
);
1658 if (UPIMUTEX(mtype
) == 0)
1659 mpwatched
= watch_disable_addr((caddr_t
)mp
, sizeof (*mp
),
1663 * lwpchan_lock ensures that the calling lwp is put to sleep atomically
1664 * with respect to a possible wakeup which is a result of either
1665 * an lwp_cond_signal() or an lwp_cond_broadcast().
1667 * What's misleading, is that the lwp is put to sleep after the
1668 * condition variable's mutex is released. This is OK as long as
1669 * the release operation is also done while holding lwpchan_lock.
1670 * The lwp is then put to sleep when the possibility of pagefaulting
1671 * or sleeping is completely eliminated.
1673 lwpchan_lock(&cv_lwpchan
, LWPCHAN_CVPOOL
);
1675 if (UPIMUTEX(mtype
) == 0) {
1676 lwpchan_lock(&m_lwpchan
, LWPCHAN_MPPOOL
);
1678 suword8_noerr(&cv
->cond_waiters_kernel
, 1);
1680 * unlock the condition variable's mutex. (pagefaults are
1683 set_owner_pid(mp
, 0, 0);
1684 ulock_clear(&mp
->mutex_lockw
);
1685 fuword8_noerr(&mp
->mutex_waiters
, &waiters
);
1688 * Given the locking of lwpchan_lock around the release
1689 * of the mutex and checking for waiters, the following
1690 * call to lwp_release() can fail ONLY if the lock
1691 * acquirer is interrupted after setting the waiter bit,
1692 * calling lwp_block() and releasing lwpchan_lock.
1693 * In this case, it could get pulled off the lwp sleep
1694 * q (via setrun()) before the following call to
1695 * lwp_release() occurs. In this case, the lock
1696 * requestor will update the waiter bit correctly by
1699 if (lwp_release(&m_lwpchan
, &waiters
, 0))
1700 suword8_noerr(&mp
->mutex_waiters
, waiters
);
1703 lwpchan_unlock(&m_lwpchan
, LWPCHAN_MPPOOL
);
1705 suword8_noerr(&cv
->cond_waiters_kernel
, 1);
1706 error
= lwp_upimutex_unlock(mp
, mtype
);
1707 if (error
) { /* if the upimutex unlock failed */
1709 lwpchan_unlock(&cv_lwpchan
, LWPCHAN_CVPOOL
);
1716 watch_enable_addr((caddr_t
)mp
, sizeof (*mp
), S_WRITE
);
1720 watch_enable_addr((caddr_t
)cv
, sizeof (*cv
), S_WRITE
);
1724 if (check_park
&& (!schedctl_is_park() || t
->t_unpark
)) {
1726 * We received a signal at user-level before calling here
1727 * or another thread wants us to return immediately
1728 * with EINTR. See lwp_unpark().
1733 } else if (timedwait
) {
1735 * If we successfully queue the timeout,
1736 * then don't drop t_delay_lock until
1737 * we are on the sleep queue (below).
1739 mutex_enter(&t
->t_delay_lock
);
1740 if (lwp_timer_enqueue(&lwpt
) != 0) {
1741 mutex_exit(&t
->t_delay_lock
);
1746 t
->t_flag
|= T_WAITCVSEM
;
1747 lwp_block(&cv_lwpchan
);
1749 * Nothing should happen to cause the lwp to go to sleep
1750 * until after it returns from swtch().
1753 mutex_exit(&t
->t_delay_lock
);
1755 lwpchan_unlock(&cv_lwpchan
, LWPCHAN_CVPOOL
);
1756 if (ISSIG(t
, JUSTLOOKING
) || MUSTRETURN(p
, t
) ||
1757 (imm_timeout
| imm_unpark
))
1760 t
->t_flag
&= ~(T_WAITCVSEM
| T_WAKEABLE
);
1762 tim
= lwp_timer_dequeue(&lwpt
);
1763 if (ISSIG(t
, FORREAL
) || lwp
->lwp_sysabort
||
1764 MUSTRETURN(p
, t
) || imm_unpark
)
1766 else if (imm_timeout
|| (timedwait
&& tim
== -1))
1768 lwp
->lwp_asleep
= 0;
1769 lwp
->lwp_sysabort
= 0;
1772 if (t
->t_mstate
== LMS_USER_LOCK
)
1773 (void) new_mstate(t
, LMS_SYSTEM
);
1775 if (tsp
&& check_park
) /* copyout the residual time left */
1776 error
= lwp_timer_copyout(&lwpt
, error
);
1778 /* the mutex is reacquired by the caller on return to user level */
1781 * If we were concurrently lwp_cond_signal()d and we
1782 * received a UNIX signal or got a timeout, then perform
1783 * another lwp_cond_signal() to avoid consuming the wakeup.
1786 (void) lwp_cond_signal(cv
);
1787 return (set_errno(error
));
1793 * make sure that the user level lock is dropped before
1794 * returning to caller, since the caller always re-acquires it.
1796 if (UPIMUTEX(mtype
) == 0) {
1797 lwpchan_lock(&m_lwpchan
, LWPCHAN_MPPOOL
);
1799 set_owner_pid(mp
, 0, 0);
1800 ulock_clear(&mp
->mutex_lockw
);
1801 fuword8_noerr(&mp
->mutex_waiters
, &waiters
);
1804 * See comment above on lock clearing and lwp_release()
1807 if (lwp_release(&m_lwpchan
, &waiters
, 0))
1808 suword8_noerr(&mp
->mutex_waiters
, waiters
);
1811 lwpchan_unlock(&m_lwpchan
, LWPCHAN_MPPOOL
);
1813 (void) lwp_upimutex_unlock(mp
, mtype
);
1818 watch_enable_addr((caddr_t
)mp
, sizeof (*mp
), S_WRITE
);
1820 watch_enable_addr((caddr_t
)cv
, sizeof (*cv
), S_WRITE
);
1821 if (t
->t_mstate
== LMS_USER_LOCK
)
1822 (void) new_mstate(t
, LMS_SYSTEM
);
1823 return (set_errno(error
));
1827 * wakeup one lwp that's blocked on this condition variable.
1830 lwp_cond_signal(lwp_cond_t
*cv
)
1832 proc_t
*p
= ttoproc(curthread
);
1835 volatile uint16_t type
= 0;
1836 volatile int locked
= 0;
1837 volatile int watched
= 0;
1841 if ((caddr_t
)cv
>= p
->p_as
->a_userlimit
)
1842 return (set_errno(EFAULT
));
1844 watched
= watch_disable_addr((caddr_t
)cv
, sizeof (*cv
), S_WRITE
);
1846 if (on_fault(&ljb
)) {
1848 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
1853 * Force Copy-on-write if necessary and ensure that the
1854 * synchronization object resides in read/write memory.
1855 * Cause an EFAULT return now if this is not so.
1857 fuword16_noerr(&cv
->cond_type
, (uint16_t *)&type
);
1858 suword16_noerr(&cv
->cond_type
, type
);
1859 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)cv
, type
,
1860 &lwpchan
, LWPCHAN_CVPOOL
)) {
1864 lwpchan_lock(&lwpchan
, LWPCHAN_CVPOOL
);
1866 fuword8_noerr(&cv
->cond_waiters_kernel
, &waiters
);
1869 * The following call to lwp_release() might fail but it is
1870 * OK to write into the waiters bit below, since the memory
1871 * could not have been re-used or unmapped (for correctly
1872 * written user programs) as in the case of lwp_mutex_wakeup().
1873 * For an incorrect program, we should not care about data
1874 * corruption since this is just one instance of other places
1875 * where corruption can occur for such a program. Of course
1876 * if the memory is unmapped, normal fault recovery occurs.
1878 (void) lwp_release(&lwpchan
, &waiters
, T_WAITCVSEM
);
1879 suword8_noerr(&cv
->cond_waiters_kernel
, waiters
);
1881 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
1885 watch_enable_addr((caddr_t
)cv
, sizeof (*cv
), S_WRITE
);
1887 return (set_errno(error
));
1892 * wakeup every lwp that's blocked on this condition variable.
1895 lwp_cond_broadcast(lwp_cond_t
*cv
)
1897 proc_t
*p
= ttoproc(curthread
);
1899 volatile uint16_t type
= 0;
1900 volatile int locked
= 0;
1901 volatile int watched
= 0;
1906 if ((caddr_t
)cv
>= p
->p_as
->a_userlimit
)
1907 return (set_errno(EFAULT
));
1909 watched
= watch_disable_addr((caddr_t
)cv
, sizeof (*cv
), S_WRITE
);
1911 if (on_fault(&ljb
)) {
1913 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
1918 * Force Copy-on-write if necessary and ensure that the
1919 * synchronization object resides in read/write memory.
1920 * Cause an EFAULT return now if this is not so.
1922 fuword16_noerr(&cv
->cond_type
, (uint16_t *)&type
);
1923 suword16_noerr(&cv
->cond_type
, type
);
1924 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)cv
, type
,
1925 &lwpchan
, LWPCHAN_CVPOOL
)) {
1929 lwpchan_lock(&lwpchan
, LWPCHAN_CVPOOL
);
1931 fuword8_noerr(&cv
->cond_waiters_kernel
, &waiters
);
1933 lwp_release_all(&lwpchan
);
1934 suword8_noerr(&cv
->cond_waiters_kernel
, 0);
1936 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
1940 watch_enable_addr((caddr_t
)cv
, sizeof (*cv
), S_WRITE
);
1942 return (set_errno(error
));
1947 lwp_sema_trywait(lwp_sema_t
*sp
)
1949 kthread_t
*t
= curthread
;
1950 proc_t
*p
= ttoproc(t
);
1952 volatile int locked
= 0;
1953 volatile int watched
= 0;
1954 volatile uint16_t type
= 0;
1960 if ((caddr_t
)sp
>= p
->p_as
->a_userlimit
)
1961 return (set_errno(EFAULT
));
1963 watched
= watch_disable_addr((caddr_t
)sp
, sizeof (*sp
), S_WRITE
);
1965 if (on_fault(&ljb
)) {
1967 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
1972 * Force Copy-on-write if necessary and ensure that the
1973 * synchronization object resides in read/write memory.
1974 * Cause an EFAULT return now if this is not so.
1976 fuword16_noerr((void *)&sp
->sema_type
, (uint16_t *)&type
);
1977 suword16_noerr((void *)&sp
->sema_type
, type
);
1978 if (!get_lwpchan(p
->p_as
, (caddr_t
)sp
, type
,
1979 &lwpchan
, LWPCHAN_CVPOOL
)) {
1983 lwpchan_lock(&lwpchan
, LWPCHAN_CVPOOL
);
1985 fuword32_noerr((void *)&sp
->sema_count
, (uint32_t *)&count
);
1989 suword32_noerr((void *)&sp
->sema_count
, --count
);
1991 fuword8_noerr(&sp
->sema_waiters
, &waiters
);
1993 (void) lwp_release(&lwpchan
, &waiters
, T_WAITCVSEM
);
1994 suword8_noerr(&sp
->sema_waiters
, waiters
);
1997 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2001 watch_enable_addr((caddr_t
)sp
, sizeof (*sp
), S_WRITE
);
2003 return (set_errno(error
));
2008 * See lwp_cond_wait(), above, for an explanation of the 'check_park' argument.
2011 lwp_sema_timedwait(lwp_sema_t
*sp
, timespec_t
*tsp
, int check_park
)
2013 kthread_t
*t
= curthread
;
2014 klwp_t
*lwp
= ttolwp(t
);
2015 proc_t
*p
= ttoproc(t
);
2020 volatile int locked
= 0;
2021 volatile int watched
= 0;
2022 volatile uint16_t type
= 0;
2028 int imm_timeout
= 0;
2031 if ((caddr_t
)sp
>= p
->p_as
->a_userlimit
)
2032 return (set_errno(EFAULT
));
2035 * Put the lwp in an orderly state for debugging,
2036 * in case we are stopped while sleeping, below.
2038 prstop(PR_REQUESTED
, 0);
2040 timedwait
= (caddr_t
)tsp
;
2041 if ((time_error
= lwp_timer_copyin(&lwpt
, tsp
)) == 0 &&
2042 lwpt
.lwpt_imm_timeout
) {
2047 watched
= watch_disable_addr((caddr_t
)sp
, sizeof (*sp
), S_WRITE
);
2049 if (on_fault(&ljb
)) {
2051 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2056 * Force Copy-on-write if necessary and ensure that the
2057 * synchronization object resides in read/write memory.
2058 * Cause an EFAULT return now if this is not so.
2060 fuword16_noerr((void *)&sp
->sema_type
, (uint16_t *)&type
);
2061 suword16_noerr((void *)&sp
->sema_type
, type
);
2062 if (!get_lwpchan(p
->p_as
, (caddr_t
)sp
, type
,
2063 &lwpchan
, LWPCHAN_CVPOOL
)) {
2067 lwpchan_lock(&lwpchan
, LWPCHAN_CVPOOL
);
2069 fuword32_noerr((void *)&sp
->sema_count
, (uint32_t *)&count
);
2070 while (error
== 0 && count
== 0) {
2073 * The SUSV3 Posix spec is very clear that we
2074 * should get no error from validating the
2075 * timer until we would actually sleep.
2080 suword8_noerr(&sp
->sema_waiters
, 1);
2082 watch_enable_addr((caddr_t
)sp
, sizeof (*sp
), S_WRITE
);
2083 if (check_park
&& (!schedctl_is_park() || t
->t_unpark
)) {
2085 * We received a signal at user-level before calling
2086 * here or another thread wants us to return
2087 * immediately with EINTR. See lwp_unpark().
2092 } else if (timedwait
) {
2094 * If we successfully queue the timeout,
2095 * then don't drop t_delay_lock until
2096 * we are on the sleep queue (below).
2098 mutex_enter(&t
->t_delay_lock
);
2099 if (lwp_timer_enqueue(&lwpt
) != 0) {
2100 mutex_exit(&t
->t_delay_lock
);
2105 t
->t_flag
|= T_WAITCVSEM
;
2106 lwp_block(&lwpchan
);
2108 * Nothing should happen to cause the lwp to sleep
2109 * again until after it returns from swtch().
2112 mutex_exit(&t
->t_delay_lock
);
2114 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2115 if (ISSIG(t
, JUSTLOOKING
) || MUSTRETURN(p
, t
) ||
2116 (imm_timeout
| imm_unpark
))
2119 t
->t_flag
&= ~(T_WAITCVSEM
| T_WAKEABLE
);
2121 tim
= lwp_timer_dequeue(&lwpt
);
2123 if (ISSIG(t
, FORREAL
) || lwp
->lwp_sysabort
||
2124 MUSTRETURN(p
, t
) || imm_unpark
)
2126 else if (imm_timeout
|| (timedwait
&& tim
== -1))
2128 lwp
->lwp_asleep
= 0;
2129 lwp
->lwp_sysabort
= 0;
2130 watched
= watch_disable_addr((caddr_t
)sp
,
2131 sizeof (*sp
), S_WRITE
);
2132 lwpchan_lock(&lwpchan
, LWPCHAN_CVPOOL
);
2134 fuword32_noerr((void *)&sp
->sema_count
, (uint32_t *)&count
);
2137 suword32_noerr((void *)&sp
->sema_count
, --count
);
2139 (void) lwp_release(&lwpchan
, &waiters
, T_WAITCVSEM
);
2140 suword8_noerr(&sp
->sema_waiters
, waiters
);
2142 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2146 watch_enable_addr((caddr_t
)sp
, sizeof (*sp
), S_WRITE
);
2147 if (tsp
&& check_park
&& !time_error
)
2148 error
= lwp_timer_copyout(&lwpt
, error
);
2150 return (set_errno(error
));
2155 lwp_sema_post(lwp_sema_t
*sp
)
2157 proc_t
*p
= ttoproc(curthread
);
2159 volatile int locked
= 0;
2160 volatile int watched
= 0;
2161 volatile uint16_t type
= 0;
2167 if ((caddr_t
)sp
>= p
->p_as
->a_userlimit
)
2168 return (set_errno(EFAULT
));
2170 watched
= watch_disable_addr((caddr_t
)sp
, sizeof (*sp
), S_WRITE
);
2172 if (on_fault(&ljb
)) {
2174 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2179 * Force Copy-on-write if necessary and ensure that the
2180 * synchronization object resides in read/write memory.
2181 * Cause an EFAULT return now if this is not so.
2183 fuword16_noerr(&sp
->sema_type
, (uint16_t *)&type
);
2184 suword16_noerr(&sp
->sema_type
, type
);
2185 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)sp
, type
,
2186 &lwpchan
, LWPCHAN_CVPOOL
)) {
2190 lwpchan_lock(&lwpchan
, LWPCHAN_CVPOOL
);
2192 fuword32_noerr(&sp
->sema_count
, (uint32_t *)&count
);
2193 if (count
== _SEM_VALUE_MAX
)
2196 suword32_noerr(&sp
->sema_count
, ++count
);
2198 fuword8_noerr(&sp
->sema_waiters
, &waiters
);
2200 (void) lwp_release(&lwpchan
, &waiters
, T_WAITCVSEM
);
2201 suword8_noerr(&sp
->sema_waiters
, waiters
);
2204 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2208 watch_enable_addr((caddr_t
)sp
, sizeof (*sp
), S_WRITE
);
2210 return (set_errno(error
));
2214 #define TRW_WANT_WRITE 0x1
2215 #define TRW_LOCK_GRANTED 0x2
2218 #define WRITE_LOCK 1
2219 #define TRY_FLAG 0x10
2220 #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG)
2221 #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG)
2224 * Release one writer or one or more readers. Compute the rwstate word to
2225 * reflect the new state of the queue. For a safe hand-off we copy the new
2226 * rwstate value back to userland before we wake any of the new lock holders.
2228 * Note that sleepq_insert() implements a prioritized FIFO (with writers
2229 * being given precedence over readers of the same priority).
2231 * If the first thread is a reader we scan the queue releasing all readers
2232 * until we hit a writer or the end of the queue. If the first thread is a
2233 * writer we still need to check for another writer.
2236 lwp_rwlock_release(lwpchan_t
*lwpchan
, lwp_rwlock_t
*rw
)
2242 kthread_t
*wakelist
= NULL
;
2243 uint32_t rwstate
= 0;
2247 sqh
= lwpsqhash(lwpchan
);
2248 disp_lock_enter(&sqh
->sq_lock
);
2249 tpp
= &sqh
->sq_queue
.sq_first
;
2250 while ((tp
= *tpp
) != NULL
) {
2251 if (tp
->t_lwpchan
.lc_wchan0
== lwpchan
->lc_wchan0
&&
2252 tp
->t_lwpchan
.lc_wchan
== lwpchan
->lc_wchan
) {
2253 if (tp
->t_writer
& TRW_WANT_WRITE
) {
2254 if ((wcount
++ == 0) && (rcount
== 0)) {
2255 rwstate
|= URW_WRITE_LOCKED
;
2257 /* Just one writer to wake. */
2258 sleepq_unlink(tpp
, tp
);
2261 /* tpp already set for next thread. */
2264 rwstate
|= URW_HAS_WAITERS
;
2265 /* We need look no further. */
2273 /* Add reader to wake list. */
2274 sleepq_unlink(tpp
, tp
);
2275 tp
->t_link
= wakelist
;
2278 /* tpp already set for next thread. */
2281 rwstate
|= URW_HAS_WAITERS
;
2282 /* We need look no further. */
2290 /* Copy the new rwstate back to userland. */
2291 suword32_noerr(&rw
->rwlock_readers
, rwstate
);
2293 /* Wake the new lock holder(s) up. */
2295 while (tp
!= NULL
) {
2296 DTRACE_SCHED1(wakeup
, kthread_t
*, tp
);
2297 tp
->t_wchan0
= NULL
;
2299 tp
->t_sobj_ops
= NULL
;
2300 tp
->t_writer
|= TRW_LOCK_GRANTED
;
2301 tpnext
= tp
->t_link
;
2304 thread_unlock_high(tp
);
2308 disp_lock_exit(&sqh
->sq_lock
);
2312 * We enter here holding the user-level mutex, which we must release before
2313 * returning or blocking. Based on lwp_cond_wait().
2316 lwp_rwlock_lock(lwp_rwlock_t
*rw
, timespec_t
*tsp
, int rd_wr
)
2318 lwp_mutex_t
*mp
= NULL
;
2319 kthread_t
*t
= curthread
;
2321 klwp_t
*lwp
= ttolwp(t
);
2322 proc_t
*p
= ttoproc(t
);
2327 volatile uint16_t type
= 0;
2328 volatile uint8_t mtype
= 0;
2330 volatile int error
= 0;
2333 volatile int locked
= 0;
2334 volatile int mlocked
= 0;
2335 volatile int watched
= 0;
2336 volatile int mwatched
= 0;
2338 volatile int no_lwpchan
= 1;
2339 int imm_timeout
= 0;
2344 /* We only check rw because the mutex is included in it. */
2345 if ((caddr_t
)rw
>= p
->p_as
->a_userlimit
)
2346 return (set_errno(EFAULT
));
2349 * Put the lwp in an orderly state for debugging,
2350 * in case we are stopped while sleeping, below.
2352 prstop(PR_REQUESTED
, 0);
2354 /* We must only report this error if we are about to sleep (later). */
2355 timedwait
= (caddr_t
)tsp
;
2356 if ((time_error
= lwp_timer_copyin(&lwpt
, tsp
)) == 0 &&
2357 lwpt
.lwpt_imm_timeout
) {
2362 (void) new_mstate(t
, LMS_USER_LOCK
);
2364 if (on_fault(&ljb
)) {
2371 lwpchan_unlock(&mlwpchan
, LWPCHAN_MPPOOL
);
2375 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2378 * Set up another on_fault() for a possible fault
2379 * on the user lock accessed at "out_drop".
2381 if (on_fault(&ljb
)) {
2384 lwpchan_unlock(&mlwpchan
, LWPCHAN_MPPOOL
);
2393 /* Process rd_wr (including sanity check). */
2394 try_flag
= (rd_wr
& TRY_FLAG
);
2396 if ((rd_wr
!= READ_LOCK
) && (rd_wr
!= WRITE_LOCK
)) {
2402 * Force Copy-on-write if necessary and ensure that the
2403 * synchronization object resides in read/write memory.
2404 * Cause an EFAULT return now if this is not so.
2407 fuword8_noerr(&mp
->mutex_type
, (uint8_t *)&mtype
);
2408 fuword16_noerr(&rw
->rwlock_type
, (uint16_t *)&type
);
2409 suword8_noerr(&mp
->mutex_type
, mtype
);
2410 suword16_noerr(&rw
->rwlock_type
, type
);
2412 /* We can only continue for simple USYNC_PROCESS locks. */
2413 if ((mtype
!= USYNC_PROCESS
) || (type
!= USYNC_PROCESS
)) {
2418 /* Convert user level mutex, "mp", to a unique lwpchan. */
2419 if (!get_lwpchan(p
->p_as
, (caddr_t
)mp
, mtype
,
2420 &mlwpchan
, LWPCHAN_MPPOOL
)) {
2425 /* Convert user level rwlock, "rw", to a unique lwpchan. */
2426 if (!get_lwpchan(p
->p_as
, (caddr_t
)rw
, type
,
2427 &lwpchan
, LWPCHAN_CVPOOL
)) {
2433 watched
= watch_disable_addr((caddr_t
)rw
, sizeof (*rw
), S_WRITE
);
2434 mwatched
= watch_disable_addr((caddr_t
)mp
, sizeof (*mp
), S_WRITE
);
2437 * lwpchan_lock() ensures that the calling LWP is put to sleep
2438 * atomically with respect to a possible wakeup which is a result
2439 * of lwp_rwlock_unlock().
2441 * What's misleading is that the LWP is put to sleep after the
2442 * rwlock's mutex is released. This is OK as long as the release
2443 * operation is also done while holding mlwpchan. The LWP is then
2444 * put to sleep when the possibility of pagefaulting or sleeping
2445 * has been completely eliminated.
2447 lwpchan_lock(&lwpchan
, LWPCHAN_CVPOOL
);
2449 lwpchan_lock(&mlwpchan
, LWPCHAN_MPPOOL
);
2453 * Fetch the current rwlock state.
2455 * The possibility of spurious wake-ups or killed waiters means
2456 * rwstate's URW_HAS_WAITERS bit may indicate false positives.
2457 * We only fix these if they are important to us.
2459 * Although various error states can be observed here (e.g. the lock
2460 * is not held, but there are waiters) we assume these are applicaton
2461 * errors and so we take no corrective action.
2463 fuword32_noerr(&rw
->rwlock_readers
, &rwstate
);
2465 * We cannot legitimately get here from user-level
2466 * without URW_HAS_WAITERS being set.
2467 * Set it now to guard against user-level error.
2469 rwstate
|= URW_HAS_WAITERS
;
2472 * We can try only if the lock isn't held by a writer.
2474 if (!(rwstate
& URW_WRITE_LOCKED
)) {
2475 tp
= lwp_queue_waiter(&lwpchan
);
2478 * Hmmm, rwstate indicates waiters but there are
2479 * none queued. This could just be the result of a
2480 * spurious wakeup, so let's ignore it.
2482 * We now have a chance to acquire the lock
2483 * uncontended, but this is the last chance for
2484 * a writer to acquire the lock without blocking.
2486 if (rd_wr
== READ_LOCK
) {
2489 } else if ((rwstate
& URW_READERS_MASK
) == 0) {
2490 rwstate
|= URW_WRITE_LOCKED
;
2493 } else if (rd_wr
== READ_LOCK
) {
2495 * This is the last chance for a reader to acquire
2496 * the lock now, but it can only do so if there is
2497 * no writer of equal or greater priority at the
2498 * head of the queue .
2500 * It is also just possible that there is a reader
2501 * at the head of the queue. This may be the result
2502 * of a spurious wakeup or an application failure.
2503 * In this case we only acquire the lock if we have
2504 * equal or greater priority. It is not our job to
2505 * release spurious waiters.
2507 pri_t our_pri
= DISP_PRIO(t
);
2508 pri_t his_pri
= DISP_PRIO(tp
);
2510 if ((our_pri
> his_pri
) || ((our_pri
== his_pri
) &&
2511 !(tp
->t_writer
& TRW_WANT_WRITE
))) {
2518 if (acquired
|| try_flag
|| time_error
) {
2520 * We're not going to block this time.
2522 suword32_noerr(&rw
->rwlock_readers
, rwstate
);
2523 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2532 } else if (try_flag
) {
2534 * We didn't get the lock and we're about to block.
2535 * If we're doing a trylock, return EBUSY instead.
2539 } else if (time_error
) {
2541 * The SUSV3 POSIX spec is very clear that we should
2542 * get no error from validating the timer (above)
2543 * until we would actually sleep.
2552 * We're about to block, so indicate what kind of waiter we are.
2555 if (rd_wr
== WRITE_LOCK
)
2556 t
->t_writer
= TRW_WANT_WRITE
;
2557 suword32_noerr(&rw
->rwlock_readers
, rwstate
);
2560 * Unlock the rwlock's mutex (pagefaults are possible here).
2562 set_owner_pid(mp
, 0, 0);
2563 ulock_clear(&mp
->mutex_lockw
);
2564 fuword8_noerr(&mp
->mutex_waiters
, &mwaiters
);
2565 if (mwaiters
!= 0) {
2567 * Given the locking of mlwpchan around the release of
2568 * the mutex and checking for waiters, the following
2569 * call to lwp_release() can fail ONLY if the lock
2570 * acquirer is interrupted after setting the waiter bit,
2571 * calling lwp_block() and releasing mlwpchan.
2572 * In this case, it could get pulled off the LWP sleep
2573 * queue (via setrun()) before the following call to
2574 * lwp_release() occurs, and the lock requestor will
2575 * update the waiter bit correctly by re-evaluating it.
2577 if (lwp_release(&mlwpchan
, &mwaiters
, 0))
2578 suword8_noerr(&mp
->mutex_waiters
, mwaiters
);
2580 lwpchan_unlock(&mlwpchan
, LWPCHAN_MPPOOL
);
2585 watch_enable_addr((caddr_t
)mp
, sizeof (*mp
), S_WRITE
);
2589 watch_enable_addr((caddr_t
)rw
, sizeof (*rw
), S_WRITE
);
2595 * If we successfully queue the timeout,
2596 * then don't drop t_delay_lock until
2597 * we are on the sleep queue (below).
2599 mutex_enter(&t
->t_delay_lock
);
2600 if (lwp_timer_enqueue(&lwpt
) != 0) {
2601 mutex_exit(&t
->t_delay_lock
);
2606 t
->t_flag
|= T_WAITCVSEM
;
2607 lwp_block(&lwpchan
);
2610 * Nothing should happen to cause the LWp to go to sleep until after
2611 * it returns from swtch().
2614 mutex_exit(&t
->t_delay_lock
);
2616 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2617 if (ISSIG(t
, JUSTLOOKING
) || MUSTRETURN(p
, t
) || imm_timeout
)
2622 * We're back, but we need to work out why. Were we interrupted? Did
2623 * we timeout? Were we granted the lock?
2626 acquired
= (t
->t_writer
& TRW_LOCK_GRANTED
);
2628 t
->t_flag
&= ~(T_WAITCVSEM
| T_WAKEABLE
);
2630 tim
= lwp_timer_dequeue(&lwpt
);
2631 if (ISSIG(t
, FORREAL
) || lwp
->lwp_sysabort
|| MUSTRETURN(p
, t
))
2633 else if (imm_timeout
|| (timedwait
&& tim
== -1))
2635 lwp
->lwp_asleep
= 0;
2636 lwp
->lwp_sysabort
= 0;
2640 * If we were granted the lock we don't care about EINTR or ETIME.
2645 if (t
->t_mstate
== LMS_USER_LOCK
)
2646 (void) new_mstate(t
, LMS_SYSTEM
);
2649 return (set_errno(error
));
2654 * Make sure that the user level lock is dropped before returning
2658 lwpchan_lock(&mlwpchan
, LWPCHAN_MPPOOL
);
2661 set_owner_pid(mp
, 0, 0);
2662 ulock_clear(&mp
->mutex_lockw
);
2663 fuword8_noerr(&mp
->mutex_waiters
, &mwaiters
);
2664 if (mwaiters
!= 0) {
2666 * See comment above on lock clearing and lwp_release()
2669 if (lwp_release(&mlwpchan
, &mwaiters
, 0))
2670 suword8_noerr(&mp
->mutex_waiters
, mwaiters
);
2672 lwpchan_unlock(&mlwpchan
, LWPCHAN_MPPOOL
);
2678 watch_enable_addr((caddr_t
)mp
, sizeof (*mp
), S_WRITE
);
2680 watch_enable_addr((caddr_t
)rw
, sizeof (*rw
), S_WRITE
);
2681 if (t
->t_mstate
== LMS_USER_LOCK
)
2682 (void) new_mstate(t
, LMS_SYSTEM
);
2684 return (set_errno(error
));
2689 * We enter here holding the user-level mutex but, unlike lwp_rwlock_lock(),
2690 * we never drop the lock.
2693 lwp_rwlock_unlock(lwp_rwlock_t
*rw
)
2695 kthread_t
*t
= curthread
;
2696 proc_t
*p
= ttoproc(t
);
2698 volatile uint16_t type
= 0;
2699 volatile int error
= 0;
2700 volatile int locked
= 0;
2701 volatile int watched
= 0;
2703 volatile int no_lwpchan
= 1;
2706 /* We only check rw because the mutex is included in it. */
2707 if ((caddr_t
)rw
>= p
->p_as
->a_userlimit
)
2708 return (set_errno(EFAULT
));
2710 if (on_fault(&ljb
)) {
2717 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2724 * Force Copy-on-write if necessary and ensure that the
2725 * synchronization object resides in read/write memory.
2726 * Cause an EFAULT return now if this is not so.
2728 fuword16_noerr(&rw
->rwlock_type
, (uint16_t *)&type
);
2729 suword16_noerr(&rw
->rwlock_type
, type
);
2731 /* We can only continue for simple USYNC_PROCESS locks. */
2732 if (type
!= USYNC_PROCESS
) {
2737 /* Convert user level rwlock, "rw", to a unique lwpchan. */
2738 if (!get_lwpchan(p
->p_as
, (caddr_t
)rw
, type
,
2739 &lwpchan
, LWPCHAN_CVPOOL
)) {
2745 watched
= watch_disable_addr((caddr_t
)rw
, sizeof (*rw
), S_WRITE
);
2747 lwpchan_lock(&lwpchan
, LWPCHAN_CVPOOL
);
2751 * We can resolve multiple readers (except the last reader) here.
2752 * For the last reader or a writer we need lwp_rwlock_release(),
2753 * to which we also delegate the task of copying the new rwstate
2754 * back to userland (see the comment there).
2756 fuword32_noerr(&rw
->rwlock_readers
, &rwstate
);
2757 if (rwstate
& URW_WRITE_LOCKED
)
2758 lwp_rwlock_release(&lwpchan
, rw
);
2759 else if ((rwstate
& URW_READERS_MASK
) > 0) {
2761 if ((rwstate
& URW_READERS_MASK
) == 0)
2762 lwp_rwlock_release(&lwpchan
, rw
);
2764 suword32_noerr(&rw
->rwlock_readers
, rwstate
);
2767 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2774 watch_enable_addr((caddr_t
)rw
, sizeof (*rw
), S_WRITE
);
2776 return (set_errno(error
));
2781 lwp_rwlock_sys(int subcode
, lwp_rwlock_t
*rwlp
, timespec_t
*tsp
)
2785 return (lwp_rwlock_lock(rwlp
, tsp
, READ_LOCK
));
2787 return (lwp_rwlock_lock(rwlp
, tsp
, WRITE_LOCK
));
2789 return (lwp_rwlock_lock(rwlp
, NULL
, READ_LOCK_TRY
));
2791 return (lwp_rwlock_lock(rwlp
, NULL
, WRITE_LOCK_TRY
));
2793 return (lwp_rwlock_unlock(rwlp
));
2795 return (set_errno(EINVAL
));
2799 * Return the owner of the user-level s-object.
2800 * Since we can't really do this, return NULL.
2804 lwpsobj_owner(caddr_t sobj
)
2806 return ((kthread_t
*)NULL
);
2810 * Wake up a thread asleep on a user-level synchronization
2814 lwp_unsleep(kthread_t
*t
)
2816 ASSERT(THREAD_LOCK_HELD(t
));
2817 if (t
->t_wchan0
!= NULL
) {
2819 sleepq_t
*sqp
= t
->t_sleepq
;
2822 sqh
= lwpsqhash(&t
->t_lwpchan
);
2823 ASSERT(&sqh
->sq_queue
== sqp
);
2825 disp_lock_exit_high(&sqh
->sq_lock
);
2830 panic("lwp_unsleep: thread %p not on sleepq", (void *)t
);
2834 * Change the priority of a thread asleep on a user-level
2835 * synchronization object. To maintain proper priority order,
2837 * o dequeue the thread.
2838 * o change its priority.
2839 * o re-enqueue the thread.
2840 * Assumption: the thread is locked on entry.
2843 lwp_change_pri(kthread_t
*t
, pri_t pri
, pri_t
*t_prip
)
2845 ASSERT(THREAD_LOCK_HELD(t
));
2846 if (t
->t_wchan0
!= NULL
) {
2847 sleepq_t
*sqp
= t
->t_sleepq
;
2851 sleepq_insert(sqp
, t
);
2853 panic("lwp_change_pri: %p not on a sleep queue", (void *)t
);
2857 * Clean up a left-over process-shared robust mutex
2860 lwp_mutex_cleanup(lwpchan_entry_t
*ent
, uint16_t lockflg
)
2867 volatile int locked
= 0;
2868 volatile int watched
= 0;
2869 volatile struct upimutex
*upimutex
= NULL
;
2870 volatile int upilocked
= 0;
2872 if ((ent
->lwpchan_type
& (USYNC_PROCESS
| LOCK_ROBUST
))
2873 != (USYNC_PROCESS
| LOCK_ROBUST
))
2876 lp
= (lwp_mutex_t
*)ent
->lwpchan_addr
;
2877 watched
= watch_disable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
2878 if (on_fault(&ljb
)) {
2880 lwpchan_unlock(&ent
->lwpchan_lwpchan
, LWPCHAN_MPPOOL
);
2882 upimutex_unlock((upimutex_t
*)upimutex
, 0);
2886 fuword32_noerr(&lp
->mutex_ownerpid
, (uint32_t *)&owner_pid
);
2888 if (UPIMUTEX(ent
->lwpchan_type
)) {
2889 lwpchan_t lwpchan
= ent
->lwpchan_lwpchan
;
2890 upib_t
*upibp
= &UPI_CHAIN(lwpchan
);
2892 if (owner_pid
!= curproc
->p_pid
)
2894 mutex_enter(&upibp
->upib_lock
);
2895 upimutex
= upi_get(upibp
, &lwpchan
);
2896 if (upimutex
== NULL
|| upimutex
->upi_owner
!= curthread
) {
2897 mutex_exit(&upibp
->upib_lock
);
2900 mutex_exit(&upibp
->upib_lock
);
2902 flag
= lwp_clear_mutex(lp
, lockflg
);
2903 suword8_noerr(&lp
->mutex_lockw
, 0);
2904 upimutex_unlock((upimutex_t
*)upimutex
, flag
);
2906 lwpchan_lock(&ent
->lwpchan_lwpchan
, LWPCHAN_MPPOOL
);
2909 * Clear the spinners count because one of our
2910 * threads could have been spinning for this lock
2911 * at user level when the process was suddenly killed.
2912 * There is no harm in this since user-level libc code
2913 * will adapt to the sudden change in the spinner count.
2915 suword8_noerr(&lp
->mutex_spinners
, 0);
2916 if (owner_pid
!= curproc
->p_pid
) {
2918 * We are not the owner. There may or may not be one.
2919 * If there are waiters, we wake up one or all of them.
2920 * It doesn't hurt to wake them up in error since
2921 * they will just retry the lock and go to sleep
2922 * again if necessary.
2924 fuword8_noerr(&lp
->mutex_waiters
, &waiters
);
2925 if (waiters
!= 0) { /* there are waiters */
2926 fuword16_noerr(&lp
->mutex_flag
, &flag
);
2927 if (flag
& LOCK_NOTRECOVERABLE
) {
2928 lwp_release_all(&ent
->lwpchan_lwpchan
);
2929 suword8_noerr(&lp
->mutex_waiters
, 0);
2930 } else if (lwp_release(&ent
->lwpchan_lwpchan
,
2932 suword8_noerr(&lp
->mutex_waiters
,
2938 * We are the owner. Release it.
2940 (void) lwp_clear_mutex(lp
, lockflg
);
2941 ulock_clear(&lp
->mutex_lockw
);
2942 fuword8_noerr(&lp
->mutex_waiters
, &waiters
);
2944 lwp_release(&ent
->lwpchan_lwpchan
, &waiters
, 0))
2945 suword8_noerr(&lp
->mutex_waiters
, waiters
);
2947 lwpchan_unlock(&ent
->lwpchan_lwpchan
, LWPCHAN_MPPOOL
);
2952 watch_enable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
2956 * Register a process-shared robust mutex in the lwpchan cache.
2959 lwp_mutex_register(lwp_mutex_t
*lp
, caddr_t uaddr
)
2962 volatile int watched
;
2967 if ((caddr_t
)lp
>= (caddr_t
)USERLIMIT
)
2968 return (set_errno(EFAULT
));
2970 watched
= watch_disable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
2972 if (on_fault(&ljb
)) {
2976 * Force Copy-on-write if necessary and ensure that the
2977 * synchronization object resides in read/write memory.
2978 * Cause an EFAULT return now if this is not so.
2980 fuword8_noerr(&lp
->mutex_type
, &type
);
2981 suword8_noerr(&lp
->mutex_type
, type
);
2982 if ((type
& (USYNC_PROCESS
|LOCK_ROBUST
))
2983 != (USYNC_PROCESS
|LOCK_ROBUST
)) {
2985 } else if (!lwpchan_get_mapping(curproc
->p_as
, (caddr_t
)lp
,
2986 uaddr
, type
, &lwpchan
, LWPCHAN_MPPOOL
)) {
2992 watch_enable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
2994 return (set_errno(error
));
2999 * There is a user-level robust lock registration in libc.
3000 * Mark it as invalid by storing -1 into the location of the pointer.
3003 lwp_mutex_unregister(void *uaddr
)
3005 if (get_udatamodel() == DATAMODEL_NATIVE
) {
3006 (void) sulword(uaddr
, (ulong_t
)-1);
3007 #ifdef _SYSCALL32_IMPL
3009 (void) suword32(uaddr
, (uint32_t)-1);
3015 lwp_mutex_trylock(lwp_mutex_t
*lp
, uintptr_t owner
)
3017 kthread_t
*t
= curthread
;
3018 proc_t
*p
= ttoproc(t
);
3020 volatile int locked
= 0;
3021 volatile int watched
= 0;
3023 volatile uint8_t type
= 0;
3027 if ((caddr_t
)lp
>= p
->p_as
->a_userlimit
)
3028 return (set_errno(EFAULT
));
3030 (void) new_mstate(t
, LMS_USER_LOCK
);
3032 if (on_fault(&ljb
)) {
3034 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
3039 * Force Copy-on-write if necessary and ensure that the
3040 * synchronization object resides in read/write memory.
3041 * Cause an EFAULT return now if this is not so.
3043 fuword8_noerr(&lp
->mutex_type
, (uint8_t *)&type
);
3044 suword8_noerr(&lp
->mutex_type
, type
);
3045 if (UPIMUTEX(type
)) {
3047 error
= lwp_upimutex_lock(lp
, type
, UPIMUTEX_TRY
, NULL
);
3048 if (error
== 0 || error
== EOWNERDEAD
|| error
== ELOCKUNMAPPED
)
3049 set_owner_pid(lp
, owner
,
3050 (type
& USYNC_PROCESS
)? p
->p_pid
: 0);
3052 return (set_errno(error
));
3055 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)lp
, type
,
3056 &lwpchan
, LWPCHAN_MPPOOL
)) {
3060 lwpchan_lock(&lwpchan
, LWPCHAN_MPPOOL
);
3062 if (type
& LOCK_ROBUST
) {
3063 fuword16_noerr(&lp
->mutex_flag
, &flag
);
3064 if (flag
& LOCK_NOTRECOVERABLE
) {
3065 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
3066 error
= ENOTRECOVERABLE
;
3071 watched
= watch_disable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
3073 if (!ulock_try(&lp
->mutex_lockw
))
3076 set_owner_pid(lp
, owner
, (type
& USYNC_PROCESS
)? p
->p_pid
: 0);
3077 if (type
& LOCK_ROBUST
) {
3078 fuword16_noerr(&lp
->mutex_flag
, &flag
);
3079 if (flag
& (LOCK_OWNERDEAD
| LOCK_UNMAPPED
)) {
3080 if (flag
& LOCK_OWNERDEAD
)
3082 else if (type
& USYNC_PROCESS_ROBUST
)
3083 error
= ELOCKUNMAPPED
;
3090 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
3093 if (t
->t_mstate
== LMS_USER_LOCK
)
3094 (void) new_mstate(t
, LMS_SYSTEM
);
3098 watch_enable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
3100 return (set_errno(error
));
3105 * unlock the mutex and unblock lwps that is trying to acquire this mutex.
3106 * the blocked lwp resumes and retries to acquire the lock.
3109 lwp_mutex_unlock(lwp_mutex_t
*lp
)
3111 proc_t
*p
= ttoproc(curthread
);
3114 volatile int locked
= 0;
3115 volatile int watched
= 0;
3116 volatile uint8_t type
= 0;
3121 if ((caddr_t
)lp
>= p
->p_as
->a_userlimit
)
3122 return (set_errno(EFAULT
));
3124 if (on_fault(&ljb
)) {
3126 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
3132 * Force Copy-on-write if necessary and ensure that the
3133 * synchronization object resides in read/write memory.
3134 * Cause an EFAULT return now if this is not so.
3136 fuword8_noerr(&lp
->mutex_type
, (uint8_t *)&type
);
3137 suword8_noerr(&lp
->mutex_type
, type
);
3139 if (UPIMUTEX(type
)) {
3141 error
= lwp_upimutex_unlock(lp
, type
);
3143 return (set_errno(error
));
3147 watched
= watch_disable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
3149 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)lp
, type
,
3150 &lwpchan
, LWPCHAN_MPPOOL
)) {
3154 lwpchan_lock(&lwpchan
, LWPCHAN_MPPOOL
);
3156 if (type
& LOCK_ROBUST
) {
3157 fuword16_noerr(&lp
->mutex_flag
, &flag
);
3158 if (flag
& (LOCK_OWNERDEAD
| LOCK_UNMAPPED
)) {
3159 flag
&= ~(LOCK_OWNERDEAD
| LOCK_UNMAPPED
);
3160 flag
|= LOCK_NOTRECOVERABLE
;
3161 suword16_noerr(&lp
->mutex_flag
, flag
);
3164 set_owner_pid(lp
, 0, 0);
3165 ulock_clear(&lp
->mutex_lockw
);
3167 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will
3168 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release()
3169 * may fail. If it fails, do not write into the waiter bit.
3170 * The call to lwp_release() might fail due to one of three reasons:
3172 * 1. due to the thread which set the waiter bit not actually
3173 * sleeping since it got the lock on the re-try. The waiter
3174 * bit will then be correctly updated by that thread. This
3175 * window may be closed by reading the wait bit again here
3176 * and not calling lwp_release() at all if it is zero.
3177 * 2. the thread which set the waiter bit and went to sleep
3178 * was woken up by a signal. This time, the waiter recomputes
3179 * the wait bit in the return with EINTR code.
3180 * 3. the waiter bit read by lwp_mutex_wakeup() was in
3181 * memory that has been re-used after the lock was dropped.
3182 * In this case, writing into the waiter bit would cause data
3185 fuword8_noerr(&lp
->mutex_waiters
, &waiters
);
3187 if ((type
& LOCK_ROBUST
) &&
3188 (flag
& LOCK_NOTRECOVERABLE
)) {
3189 lwp_release_all(&lwpchan
);
3190 suword8_noerr(&lp
->mutex_waiters
, 0);
3191 } else if (lwp_release(&lwpchan
, &waiters
, 0)) {
3192 suword8_noerr(&lp
->mutex_waiters
, waiters
);
3196 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
3200 watch_enable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
3202 return (set_errno(error
));