4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2015 Joyent, Inc.
28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/sysmacros.h>
34 #include <sys/systm.h>
37 #include <sys/errno.h>
40 #include <sys/prsystm.h>
42 #include <sys/sobject.h>
43 #include <sys/fault.h>
44 #include <sys/procfs.h>
45 #include <sys/watchpoint.h>
47 #include <sys/cmn_err.h>
48 #include <sys/machlock.h>
49 #include <sys/debug.h>
50 #include <sys/synch.h>
51 #include <sys/synch32.h>
53 #include <sys/class.h>
54 #include <sys/schedctl.h>
55 #include <sys/sleepq.h>
56 #include <sys/policy.h>
57 #include <sys/tnf_probe.h>
58 #include <sys/lwpchan_impl.h>
59 #include <sys/turnstile.h>
60 #include <sys/atomic.h>
61 #include <sys/lwp_timer_impl.h>
62 #include <sys/lwp_upimutex_impl.h>
66 static kthread_t
*lwpsobj_owner(caddr_t
);
67 static void lwp_unsleep(kthread_t
*t
);
68 static void lwp_change_pri(kthread_t
*t
, pri_t pri
, pri_t
*t_prip
);
69 static void lwp_mutex_cleanup(lwpchan_entry_t
*ent
, uint16_t lockflg
);
70 static void lwp_mutex_unregister(void *uaddr
);
71 static void set_owner_pid(lwp_mutex_t
*, uintptr_t, pid_t
);
72 static int iswanted(kthread_t
*, lwpchan_t
*);
74 extern int lwp_cond_signal(lwp_cond_t
*cv
);
77 * Maximum number of user prio inheritance locks that can be held by a thread.
78 * Used to limit kmem for each thread. This is a per-thread limit that
79 * can be administered on a system wide basis (using /etc/system).
81 * Also, when a limit, say maxlwps is added for numbers of lwps within a
82 * process, the per-thread limit automatically becomes a process-wide limit
83 * of maximum number of held upi locks within a process:
84 * maxheldupimx = maxnestupimx * maxlwps;
86 static uint32_t maxnestupimx
= 2000;
89 * The sobj_ops vector exports a set of functions needed when a thread
90 * is asleep on a synchronization object of this type.
92 static sobj_ops_t lwp_sobj_ops
= {
93 SOBJ_USER
, lwpsobj_owner
, lwp_unsleep
, lwp_change_pri
96 static kthread_t
*lwpsobj_pi_owner(upimutex_t
*up
);
98 static sobj_ops_t lwp_sobj_pi_ops
= {
99 SOBJ_USER_PI
, lwpsobj_pi_owner
, turnstile_unsleep
,
103 static sleepq_head_t lwpsleepq
[NSLEEPQ
];
104 upib_t upimutextab
[UPIMUTEX_TABSIZE
];
106 #define LWPCHAN_LOCK_SHIFT 10 /* 1024 locks for each pool */
107 #define LWPCHAN_LOCK_SIZE (1 << LWPCHAN_LOCK_SHIFT)
110 * We know that both lc_wchan and lc_wchan0 are addresses that most
111 * likely are 8-byte aligned, so we shift off the low-order 3 bits.
112 * 'pool' is either 0 or 1.
114 #define LWPCHAN_LOCK_HASH(X, pool) \
115 (((((X) >> 3) ^ ((X) >> (LWPCHAN_LOCK_SHIFT + 3))) & \
116 (LWPCHAN_LOCK_SIZE - 1)) + ((pool)? LWPCHAN_LOCK_SIZE : 0))
118 static kmutex_t lwpchanlock
[2 * LWPCHAN_LOCK_SIZE
];
121 * Is this a POSIX threads user-level lock requiring priority inheritance?
123 #define UPIMUTEX(type) ((type) & LOCK_PRIO_INHERIT)
125 static sleepq_head_t
*
126 lwpsqhash(lwpchan_t
*lwpchan
)
128 uint_t x
= (uintptr_t)lwpchan
->lc_wchan
^ (uintptr_t)lwpchan
->lc_wchan0
;
129 return (&lwpsleepq
[SQHASHINDEX(x
)]);
134 * Keep this in sync with lwpchan_unlock(), below.
137 lwpchan_lock(lwpchan_t
*lwpchan
, int pool
)
139 uint_t x
= (uintptr_t)lwpchan
->lc_wchan
^ (uintptr_t)lwpchan
->lc_wchan0
;
140 mutex_enter(&lwpchanlock
[LWPCHAN_LOCK_HASH(x
, pool
)]);
145 * Keep this in sync with lwpchan_lock(), above.
148 lwpchan_unlock(lwpchan_t
*lwpchan
, int pool
)
150 uint_t x
= (uintptr_t)lwpchan
->lc_wchan
^ (uintptr_t)lwpchan
->lc_wchan0
;
151 mutex_exit(&lwpchanlock
[LWPCHAN_LOCK_HASH(x
, pool
)]);
155 * Delete mappings from the lwpchan cache for pages that are being
156 * unmapped by as_unmap(). Given a range of addresses, "start" to "end",
157 * all mappings within the range are deleted from the lwpchan cache.
160 lwpchan_delete_mapping(proc_t
*p
, caddr_t start
, caddr_t end
)
163 lwpchan_hashbucket_t
*hashbucket
;
164 lwpchan_hashbucket_t
*endbucket
;
165 lwpchan_entry_t
*ent
;
166 lwpchan_entry_t
**prev
;
169 mutex_enter(&p
->p_lcp_lock
);
171 hashbucket
= lcp
->lwpchan_cache
;
172 endbucket
= hashbucket
+ lcp
->lwpchan_size
;
173 for (; hashbucket
< endbucket
; hashbucket
++) {
174 if (hashbucket
->lwpchan_chain
== NULL
)
176 mutex_enter(&hashbucket
->lwpchan_lock
);
177 prev
= &hashbucket
->lwpchan_chain
;
178 /* check entire chain */
179 while ((ent
= *prev
) != NULL
) {
180 addr
= ent
->lwpchan_addr
;
181 if (start
<= addr
&& addr
< end
) {
182 *prev
= ent
->lwpchan_next
;
184 * We do this only for the obsolete type
185 * USYNC_PROCESS_ROBUST. Otherwise robust
186 * locks do not draw ELOCKUNMAPPED or
187 * EOWNERDEAD due to being unmapped.
189 if (ent
->lwpchan_pool
== LWPCHAN_MPPOOL
&&
190 (ent
->lwpchan_type
& USYNC_PROCESS_ROBUST
))
191 lwp_mutex_cleanup(ent
, LOCK_UNMAPPED
);
193 * If there is a user-level robust lock
194 * registration, mark it as invalid.
196 if ((addr
= ent
->lwpchan_uaddr
) != NULL
)
197 lwp_mutex_unregister(addr
);
198 kmem_free(ent
, sizeof (*ent
));
199 atomic_dec_32(&lcp
->lwpchan_entries
);
201 prev
= &ent
->lwpchan_next
;
204 mutex_exit(&hashbucket
->lwpchan_lock
);
206 mutex_exit(&p
->p_lcp_lock
);
210 * Given an lwpchan cache pointer and a process virtual address,
211 * return a pointer to the corresponding lwpchan hash bucket.
213 static lwpchan_hashbucket_t
*
214 lwpchan_bucket(lwpchan_data_t
*lcp
, uintptr_t addr
)
219 * All user-level sync object addresses are 8-byte aligned.
220 * Ignore the lowest 3 bits of the address and use the
221 * higher-order 2*lwpchan_bits bits for the hash index.
224 i
= (addr
^ (addr
>> lcp
->lwpchan_bits
)) & lcp
->lwpchan_mask
;
225 return (lcp
->lwpchan_cache
+ i
);
229 * (Re)allocate the per-process lwpchan cache.
232 lwpchan_alloc_cache(proc_t
*p
, uint_t bits
)
235 lwpchan_data_t
*old_lcp
;
236 lwpchan_hashbucket_t
*hashbucket
;
237 lwpchan_hashbucket_t
*endbucket
;
238 lwpchan_hashbucket_t
*newbucket
;
239 lwpchan_entry_t
*ent
;
240 lwpchan_entry_t
*next
;
243 ASSERT(bits
>= LWPCHAN_INITIAL_BITS
&& bits
<= LWPCHAN_MAX_BITS
);
245 lcp
= kmem_alloc(sizeof (lwpchan_data_t
), KM_SLEEP
);
246 lcp
->lwpchan_bits
= bits
;
247 lcp
->lwpchan_size
= 1 << lcp
->lwpchan_bits
;
248 lcp
->lwpchan_mask
= lcp
->lwpchan_size
- 1;
249 lcp
->lwpchan_entries
= 0;
250 lcp
->lwpchan_cache
= kmem_zalloc(lcp
->lwpchan_size
*
251 sizeof (lwpchan_hashbucket_t
), KM_SLEEP
);
252 lcp
->lwpchan_next_data
= NULL
;
254 mutex_enter(&p
->p_lcp_lock
);
255 if ((old_lcp
= p
->p_lcp
) != NULL
) {
256 if (old_lcp
->lwpchan_bits
>= bits
) {
257 /* someone beat us to it */
258 mutex_exit(&p
->p_lcp_lock
);
259 kmem_free(lcp
->lwpchan_cache
, lcp
->lwpchan_size
*
260 sizeof (lwpchan_hashbucket_t
));
261 kmem_free(lcp
, sizeof (lwpchan_data_t
));
265 * Acquire all of the old hash table locks.
267 hashbucket
= old_lcp
->lwpchan_cache
;
268 endbucket
= hashbucket
+ old_lcp
->lwpchan_size
;
269 for (; hashbucket
< endbucket
; hashbucket
++)
270 mutex_enter(&hashbucket
->lwpchan_lock
);
272 * Move all of the old hash table entries to the
273 * new hash table. The new hash table has not yet
274 * been installed so we don't need any of its locks.
277 hashbucket
= old_lcp
->lwpchan_cache
;
278 for (; hashbucket
< endbucket
; hashbucket
++) {
279 ent
= hashbucket
->lwpchan_chain
;
280 while (ent
!= NULL
) {
281 next
= ent
->lwpchan_next
;
282 newbucket
= lwpchan_bucket(lcp
,
283 (uintptr_t)ent
->lwpchan_addr
);
284 ent
->lwpchan_next
= newbucket
->lwpchan_chain
;
285 newbucket
->lwpchan_chain
= ent
;
289 hashbucket
->lwpchan_chain
= NULL
;
291 lcp
->lwpchan_entries
= count
;
295 * Retire the old hash table. We can't actually kmem_free() it
296 * now because someone may still have a pointer to it. Instead,
297 * we link it onto the new hash table's list of retired hash tables.
298 * The new hash table is double the size of the previous one, so
299 * the total size of all retired hash tables is less than the size
300 * of the new one. exit() and exec() free the retired hash tables
301 * (see lwpchan_destroy_cache(), below).
303 lcp
->lwpchan_next_data
= old_lcp
;
306 * As soon as we store the new lcp, future locking operations will
307 * use it. Therefore, we must ensure that all the state we've just
308 * established reaches global visibility before the new lcp does.
313 if (old_lcp
!= NULL
) {
315 * Release all of the old hash table locks.
317 hashbucket
= old_lcp
->lwpchan_cache
;
318 for (; hashbucket
< endbucket
; hashbucket
++)
319 mutex_exit(&hashbucket
->lwpchan_lock
);
321 mutex_exit(&p
->p_lcp_lock
);
325 * Deallocate the lwpchan cache, and any dynamically allocated mappings.
326 * Called when the process exits or execs. All lwps except one have
327 * exited so we need no locks here.
330 lwpchan_destroy_cache(int exec
)
333 lwpchan_hashbucket_t
*hashbucket
;
334 lwpchan_hashbucket_t
*endbucket
;
336 lwpchan_entry_t
*ent
;
337 lwpchan_entry_t
*next
;
343 lockflg
= exec
? LOCK_UNMAPPED
: LOCK_OWNERDEAD
;
344 hashbucket
= lcp
->lwpchan_cache
;
345 endbucket
= hashbucket
+ lcp
->lwpchan_size
;
346 for (; hashbucket
< endbucket
; hashbucket
++) {
347 ent
= hashbucket
->lwpchan_chain
;
348 hashbucket
->lwpchan_chain
= NULL
;
349 while (ent
!= NULL
) {
350 next
= ent
->lwpchan_next
;
351 if (ent
->lwpchan_pool
== LWPCHAN_MPPOOL
&&
352 (ent
->lwpchan_type
& (USYNC_PROCESS
| LOCK_ROBUST
))
353 == (USYNC_PROCESS
| LOCK_ROBUST
))
354 lwp_mutex_cleanup(ent
, lockflg
);
355 kmem_free(ent
, sizeof (*ent
));
360 while (lcp
!= NULL
) {
361 lwpchan_data_t
*next_lcp
= lcp
->lwpchan_next_data
;
362 kmem_free(lcp
->lwpchan_cache
, lcp
->lwpchan_size
*
363 sizeof (lwpchan_hashbucket_t
));
364 kmem_free(lcp
, sizeof (lwpchan_data_t
));
370 * Return zero when there is an entry in the lwpchan cache for the
371 * given process virtual address and non-zero when there is not.
372 * The returned non-zero value is the current length of the
373 * hash chain plus one. The caller holds the hash bucket lock.
376 lwpchan_cache_mapping(caddr_t addr
, int type
, int pool
, lwpchan_t
*lwpchan
,
377 lwpchan_hashbucket_t
*hashbucket
)
379 lwpchan_entry_t
*ent
;
382 for (ent
= hashbucket
->lwpchan_chain
; ent
; ent
= ent
->lwpchan_next
) {
383 if (ent
->lwpchan_addr
== addr
) {
384 if (ent
->lwpchan_type
!= type
||
385 ent
->lwpchan_pool
!= pool
) {
387 * This shouldn't happen, but might if the
388 * process reuses its memory for different
389 * types of sync objects. We test first
390 * to avoid grabbing the memory cache line.
392 ent
->lwpchan_type
= (uint16_t)type
;
393 ent
->lwpchan_pool
= (uint16_t)pool
;
395 *lwpchan
= ent
->lwpchan_lwpchan
;
404 * Return the cached lwpchan mapping if cached, otherwise insert
405 * a virtual address to lwpchan mapping into the cache.
408 lwpchan_get_mapping(struct as
*as
, caddr_t addr
, caddr_t uaddr
,
409 int type
, lwpchan_t
*lwpchan
, int pool
)
413 lwpchan_hashbucket_t
*hashbucket
;
414 lwpchan_entry_t
*ent
;
420 /* initialize the lwpchan cache, if necesary */
421 if ((lcp
= p
->p_lcp
) == NULL
) {
422 lwpchan_alloc_cache(p
, LWPCHAN_INITIAL_BITS
);
425 hashbucket
= lwpchan_bucket(lcp
, (uintptr_t)addr
);
426 mutex_enter(&hashbucket
->lwpchan_lock
);
427 if (lcp
!= p
->p_lcp
) {
428 /* someone resized the lwpchan cache; start over */
429 mutex_exit(&hashbucket
->lwpchan_lock
);
432 if (lwpchan_cache_mapping(addr
, type
, pool
, lwpchan
, hashbucket
) == 0) {
433 /* it's in the cache */
434 mutex_exit(&hashbucket
->lwpchan_lock
);
437 mutex_exit(&hashbucket
->lwpchan_lock
);
438 if (as_getmemid(as
, addr
, &memid
) != 0)
440 lwpchan
->lc_wchan0
= (caddr_t
)(uintptr_t)memid
.val
[0];
441 lwpchan
->lc_wchan
= (caddr_t
)(uintptr_t)memid
.val
[1];
442 ent
= kmem_alloc(sizeof (lwpchan_entry_t
), KM_SLEEP
);
443 mutex_enter(&hashbucket
->lwpchan_lock
);
444 if (lcp
!= p
->p_lcp
) {
445 /* someone resized the lwpchan cache; start over */
446 mutex_exit(&hashbucket
->lwpchan_lock
);
447 kmem_free(ent
, sizeof (*ent
));
450 count
= lwpchan_cache_mapping(addr
, type
, pool
, lwpchan
, hashbucket
);
452 /* someone else added this entry to the cache */
453 mutex_exit(&hashbucket
->lwpchan_lock
);
454 kmem_free(ent
, sizeof (*ent
));
457 if (count
> lcp
->lwpchan_bits
+ 2 && /* larger table, longer chains */
458 (bits
= lcp
->lwpchan_bits
) < LWPCHAN_MAX_BITS
) {
459 /* hash chain too long; reallocate the hash table */
460 mutex_exit(&hashbucket
->lwpchan_lock
);
461 kmem_free(ent
, sizeof (*ent
));
462 lwpchan_alloc_cache(p
, bits
+ 1);
465 ent
->lwpchan_addr
= addr
;
466 ent
->lwpchan_uaddr
= uaddr
;
467 ent
->lwpchan_type
= (uint16_t)type
;
468 ent
->lwpchan_pool
= (uint16_t)pool
;
469 ent
->lwpchan_lwpchan
= *lwpchan
;
470 ent
->lwpchan_next
= hashbucket
->lwpchan_chain
;
471 hashbucket
->lwpchan_chain
= ent
;
472 atomic_inc_32(&lcp
->lwpchan_entries
);
473 mutex_exit(&hashbucket
->lwpchan_lock
);
478 * Return a unique pair of identifiers that corresponds to a
479 * synchronization object's virtual address. Process-shared
480 * sync objects usually get vnode/offset from as_getmemid().
483 get_lwpchan(struct as
*as
, caddr_t addr
, int type
, lwpchan_t
*lwpchan
, int pool
)
486 * If the lwp synch object is defined to be process-private,
487 * we just make the first field of the lwpchan be 'as' and
488 * the second field be the synch object's virtual address.
489 * (segvn_getmemid() does the same for MAP_PRIVATE mappings.)
490 * The lwpchan cache is used only for process-shared objects.
492 if (!(type
& USYNC_PROCESS
)) {
493 lwpchan
->lc_wchan0
= (caddr_t
)as
;
494 lwpchan
->lc_wchan
= addr
;
498 return (lwpchan_get_mapping(as
, addr
, NULL
, type
, lwpchan
, pool
));
502 lwp_block(lwpchan_t
*lwpchan
)
504 kthread_t
*t
= curthread
;
505 klwp_t
*lwp
= ttolwp(t
);
509 t
->t_flag
|= T_WAKEABLE
;
510 t
->t_lwpchan
= *lwpchan
;
511 t
->t_sobj_ops
= &lwp_sobj_ops
;
513 sqh
= lwpsqhash(lwpchan
);
514 disp_lock_enter_high(&sqh
->sq_lock
);
517 THREAD_SLEEP(t
, &sqh
->sq_lock
);
518 sleepq_insert(&sqh
->sq_queue
, t
);
521 lwp
->lwp_sysabort
= 0;
523 (void) new_mstate(curthread
, LMS_SLEEP
);
527 lwpsobj_pi_owner(upimutex_t
*up
)
529 return (up
->upi_owner
);
532 static struct upimutex
*
533 upi_get(upib_t
*upibp
, lwpchan_t
*lcp
)
535 struct upimutex
*upip
;
537 for (upip
= upibp
->upib_first
; upip
!= NULL
;
538 upip
= upip
->upi_nextchain
) {
539 if (upip
->upi_lwpchan
.lc_wchan0
== lcp
->lc_wchan0
&&
540 upip
->upi_lwpchan
.lc_wchan
== lcp
->lc_wchan
)
547 upi_chain_add(upib_t
*upibp
, struct upimutex
*upimutex
)
549 ASSERT(MUTEX_HELD(&upibp
->upib_lock
));
552 * Insert upimutex at front of list. Maybe a bit unfair
553 * but assume that not many lwpchans hash to the same
554 * upimutextab bucket, i.e. the list of upimutexes from
555 * upib_first is not too long.
557 upimutex
->upi_nextchain
= upibp
->upib_first
;
558 upibp
->upib_first
= upimutex
;
562 upi_chain_del(upib_t
*upibp
, struct upimutex
*upimutex
)
564 struct upimutex
**prev
;
566 ASSERT(MUTEX_HELD(&upibp
->upib_lock
));
568 prev
= &upibp
->upib_first
;
569 while (*prev
!= upimutex
) {
570 prev
= &(*prev
)->upi_nextchain
;
572 *prev
= upimutex
->upi_nextchain
;
573 upimutex
->upi_nextchain
= NULL
;
577 * Add upimutex to chain of upimutexes held by curthread.
578 * Returns number of upimutexes held by curthread.
581 upi_mylist_add(struct upimutex
*upimutex
)
583 kthread_t
*t
= curthread
;
586 * Insert upimutex at front of list of upimutexes owned by t. This
587 * would match typical LIFO order in which nested locks are acquired
590 upimutex
->upi_nextowned
= t
->t_upimutex
;
591 t
->t_upimutex
= upimutex
;
593 ASSERT(t
->t_nupinest
> 0);
594 return (t
->t_nupinest
);
598 * Delete upimutex from list of upimutexes owned by curthread.
601 upi_mylist_del(struct upimutex
*upimutex
)
603 kthread_t
*t
= curthread
;
604 struct upimutex
**prev
;
607 * Since the order in which nested locks are acquired and released,
608 * is typically LIFO, and typical nesting levels are not too deep, the
609 * following should not be expensive in the general case.
611 prev
= &t
->t_upimutex
;
612 while (*prev
!= upimutex
) {
613 prev
= &(*prev
)->upi_nextowned
;
615 *prev
= upimutex
->upi_nextowned
;
616 upimutex
->upi_nextowned
= NULL
;
617 ASSERT(t
->t_nupinest
> 0);
622 * Returns true if upimutex is owned. Should be called only when upim points
623 * to kmem which cannot disappear from underneath.
626 upi_owned(upimutex_t
*upim
)
628 return (upim
->upi_owner
== curthread
);
632 * Returns pointer to kernel object (upimutex_t *) if lp is owned.
634 static struct upimutex
*
635 lwp_upimutex_owned(lwp_mutex_t
*lp
, uint8_t type
)
639 struct upimutex
*upimutex
;
641 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)lp
, type
,
642 &lwpchan
, LWPCHAN_MPPOOL
))
645 upibp
= &UPI_CHAIN(lwpchan
);
646 mutex_enter(&upibp
->upib_lock
);
647 upimutex
= upi_get(upibp
, &lwpchan
);
648 if (upimutex
== NULL
|| upimutex
->upi_owner
!= curthread
) {
649 mutex_exit(&upibp
->upib_lock
);
652 mutex_exit(&upibp
->upib_lock
);
657 * Unlocks upimutex, waking up waiters if any. upimutex kmem is freed if
658 * no lock hand-off occurrs.
661 upimutex_unlock(struct upimutex
*upimutex
, uint16_t flag
)
667 upi_mylist_del(upimutex
);
668 upibp
= upimutex
->upi_upibp
;
669 mutex_enter(&upibp
->upib_lock
);
670 if (upimutex
->upi_waiter
!= 0) { /* if waiters */
671 ts
= turnstile_lookup(upimutex
);
672 if (ts
!= NULL
&& !(flag
& LOCK_NOTRECOVERABLE
)) {
673 /* hand-off lock to highest prio waiter */
674 newowner
= ts
->ts_sleepq
[TS_WRITER_Q
].sq_first
;
675 upimutex
->upi_owner
= newowner
;
676 if (ts
->ts_waiters
== 1)
677 upimutex
->upi_waiter
= 0;
678 turnstile_wakeup(ts
, TS_WRITER_Q
, 1, newowner
);
679 mutex_exit(&upibp
->upib_lock
);
681 } else if (ts
!= NULL
) {
682 /* LOCK_NOTRECOVERABLE: wakeup all */
683 turnstile_wakeup(ts
, TS_WRITER_Q
, ts
->ts_waiters
, NULL
);
686 * Misleading w bit. Waiters might have been
687 * interrupted. No need to clear the w bit (upimutex
688 * will soon be freed). Re-calculate PI from existing
691 turnstile_exit(upimutex
);
692 turnstile_pi_recalc();
696 * no waiters, or LOCK_NOTRECOVERABLE.
697 * remove from the bucket chain of upi mutexes.
698 * de-allocate kernel memory (upimutex).
700 upi_chain_del(upimutex
->upi_upibp
, upimutex
);
701 mutex_exit(&upibp
->upib_lock
);
702 kmem_free(upimutex
, sizeof (upimutex_t
));
706 lwp_upimutex_lock(lwp_mutex_t
*lp
, uint8_t type
, int try, lwp_timer_t
*lwptp
)
713 volatile struct upimutex
*upimutex
= NULL
;
716 volatile int upilocked
= 0;
718 if (on_fault(&ljb
)) {
720 upimutex_unlock((upimutex_t
*)upimutex
, 0);
724 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)lp
, type
,
725 &lwpchan
, LWPCHAN_MPPOOL
)) {
729 upibp
= &UPI_CHAIN(lwpchan
);
731 mutex_enter(&upibp
->upib_lock
);
732 upimutex
= upi_get(upibp
, &lwpchan
);
733 if (upimutex
== NULL
) {
734 /* lock available since lwpchan has no upimutex */
735 upimutex
= kmem_zalloc(sizeof (upimutex_t
), KM_SLEEP
);
736 upi_chain_add(upibp
, (upimutex_t
*)upimutex
);
737 upimutex
->upi_owner
= curthread
; /* grab lock */
738 upimutex
->upi_upibp
= upibp
;
739 upimutex
->upi_vaddr
= lp
;
740 upimutex
->upi_lwpchan
= lwpchan
;
741 mutex_exit(&upibp
->upib_lock
);
742 nupinest
= upi_mylist_add((upimutex_t
*)upimutex
);
744 fuword16_noerr(&lp
->mutex_flag
, &flag
);
745 if (nupinest
> maxnestupimx
&&
746 secpolicy_resource(CRED()) != 0) {
747 upimutex_unlock((upimutex_t
*)upimutex
, flag
);
751 if (flag
& LOCK_NOTRECOVERABLE
) {
753 * Since the setting of LOCK_NOTRECOVERABLE
754 * was done under the high-level upi mutex,
755 * in lwp_upimutex_unlock(), this flag needs to
756 * be checked while holding the upi mutex.
757 * If set, this thread should return without
758 * the lock held, and with the right error code.
760 upimutex_unlock((upimutex_t
*)upimutex
, flag
);
762 error
= ENOTRECOVERABLE
;
763 } else if (flag
& (LOCK_OWNERDEAD
| LOCK_UNMAPPED
)) {
764 if (flag
& LOCK_OWNERDEAD
)
766 else if (type
& USYNC_PROCESS_ROBUST
)
767 error
= ELOCKUNMAPPED
;
774 * If a upimutex object exists, it must have an owner.
775 * This is due to lock hand-off, and release of upimutex when no
776 * waiters are present at unlock time,
778 ASSERT(upimutex
->upi_owner
!= NULL
);
779 if (upimutex
->upi_owner
== curthread
) {
781 * The user wrapper can check if the mutex type is
782 * ERRORCHECK: if not, it should stall at user-level.
783 * If so, it should return the error code.
785 mutex_exit(&upibp
->upib_lock
);
789 if (try == UPIMUTEX_TRY
) {
790 mutex_exit(&upibp
->upib_lock
);
795 * Block for the lock.
797 if ((error
= lwptp
->lwpt_time_error
) != 0) {
799 * The SUSV3 Posix spec is very clear that we
800 * should get no error from validating the
801 * timer until we would actually sleep.
803 mutex_exit(&upibp
->upib_lock
);
806 if (lwptp
->lwpt_tsp
!= NULL
) {
808 * Unlike the protocol for other lwp timedwait operations,
809 * we must drop t_delay_lock before going to sleep in
810 * turnstile_block() for a upi mutex.
811 * See the comments below and in turnstile.c
813 mutex_enter(&curthread
->t_delay_lock
);
814 (void) lwp_timer_enqueue(lwptp
);
815 mutex_exit(&curthread
->t_delay_lock
);
818 * Now, set the waiter bit and block for the lock in turnstile_block().
819 * No need to preserve the previous wbit since a lock try is not
820 * attempted after setting the wait bit. Wait bit is set under
821 * the upib_lock, which is not released until the turnstile lock
822 * is acquired. Say, the upimutex is L:
824 * 1. upib_lock is held so the waiter does not have to retry L after
825 * setting the wait bit: since the owner has to grab the upib_lock
826 * to unlock L, it will certainly see the wait bit set.
827 * 2. upib_lock is not released until the turnstile lock is acquired.
828 * This is the key to preventing a missed wake-up. Otherwise, the
829 * owner could acquire the upib_lock, and the tc_lock, to call
830 * turnstile_wakeup(). All this, before the waiter gets tc_lock
831 * to sleep in turnstile_block(). turnstile_wakeup() will then not
832 * find this waiter, resulting in the missed wakeup.
833 * 3. The upib_lock, being a kernel mutex, cannot be released while
834 * holding the tc_lock (since mutex_exit() could need to acquire
835 * the same tc_lock)...and so is held when calling turnstile_block().
836 * The address of upib_lock is passed to turnstile_block() which
837 * releases it after releasing all turnstile locks, and before going
838 * to sleep in swtch().
839 * 4. The waiter value cannot be a count of waiters, because a waiter
840 * can be interrupted. The interrupt occurs under the tc_lock, at
841 * which point, the upib_lock cannot be locked, to decrement waiter
842 * count. So, just treat the waiter state as a bit, not a count.
844 ts
= turnstile_lookup((upimutex_t
*)upimutex
);
845 upimutex
->upi_waiter
= 1;
846 error
= turnstile_block(ts
, TS_WRITER_Q
, (upimutex_t
*)upimutex
,
847 &lwp_sobj_pi_ops
, &upibp
->upib_lock
, lwptp
);
849 * Hand-off implies that we wakeup holding the lock, except when:
850 * - deadlock is detected
851 * - lock is not recoverable
852 * - we got an interrupt or timeout
853 * If we wake up due to an interrupt or timeout, we may
854 * or may not be holding the lock due to mutex hand-off.
855 * Use lwp_upimutex_owned() to check if we do hold the lock.
858 if ((error
== EINTR
|| error
== ETIME
) &&
859 (upimutex
= lwp_upimutex_owned(lp
, type
))) {
861 * Unlock and return - the re-startable syscall will
862 * try the lock again if we got EINTR.
864 (void) upi_mylist_add((upimutex_t
*)upimutex
);
865 upimutex_unlock((upimutex_t
*)upimutex
, 0);
868 * The only other possible error is EDEADLK. If so, upimutex
869 * is valid, since its owner is deadlocked with curthread.
871 ASSERT(error
== EINTR
|| error
== ETIME
||
872 (error
== EDEADLK
&& !upi_owned((upimutex_t
*)upimutex
)));
873 ASSERT(!lwp_upimutex_owned(lp
, type
));
876 if (lwp_upimutex_owned(lp
, type
)) {
877 ASSERT(lwp_upimutex_owned(lp
, type
) == upimutex
);
878 nupinest
= upi_mylist_add((upimutex_t
*)upimutex
);
882 * Now, need to read the user-level lp->mutex_flag to do the following:
884 * - if lock is held, check if EOWNERDEAD or ELOCKUNMAPPED
885 * should be returned.
886 * - if lock isn't held, check if ENOTRECOVERABLE should
889 * Now, either lp->mutex_flag is readable or it's not. If not
890 * readable, the on_fault path will cause a return with EFAULT
891 * as it should. If it is readable, the state of the flag
892 * encodes the robustness state of the lock:
894 * If the upimutex is locked here, the flag's LOCK_OWNERDEAD
895 * or LOCK_UNMAPPED setting will influence the return code
896 * appropriately. If the upimutex is not locked here, this
897 * could be due to a spurious wake-up or a NOTRECOVERABLE
898 * event. The flag's setting can be used to distinguish
899 * between these two events.
901 fuword16_noerr(&lp
->mutex_flag
, &flag
);
904 * If the thread wakes up from turnstile_block with the lock
905 * held, the flag could not be set to LOCK_NOTRECOVERABLE,
906 * since it would not have been handed-off the lock.
907 * So, no need to check for this case.
909 if (nupinest
> maxnestupimx
&&
910 secpolicy_resource(CRED()) != 0) {
911 upimutex_unlock((upimutex_t
*)upimutex
, flag
);
914 } else if (flag
& (LOCK_OWNERDEAD
| LOCK_UNMAPPED
)) {
915 if (flag
& LOCK_OWNERDEAD
)
917 else if (type
& USYNC_PROCESS_ROBUST
)
918 error
= ELOCKUNMAPPED
;
924 * Wake-up without the upimutex held. Either this is a
925 * spurious wake-up (due to signals, forkall(), whatever), or
926 * it is a LOCK_NOTRECOVERABLE robustness event. The setting
927 * of the mutex flag can be used to distinguish between the
930 if (flag
& LOCK_NOTRECOVERABLE
) {
931 error
= ENOTRECOVERABLE
;
934 * Here, the flag could be set to LOCK_OWNERDEAD or
935 * not. In both cases, this is a spurious wakeup,
936 * since the upi lock is not held, but the thread
937 * has returned from turnstile_block().
939 * The user flag could be LOCK_OWNERDEAD if, at the
940 * same time as curthread having been woken up
941 * spuriously, the owner (say Tdead) has died, marked
942 * the mutex flag accordingly, and handed off the lock
943 * to some other waiter (say Tnew). curthread just
944 * happened to read the flag while Tnew has yet to deal
945 * with the owner-dead event.
947 * In this event, curthread should retry the lock.
948 * If Tnew is able to cleanup the lock, curthread
949 * will eventually get the lock with a zero error code,
950 * If Tnew is unable to cleanup, its eventual call to
951 * unlock the lock will result in the mutex flag being
952 * set to LOCK_NOTRECOVERABLE, and the wake-up of
953 * all waiters, including curthread, which will then
954 * eventually return ENOTRECOVERABLE due to the above
957 * Of course, if the user-flag is not set with
958 * LOCK_OWNERDEAD, retrying is the thing to do, since
959 * this is definitely a spurious wakeup.
972 lwp_upimutex_unlock(lwp_mutex_t
*lp
, uint8_t type
)
979 volatile struct upimutex
*upimutex
= NULL
;
980 volatile int upilocked
= 0;
982 if (on_fault(&ljb
)) {
984 upimutex_unlock((upimutex_t
*)upimutex
, 0);
988 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)lp
, type
,
989 &lwpchan
, LWPCHAN_MPPOOL
)) {
993 upibp
= &UPI_CHAIN(lwpchan
);
994 mutex_enter(&upibp
->upib_lock
);
995 upimutex
= upi_get(upibp
, &lwpchan
);
997 * If the lock is not held, or the owner is not curthread, return
998 * error. The user-level wrapper can return this error or stall,
999 * depending on whether mutex is of ERRORCHECK type or not.
1001 if (upimutex
== NULL
|| upimutex
->upi_owner
!= curthread
) {
1002 mutex_exit(&upibp
->upib_lock
);
1006 mutex_exit(&upibp
->upib_lock
); /* release for user memory access */
1008 fuword16_noerr(&lp
->mutex_flag
, &flag
);
1009 if (flag
& (LOCK_OWNERDEAD
| LOCK_UNMAPPED
)) {
1011 * transition mutex to the LOCK_NOTRECOVERABLE state.
1013 flag
&= ~(LOCK_OWNERDEAD
| LOCK_UNMAPPED
);
1014 flag
|= LOCK_NOTRECOVERABLE
;
1015 suword16_noerr(&lp
->mutex_flag
, flag
);
1017 set_owner_pid(lp
, 0, 0);
1018 upimutex_unlock((upimutex_t
*)upimutex
, flag
);
1026 * Set the owner and ownerpid fields of a user-level mutex. Note, this function
1027 * uses the suword*_noerr routines which must be called between
1028 * on_fault/no_fault. However, this routine itself does not do the
1029 * on_fault/no_fault and it is assumed all the callers will do so instead!
1032 set_owner_pid(lwp_mutex_t
*lp
, uintptr_t owner
, pid_t pid
)
1039 un
.word64
= (uint64_t)owner
;
1041 suword32_noerr(&lp
->mutex_ownerpid
, pid
);
1043 if (((uintptr_t)lp
& (_LONG_LONG_ALIGNMENT
- 1)) == 0) { /* aligned */
1044 suword64_noerr(&lp
->mutex_owner
, un
.word64
);
1048 /* mutex is unaligned or we are running on a 32-bit kernel */
1049 suword32_noerr((uint32_t *)&lp
->mutex_owner
, un
.word32
[0]);
1050 suword32_noerr((uint32_t *)&lp
->mutex_owner
+ 1, un
.word32
[1]);
1054 * Clear the contents of a user-level mutex; return the flags.
1055 * Used only by upi_dead() and lwp_mutex_cleanup(), below.
1058 lwp_clear_mutex(lwp_mutex_t
*lp
, uint16_t lockflg
)
1062 fuword16_noerr(&lp
->mutex_flag
, &flag
);
1064 (LOCK_OWNERDEAD
| LOCK_UNMAPPED
| LOCK_NOTRECOVERABLE
)) == 0) {
1066 suword16_noerr(&lp
->mutex_flag
, flag
);
1068 set_owner_pid(lp
, 0, 0);
1069 suword8_noerr(&lp
->mutex_rcount
, 0);
1075 * Mark user mutex state, corresponding to kernel upimutex,
1076 * as LOCK_UNMAPPED or LOCK_OWNERDEAD, as appropriate
1079 upi_dead(upimutex_t
*upip
, uint16_t lockflg
)
1085 if (on_fault(&ljb
)) {
1090 lp
= upip
->upi_vaddr
;
1091 (void) lwp_clear_mutex(lp
, lockflg
);
1092 suword8_noerr(&lp
->mutex_lockw
, 0);
1099 * Unlock all upimutexes held by curthread, since curthread is dying.
1100 * For each upimutex, attempt to mark its corresponding user mutex object as
1106 kthread_t
*t
= curthread
;
1107 uint16_t lockflg
= (ttoproc(t
)->p_proc_flag
& P_PR_EXEC
)?
1108 LOCK_UNMAPPED
: LOCK_OWNERDEAD
;
1109 struct upimutex
*upip
;
1111 while ((upip
= t
->t_upimutex
) != NULL
) {
1112 if (upi_dead(upip
, lockflg
) != 0) {
1114 * If the user object associated with this upimutex is
1115 * unmapped, unlock upimutex with the
1116 * LOCK_NOTRECOVERABLE flag, so that all waiters are
1117 * woken up. Since user object is unmapped, it could
1118 * not be marked as dead or notrecoverable.
1119 * The waiters will now all wake up and return
1120 * ENOTRECOVERABLE, since they would find that the lock
1121 * has not been handed-off to them.
1122 * See lwp_upimutex_lock().
1124 upimutex_unlock(upip
, LOCK_NOTRECOVERABLE
);
1127 * The user object has been updated as dead.
1128 * Unlock the upimutex: if no waiters, upip kmem will
1129 * be freed. If there is a waiter, the lock will be
1130 * handed off. If exit() is in progress, each existing
1131 * waiter will successively get the lock, as owners
1132 * die, and each new owner will call this routine as
1133 * it dies. The last owner will free kmem, since
1134 * it will find the upimutex has no waiters. So,
1135 * eventually, the kmem is guaranteed to be freed.
1137 upimutex_unlock(upip
, 0);
1140 * Note that the call to upimutex_unlock() above will delete
1141 * upimutex from the t_upimutexes chain. And so the
1142 * while loop will eventually terminate.
1148 lwp_mutex_timedlock(lwp_mutex_t
*lp
, timespec_t
*tsp
, uintptr_t owner
)
1150 kthread_t
*t
= curthread
;
1151 klwp_t
*lwp
= ttolwp(t
);
1152 proc_t
*p
= ttoproc(t
);
1159 volatile int locked
= 0;
1160 volatile int watched
= 0;
1162 volatile uint8_t type
= 0;
1166 int imm_timeout
= 0;
1168 if ((caddr_t
)lp
>= p
->p_as
->a_userlimit
)
1169 return (set_errno(EFAULT
));
1172 * Put the lwp in an orderly state for debugging,
1173 * in case we are stopped while sleeping, below.
1175 prstop(PR_REQUESTED
, 0);
1177 timedwait
= (caddr_t
)tsp
;
1178 if ((time_error
= lwp_timer_copyin(&lwpt
, tsp
)) == 0 &&
1179 lwpt
.lwpt_imm_timeout
) {
1185 * Although LMS_USER_LOCK implies "asleep waiting for user-mode lock",
1186 * this micro state is really a run state. If the thread indeed blocks,
1187 * this state becomes valid. If not, the state is converted back to
1188 * LMS_SYSTEM. So, it is OK to set the mstate here, instead of just
1191 (void) new_mstate(t
, LMS_USER_LOCK
);
1192 if (on_fault(&ljb
)) {
1194 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
1199 * Force Copy-on-write if necessary and ensure that the
1200 * synchronization object resides in read/write memory.
1201 * Cause an EFAULT return now if this is not so.
1203 fuword8_noerr(&lp
->mutex_type
, (uint8_t *)&type
);
1204 suword8_noerr(&lp
->mutex_type
, type
);
1205 if (UPIMUTEX(type
)) {
1207 error
= lwp_upimutex_lock(lp
, type
, UPIMUTEX_BLOCK
, &lwpt
);
1208 if (error
== 0 || error
== EOWNERDEAD
||
1209 error
== ELOCKUNMAPPED
) {
1210 volatile int locked
= error
!= 0;
1211 if (on_fault(&ljb
)) {
1213 error
= lwp_upimutex_unlock(lp
, type
);
1218 set_owner_pid(lp
, owner
,
1219 (type
& USYNC_PROCESS
)? p
->p_pid
: 0);
1223 if (tsp
&& !time_error
) /* copyout the residual time left */
1224 error
= lwp_timer_copyout(&lwpt
, error
);
1226 return (set_errno(error
));
1229 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)lp
, type
,
1230 &lwpchan
, LWPCHAN_MPPOOL
)) {
1234 lwpchan_lock(&lwpchan
, LWPCHAN_MPPOOL
);
1236 if (type
& LOCK_ROBUST
) {
1237 fuword16_noerr(&lp
->mutex_flag
, &flag
);
1238 if (flag
& LOCK_NOTRECOVERABLE
) {
1239 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
1240 error
= ENOTRECOVERABLE
;
1244 fuword8_noerr(&lp
->mutex_waiters
, &waiters
);
1245 suword8_noerr(&lp
->mutex_waiters
, 1);
1248 * If watchpoints are set, they need to be restored, since
1249 * atomic accesses of memory such as the call to ulock_try()
1250 * below cannot be watched.
1253 watched
= watch_disable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
1255 while (!ulock_try(&lp
->mutex_lockw
)) {
1258 * The SUSV3 Posix spec is very clear that we
1259 * should get no error from validating the
1260 * timer until we would actually sleep.
1267 watch_enable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
1273 * If we successfully queue the timeout,
1274 * then don't drop t_delay_lock until
1275 * we are on the sleep queue (below).
1277 mutex_enter(&t
->t_delay_lock
);
1278 if (lwp_timer_enqueue(&lwpt
) != 0) {
1279 mutex_exit(&t
->t_delay_lock
);
1284 lwp_block(&lwpchan
);
1286 * Nothing should happen to cause the lwp to go to
1287 * sleep again until after it returns from swtch().
1290 mutex_exit(&t
->t_delay_lock
);
1292 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
1293 if (ISSIG(t
, JUSTLOOKING
) || MUSTRETURN(p
, t
) || imm_timeout
)
1296 t
->t_flag
&= ~T_WAKEABLE
;
1298 tim
= lwp_timer_dequeue(&lwpt
);
1300 if (ISSIG(t
, FORREAL
) || lwp
->lwp_sysabort
|| MUSTRETURN(p
, t
))
1302 else if (imm_timeout
|| (timedwait
&& tim
== -1))
1305 lwp
->lwp_asleep
= 0;
1306 lwp
->lwp_sysabort
= 0;
1307 watched
= watch_disable_addr((caddr_t
)lp
, sizeof (*lp
),
1311 * Need to re-compute waiters bit. The waiters field in
1312 * the lock is not reliable. Either of two things could
1313 * have occurred: no lwp may have called lwp_release()
1314 * for me but I have woken up due to a signal or
1315 * timeout. In this case, the waiter bit is incorrect
1316 * since it is still set to 1, set above.
1317 * OR an lwp_release() did occur for some other lwp on
1318 * the same lwpchan. In this case, the waiter bit is
1319 * correct. But which event occurred, one can't tell.
1322 lwpchan_lock(&lwpchan
, LWPCHAN_MPPOOL
);
1324 sqh
= lwpsqhash(&lwpchan
);
1325 disp_lock_enter(&sqh
->sq_lock
);
1326 waiters
= iswanted(sqh
->sq_queue
.sq_first
, &lwpchan
);
1327 disp_lock_exit(&sqh
->sq_lock
);
1330 lwp
->lwp_asleep
= 0;
1331 watched
= watch_disable_addr((caddr_t
)lp
, sizeof (*lp
),
1333 lwpchan_lock(&lwpchan
, LWPCHAN_MPPOOL
);
1335 fuword8_noerr(&lp
->mutex_waiters
, &waiters
);
1336 suword8_noerr(&lp
->mutex_waiters
, 1);
1337 if (type
& LOCK_ROBUST
) {
1338 fuword16_noerr(&lp
->mutex_flag
, &flag
);
1339 if (flag
& LOCK_NOTRECOVERABLE
) {
1340 error
= ENOTRECOVERABLE
;
1346 if (t
->t_mstate
== LMS_USER_LOCK
)
1347 (void) new_mstate(t
, LMS_SYSTEM
);
1350 set_owner_pid(lp
, owner
, (type
& USYNC_PROCESS
)? p
->p_pid
: 0);
1351 if (type
& LOCK_ROBUST
) {
1352 fuword16_noerr(&lp
->mutex_flag
, &flag
);
1353 if (flag
& (LOCK_OWNERDEAD
| LOCK_UNMAPPED
)) {
1354 if (flag
& LOCK_OWNERDEAD
)
1356 else if (type
& USYNC_PROCESS_ROBUST
)
1357 error
= ELOCKUNMAPPED
;
1363 suword8_noerr(&lp
->mutex_waiters
, waiters
);
1365 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
1369 watch_enable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
1370 if (tsp
&& !time_error
) /* copyout the residual time left */
1371 error
= lwp_timer_copyout(&lwpt
, error
);
1373 return (set_errno(error
));
1378 iswanted(kthread_t
*t
, lwpchan_t
*lwpchan
)
1381 * The caller holds the dispatcher lock on the sleep queue.
1384 if (t
->t_lwpchan
.lc_wchan0
== lwpchan
->lc_wchan0
&&
1385 t
->t_lwpchan
.lc_wchan
== lwpchan
->lc_wchan
)
1393 * Return the highest priority thread sleeping on this lwpchan.
1396 lwp_queue_waiter(lwpchan_t
*lwpchan
)
1401 sqh
= lwpsqhash(lwpchan
);
1402 disp_lock_enter(&sqh
->sq_lock
); /* lock the sleep queue */
1403 for (tp
= sqh
->sq_queue
.sq_first
; tp
!= NULL
; tp
= tp
->t_link
) {
1404 if (tp
->t_lwpchan
.lc_wchan0
== lwpchan
->lc_wchan0
&&
1405 tp
->t_lwpchan
.lc_wchan
== lwpchan
->lc_wchan
)
1408 disp_lock_exit(&sqh
->sq_lock
);
1413 lwp_release(lwpchan_t
*lwpchan
, uchar_t
*waiters
, int sync_type
)
1419 sqh
= lwpsqhash(lwpchan
);
1420 disp_lock_enter(&sqh
->sq_lock
); /* lock the sleep queue */
1421 tpp
= &sqh
->sq_queue
.sq_first
;
1422 while ((tp
= *tpp
) != NULL
) {
1423 if (tp
->t_lwpchan
.lc_wchan0
== lwpchan
->lc_wchan0
&&
1424 tp
->t_lwpchan
.lc_wchan
== lwpchan
->lc_wchan
) {
1426 * The following is typically false. It could be true
1427 * only if lwp_release() is called from
1428 * lwp_mutex_wakeup() after reading the waiters field
1429 * from memory in which the lwp lock used to be, but has
1430 * since been re-used to hold a lwp cv or lwp semaphore.
1431 * The thread "tp" found to match the lwp lock's wchan
1432 * is actually sleeping for the cv or semaphore which
1433 * now has the same wchan. In this case, lwp_release()
1434 * should return failure.
1436 if (sync_type
!= (tp
->t_flag
& T_WAITCVSEM
)) {
1437 ASSERT(sync_type
== 0);
1439 * assert that this can happen only for mutexes
1440 * i.e. sync_type == 0, for correctly written
1443 disp_lock_exit(&sqh
->sq_lock
);
1446 *waiters
= iswanted(tp
->t_link
, lwpchan
);
1447 sleepq_unlink(tpp
, tp
);
1448 DTRACE_SCHED1(wakeup
, kthread_t
*, tp
);
1449 tp
->t_wchan0
= NULL
;
1451 tp
->t_sobj_ops
= NULL
;
1453 THREAD_TRANSITION(tp
); /* drops sleepq lock */
1455 thread_unlock(tp
); /* drop run queue lock */
1461 disp_lock_exit(&sqh
->sq_lock
);
1466 lwp_release_all(lwpchan_t
*lwpchan
)
1472 sqh
= lwpsqhash(lwpchan
);
1473 disp_lock_enter(&sqh
->sq_lock
); /* lock sleep q queue */
1474 tpp
= &sqh
->sq_queue
.sq_first
;
1475 while ((tp
= *tpp
) != NULL
) {
1476 if (tp
->t_lwpchan
.lc_wchan0
== lwpchan
->lc_wchan0
&&
1477 tp
->t_lwpchan
.lc_wchan
== lwpchan
->lc_wchan
) {
1478 sleepq_unlink(tpp
, tp
);
1479 DTRACE_SCHED1(wakeup
, kthread_t
*, tp
);
1480 tp
->t_wchan0
= NULL
;
1482 tp
->t_sobj_ops
= NULL
;
1484 thread_unlock_high(tp
); /* release run queue lock */
1489 disp_lock_exit(&sqh
->sq_lock
); /* drop sleep q lock */
1493 * unblock a lwp that is trying to acquire this mutex. the blocked
1494 * lwp resumes and retries to acquire the lock.
1497 lwp_mutex_wakeup(lwp_mutex_t
*lp
, int release_all
)
1499 proc_t
*p
= ttoproc(curthread
);
1502 volatile int locked
= 0;
1503 volatile int watched
= 0;
1504 volatile uint8_t type
= 0;
1508 if ((caddr_t
)lp
>= p
->p_as
->a_userlimit
)
1509 return (set_errno(EFAULT
));
1511 watched
= watch_disable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
1513 if (on_fault(&ljb
)) {
1515 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
1520 * Force Copy-on-write if necessary and ensure that the
1521 * synchronization object resides in read/write memory.
1522 * Cause an EFAULT return now if this is not so.
1524 fuword8_noerr(&lp
->mutex_type
, (uint8_t *)&type
);
1525 suword8_noerr(&lp
->mutex_type
, type
);
1526 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)lp
, type
,
1527 &lwpchan
, LWPCHAN_MPPOOL
)) {
1531 lwpchan_lock(&lwpchan
, LWPCHAN_MPPOOL
);
1534 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will
1535 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release()
1536 * may fail. If it fails, do not write into the waiter bit.
1537 * The call to lwp_release() might fail due to one of three reasons:
1539 * 1. due to the thread which set the waiter bit not actually
1540 * sleeping since it got the lock on the re-try. The waiter
1541 * bit will then be correctly updated by that thread. This
1542 * window may be closed by reading the wait bit again here
1543 * and not calling lwp_release() at all if it is zero.
1544 * 2. the thread which set the waiter bit and went to sleep
1545 * was woken up by a signal. This time, the waiter recomputes
1546 * the wait bit in the return with EINTR code.
1547 * 3. the waiter bit read by lwp_mutex_wakeup() was in
1548 * memory that has been re-used after the lock was dropped.
1549 * In this case, writing into the waiter bit would cause data
1553 lwp_release_all(&lwpchan
);
1554 else if (lwp_release(&lwpchan
, &waiters
, 0))
1555 suword8_noerr(&lp
->mutex_waiters
, waiters
);
1556 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
1560 watch_enable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
1562 return (set_errno(error
));
1567 * lwp_cond_wait() has four arguments, a pointer to a condition variable,
1568 * a pointer to a mutex, a pointer to a timespec for a timed wait and
1569 * a flag telling the kernel whether or not to honor the kernel/user
1570 * schedctl parking protocol (see schedctl_is_park() in schedctl.c).
1571 * The kernel puts the lwp to sleep on a unique pair of caddr_t's called an
1572 * lwpchan, returned by get_lwpchan(). If the timespec pointer is non-NULL,
1573 * it is used an an in/out parameter. On entry, it contains the relative
1574 * time until timeout. On exit, we copyout the residual time left to it.
1577 lwp_cond_wait(lwp_cond_t
*cv
, lwp_mutex_t
*mp
, timespec_t
*tsp
, int check_park
)
1579 kthread_t
*t
= curthread
;
1580 klwp_t
*lwp
= ttolwp(t
);
1581 proc_t
*p
= ttoproc(t
);
1583 lwpchan_t cv_lwpchan
;
1584 lwpchan_t m_lwpchan
;
1586 volatile uint16_t type
= 0;
1587 volatile uint8_t mtype
= 0;
1591 volatile int locked
= 0;
1592 volatile int m_locked
= 0;
1593 volatile int cvwatched
= 0;
1594 volatile int mpwatched
= 0;
1596 volatile int no_lwpchan
= 1;
1597 int imm_timeout
= 0;
1600 if ((caddr_t
)cv
>= p
->p_as
->a_userlimit
||
1601 (caddr_t
)mp
>= p
->p_as
->a_userlimit
)
1602 return (set_errno(EFAULT
));
1605 * Put the lwp in an orderly state for debugging,
1606 * in case we are stopped while sleeping, below.
1608 prstop(PR_REQUESTED
, 0);
1610 timedwait
= (caddr_t
)tsp
;
1611 if ((error
= lwp_timer_copyin(&lwpt
, tsp
)) != 0)
1612 return (set_errno(error
));
1613 if (lwpt
.lwpt_imm_timeout
) {
1618 (void) new_mstate(t
, LMS_USER_LOCK
);
1620 if (on_fault(&ljb
)) {
1627 lwpchan_unlock(&m_lwpchan
, LWPCHAN_MPPOOL
);
1631 lwpchan_unlock(&cv_lwpchan
, LWPCHAN_CVPOOL
);
1634 * set up another on_fault() for a possible fault
1635 * on the user lock accessed at "efault"
1637 if (on_fault(&ljb
)) {
1640 lwpchan_unlock(&m_lwpchan
, LWPCHAN_MPPOOL
);
1649 * Force Copy-on-write if necessary and ensure that the
1650 * synchronization object resides in read/write memory.
1651 * Cause an EFAULT return now if this is not so.
1653 fuword8_noerr(&mp
->mutex_type
, (uint8_t *)&mtype
);
1654 suword8_noerr(&mp
->mutex_type
, mtype
);
1655 if (UPIMUTEX(mtype
) == 0) {
1656 /* convert user level mutex, "mp", to a unique lwpchan */
1657 /* check if mtype is ok to use below, instead of type from cv */
1658 if (!get_lwpchan(p
->p_as
, (caddr_t
)mp
, mtype
,
1659 &m_lwpchan
, LWPCHAN_MPPOOL
)) {
1664 fuword16_noerr(&cv
->cond_type
, (uint16_t *)&type
);
1665 suword16_noerr(&cv
->cond_type
, type
);
1666 /* convert user level condition variable, "cv", to a unique lwpchan */
1667 if (!get_lwpchan(p
->p_as
, (caddr_t
)cv
, type
,
1668 &cv_lwpchan
, LWPCHAN_CVPOOL
)) {
1673 cvwatched
= watch_disable_addr((caddr_t
)cv
, sizeof (*cv
), S_WRITE
);
1674 if (UPIMUTEX(mtype
) == 0)
1675 mpwatched
= watch_disable_addr((caddr_t
)mp
, sizeof (*mp
),
1679 * lwpchan_lock ensures that the calling lwp is put to sleep atomically
1680 * with respect to a possible wakeup which is a result of either
1681 * an lwp_cond_signal() or an lwp_cond_broadcast().
1683 * What's misleading, is that the lwp is put to sleep after the
1684 * condition variable's mutex is released. This is OK as long as
1685 * the release operation is also done while holding lwpchan_lock.
1686 * The lwp is then put to sleep when the possibility of pagefaulting
1687 * or sleeping is completely eliminated.
1689 lwpchan_lock(&cv_lwpchan
, LWPCHAN_CVPOOL
);
1691 if (UPIMUTEX(mtype
) == 0) {
1692 lwpchan_lock(&m_lwpchan
, LWPCHAN_MPPOOL
);
1694 suword8_noerr(&cv
->cond_waiters_kernel
, 1);
1696 * unlock the condition variable's mutex. (pagefaults are
1699 set_owner_pid(mp
, 0, 0);
1700 ulock_clear(&mp
->mutex_lockw
);
1701 fuword8_noerr(&mp
->mutex_waiters
, &waiters
);
1704 * Given the locking of lwpchan_lock around the release
1705 * of the mutex and checking for waiters, the following
1706 * call to lwp_release() can fail ONLY if the lock
1707 * acquirer is interrupted after setting the waiter bit,
1708 * calling lwp_block() and releasing lwpchan_lock.
1709 * In this case, it could get pulled off the lwp sleep
1710 * q (via setrun()) before the following call to
1711 * lwp_release() occurs. In this case, the lock
1712 * requestor will update the waiter bit correctly by
1715 if (lwp_release(&m_lwpchan
, &waiters
, 0))
1716 suword8_noerr(&mp
->mutex_waiters
, waiters
);
1719 lwpchan_unlock(&m_lwpchan
, LWPCHAN_MPPOOL
);
1721 suword8_noerr(&cv
->cond_waiters_kernel
, 1);
1722 error
= lwp_upimutex_unlock(mp
, mtype
);
1723 if (error
) { /* if the upimutex unlock failed */
1725 lwpchan_unlock(&cv_lwpchan
, LWPCHAN_CVPOOL
);
1732 watch_enable_addr((caddr_t
)mp
, sizeof (*mp
), S_WRITE
);
1736 watch_enable_addr((caddr_t
)cv
, sizeof (*cv
), S_WRITE
);
1740 if (check_park
&& (!schedctl_is_park() || t
->t_unpark
)) {
1742 * We received a signal at user-level before calling here
1743 * or another thread wants us to return immediately
1744 * with EINTR. See lwp_unpark().
1749 } else if (timedwait
) {
1751 * If we successfully queue the timeout,
1752 * then don't drop t_delay_lock until
1753 * we are on the sleep queue (below).
1755 mutex_enter(&t
->t_delay_lock
);
1756 if (lwp_timer_enqueue(&lwpt
) != 0) {
1757 mutex_exit(&t
->t_delay_lock
);
1762 t
->t_flag
|= T_WAITCVSEM
;
1763 lwp_block(&cv_lwpchan
);
1765 * Nothing should happen to cause the lwp to go to sleep
1766 * until after it returns from swtch().
1769 mutex_exit(&t
->t_delay_lock
);
1771 lwpchan_unlock(&cv_lwpchan
, LWPCHAN_CVPOOL
);
1772 if (ISSIG(t
, JUSTLOOKING
) || MUSTRETURN(p
, t
) ||
1773 (imm_timeout
| imm_unpark
))
1776 t
->t_flag
&= ~(T_WAITCVSEM
| T_WAKEABLE
);
1778 tim
= lwp_timer_dequeue(&lwpt
);
1779 if (ISSIG(t
, FORREAL
) || lwp
->lwp_sysabort
||
1780 MUSTRETURN(p
, t
) || imm_unpark
)
1782 else if (imm_timeout
|| (timedwait
&& tim
== -1))
1784 lwp
->lwp_asleep
= 0;
1785 lwp
->lwp_sysabort
= 0;
1788 if (t
->t_mstate
== LMS_USER_LOCK
)
1789 (void) new_mstate(t
, LMS_SYSTEM
);
1791 if (tsp
&& check_park
) /* copyout the residual time left */
1792 error
= lwp_timer_copyout(&lwpt
, error
);
1794 /* the mutex is reacquired by the caller on return to user level */
1797 * If we were concurrently lwp_cond_signal()d and we
1798 * received a UNIX signal or got a timeout, then perform
1799 * another lwp_cond_signal() to avoid consuming the wakeup.
1802 (void) lwp_cond_signal(cv
);
1803 return (set_errno(error
));
1809 * make sure that the user level lock is dropped before
1810 * returning to caller, since the caller always re-acquires it.
1812 if (UPIMUTEX(mtype
) == 0) {
1813 lwpchan_lock(&m_lwpchan
, LWPCHAN_MPPOOL
);
1815 set_owner_pid(mp
, 0, 0);
1816 ulock_clear(&mp
->mutex_lockw
);
1817 fuword8_noerr(&mp
->mutex_waiters
, &waiters
);
1820 * See comment above on lock clearing and lwp_release()
1823 if (lwp_release(&m_lwpchan
, &waiters
, 0))
1824 suword8_noerr(&mp
->mutex_waiters
, waiters
);
1827 lwpchan_unlock(&m_lwpchan
, LWPCHAN_MPPOOL
);
1829 (void) lwp_upimutex_unlock(mp
, mtype
);
1834 watch_enable_addr((caddr_t
)mp
, sizeof (*mp
), S_WRITE
);
1836 watch_enable_addr((caddr_t
)cv
, sizeof (*cv
), S_WRITE
);
1837 if (t
->t_mstate
== LMS_USER_LOCK
)
1838 (void) new_mstate(t
, LMS_SYSTEM
);
1839 return (set_errno(error
));
1843 * wakeup one lwp that's blocked on this condition variable.
1846 lwp_cond_signal(lwp_cond_t
*cv
)
1848 proc_t
*p
= ttoproc(curthread
);
1851 volatile uint16_t type
= 0;
1852 volatile int locked
= 0;
1853 volatile int watched
= 0;
1857 if ((caddr_t
)cv
>= p
->p_as
->a_userlimit
)
1858 return (set_errno(EFAULT
));
1860 watched
= watch_disable_addr((caddr_t
)cv
, sizeof (*cv
), S_WRITE
);
1862 if (on_fault(&ljb
)) {
1864 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
1869 * Force Copy-on-write if necessary and ensure that the
1870 * synchronization object resides in read/write memory.
1871 * Cause an EFAULT return now if this is not so.
1873 fuword16_noerr(&cv
->cond_type
, (uint16_t *)&type
);
1874 suword16_noerr(&cv
->cond_type
, type
);
1875 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)cv
, type
,
1876 &lwpchan
, LWPCHAN_CVPOOL
)) {
1880 lwpchan_lock(&lwpchan
, LWPCHAN_CVPOOL
);
1882 fuword8_noerr(&cv
->cond_waiters_kernel
, &waiters
);
1885 * The following call to lwp_release() might fail but it is
1886 * OK to write into the waiters bit below, since the memory
1887 * could not have been re-used or unmapped (for correctly
1888 * written user programs) as in the case of lwp_mutex_wakeup().
1889 * For an incorrect program, we should not care about data
1890 * corruption since this is just one instance of other places
1891 * where corruption can occur for such a program. Of course
1892 * if the memory is unmapped, normal fault recovery occurs.
1894 (void) lwp_release(&lwpchan
, &waiters
, T_WAITCVSEM
);
1895 suword8_noerr(&cv
->cond_waiters_kernel
, waiters
);
1897 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
1901 watch_enable_addr((caddr_t
)cv
, sizeof (*cv
), S_WRITE
);
1903 return (set_errno(error
));
1908 * wakeup every lwp that's blocked on this condition variable.
1911 lwp_cond_broadcast(lwp_cond_t
*cv
)
1913 proc_t
*p
= ttoproc(curthread
);
1915 volatile uint16_t type
= 0;
1916 volatile int locked
= 0;
1917 volatile int watched
= 0;
1922 if ((caddr_t
)cv
>= p
->p_as
->a_userlimit
)
1923 return (set_errno(EFAULT
));
1925 watched
= watch_disable_addr((caddr_t
)cv
, sizeof (*cv
), S_WRITE
);
1927 if (on_fault(&ljb
)) {
1929 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
1934 * Force Copy-on-write if necessary and ensure that the
1935 * synchronization object resides in read/write memory.
1936 * Cause an EFAULT return now if this is not so.
1938 fuword16_noerr(&cv
->cond_type
, (uint16_t *)&type
);
1939 suword16_noerr(&cv
->cond_type
, type
);
1940 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)cv
, type
,
1941 &lwpchan
, LWPCHAN_CVPOOL
)) {
1945 lwpchan_lock(&lwpchan
, LWPCHAN_CVPOOL
);
1947 fuword8_noerr(&cv
->cond_waiters_kernel
, &waiters
);
1949 lwp_release_all(&lwpchan
);
1950 suword8_noerr(&cv
->cond_waiters_kernel
, 0);
1952 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
1956 watch_enable_addr((caddr_t
)cv
, sizeof (*cv
), S_WRITE
);
1958 return (set_errno(error
));
1963 lwp_sema_trywait(lwp_sema_t
*sp
)
1965 kthread_t
*t
= curthread
;
1966 proc_t
*p
= ttoproc(t
);
1968 volatile int locked
= 0;
1969 volatile int watched
= 0;
1970 volatile uint16_t type
= 0;
1976 if ((caddr_t
)sp
>= p
->p_as
->a_userlimit
)
1977 return (set_errno(EFAULT
));
1979 watched
= watch_disable_addr((caddr_t
)sp
, sizeof (*sp
), S_WRITE
);
1981 if (on_fault(&ljb
)) {
1983 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
1988 * Force Copy-on-write if necessary and ensure that the
1989 * synchronization object resides in read/write memory.
1990 * Cause an EFAULT return now if this is not so.
1992 fuword16_noerr((void *)&sp
->sema_type
, (uint16_t *)&type
);
1993 suword16_noerr((void *)&sp
->sema_type
, type
);
1994 if (!get_lwpchan(p
->p_as
, (caddr_t
)sp
, type
,
1995 &lwpchan
, LWPCHAN_CVPOOL
)) {
1999 lwpchan_lock(&lwpchan
, LWPCHAN_CVPOOL
);
2001 fuword32_noerr((void *)&sp
->sema_count
, (uint32_t *)&count
);
2005 suword32_noerr((void *)&sp
->sema_count
, --count
);
2007 fuword8_noerr(&sp
->sema_waiters
, &waiters
);
2009 (void) lwp_release(&lwpchan
, &waiters
, T_WAITCVSEM
);
2010 suword8_noerr(&sp
->sema_waiters
, waiters
);
2013 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2017 watch_enable_addr((caddr_t
)sp
, sizeof (*sp
), S_WRITE
);
2019 return (set_errno(error
));
2024 * See lwp_cond_wait(), above, for an explanation of the 'check_park' argument.
2027 lwp_sema_timedwait(lwp_sema_t
*sp
, timespec_t
*tsp
, int check_park
)
2029 kthread_t
*t
= curthread
;
2030 klwp_t
*lwp
= ttolwp(t
);
2031 proc_t
*p
= ttoproc(t
);
2036 volatile int locked
= 0;
2037 volatile int watched
= 0;
2038 volatile uint16_t type
= 0;
2044 int imm_timeout
= 0;
2047 if ((caddr_t
)sp
>= p
->p_as
->a_userlimit
)
2048 return (set_errno(EFAULT
));
2051 * Put the lwp in an orderly state for debugging,
2052 * in case we are stopped while sleeping, below.
2054 prstop(PR_REQUESTED
, 0);
2056 timedwait
= (caddr_t
)tsp
;
2057 if ((time_error
= lwp_timer_copyin(&lwpt
, tsp
)) == 0 &&
2058 lwpt
.lwpt_imm_timeout
) {
2063 watched
= watch_disable_addr((caddr_t
)sp
, sizeof (*sp
), S_WRITE
);
2065 if (on_fault(&ljb
)) {
2067 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2072 * Force Copy-on-write if necessary and ensure that the
2073 * synchronization object resides in read/write memory.
2074 * Cause an EFAULT return now if this is not so.
2076 fuword16_noerr((void *)&sp
->sema_type
, (uint16_t *)&type
);
2077 suword16_noerr((void *)&sp
->sema_type
, type
);
2078 if (!get_lwpchan(p
->p_as
, (caddr_t
)sp
, type
,
2079 &lwpchan
, LWPCHAN_CVPOOL
)) {
2083 lwpchan_lock(&lwpchan
, LWPCHAN_CVPOOL
);
2085 fuword32_noerr((void *)&sp
->sema_count
, (uint32_t *)&count
);
2086 while (error
== 0 && count
== 0) {
2089 * The SUSV3 Posix spec is very clear that we
2090 * should get no error from validating the
2091 * timer until we would actually sleep.
2096 suword8_noerr(&sp
->sema_waiters
, 1);
2098 watch_enable_addr((caddr_t
)sp
, sizeof (*sp
), S_WRITE
);
2099 if (check_park
&& (!schedctl_is_park() || t
->t_unpark
)) {
2101 * We received a signal at user-level before calling
2102 * here or another thread wants us to return
2103 * immediately with EINTR. See lwp_unpark().
2108 } else if (timedwait
) {
2110 * If we successfully queue the timeout,
2111 * then don't drop t_delay_lock until
2112 * we are on the sleep queue (below).
2114 mutex_enter(&t
->t_delay_lock
);
2115 if (lwp_timer_enqueue(&lwpt
) != 0) {
2116 mutex_exit(&t
->t_delay_lock
);
2121 t
->t_flag
|= T_WAITCVSEM
;
2122 lwp_block(&lwpchan
);
2124 * Nothing should happen to cause the lwp to sleep
2125 * again until after it returns from swtch().
2128 mutex_exit(&t
->t_delay_lock
);
2130 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2131 if (ISSIG(t
, JUSTLOOKING
) || MUSTRETURN(p
, t
) ||
2132 (imm_timeout
| imm_unpark
))
2135 t
->t_flag
&= ~(T_WAITCVSEM
| T_WAKEABLE
);
2137 tim
= lwp_timer_dequeue(&lwpt
);
2139 if (ISSIG(t
, FORREAL
) || lwp
->lwp_sysabort
||
2140 MUSTRETURN(p
, t
) || imm_unpark
)
2142 else if (imm_timeout
|| (timedwait
&& tim
== -1))
2144 lwp
->lwp_asleep
= 0;
2145 lwp
->lwp_sysabort
= 0;
2146 watched
= watch_disable_addr((caddr_t
)sp
,
2147 sizeof (*sp
), S_WRITE
);
2148 lwpchan_lock(&lwpchan
, LWPCHAN_CVPOOL
);
2150 fuword32_noerr((void *)&sp
->sema_count
, (uint32_t *)&count
);
2153 suword32_noerr((void *)&sp
->sema_count
, --count
);
2155 (void) lwp_release(&lwpchan
, &waiters
, T_WAITCVSEM
);
2156 suword8_noerr(&sp
->sema_waiters
, waiters
);
2158 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2162 watch_enable_addr((caddr_t
)sp
, sizeof (*sp
), S_WRITE
);
2163 if (tsp
&& check_park
&& !time_error
)
2164 error
= lwp_timer_copyout(&lwpt
, error
);
2166 return (set_errno(error
));
2171 lwp_sema_post(lwp_sema_t
*sp
)
2173 proc_t
*p
= ttoproc(curthread
);
2175 volatile int locked
= 0;
2176 volatile int watched
= 0;
2177 volatile uint16_t type
= 0;
2183 if ((caddr_t
)sp
>= p
->p_as
->a_userlimit
)
2184 return (set_errno(EFAULT
));
2186 watched
= watch_disable_addr((caddr_t
)sp
, sizeof (*sp
), S_WRITE
);
2188 if (on_fault(&ljb
)) {
2190 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2195 * Force Copy-on-write if necessary and ensure that the
2196 * synchronization object resides in read/write memory.
2197 * Cause an EFAULT return now if this is not so.
2199 fuword16_noerr(&sp
->sema_type
, (uint16_t *)&type
);
2200 suword16_noerr(&sp
->sema_type
, type
);
2201 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)sp
, type
,
2202 &lwpchan
, LWPCHAN_CVPOOL
)) {
2206 lwpchan_lock(&lwpchan
, LWPCHAN_CVPOOL
);
2208 fuword32_noerr(&sp
->sema_count
, (uint32_t *)&count
);
2209 if (count
== _SEM_VALUE_MAX
)
2212 suword32_noerr(&sp
->sema_count
, ++count
);
2214 fuword8_noerr(&sp
->sema_waiters
, &waiters
);
2216 (void) lwp_release(&lwpchan
, &waiters
, T_WAITCVSEM
);
2217 suword8_noerr(&sp
->sema_waiters
, waiters
);
2220 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2224 watch_enable_addr((caddr_t
)sp
, sizeof (*sp
), S_WRITE
);
2226 return (set_errno(error
));
2230 #define TRW_WANT_WRITE 0x1
2231 #define TRW_LOCK_GRANTED 0x2
2234 #define WRITE_LOCK 1
2235 #define TRY_FLAG 0x10
2236 #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG)
2237 #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG)
2240 * Release one writer or one or more readers. Compute the rwstate word to
2241 * reflect the new state of the queue. For a safe hand-off we copy the new
2242 * rwstate value back to userland before we wake any of the new lock holders.
2244 * Note that sleepq_insert() implements a prioritized FIFO (with writers
2245 * being given precedence over readers of the same priority).
2247 * If the first thread is a reader we scan the queue releasing all readers
2248 * until we hit a writer or the end of the queue. If the first thread is a
2249 * writer we still need to check for another writer.
2252 lwp_rwlock_release(lwpchan_t
*lwpchan
, lwp_rwlock_t
*rw
)
2258 kthread_t
*wakelist
= NULL
;
2259 uint32_t rwstate
= 0;
2263 sqh
= lwpsqhash(lwpchan
);
2264 disp_lock_enter(&sqh
->sq_lock
);
2265 tpp
= &sqh
->sq_queue
.sq_first
;
2266 while ((tp
= *tpp
) != NULL
) {
2267 if (tp
->t_lwpchan
.lc_wchan0
== lwpchan
->lc_wchan0
&&
2268 tp
->t_lwpchan
.lc_wchan
== lwpchan
->lc_wchan
) {
2269 if (tp
->t_writer
& TRW_WANT_WRITE
) {
2270 if ((wcount
++ == 0) && (rcount
== 0)) {
2271 rwstate
|= URW_WRITE_LOCKED
;
2273 /* Just one writer to wake. */
2274 sleepq_unlink(tpp
, tp
);
2277 /* tpp already set for next thread. */
2280 rwstate
|= URW_HAS_WAITERS
;
2281 /* We need look no further. */
2289 /* Add reader to wake list. */
2290 sleepq_unlink(tpp
, tp
);
2291 tp
->t_link
= wakelist
;
2294 /* tpp already set for next thread. */
2297 rwstate
|= URW_HAS_WAITERS
;
2298 /* We need look no further. */
2306 /* Copy the new rwstate back to userland. */
2307 suword32_noerr(&rw
->rwlock_readers
, rwstate
);
2309 /* Wake the new lock holder(s) up. */
2311 while (tp
!= NULL
) {
2312 DTRACE_SCHED1(wakeup
, kthread_t
*, tp
);
2313 tp
->t_wchan0
= NULL
;
2315 tp
->t_sobj_ops
= NULL
;
2316 tp
->t_writer
|= TRW_LOCK_GRANTED
;
2317 tpnext
= tp
->t_link
;
2320 thread_unlock_high(tp
);
2324 disp_lock_exit(&sqh
->sq_lock
);
2328 * We enter here holding the user-level mutex, which we must release before
2329 * returning or blocking. Based on lwp_cond_wait().
2332 lwp_rwlock_lock(lwp_rwlock_t
*rw
, timespec_t
*tsp
, int rd_wr
)
2334 lwp_mutex_t
*mp
= NULL
;
2335 kthread_t
*t
= curthread
;
2337 klwp_t
*lwp
= ttolwp(t
);
2338 proc_t
*p
= ttoproc(t
);
2343 volatile uint16_t type
= 0;
2344 volatile uint8_t mtype
= 0;
2346 volatile int error
= 0;
2349 volatile int locked
= 0;
2350 volatile int mlocked
= 0;
2351 volatile int watched
= 0;
2352 volatile int mwatched
= 0;
2354 volatile int no_lwpchan
= 1;
2355 int imm_timeout
= 0;
2360 /* We only check rw because the mutex is included in it. */
2361 if ((caddr_t
)rw
>= p
->p_as
->a_userlimit
)
2362 return (set_errno(EFAULT
));
2365 * Put the lwp in an orderly state for debugging,
2366 * in case we are stopped while sleeping, below.
2368 prstop(PR_REQUESTED
, 0);
2370 /* We must only report this error if we are about to sleep (later). */
2371 timedwait
= (caddr_t
)tsp
;
2372 if ((time_error
= lwp_timer_copyin(&lwpt
, tsp
)) == 0 &&
2373 lwpt
.lwpt_imm_timeout
) {
2378 (void) new_mstate(t
, LMS_USER_LOCK
);
2380 if (on_fault(&ljb
)) {
2387 lwpchan_unlock(&mlwpchan
, LWPCHAN_MPPOOL
);
2391 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2394 * Set up another on_fault() for a possible fault
2395 * on the user lock accessed at "out_drop".
2397 if (on_fault(&ljb
)) {
2400 lwpchan_unlock(&mlwpchan
, LWPCHAN_MPPOOL
);
2409 /* Process rd_wr (including sanity check). */
2410 try_flag
= (rd_wr
& TRY_FLAG
);
2412 if ((rd_wr
!= READ_LOCK
) && (rd_wr
!= WRITE_LOCK
)) {
2418 * Force Copy-on-write if necessary and ensure that the
2419 * synchronization object resides in read/write memory.
2420 * Cause an EFAULT return now if this is not so.
2423 fuword8_noerr(&mp
->mutex_type
, (uint8_t *)&mtype
);
2424 fuword16_noerr(&rw
->rwlock_type
, (uint16_t *)&type
);
2425 suword8_noerr(&mp
->mutex_type
, mtype
);
2426 suword16_noerr(&rw
->rwlock_type
, type
);
2428 /* We can only continue for simple USYNC_PROCESS locks. */
2429 if ((mtype
!= USYNC_PROCESS
) || (type
!= USYNC_PROCESS
)) {
2434 /* Convert user level mutex, "mp", to a unique lwpchan. */
2435 if (!get_lwpchan(p
->p_as
, (caddr_t
)mp
, mtype
,
2436 &mlwpchan
, LWPCHAN_MPPOOL
)) {
2441 /* Convert user level rwlock, "rw", to a unique lwpchan. */
2442 if (!get_lwpchan(p
->p_as
, (caddr_t
)rw
, type
,
2443 &lwpchan
, LWPCHAN_CVPOOL
)) {
2449 watched
= watch_disable_addr((caddr_t
)rw
, sizeof (*rw
), S_WRITE
);
2450 mwatched
= watch_disable_addr((caddr_t
)mp
, sizeof (*mp
), S_WRITE
);
2453 * lwpchan_lock() ensures that the calling LWP is put to sleep
2454 * atomically with respect to a possible wakeup which is a result
2455 * of lwp_rwlock_unlock().
2457 * What's misleading is that the LWP is put to sleep after the
2458 * rwlock's mutex is released. This is OK as long as the release
2459 * operation is also done while holding mlwpchan. The LWP is then
2460 * put to sleep when the possibility of pagefaulting or sleeping
2461 * has been completely eliminated.
2463 lwpchan_lock(&lwpchan
, LWPCHAN_CVPOOL
);
2465 lwpchan_lock(&mlwpchan
, LWPCHAN_MPPOOL
);
2469 * Fetch the current rwlock state.
2471 * The possibility of spurious wake-ups or killed waiters means
2472 * rwstate's URW_HAS_WAITERS bit may indicate false positives.
2473 * We only fix these if they are important to us.
2475 * Although various error states can be observed here (e.g. the lock
2476 * is not held, but there are waiters) we assume these are applicaton
2477 * errors and so we take no corrective action.
2479 fuword32_noerr(&rw
->rwlock_readers
, &rwstate
);
2481 * We cannot legitimately get here from user-level
2482 * without URW_HAS_WAITERS being set.
2483 * Set it now to guard against user-level error.
2485 rwstate
|= URW_HAS_WAITERS
;
2488 * We can try only if the lock isn't held by a writer.
2490 if (!(rwstate
& URW_WRITE_LOCKED
)) {
2491 tp
= lwp_queue_waiter(&lwpchan
);
2494 * Hmmm, rwstate indicates waiters but there are
2495 * none queued. This could just be the result of a
2496 * spurious wakeup, so let's ignore it.
2498 * We now have a chance to acquire the lock
2499 * uncontended, but this is the last chance for
2500 * a writer to acquire the lock without blocking.
2502 if (rd_wr
== READ_LOCK
) {
2505 } else if ((rwstate
& URW_READERS_MASK
) == 0) {
2506 rwstate
|= URW_WRITE_LOCKED
;
2509 } else if (rd_wr
== READ_LOCK
) {
2511 * This is the last chance for a reader to acquire
2512 * the lock now, but it can only do so if there is
2513 * no writer of equal or greater priority at the
2514 * head of the queue .
2516 * It is also just possible that there is a reader
2517 * at the head of the queue. This may be the result
2518 * of a spurious wakeup or an application failure.
2519 * In this case we only acquire the lock if we have
2520 * equal or greater priority. It is not our job to
2521 * release spurious waiters.
2523 pri_t our_pri
= DISP_PRIO(t
);
2524 pri_t his_pri
= DISP_PRIO(tp
);
2526 if ((our_pri
> his_pri
) || ((our_pri
== his_pri
) &&
2527 !(tp
->t_writer
& TRW_WANT_WRITE
))) {
2534 if (acquired
|| try_flag
|| time_error
) {
2536 * We're not going to block this time.
2538 suword32_noerr(&rw
->rwlock_readers
, rwstate
);
2539 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2548 } else if (try_flag
) {
2550 * We didn't get the lock and we're about to block.
2551 * If we're doing a trylock, return EBUSY instead.
2555 } else if (time_error
) {
2557 * The SUSV3 POSIX spec is very clear that we should
2558 * get no error from validating the timer (above)
2559 * until we would actually sleep.
2568 * We're about to block, so indicate what kind of waiter we are.
2571 if (rd_wr
== WRITE_LOCK
)
2572 t
->t_writer
= TRW_WANT_WRITE
;
2573 suword32_noerr(&rw
->rwlock_readers
, rwstate
);
2576 * Unlock the rwlock's mutex (pagefaults are possible here).
2578 set_owner_pid(mp
, 0, 0);
2579 ulock_clear(&mp
->mutex_lockw
);
2580 fuword8_noerr(&mp
->mutex_waiters
, &mwaiters
);
2581 if (mwaiters
!= 0) {
2583 * Given the locking of mlwpchan around the release of
2584 * the mutex and checking for waiters, the following
2585 * call to lwp_release() can fail ONLY if the lock
2586 * acquirer is interrupted after setting the waiter bit,
2587 * calling lwp_block() and releasing mlwpchan.
2588 * In this case, it could get pulled off the LWP sleep
2589 * queue (via setrun()) before the following call to
2590 * lwp_release() occurs, and the lock requestor will
2591 * update the waiter bit correctly by re-evaluating it.
2593 if (lwp_release(&mlwpchan
, &mwaiters
, 0))
2594 suword8_noerr(&mp
->mutex_waiters
, mwaiters
);
2596 lwpchan_unlock(&mlwpchan
, LWPCHAN_MPPOOL
);
2601 watch_enable_addr((caddr_t
)mp
, sizeof (*mp
), S_WRITE
);
2605 watch_enable_addr((caddr_t
)rw
, sizeof (*rw
), S_WRITE
);
2611 * If we successfully queue the timeout,
2612 * then don't drop t_delay_lock until
2613 * we are on the sleep queue (below).
2615 mutex_enter(&t
->t_delay_lock
);
2616 if (lwp_timer_enqueue(&lwpt
) != 0) {
2617 mutex_exit(&t
->t_delay_lock
);
2622 t
->t_flag
|= T_WAITCVSEM
;
2623 lwp_block(&lwpchan
);
2626 * Nothing should happen to cause the LWp to go to sleep until after
2627 * it returns from swtch().
2630 mutex_exit(&t
->t_delay_lock
);
2632 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2633 if (ISSIG(t
, JUSTLOOKING
) || MUSTRETURN(p
, t
) || imm_timeout
)
2638 * We're back, but we need to work out why. Were we interrupted? Did
2639 * we timeout? Were we granted the lock?
2642 acquired
= (t
->t_writer
& TRW_LOCK_GRANTED
);
2644 t
->t_flag
&= ~(T_WAITCVSEM
| T_WAKEABLE
);
2646 tim
= lwp_timer_dequeue(&lwpt
);
2647 if (ISSIG(t
, FORREAL
) || lwp
->lwp_sysabort
|| MUSTRETURN(p
, t
))
2649 else if (imm_timeout
|| (timedwait
&& tim
== -1))
2651 lwp
->lwp_asleep
= 0;
2652 lwp
->lwp_sysabort
= 0;
2656 * If we were granted the lock we don't care about EINTR or ETIME.
2661 if (t
->t_mstate
== LMS_USER_LOCK
)
2662 (void) new_mstate(t
, LMS_SYSTEM
);
2665 return (set_errno(error
));
2670 * Make sure that the user level lock is dropped before returning
2674 lwpchan_lock(&mlwpchan
, LWPCHAN_MPPOOL
);
2677 set_owner_pid(mp
, 0, 0);
2678 ulock_clear(&mp
->mutex_lockw
);
2679 fuword8_noerr(&mp
->mutex_waiters
, &mwaiters
);
2680 if (mwaiters
!= 0) {
2682 * See comment above on lock clearing and lwp_release()
2685 if (lwp_release(&mlwpchan
, &mwaiters
, 0))
2686 suword8_noerr(&mp
->mutex_waiters
, mwaiters
);
2688 lwpchan_unlock(&mlwpchan
, LWPCHAN_MPPOOL
);
2694 watch_enable_addr((caddr_t
)mp
, sizeof (*mp
), S_WRITE
);
2696 watch_enable_addr((caddr_t
)rw
, sizeof (*rw
), S_WRITE
);
2697 if (t
->t_mstate
== LMS_USER_LOCK
)
2698 (void) new_mstate(t
, LMS_SYSTEM
);
2700 return (set_errno(error
));
2705 * We enter here holding the user-level mutex but, unlike lwp_rwlock_lock(),
2706 * we never drop the lock.
2709 lwp_rwlock_unlock(lwp_rwlock_t
*rw
)
2711 kthread_t
*t
= curthread
;
2712 proc_t
*p
= ttoproc(t
);
2714 volatile uint16_t type
= 0;
2715 volatile int error
= 0;
2716 volatile int locked
= 0;
2717 volatile int watched
= 0;
2719 volatile int no_lwpchan
= 1;
2722 /* We only check rw because the mutex is included in it. */
2723 if ((caddr_t
)rw
>= p
->p_as
->a_userlimit
)
2724 return (set_errno(EFAULT
));
2726 if (on_fault(&ljb
)) {
2733 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2740 * Force Copy-on-write if necessary and ensure that the
2741 * synchronization object resides in read/write memory.
2742 * Cause an EFAULT return now if this is not so.
2744 fuword16_noerr(&rw
->rwlock_type
, (uint16_t *)&type
);
2745 suword16_noerr(&rw
->rwlock_type
, type
);
2747 /* We can only continue for simple USYNC_PROCESS locks. */
2748 if (type
!= USYNC_PROCESS
) {
2753 /* Convert user level rwlock, "rw", to a unique lwpchan. */
2754 if (!get_lwpchan(p
->p_as
, (caddr_t
)rw
, type
,
2755 &lwpchan
, LWPCHAN_CVPOOL
)) {
2761 watched
= watch_disable_addr((caddr_t
)rw
, sizeof (*rw
), S_WRITE
);
2763 lwpchan_lock(&lwpchan
, LWPCHAN_CVPOOL
);
2767 * We can resolve multiple readers (except the last reader) here.
2768 * For the last reader or a writer we need lwp_rwlock_release(),
2769 * to which we also delegate the task of copying the new rwstate
2770 * back to userland (see the comment there).
2772 fuword32_noerr(&rw
->rwlock_readers
, &rwstate
);
2773 if (rwstate
& URW_WRITE_LOCKED
)
2774 lwp_rwlock_release(&lwpchan
, rw
);
2775 else if ((rwstate
& URW_READERS_MASK
) > 0) {
2777 if ((rwstate
& URW_READERS_MASK
) == 0)
2778 lwp_rwlock_release(&lwpchan
, rw
);
2780 suword32_noerr(&rw
->rwlock_readers
, rwstate
);
2783 lwpchan_unlock(&lwpchan
, LWPCHAN_CVPOOL
);
2790 watch_enable_addr((caddr_t
)rw
, sizeof (*rw
), S_WRITE
);
2792 return (set_errno(error
));
2797 lwp_rwlock_sys(int subcode
, lwp_rwlock_t
*rwlp
, timespec_t
*tsp
)
2801 return (lwp_rwlock_lock(rwlp
, tsp
, READ_LOCK
));
2803 return (lwp_rwlock_lock(rwlp
, tsp
, WRITE_LOCK
));
2805 return (lwp_rwlock_lock(rwlp
, NULL
, READ_LOCK_TRY
));
2807 return (lwp_rwlock_lock(rwlp
, NULL
, WRITE_LOCK_TRY
));
2809 return (lwp_rwlock_unlock(rwlp
));
2811 return (set_errno(EINVAL
));
2815 * Return the owner of the user-level s-object.
2816 * Since we can't really do this, return NULL.
2820 lwpsobj_owner(caddr_t sobj
)
2826 * Wake up a thread asleep on a user-level synchronization
2830 lwp_unsleep(kthread_t
*t
)
2832 ASSERT(THREAD_LOCK_HELD(t
));
2833 if (t
->t_wchan0
!= NULL
) {
2835 sleepq_t
*sqp
= t
->t_sleepq
;
2838 sqh
= lwpsqhash(&t
->t_lwpchan
);
2839 ASSERT(&sqh
->sq_queue
== sqp
);
2841 disp_lock_exit_high(&sqh
->sq_lock
);
2846 panic("lwp_unsleep: thread %p not on sleepq", (void *)t
);
2850 * Change the priority of a thread asleep on a user-level
2851 * synchronization object. To maintain proper priority order,
2853 * o dequeue the thread.
2854 * o change its priority.
2855 * o re-enqueue the thread.
2856 * Assumption: the thread is locked on entry.
2859 lwp_change_pri(kthread_t
*t
, pri_t pri
, pri_t
*t_prip
)
2861 ASSERT(THREAD_LOCK_HELD(t
));
2862 if (t
->t_wchan0
!= NULL
) {
2863 sleepq_t
*sqp
= t
->t_sleepq
;
2867 sleepq_insert(sqp
, t
);
2869 panic("lwp_change_pri: %p not on a sleep queue", (void *)t
);
2873 * Clean up a left-over process-shared robust mutex
2876 lwp_mutex_cleanup(lwpchan_entry_t
*ent
, uint16_t lockflg
)
2883 volatile int locked
= 0;
2884 volatile int watched
= 0;
2885 volatile struct upimutex
*upimutex
= NULL
;
2886 volatile int upilocked
= 0;
2888 if ((ent
->lwpchan_type
& (USYNC_PROCESS
| LOCK_ROBUST
))
2889 != (USYNC_PROCESS
| LOCK_ROBUST
))
2892 lp
= (lwp_mutex_t
*)ent
->lwpchan_addr
;
2893 watched
= watch_disable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
2894 if (on_fault(&ljb
)) {
2896 lwpchan_unlock(&ent
->lwpchan_lwpchan
, LWPCHAN_MPPOOL
);
2898 upimutex_unlock((upimutex_t
*)upimutex
, 0);
2902 fuword32_noerr(&lp
->mutex_ownerpid
, (uint32_t *)&owner_pid
);
2904 if (UPIMUTEX(ent
->lwpchan_type
)) {
2905 lwpchan_t lwpchan
= ent
->lwpchan_lwpchan
;
2906 upib_t
*upibp
= &UPI_CHAIN(lwpchan
);
2908 if (owner_pid
!= curproc
->p_pid
)
2910 mutex_enter(&upibp
->upib_lock
);
2911 upimutex
= upi_get(upibp
, &lwpchan
);
2912 if (upimutex
== NULL
|| upimutex
->upi_owner
!= curthread
) {
2913 mutex_exit(&upibp
->upib_lock
);
2916 mutex_exit(&upibp
->upib_lock
);
2918 flag
= lwp_clear_mutex(lp
, lockflg
);
2919 suword8_noerr(&lp
->mutex_lockw
, 0);
2920 upimutex_unlock((upimutex_t
*)upimutex
, flag
);
2922 lwpchan_lock(&ent
->lwpchan_lwpchan
, LWPCHAN_MPPOOL
);
2925 * Clear the spinners count because one of our
2926 * threads could have been spinning for this lock
2927 * at user level when the process was suddenly killed.
2928 * There is no harm in this since user-level libc code
2929 * will adapt to the sudden change in the spinner count.
2931 suword8_noerr(&lp
->mutex_spinners
, 0);
2932 if (owner_pid
!= curproc
->p_pid
) {
2934 * We are not the owner. There may or may not be one.
2935 * If there are waiters, we wake up one or all of them.
2936 * It doesn't hurt to wake them up in error since
2937 * they will just retry the lock and go to sleep
2938 * again if necessary.
2940 fuword8_noerr(&lp
->mutex_waiters
, &waiters
);
2941 if (waiters
!= 0) { /* there are waiters */
2942 fuword16_noerr(&lp
->mutex_flag
, &flag
);
2943 if (flag
& LOCK_NOTRECOVERABLE
) {
2944 lwp_release_all(&ent
->lwpchan_lwpchan
);
2945 suword8_noerr(&lp
->mutex_waiters
, 0);
2946 } else if (lwp_release(&ent
->lwpchan_lwpchan
,
2948 suword8_noerr(&lp
->mutex_waiters
,
2954 * We are the owner. Release it.
2956 (void) lwp_clear_mutex(lp
, lockflg
);
2957 ulock_clear(&lp
->mutex_lockw
);
2958 fuword8_noerr(&lp
->mutex_waiters
, &waiters
);
2960 lwp_release(&ent
->lwpchan_lwpchan
, &waiters
, 0))
2961 suword8_noerr(&lp
->mutex_waiters
, waiters
);
2963 lwpchan_unlock(&ent
->lwpchan_lwpchan
, LWPCHAN_MPPOOL
);
2968 watch_enable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
2972 * Register a process-shared robust mutex in the lwpchan cache.
2975 lwp_mutex_register(lwp_mutex_t
*lp
, caddr_t uaddr
)
2978 volatile int watched
;
2983 if ((caddr_t
)lp
>= (caddr_t
)USERLIMIT
)
2984 return (set_errno(EFAULT
));
2986 watched
= watch_disable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
2988 if (on_fault(&ljb
)) {
2992 * Force Copy-on-write if necessary and ensure that the
2993 * synchronization object resides in read/write memory.
2994 * Cause an EFAULT return now if this is not so.
2996 fuword8_noerr(&lp
->mutex_type
, &type
);
2997 suword8_noerr(&lp
->mutex_type
, type
);
2998 if ((type
& (USYNC_PROCESS
|LOCK_ROBUST
))
2999 != (USYNC_PROCESS
|LOCK_ROBUST
)) {
3001 } else if (!lwpchan_get_mapping(curproc
->p_as
, (caddr_t
)lp
,
3002 uaddr
, type
, &lwpchan
, LWPCHAN_MPPOOL
)) {
3008 watch_enable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
3010 return (set_errno(error
));
3015 * There is a user-level robust lock registration in libc.
3016 * Mark it as invalid by storing -1 into the location of the pointer.
3019 lwp_mutex_unregister(void *uaddr
)
3021 if (get_udatamodel() == DATAMODEL_NATIVE
) {
3022 (void) sulword(uaddr
, (ulong_t
)-1);
3023 #ifdef _SYSCALL32_IMPL
3025 (void) suword32(uaddr
, (uint32_t)-1);
3031 lwp_mutex_trylock(lwp_mutex_t
*lp
, uintptr_t owner
)
3033 kthread_t
*t
= curthread
;
3034 proc_t
*p
= ttoproc(t
);
3036 volatile int locked
= 0;
3037 volatile int watched
= 0;
3039 volatile uint8_t type
= 0;
3043 if ((caddr_t
)lp
>= p
->p_as
->a_userlimit
)
3044 return (set_errno(EFAULT
));
3046 (void) new_mstate(t
, LMS_USER_LOCK
);
3048 if (on_fault(&ljb
)) {
3050 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
3055 * Force Copy-on-write if necessary and ensure that the
3056 * synchronization object resides in read/write memory.
3057 * Cause an EFAULT return now if this is not so.
3059 fuword8_noerr(&lp
->mutex_type
, (uint8_t *)&type
);
3060 suword8_noerr(&lp
->mutex_type
, type
);
3061 if (UPIMUTEX(type
)) {
3063 error
= lwp_upimutex_lock(lp
, type
, UPIMUTEX_TRY
, NULL
);
3064 if (error
== 0 || error
== EOWNERDEAD
||
3065 error
== ELOCKUNMAPPED
) {
3066 volatile int locked
= error
!= 0;
3067 if (on_fault(&ljb
)) {
3069 error
= lwp_upimutex_unlock(lp
, type
);
3074 set_owner_pid(lp
, owner
,
3075 (type
& USYNC_PROCESS
)? p
->p_pid
: 0);
3081 return (set_errno(error
));
3084 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)lp
, type
,
3085 &lwpchan
, LWPCHAN_MPPOOL
)) {
3089 lwpchan_lock(&lwpchan
, LWPCHAN_MPPOOL
);
3091 if (type
& LOCK_ROBUST
) {
3092 fuword16_noerr(&lp
->mutex_flag
, &flag
);
3093 if (flag
& LOCK_NOTRECOVERABLE
) {
3094 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
3095 error
= ENOTRECOVERABLE
;
3100 watched
= watch_disable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
3102 if (!ulock_try(&lp
->mutex_lockw
))
3105 set_owner_pid(lp
, owner
, (type
& USYNC_PROCESS
)? p
->p_pid
: 0);
3106 if (type
& LOCK_ROBUST
) {
3107 fuword16_noerr(&lp
->mutex_flag
, &flag
);
3108 if (flag
& (LOCK_OWNERDEAD
| LOCK_UNMAPPED
)) {
3109 if (flag
& LOCK_OWNERDEAD
)
3111 else if (type
& USYNC_PROCESS_ROBUST
)
3112 error
= ELOCKUNMAPPED
;
3119 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
3122 if (t
->t_mstate
== LMS_USER_LOCK
)
3123 (void) new_mstate(t
, LMS_SYSTEM
);
3127 watch_enable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
3129 return (set_errno(error
));
3134 * unlock the mutex and unblock lwps that is trying to acquire this mutex.
3135 * the blocked lwp resumes and retries to acquire the lock.
3138 lwp_mutex_unlock(lwp_mutex_t
*lp
)
3140 proc_t
*p
= ttoproc(curthread
);
3143 volatile int locked
= 0;
3144 volatile int watched
= 0;
3145 volatile uint8_t type
= 0;
3150 if ((caddr_t
)lp
>= p
->p_as
->a_userlimit
)
3151 return (set_errno(EFAULT
));
3153 if (on_fault(&ljb
)) {
3155 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
3161 * Force Copy-on-write if necessary and ensure that the
3162 * synchronization object resides in read/write memory.
3163 * Cause an EFAULT return now if this is not so.
3165 fuword8_noerr(&lp
->mutex_type
, (uint8_t *)&type
);
3166 suword8_noerr(&lp
->mutex_type
, type
);
3168 if (UPIMUTEX(type
)) {
3170 error
= lwp_upimutex_unlock(lp
, type
);
3172 return (set_errno(error
));
3176 watched
= watch_disable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
3178 if (!get_lwpchan(curproc
->p_as
, (caddr_t
)lp
, type
,
3179 &lwpchan
, LWPCHAN_MPPOOL
)) {
3183 lwpchan_lock(&lwpchan
, LWPCHAN_MPPOOL
);
3185 if (type
& LOCK_ROBUST
) {
3186 fuword16_noerr(&lp
->mutex_flag
, &flag
);
3187 if (flag
& (LOCK_OWNERDEAD
| LOCK_UNMAPPED
)) {
3188 flag
&= ~(LOCK_OWNERDEAD
| LOCK_UNMAPPED
);
3189 flag
|= LOCK_NOTRECOVERABLE
;
3190 suword16_noerr(&lp
->mutex_flag
, flag
);
3193 set_owner_pid(lp
, 0, 0);
3194 ulock_clear(&lp
->mutex_lockw
);
3196 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will
3197 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release()
3198 * may fail. If it fails, do not write into the waiter bit.
3199 * The call to lwp_release() might fail due to one of three reasons:
3201 * 1. due to the thread which set the waiter bit not actually
3202 * sleeping since it got the lock on the re-try. The waiter
3203 * bit will then be correctly updated by that thread. This
3204 * window may be closed by reading the wait bit again here
3205 * and not calling lwp_release() at all if it is zero.
3206 * 2. the thread which set the waiter bit and went to sleep
3207 * was woken up by a signal. This time, the waiter recomputes
3208 * the wait bit in the return with EINTR code.
3209 * 3. the waiter bit read by lwp_mutex_wakeup() was in
3210 * memory that has been re-used after the lock was dropped.
3211 * In this case, writing into the waiter bit would cause data
3214 fuword8_noerr(&lp
->mutex_waiters
, &waiters
);
3216 if ((type
& LOCK_ROBUST
) &&
3217 (flag
& LOCK_NOTRECOVERABLE
)) {
3218 lwp_release_all(&lwpchan
);
3219 suword8_noerr(&lp
->mutex_waiters
, 0);
3220 } else if (lwp_release(&lwpchan
, &waiters
, 0)) {
3221 suword8_noerr(&lp
->mutex_waiters
, waiters
);
3225 lwpchan_unlock(&lwpchan
, LWPCHAN_MPPOOL
);
3229 watch_enable_addr((caddr_t
)lp
, sizeof (*lp
), S_WRITE
);
3231 return (set_errno(error
));