4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Copyright (c) 2012 by Delphix. All rights reserved.
31 #include <sys/thread.h>
33 #include <sys/debug.h>
34 #include <sys/cmn_err.h>
35 #include <sys/systm.h>
36 #include <sys/sobject.h>
37 #include <sys/sleepq.h>
38 #include <sys/cpuvar.h>
39 #include <sys/condvar.h>
40 #include <sys/condvar_impl.h>
41 #include <sys/schedctl.h>
42 #include <sys/procfs.h>
44 #include <sys/callo.h>
47 * CV_MAX_WAITERS is the maximum number of waiters we track; once
48 * the number becomes higher than that, we look at the sleepq to
49 * see whether there are *really* any waiters.
51 #define CV_MAX_WAITERS 1024 /* must be power of 2 */
52 #define CV_WAITERS_MASK (CV_MAX_WAITERS - 1)
55 * Threads don't "own" condition variables.
65 * Unsleep a thread that's blocked on a condition variable.
68 cv_unsleep(kthread_t
*t
)
70 condvar_impl_t
*cvp
= (condvar_impl_t
*)t
->t_wchan
;
71 sleepq_head_t
*sqh
= SQHASH(cvp
);
73 ASSERT(THREAD_LOCK_HELD(t
));
76 panic("cv_unsleep: thread %p not on sleepq %p",
77 (void *)t
, (void *)sqh
);
78 DTRACE_SCHED1(wakeup
, kthread_t
*, t
);
80 if (cvp
->cv_waiters
!= CV_MAX_WAITERS
)
82 disp_lock_exit_high(&sqh
->sq_lock
);
87 * Change the priority of a thread that's blocked on a condition variable.
90 cv_change_pri(kthread_t
*t
, pri_t pri
, pri_t
*t_prip
)
92 condvar_impl_t
*cvp
= (condvar_impl_t
*)t
->t_wchan
;
93 sleepq_t
*sqp
= t
->t_sleepq
;
95 ASSERT(THREAD_LOCK_HELD(t
));
96 ASSERT(&SQHASH(cvp
)->sq_queue
== sqp
);
99 panic("cv_change_pri: %p not on sleep queue", (void *)t
);
102 sleepq_insert(sqp
, t
);
106 * The sobj_ops vector exports a set of functions needed when a thread
107 * is asleep on a synchronization object of this type.
109 static sobj_ops_t cv_sobj_ops
= {
110 SOBJ_CV
, cv_owner
, cv_unsleep
, cv_change_pri
115 cv_init(kcondvar_t
*cvp
, char *name
, kcv_type_t type
, void *arg
)
117 ((condvar_impl_t
*)cvp
)->cv_waiters
= 0;
121 * cv_destroy is not currently needed, but is part of the DDI.
122 * This is in case cv_init ever needs to allocate something for a cv.
126 cv_destroy(kcondvar_t
*cvp
)
128 ASSERT((((condvar_impl_t
*)cvp
)->cv_waiters
& CV_WAITERS_MASK
) == 0);
132 * The cv_block() function blocks a thread on a condition variable
133 * by putting it in a hashed sleep queue associated with the
134 * synchronization object.
136 * Threads are taken off the hashed sleep queues via calls to
137 * cv_signal(), cv_broadcast(), or cv_unsleep().
140 cv_block(condvar_impl_t
*cvp
)
142 kthread_t
*t
= curthread
;
143 klwp_t
*lwp
= ttolwp(t
);
146 ASSERT(THREAD_LOCK_HELD(t
));
147 ASSERT(t
!= CPU
->cpu_idle_thread
);
148 ASSERT(CPU_ON_INTR(CPU
) == 0);
149 ASSERT(t
->t_wchan0
== NULL
&& t
->t_wchan
== NULL
);
150 ASSERT(t
->t_state
== TS_ONPROC
);
152 t
->t_schedflag
&= ~TS_SIGNALLED
;
153 CL_SLEEP(t
); /* assign kernel priority */
154 t
->t_wchan
= (caddr_t
)cvp
;
155 t
->t_sobj_ops
= &cv_sobj_ops
;
159 * The check for t_intr is to avoid doing the
160 * account for an interrupt thread on the still-pinned
163 if (lwp
!= NULL
&& t
->t_intr
== NULL
) {
165 (void) new_mstate(t
, LMS_SLEEP
);
169 disp_lock_enter_high(&sqh
->sq_lock
);
170 if (cvp
->cv_waiters
< CV_MAX_WAITERS
)
172 ASSERT(cvp
->cv_waiters
<= CV_MAX_WAITERS
);
173 THREAD_SLEEP(t
, &sqh
->sq_lock
);
174 sleepq_insert(&sqh
->sq_queue
, t
);
176 * THREAD_SLEEP() moves curthread->t_lockp to point to the
177 * lock sqh->sq_lock. This lock is later released by the caller
178 * when it calls thread_unlock() on curthread.
182 #define cv_block_sig(t, cvp) \
183 { (t)->t_flag |= T_WAKEABLE; cv_block(cvp); }
186 * Block on the indicated condition variable and release the
187 * associated kmutex while blocked.
190 cv_wait(kcondvar_t
*cvp
, kmutex_t
*mp
)
194 ASSERT(!quiesce_active
);
196 ASSERT(curthread
->t_schedflag
& TS_DONT_SWAP
);
197 thread_lock(curthread
); /* lock the thread */
198 cv_block((condvar_impl_t
*)cvp
);
199 thread_unlock_nopreempt(curthread
); /* unlock the waiters field */
211 * This mutex is acquired and released in order to make sure that
212 * the wakeup does not happen before the block itself happens.
214 mutex_enter(&t
->t_wait_mutex
);
215 mutex_exit(&t
->t_wait_mutex
);
220 * Same as cv_wait except the thread will unblock at 'tim'
221 * (an absolute time) if it hasn't already unblocked.
223 * Returns the amount of time left from the original 'tim' value
224 * when it was unblocked.
227 cv_timedwait(kcondvar_t
*cvp
, kmutex_t
*mp
, clock_t tim
)
230 clock_t now
= ddi_get_lbolt();
235 hrtim
= TICK_TO_NSEC(tim
- now
);
236 return (cv_timedwait_hires(cvp
, mp
, hrtim
, nsec_per_tick
, 0));
240 * Same as cv_timedwait() except that the third argument is a relative
241 * timeout value, as opposed to an absolute one. There is also a fourth
242 * argument that specifies how accurately the timeout must be implemented.
245 cv_reltimedwait(kcondvar_t
*cvp
, kmutex_t
*mp
, clock_t delta
, time_res_t res
)
249 ASSERT(TIME_RES_VALID(res
));
254 if ((exp
= TICK_TO_NSEC(delta
)) < 0)
257 return (cv_timedwait_hires(cvp
, mp
, exp
, time_res
[res
], 0));
261 cv_timedwait_hires(kcondvar_t
*cvp
, kmutex_t
*mp
, hrtime_t tim
,
262 hrtime_t res
, int flag
)
264 kthread_t
*t
= curthread
;
272 ASSERT(!quiesce_active
);
274 limit
= (flag
& CALLOUT_FLAG_ABSOLUTE
) ? gethrtime() : 0;
277 mutex_enter(&t
->t_wait_mutex
);
278 id
= timeout_generic(CALLOUT_REALTIME
, (void (*)(void *))cv_wakeup
, t
,
280 thread_lock(t
); /* lock the thread */
281 cv_block((condvar_impl_t
*)cvp
);
282 thread_unlock_nopreempt(t
);
283 mutex_exit(&t
->t_wait_mutex
);
286 signalled
= (t
->t_schedflag
& TS_SIGNALLED
);
288 * Get the time left. untimeout() returns -1 if the timeout has
289 * occured or the time remaining. If the time remaining is zero,
290 * the timeout has occured between when we were awoken and
291 * we called untimeout. We will treat this as if the timeout
292 * has occured and set timeleft to -1.
294 timeleft
= untimeout_default(id
, 0);
298 if (signalled
) /* avoid consuming the cv_signal() */
305 cv_wait_sig(kcondvar_t
*cvp
, kmutex_t
*mp
)
307 kthread_t
*t
= curthread
;
308 proc_t
*p
= ttoproc(t
);
309 klwp_t
*lwp
= ttolwp(t
);
316 ASSERT(!quiesce_active
);
319 * Threads in system processes don't process signals. This is
320 * true both for standard threads of system processes and for
321 * interrupt threads which have borrowed their pinned thread's LWP.
323 if (lwp
== NULL
|| (p
->p_flag
& SSYS
)) {
327 ASSERT(t
->t_intr
== NULL
);
329 ASSERT(curthread
->t_schedflag
& TS_DONT_SWAP
);
330 cancel_pending
= schedctl_cancel_pending();
332 lwp
->lwp_sysabort
= 0;
334 cv_block_sig(t
, (condvar_impl_t
*)cvp
);
335 thread_unlock_nopreempt(t
);
337 if (ISSIG(t
, JUSTLOOKING
) || MUSTRETURN(p
, t
) || cancel_pending
)
339 /* ASSERT(no locks are held) */
341 signalled
= (t
->t_schedflag
& TS_SIGNALLED
);
342 t
->t_flag
&= ~T_WAKEABLE
;
344 if (ISSIG_PENDING(t
, lwp
, p
)) {
350 if (lwp
->lwp_sysabort
|| MUSTRETURN(p
, t
))
352 if (rval
!= 0 && cancel_pending
) {
353 schedctl_cancel_eintr();
357 lwp
->lwp_sysabort
= 0;
358 if (rval
== 0 && signalled
) /* avoid consuming the cv_signal() */
364 cv_timedwait_sig_hires(kcondvar_t
*cvp
, kmutex_t
*mp
, hrtime_t tim
,
365 hrtime_t res
, int flag
)
367 kthread_t
*t
= curthread
;
368 proc_t
*p
= ttoproc(t
);
369 klwp_t
*lwp
= ttolwp(t
);
370 int cancel_pending
= 0;
378 ASSERT(!quiesce_active
);
381 * Threads in system processes don't process signals. This is
382 * true both for standard threads of system processes and for
383 * interrupt threads which have borrowed their pinned thread's LWP.
385 if (lwp
== NULL
|| (p
->p_flag
& SSYS
))
386 return (cv_timedwait_hires(cvp
, mp
, tim
, res
, flag
));
387 ASSERT(t
->t_intr
== NULL
);
390 * If tim is less than or equal to current hrtime, then the timeout
391 * has already occured. So just check to see if there is a signal
392 * pending. If so return 0 indicating that there is a signal pending.
393 * Else return -1 indicating that the timeout occured. No need to
396 limit
= (flag
& CALLOUT_FLAG_ABSOLUTE
) ? gethrtime() : 0;
399 lwp
->lwp_sysabort
= 0;
405 * Set the timeout and wait.
407 cancel_pending
= schedctl_cancel_pending();
408 mutex_enter(&t
->t_wait_mutex
);
409 id
= timeout_generic(CALLOUT_REALTIME
, (void (*)(void *))cv_wakeup
, t
,
412 lwp
->lwp_sysabort
= 0;
414 cv_block_sig(t
, (condvar_impl_t
*)cvp
);
415 thread_unlock_nopreempt(t
);
416 mutex_exit(&t
->t_wait_mutex
);
418 if (ISSIG(t
, JUSTLOOKING
) || MUSTRETURN(p
, t
) || cancel_pending
)
420 /* ASSERT(no locks are held) */
422 signalled
= (t
->t_schedflag
& TS_SIGNALLED
);
423 t
->t_flag
&= ~T_WAKEABLE
;
426 * Untimeout the thread. untimeout() returns -1 if the timeout has
427 * occured or the time remaining. If the time remaining is zero,
428 * the timeout has occured between when we were awoken and
429 * we called untimeout. We will treat this as if the timeout
430 * has occured and set rval to -1.
432 rval
= untimeout_default(id
, 0);
438 * Check to see if a signal is pending. If so, regardless of whether
439 * or not we were awoken due to the signal, the signal is now pending
440 * and a return of 0 has the highest priority.
443 if (ISSIG_PENDING(t
, lwp
, p
)) {
449 if (lwp
->lwp_sysabort
|| MUSTRETURN(p
, t
))
451 if (rval
!= 0 && cancel_pending
) {
452 schedctl_cancel_eintr();
456 lwp
->lwp_sysabort
= 0;
457 if (rval
<= 0 && signalled
) /* avoid consuming the cv_signal() */
464 * Function result in order of precedence:
465 * 0 if a signal was received
466 * -1 if timeout occured
467 * >0 if awakened via cv_signal() or cv_broadcast().
468 * (returns time remaining)
470 * cv_timedwait_sig() is now part of the DDI.
472 * This function is now just a wrapper for cv_timedwait_sig_hires().
475 cv_timedwait_sig(kcondvar_t
*cvp
, kmutex_t
*mp
, clock_t tim
)
479 hrtim
= TICK_TO_NSEC(tim
- ddi_get_lbolt());
480 return (cv_timedwait_sig_hires(cvp
, mp
, hrtim
, nsec_per_tick
, 0));
484 * Wait until the specified time.
485 * If tim == -1, waits without timeout using cv_wait_sig_swap().
488 cv_timedwait_sig_hrtime(kcondvar_t
*cvp
, kmutex_t
*mp
, hrtime_t tim
)
491 return (cv_wait_sig_swap(cvp
, mp
));
493 return (cv_timedwait_sig_hires(cvp
, mp
, tim
, 1,
494 CALLOUT_FLAG_ABSOLUTE
| CALLOUT_FLAG_ROUNDUP
));
499 * Same as cv_timedwait_sig() except that the third argument is a relative
500 * timeout value, as opposed to an absolute one. There is also a fourth
501 * argument that specifies how accurately the timeout must be implemented.
504 cv_reltimedwait_sig(kcondvar_t
*cvp
, kmutex_t
*mp
, clock_t delta
,
509 ASSERT(TIME_RES_VALID(res
));
512 if ((exp
= TICK_TO_NSEC(delta
)) < 0)
516 return (cv_timedwait_sig_hires(cvp
, mp
, exp
, time_res
[res
], 0));
520 * Like cv_wait_sig_swap but allows the caller to indicate (with a
521 * non-NULL sigret) that they will take care of signalling the cv
522 * after wakeup, if necessary. This is a vile hack that should only
523 * be used when no other option is available; almost all callers
524 * should just use cv_wait_sig_swap (which takes care of the cv_signal
525 * stuff automatically) instead.
528 cv_wait_sig_swap_core(kcondvar_t
*cvp
, kmutex_t
*mp
, int *sigret
)
530 kthread_t
*t
= curthread
;
531 proc_t
*p
= ttoproc(t
);
532 klwp_t
*lwp
= ttolwp(t
);
541 * Threads in system processes don't process signals. This is
542 * true both for standard threads of system processes and for
543 * interrupt threads which have borrowed their pinned thread's LWP.
545 if (lwp
== NULL
|| (p
->p_flag
& SSYS
)) {
549 ASSERT(t
->t_intr
== NULL
);
551 cancel_pending
= schedctl_cancel_pending();
553 lwp
->lwp_sysabort
= 0;
555 t
->t_kpri_req
= 0; /* don't need kernel priority */
556 cv_block_sig(t
, (condvar_impl_t
*)cvp
);
557 /* I can be swapped now */
558 curthread
->t_schedflag
&= ~TS_DONT_SWAP
;
559 thread_unlock_nopreempt(t
);
561 if (ISSIG(t
, JUSTLOOKING
) || MUSTRETURN(p
, t
) || cancel_pending
)
563 /* ASSERT(no locks are held) */
565 signalled
= (t
->t_schedflag
& TS_SIGNALLED
);
566 t
->t_flag
&= ~T_WAKEABLE
;
567 /* TS_DONT_SWAP set by disp() */
568 ASSERT(curthread
->t_schedflag
& TS_DONT_SWAP
);
570 if (ISSIG_PENDING(t
, lwp
, p
)) {
576 if (lwp
->lwp_sysabort
|| MUSTRETURN(p
, t
))
578 if (rval
!= 0 && cancel_pending
) {
579 schedctl_cancel_eintr();
583 lwp
->lwp_sysabort
= 0;
586 *sigret
= signalled
; /* just tell the caller */
588 cv_signal(cvp
); /* avoid consuming the cv_signal() */
594 * Same as cv_wait_sig but the thread can be swapped out while waiting.
595 * This should only be used when we know we aren't holding any locks.
598 cv_wait_sig_swap(kcondvar_t
*cvp
, kmutex_t
*mp
)
600 return (cv_wait_sig_swap_core(cvp
, mp
, NULL
));
604 cv_signal(kcondvar_t
*cvp
)
606 condvar_impl_t
*cp
= (condvar_impl_t
*)cvp
;
608 /* make sure the cv_waiters field looks sane */
609 ASSERT(cp
->cv_waiters
<= CV_MAX_WAITERS
);
610 if (cp
->cv_waiters
> 0) {
611 sleepq_head_t
*sqh
= SQHASH(cp
);
612 disp_lock_enter(&sqh
->sq_lock
);
613 ASSERT(CPU_ON_INTR(CPU
) == 0);
614 if (cp
->cv_waiters
& CV_WAITERS_MASK
) {
617 t
= sleepq_wakeone_chan(&sqh
->sq_queue
, cp
);
619 * If cv_waiters is non-zero (and less than
620 * CV_MAX_WAITERS) there should be a thread
624 } else if (sleepq_wakeone_chan(&sqh
->sq_queue
, cp
) == NULL
) {
627 disp_lock_exit(&sqh
->sq_lock
);
632 cv_broadcast(kcondvar_t
*cvp
)
634 condvar_impl_t
*cp
= (condvar_impl_t
*)cvp
;
636 /* make sure the cv_waiters field looks sane */
637 ASSERT(cp
->cv_waiters
<= CV_MAX_WAITERS
);
638 if (cp
->cv_waiters
> 0) {
639 sleepq_head_t
*sqh
= SQHASH(cp
);
640 disp_lock_enter(&sqh
->sq_lock
);
641 ASSERT(CPU_ON_INTR(CPU
) == 0);
642 sleepq_wakeall_chan(&sqh
->sq_queue
, cp
);
644 disp_lock_exit(&sqh
->sq_lock
);
649 * Same as cv_wait(), but wakes up (after wakeup_time milliseconds) to check
650 * for requests to stop, like cv_wait_sig() but without dealing with signals.
651 * This is a horrible kludge. It is evil. It is vile. It is swill.
652 * If your code has to call this function then your code is the same.
655 cv_wait_stop(kcondvar_t
*cvp
, kmutex_t
*mp
, int wakeup_time
)
657 kthread_t
*t
= curthread
;
658 klwp_t
*lwp
= ttolwp(t
);
659 proc_t
*p
= ttoproc(t
);
667 * Threads in system processes don't process signals. This is
668 * true both for standard threads of system processes and for
669 * interrupt threads which have borrowed their pinned thread's LWP.
671 if (lwp
== NULL
|| (p
->p_flag
& SSYS
)) {
675 ASSERT(t
->t_intr
== NULL
);
678 * Wakeup in wakeup_time milliseconds, i.e., human time.
680 tim
= ddi_get_lbolt() + MSEC_TO_TICK(wakeup_time
);
681 mutex_enter(&t
->t_wait_mutex
);
682 id
= realtime_timeout_default((void (*)(void *))cv_wakeup
, t
,
683 tim
- ddi_get_lbolt());
684 thread_lock(t
); /* lock the thread */
685 cv_block((condvar_impl_t
*)cvp
);
686 thread_unlock_nopreempt(t
);
687 mutex_exit(&t
->t_wait_mutex
);
689 /* ASSERT(no locks are held); */
691 (void) untimeout_default(id
, 0);
694 * Check for reasons to stop, if lwp_nostop is not true.
695 * See issig_forreal() for explanations of the various stops.
697 mutex_enter(&p
->p_lock
);
698 while (lwp
->lwp_nostop
== 0 && !(p
->p_flag
& SEXITLWPS
)) {
700 * Hold the lwp here for watchpoint manipulation.
702 if (t
->t_proc_flag
& TP_PAUSE
) {
703 stop(PR_SUSPENDED
, SUSPEND_PAUSE
);
709 if (t
->t_proc_flag
& TP_CHKPT
) {
710 stop(PR_CHECKPOINT
, 0);
714 * Honor fork1(), watchpoint activity (remapping a page),
715 * and lwp_suspend() requests.
717 if ((p
->p_flag
& (SHOLDFORK1
|SHOLDWATCH
)) ||
718 (t
->t_proc_flag
& TP_HOLDLWP
)) {
719 stop(PR_SUSPENDED
, SUSPEND_NORMAL
);
723 * Honor /proc requested stop.
725 if (t
->t_proc_flag
& TP_PRSTOP
) {
726 stop(PR_REQUESTED
, 0);
729 * If some lwp in the process has already stopped
730 * showing PR_JOBCONTROL, stop in sympathy with it.
732 if (p
->p_stopsig
&& t
!= p
->p_agenttp
) {
733 stop(PR_JOBCONTROL
, p
->p_stopsig
);
738 mutex_exit(&p
->p_lock
);
743 * Like cv_timedwait_sig(), but takes an absolute hires future time
744 * rather than a future time in clock ticks. Will not return showing
745 * that a timeout occurred until the future time is passed.
746 * If 'when' is a NULL pointer, no timeout will occur.
748 * Function result in order of precedence:
749 * 0 if a signal was received
750 * -1 if timeout occured
751 * >0 if awakened via cv_signal() or cv_broadcast()
752 * or by a spurious wakeup.
753 * (might return time remaining)
754 * As a special test, if someone abruptly resets the system time
755 * (but not through adjtime(2); drifting of the clock is allowed and
756 * expected [see timespectohz_adj()]), then we force a return of -1
757 * so the caller can return a premature timeout to the calling process
758 * so it can reevaluate the situation in light of the new system time.
759 * (The system clock has been reset if timecheck != timechanged.)
761 * Generally, cv_timedwait_sig_hrtime() should be used instead of this
762 * routine. It waits based on hrtime rather than wall-clock time and therefore
763 * does not need to deal with the time changing.
766 cv_waituntil_sig(kcondvar_t
*cvp
, kmutex_t
*mp
,
767 timestruc_t
*when
, int timecheck
)
775 return (cv_wait_sig_swap(cvp
, mp
));
779 timespecsub(&delta
, &now
);
780 if (delta
.tv_sec
< 0 || (delta
.tv_sec
== 0 && delta
.tv_nsec
== 0)) {
782 * We have already reached the absolute future time.
783 * Call cv_timedwait_sig() just to check for signals.
784 * We will return immediately with either 0 or -1.
786 rval
= cv_timedwait_sig_hires(cvp
, mp
, 0, 1, 0);
788 if (timecheck
== timechanged
) {
790 * Make sure that the interval is atleast one tick.
791 * This is to prevent a user from flooding the system
792 * with very small, high resolution timers.
794 interval
= ts2hrt(&delta
);
795 if (interval
< nsec_per_tick
)
796 interval
= nsec_per_tick
;
797 rval
= cv_timedwait_sig_hires(cvp
, mp
, interval
, 1,
798 CALLOUT_FLAG_HRESTIME
);
801 * Someone reset the system time;
802 * just force an immediate timeout.
806 if (rval
== -1 && timecheck
== timechanged
) {
808 * Even though cv_timedwait_sig() returned showing a
809 * timeout, the future time may not have passed yet.
810 * If not, change rval to indicate a normal wakeup.
814 timespecsub(&delta
, &now
);
815 if (delta
.tv_sec
> 0 || (delta
.tv_sec
== 0 &&