4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Copyright (c) 2012 by Delphix. All rights reserved.
31 #include <sys/thread.h>
33 #include <sys/debug.h>
34 #include <sys/cmn_err.h>
35 #include <sys/systm.h>
36 #include <sys/sobject.h>
37 #include <sys/sleepq.h>
38 #include <sys/cpuvar.h>
39 #include <sys/condvar.h>
40 #include <sys/condvar_impl.h>
41 #include <sys/schedctl.h>
42 #include <sys/procfs.h>
44 #include <sys/callo.h>
47 * CV_MAX_WAITERS is the maximum number of waiters we track; once
48 * the number becomes higher than that, we look at the sleepq to
49 * see whether there are *really* any waiters.
51 #define CV_MAX_WAITERS 1024 /* must be power of 2 */
52 #define CV_WAITERS_MASK (CV_MAX_WAITERS - 1)
55 * Threads don't "own" condition variables.
65 * Unsleep a thread that's blocked on a condition variable.
68 cv_unsleep(kthread_t
*t
)
70 condvar_impl_t
*cvp
= (condvar_impl_t
*)t
->t_wchan
;
71 sleepq_head_t
*sqh
= SQHASH(cvp
);
73 ASSERT(THREAD_LOCK_HELD(t
));
76 panic("cv_unsleep: thread %p not on sleepq %p",
77 (void *)t
, (void *)sqh
);
78 DTRACE_SCHED1(wakeup
, kthread_t
*, t
);
80 if (cvp
->cv_waiters
!= CV_MAX_WAITERS
)
82 disp_lock_exit_high(&sqh
->sq_lock
);
87 * Change the priority of a thread that's blocked on a condition variable.
90 cv_change_pri(kthread_t
*t
, pri_t pri
, pri_t
*t_prip
)
92 condvar_impl_t
*cvp
= (condvar_impl_t
*)t
->t_wchan
;
93 sleepq_t
*sqp
= t
->t_sleepq
;
95 ASSERT(THREAD_LOCK_HELD(t
));
96 ASSERT(&SQHASH(cvp
)->sq_queue
== sqp
);
99 panic("cv_change_pri: %p not on sleep queue", (void *)t
);
102 sleepq_insert(sqp
, t
);
106 * The sobj_ops vector exports a set of functions needed when a thread
107 * is asleep on a synchronization object of this type.
109 static sobj_ops_t cv_sobj_ops
= {
110 SOBJ_CV
, cv_owner
, cv_unsleep
, cv_change_pri
115 cv_init(kcondvar_t
*cvp
, char *name
, kcv_type_t type
, void *arg
)
117 ((condvar_impl_t
*)cvp
)->cv_waiters
= 0;
121 * cv_destroy is not currently needed, but is part of the DDI.
122 * This is in case cv_init ever needs to allocate something for a cv.
126 cv_destroy(kcondvar_t
*cvp
)
128 ASSERT((((condvar_impl_t
*)cvp
)->cv_waiters
& CV_WAITERS_MASK
) == 0);
132 * The cv_block() function blocks a thread on a condition variable
133 * by putting it in a hashed sleep queue associated with the
134 * synchronization object.
136 * Threads are taken off the hashed sleep queues via calls to
137 * cv_signal(), cv_broadcast(), or cv_unsleep().
140 cv_block(condvar_impl_t
*cvp
)
142 kthread_t
*t
= curthread
;
143 klwp_t
*lwp
= ttolwp(t
);
146 ASSERT(THREAD_LOCK_HELD(t
));
147 ASSERT(t
!= CPU
->cpu_idle_thread
);
148 ASSERT(CPU_ON_INTR(CPU
) == 0);
149 ASSERT(t
->t_wchan0
== NULL
&& t
->t_wchan
== NULL
);
150 ASSERT(t
->t_state
== TS_ONPROC
);
152 t
->t_schedflag
&= ~TS_SIGNALLED
;
153 CL_SLEEP(t
); /* assign kernel priority */
154 t
->t_wchan
= (caddr_t
)cvp
;
155 t
->t_sobj_ops
= &cv_sobj_ops
;
159 * The check for t_intr is to avoid doing the
160 * account for an interrupt thread on the still-pinned
163 if (lwp
!= NULL
&& t
->t_intr
== NULL
) {
165 (void) new_mstate(t
, LMS_SLEEP
);
169 disp_lock_enter_high(&sqh
->sq_lock
);
170 if (cvp
->cv_waiters
< CV_MAX_WAITERS
)
172 ASSERT(cvp
->cv_waiters
<= CV_MAX_WAITERS
);
173 THREAD_SLEEP(t
, &sqh
->sq_lock
);
174 sleepq_insert(&sqh
->sq_queue
, t
);
176 * THREAD_SLEEP() moves curthread->t_lockp to point to the
177 * lock sqh->sq_lock. This lock is later released by the caller
178 * when it calls thread_unlock() on curthread.
182 #define cv_block_sig(t, cvp) \
183 { (t)->t_flag |= T_WAKEABLE; cv_block(cvp); }
186 * Block on the indicated condition variable and release the
187 * associated kmutex while blocked.
190 cv_wait(kcondvar_t
*cvp
, kmutex_t
*mp
)
194 ASSERT(!quiesce_active
);
196 thread_lock(curthread
); /* lock the thread */
197 cv_block((condvar_impl_t
*)cvp
);
198 thread_unlock_nopreempt(curthread
); /* unlock the waiters field */
210 * This mutex is acquired and released in order to make sure that
211 * the wakeup does not happen before the block itself happens.
213 mutex_enter(&t
->t_wait_mutex
);
214 mutex_exit(&t
->t_wait_mutex
);
219 * Same as cv_wait except the thread will unblock at 'tim'
220 * (an absolute time) if it hasn't already unblocked.
222 * Returns the amount of time left from the original 'tim' value
223 * when it was unblocked.
226 cv_timedwait(kcondvar_t
*cvp
, kmutex_t
*mp
, clock_t tim
)
229 clock_t now
= ddi_get_lbolt();
234 hrtim
= TICK_TO_NSEC(tim
- now
);
235 return (cv_timedwait_hires(cvp
, mp
, hrtim
, nsec_per_tick
, 0));
239 * Same as cv_timedwait() except that the third argument is a relative
240 * timeout value, as opposed to an absolute one. There is also a fourth
241 * argument that specifies how accurately the timeout must be implemented.
244 cv_reltimedwait(kcondvar_t
*cvp
, kmutex_t
*mp
, clock_t delta
, time_res_t res
)
248 ASSERT(TIME_RES_VALID(res
));
253 if ((exp
= TICK_TO_NSEC(delta
)) < 0)
256 return (cv_timedwait_hires(cvp
, mp
, exp
, time_res
[res
], 0));
260 cv_timedwait_hires(kcondvar_t
*cvp
, kmutex_t
*mp
, hrtime_t tim
,
261 hrtime_t res
, int flag
)
263 kthread_t
*t
= curthread
;
271 ASSERT(!quiesce_active
);
273 limit
= (flag
& CALLOUT_FLAG_ABSOLUTE
) ? gethrtime() : 0;
276 mutex_enter(&t
->t_wait_mutex
);
277 id
= timeout_generic(CALLOUT_REALTIME
, (void (*)(void *))cv_wakeup
, t
,
279 thread_lock(t
); /* lock the thread */
280 cv_block((condvar_impl_t
*)cvp
);
281 thread_unlock_nopreempt(t
);
282 mutex_exit(&t
->t_wait_mutex
);
285 signalled
= (t
->t_schedflag
& TS_SIGNALLED
);
287 * Get the time left. untimeout() returns -1 if the timeout has
288 * occured or the time remaining. If the time remaining is zero,
289 * the timeout has occured between when we were awoken and
290 * we called untimeout. We will treat this as if the timeout
291 * has occured and set timeleft to -1.
293 timeleft
= untimeout_default(id
, 0);
297 if (signalled
) /* avoid consuming the cv_signal() */
304 cv_wait_sig(kcondvar_t
*cvp
, kmutex_t
*mp
)
306 kthread_t
*t
= curthread
;
307 proc_t
*p
= ttoproc(t
);
308 klwp_t
*lwp
= ttolwp(t
);
315 ASSERT(!quiesce_active
);
318 * Threads in system processes don't process signals. This is
319 * true both for standard threads of system processes and for
320 * interrupt threads which have borrowed their pinned thread's LWP.
322 if (lwp
== NULL
|| (p
->p_flag
& SSYS
)) {
326 ASSERT(t
->t_intr
== NULL
);
328 cancel_pending
= schedctl_cancel_pending();
330 lwp
->lwp_sysabort
= 0;
332 cv_block_sig(t
, (condvar_impl_t
*)cvp
);
333 thread_unlock_nopreempt(t
);
335 if (ISSIG(t
, JUSTLOOKING
) || MUSTRETURN(p
, t
) || cancel_pending
)
337 /* ASSERT(no locks are held) */
339 signalled
= (t
->t_schedflag
& TS_SIGNALLED
);
340 t
->t_flag
&= ~T_WAKEABLE
;
342 if (ISSIG_PENDING(t
, lwp
, p
)) {
348 if (lwp
->lwp_sysabort
|| MUSTRETURN(p
, t
))
350 if (rval
!= 0 && cancel_pending
) {
351 schedctl_cancel_eintr();
355 lwp
->lwp_sysabort
= 0;
356 if (rval
== 0 && signalled
) /* avoid consuming the cv_signal() */
362 cv_timedwait_sig_hires(kcondvar_t
*cvp
, kmutex_t
*mp
, hrtime_t tim
,
363 hrtime_t res
, int flag
)
365 kthread_t
*t
= curthread
;
366 proc_t
*p
= ttoproc(t
);
367 klwp_t
*lwp
= ttolwp(t
);
368 int cancel_pending
= 0;
376 ASSERT(!quiesce_active
);
379 * Threads in system processes don't process signals. This is
380 * true both for standard threads of system processes and for
381 * interrupt threads which have borrowed their pinned thread's LWP.
383 if (lwp
== NULL
|| (p
->p_flag
& SSYS
))
384 return (cv_timedwait_hires(cvp
, mp
, tim
, res
, flag
));
385 ASSERT(t
->t_intr
== NULL
);
388 * If tim is less than or equal to current hrtime, then the timeout
389 * has already occured. So just check to see if there is a signal
390 * pending. If so return 0 indicating that there is a signal pending.
391 * Else return -1 indicating that the timeout occured. No need to
394 limit
= (flag
& CALLOUT_FLAG_ABSOLUTE
) ? gethrtime() : 0;
397 lwp
->lwp_sysabort
= 0;
403 * Set the timeout and wait.
405 cancel_pending
= schedctl_cancel_pending();
406 mutex_enter(&t
->t_wait_mutex
);
407 id
= timeout_generic(CALLOUT_REALTIME
, (void (*)(void *))cv_wakeup
, t
,
410 lwp
->lwp_sysabort
= 0;
412 cv_block_sig(t
, (condvar_impl_t
*)cvp
);
413 thread_unlock_nopreempt(t
);
414 mutex_exit(&t
->t_wait_mutex
);
416 if (ISSIG(t
, JUSTLOOKING
) || MUSTRETURN(p
, t
) || cancel_pending
)
418 /* ASSERT(no locks are held) */
420 signalled
= (t
->t_schedflag
& TS_SIGNALLED
);
421 t
->t_flag
&= ~T_WAKEABLE
;
424 * Untimeout the thread. untimeout() returns -1 if the timeout has
425 * occured or the time remaining. If the time remaining is zero,
426 * the timeout has occured between when we were awoken and
427 * we called untimeout. We will treat this as if the timeout
428 * has occured and set rval to -1.
430 rval
= untimeout_default(id
, 0);
436 * Check to see if a signal is pending. If so, regardless of whether
437 * or not we were awoken due to the signal, the signal is now pending
438 * and a return of 0 has the highest priority.
441 if (ISSIG_PENDING(t
, lwp
, p
)) {
447 if (lwp
->lwp_sysabort
|| MUSTRETURN(p
, t
))
449 if (rval
!= 0 && cancel_pending
) {
450 schedctl_cancel_eintr();
454 lwp
->lwp_sysabort
= 0;
455 if (rval
<= 0 && signalled
) /* avoid consuming the cv_signal() */
462 * Function result in order of precedence:
463 * 0 if a signal was received
464 * -1 if timeout occured
465 * >0 if awakened via cv_signal() or cv_broadcast().
466 * (returns time remaining)
468 * cv_timedwait_sig() is now part of the DDI.
470 * This function is now just a wrapper for cv_timedwait_sig_hires().
473 cv_timedwait_sig(kcondvar_t
*cvp
, kmutex_t
*mp
, clock_t tim
)
477 hrtim
= TICK_TO_NSEC(tim
- ddi_get_lbolt());
478 return (cv_timedwait_sig_hires(cvp
, mp
, hrtim
, nsec_per_tick
, 0));
482 * Wait until the specified time.
483 * If tim == -1, waits without timeout using cv_wait_sig_swap().
486 cv_timedwait_sig_hrtime(kcondvar_t
*cvp
, kmutex_t
*mp
, hrtime_t tim
)
489 return (cv_wait_sig_swap(cvp
, mp
));
491 return (cv_timedwait_sig_hires(cvp
, mp
, tim
, 1,
492 CALLOUT_FLAG_ABSOLUTE
| CALLOUT_FLAG_ROUNDUP
));
497 * Same as cv_timedwait_sig() except that the third argument is a relative
498 * timeout value, as opposed to an absolute one. There is also a fourth
499 * argument that specifies how accurately the timeout must be implemented.
502 cv_reltimedwait_sig(kcondvar_t
*cvp
, kmutex_t
*mp
, clock_t delta
,
507 ASSERT(TIME_RES_VALID(res
));
510 if ((exp
= TICK_TO_NSEC(delta
)) < 0)
514 return (cv_timedwait_sig_hires(cvp
, mp
, exp
, time_res
[res
], 0));
518 * Like cv_wait_sig_swap but allows the caller to indicate (with a
519 * non-NULL sigret) that they will take care of signalling the cv
520 * after wakeup, if necessary. This is a vile hack that should only
521 * be used when no other option is available; almost all callers
522 * should just use cv_wait_sig_swap (which takes care of the cv_signal
523 * stuff automatically) instead.
526 cv_wait_sig_swap_core(kcondvar_t
*cvp
, kmutex_t
*mp
, int *sigret
)
528 kthread_t
*t
= curthread
;
529 proc_t
*p
= ttoproc(t
);
530 klwp_t
*lwp
= ttolwp(t
);
539 * Threads in system processes don't process signals. This is
540 * true both for standard threads of system processes and for
541 * interrupt threads which have borrowed their pinned thread's LWP.
543 if (lwp
== NULL
|| (p
->p_flag
& SSYS
)) {
547 ASSERT(t
->t_intr
== NULL
);
549 cancel_pending
= schedctl_cancel_pending();
551 lwp
->lwp_sysabort
= 0;
553 t
->t_kpri_req
= 0; /* don't need kernel priority */
554 cv_block_sig(t
, (condvar_impl_t
*)cvp
);
555 thread_unlock_nopreempt(t
);
557 if (ISSIG(t
, JUSTLOOKING
) || MUSTRETURN(p
, t
) || cancel_pending
)
559 /* ASSERT(no locks are held) */
561 signalled
= (t
->t_schedflag
& TS_SIGNALLED
);
562 t
->t_flag
&= ~T_WAKEABLE
;
564 if (ISSIG_PENDING(t
, lwp
, p
)) {
570 if (lwp
->lwp_sysabort
|| MUSTRETURN(p
, t
))
572 if (rval
!= 0 && cancel_pending
) {
573 schedctl_cancel_eintr();
577 lwp
->lwp_sysabort
= 0;
580 *sigret
= signalled
; /* just tell the caller */
582 cv_signal(cvp
); /* avoid consuming the cv_signal() */
588 * Same as cv_wait_sig but the thread can be swapped out while waiting.
589 * This should only be used when we know we aren't holding any locks.
592 cv_wait_sig_swap(kcondvar_t
*cvp
, kmutex_t
*mp
)
594 return (cv_wait_sig_swap_core(cvp
, mp
, NULL
));
598 cv_signal(kcondvar_t
*cvp
)
600 condvar_impl_t
*cp
= (condvar_impl_t
*)cvp
;
602 /* make sure the cv_waiters field looks sane */
603 ASSERT(cp
->cv_waiters
<= CV_MAX_WAITERS
);
604 if (cp
->cv_waiters
> 0) {
605 sleepq_head_t
*sqh
= SQHASH(cp
);
606 disp_lock_enter(&sqh
->sq_lock
);
607 ASSERT(CPU_ON_INTR(CPU
) == 0);
608 if (cp
->cv_waiters
& CV_WAITERS_MASK
) {
611 t
= sleepq_wakeone_chan(&sqh
->sq_queue
, cp
);
613 * If cv_waiters is non-zero (and less than
614 * CV_MAX_WAITERS) there should be a thread
618 } else if (sleepq_wakeone_chan(&sqh
->sq_queue
, cp
) == NULL
) {
621 disp_lock_exit(&sqh
->sq_lock
);
626 cv_broadcast(kcondvar_t
*cvp
)
628 condvar_impl_t
*cp
= (condvar_impl_t
*)cvp
;
630 /* make sure the cv_waiters field looks sane */
631 ASSERT(cp
->cv_waiters
<= CV_MAX_WAITERS
);
632 if (cp
->cv_waiters
> 0) {
633 sleepq_head_t
*sqh
= SQHASH(cp
);
634 disp_lock_enter(&sqh
->sq_lock
);
635 ASSERT(CPU_ON_INTR(CPU
) == 0);
636 sleepq_wakeall_chan(&sqh
->sq_queue
, cp
);
638 disp_lock_exit(&sqh
->sq_lock
);
643 * Same as cv_wait(), but wakes up (after wakeup_time milliseconds) to check
644 * for requests to stop, like cv_wait_sig() but without dealing with signals.
645 * This is a horrible kludge. It is evil. It is vile. It is swill.
646 * If your code has to call this function then your code is the same.
649 cv_wait_stop(kcondvar_t
*cvp
, kmutex_t
*mp
, int wakeup_time
)
651 kthread_t
*t
= curthread
;
652 klwp_t
*lwp
= ttolwp(t
);
653 proc_t
*p
= ttoproc(t
);
661 * Threads in system processes don't process signals. This is
662 * true both for standard threads of system processes and for
663 * interrupt threads which have borrowed their pinned thread's LWP.
665 if (lwp
== NULL
|| (p
->p_flag
& SSYS
)) {
669 ASSERT(t
->t_intr
== NULL
);
672 * Wakeup in wakeup_time milliseconds, i.e., human time.
674 tim
= ddi_get_lbolt() + MSEC_TO_TICK(wakeup_time
);
675 mutex_enter(&t
->t_wait_mutex
);
676 id
= realtime_timeout_default((void (*)(void *))cv_wakeup
, t
,
677 tim
- ddi_get_lbolt());
678 thread_lock(t
); /* lock the thread */
679 cv_block((condvar_impl_t
*)cvp
);
680 thread_unlock_nopreempt(t
);
681 mutex_exit(&t
->t_wait_mutex
);
683 /* ASSERT(no locks are held); */
685 (void) untimeout_default(id
, 0);
688 * Check for reasons to stop, if lwp_nostop is not true.
689 * See issig_forreal() for explanations of the various stops.
691 mutex_enter(&p
->p_lock
);
692 while (lwp
->lwp_nostop
== 0 && !(p
->p_flag
& SEXITLWPS
)) {
694 * Hold the lwp here for watchpoint manipulation.
696 if (t
->t_proc_flag
& TP_PAUSE
) {
697 stop(PR_SUSPENDED
, SUSPEND_PAUSE
);
703 if (t
->t_proc_flag
& TP_CHKPT
) {
704 stop(PR_CHECKPOINT
, 0);
708 * Honor fork1(), watchpoint activity (remapping a page),
709 * and lwp_suspend() requests.
711 if ((p
->p_flag
& (SHOLDFORK1
|SHOLDWATCH
)) ||
712 (t
->t_proc_flag
& TP_HOLDLWP
)) {
713 stop(PR_SUSPENDED
, SUSPEND_NORMAL
);
717 * Honor /proc requested stop.
719 if (t
->t_proc_flag
& TP_PRSTOP
) {
720 stop(PR_REQUESTED
, 0);
723 * If some lwp in the process has already stopped
724 * showing PR_JOBCONTROL, stop in sympathy with it.
726 if (p
->p_stopsig
&& t
!= p
->p_agenttp
) {
727 stop(PR_JOBCONTROL
, p
->p_stopsig
);
732 mutex_exit(&p
->p_lock
);
737 * Like cv_timedwait_sig(), but takes an absolute hires future time
738 * rather than a future time in clock ticks. Will not return showing
739 * that a timeout occurred until the future time is passed.
740 * If 'when' is a NULL pointer, no timeout will occur.
742 * Function result in order of precedence:
743 * 0 if a signal was received
744 * -1 if timeout occured
745 * >0 if awakened via cv_signal() or cv_broadcast()
746 * or by a spurious wakeup.
747 * (might return time remaining)
748 * As a special test, if someone abruptly resets the system time
749 * (but not through adjtime(2); drifting of the clock is allowed and
750 * expected [see timespectohz_adj()]), then we force a return of -1
751 * so the caller can return a premature timeout to the calling process
752 * so it can reevaluate the situation in light of the new system time.
753 * (The system clock has been reset if timecheck != timechanged.)
755 * Generally, cv_timedwait_sig_hrtime() should be used instead of this
756 * routine. It waits based on hrtime rather than wall-clock time and therefore
757 * does not need to deal with the time changing.
760 cv_waituntil_sig(kcondvar_t
*cvp
, kmutex_t
*mp
,
761 timestruc_t
*when
, int timecheck
)
769 return (cv_wait_sig_swap(cvp
, mp
));
773 timespecsub(&delta
, &now
);
774 if (delta
.tv_sec
< 0 || (delta
.tv_sec
== 0 && delta
.tv_nsec
== 0)) {
776 * We have already reached the absolute future time.
777 * Call cv_timedwait_sig() just to check for signals.
778 * We will return immediately with either 0 or -1.
780 rval
= cv_timedwait_sig_hires(cvp
, mp
, 0, 1, 0);
782 if (timecheck
== timechanged
) {
784 * Make sure that the interval is atleast one tick.
785 * This is to prevent a user from flooding the system
786 * with very small, high resolution timers.
788 interval
= ts2hrt(&delta
);
789 if (interval
< nsec_per_tick
)
790 interval
= nsec_per_tick
;
791 rval
= cv_timedwait_sig_hires(cvp
, mp
, interval
, 1,
792 CALLOUT_FLAG_HRESTIME
);
795 * Someone reset the system time;
796 * just force an immediate timeout.
800 if (rval
== -1 && timecheck
== timechanged
) {
802 * Even though cv_timedwait_sig() returned showing a
803 * timeout, the future time may not have passed yet.
804 * If not, change rval to indicate a normal wakeup.
808 timespecsub(&delta
, &now
);
809 if (delta
.tv_sec
> 0 || (delta
.tv_sec
== 0 &&