4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Copyright (c) 1982, 1986 Regents of the University of California.
29 * All rights reserved. The Berkeley software License Agreement
30 * specifies the terms and conditions for redistribution.
33 #include <sys/param.h>
35 #include <sys/vnode.h>
38 #include <sys/systm.h>
40 #include <sys/cmn_err.h>
41 #include <sys/cpuvar.h>
42 #include <sys/timer.h>
43 #include <sys/debug.h>
44 #include <sys/sysmacros.h>
45 #include <sys/cyclic.h>
47 static void realitexpire(void *);
48 static void realprofexpire(void *);
49 static void timeval_advance(struct timeval
*, struct timeval
*);
51 kmutex_t tod_lock
; /* protects time-of-day stuff */
54 * Constant to define the minimum interval value of the ITIMER_REALPROF timer.
55 * Value is in microseconds; defaults to 500 usecs. Setting this value
56 * significantly lower may allow for denial-of-service attacks.
58 int itimer_realprof_minimum
= 500;
61 * macro to compare a timeval to a timestruc
64 #define TVTSCMP(tvp, tsp, cmp) \
66 ((tvp)->tv_sec cmp (tsp)->tv_sec || \
67 ((tvp)->tv_sec == (tsp)->tv_sec && \
69 (tvp)->tv_usec * 1000 cmp (tsp)->tv_nsec))
72 * Time of day and interval timer support.
74 * These routines provide the kernel entry points to get and set
75 * the time-of-day and per-process interval timers. Subroutines
76 * here provide support for adding and subtracting timeval structures
77 * and decrementing interval timers, optionally reloading the interval
78 * timers when they expire.
82 * SunOS function to generate monotonically increasing time values.
85 uniqtime(struct timeval
*tv
)
87 static struct timeval last
;
88 static int last_timechanged
;
94 * protect modification of last
96 mutex_enter(&tod_lock
);
100 * Fast algorithm to convert nsec to usec -- see hrt2ts()
101 * in kernel/os/timers.c for a full description.
104 usec
= nsec
+ (nsec
>> 2);
105 usec
= nsec
+ (usec
>> 1);
106 usec
= nsec
+ (usec
>> 2);
107 usec
= nsec
+ (usec
>> 4);
108 usec
= nsec
- (usec
>> 3);
109 usec
= nsec
+ (usec
>> 2);
110 usec
= nsec
+ (usec
>> 3);
111 usec
= nsec
+ (usec
>> 4);
112 usec
= nsec
+ (usec
>> 1);
113 usec
= nsec
+ (usec
>> 6);
118 * If the system hres time has been changed since the last time
119 * we are called. then all bets are off; just update our
120 * local copy of timechanged and accept the reported time as is.
122 if (last_timechanged
!= timechanged
) {
123 last_timechanged
= timechanged
;
126 * Try to keep timestamps unique, but don't be obsessive about
127 * it in the face of large differences.
129 else if ((sec
<= last
.tv_sec
) && /* same or lower seconds, and */
130 ((sec
!= last
.tv_sec
) || /* either different second or */
131 (usec
<= last
.tv_usec
)) && /* lower microsecond, and */
132 ((last
.tv_sec
- sec
) <= 5)) { /* not way back in time */
134 usec
= last
.tv_usec
+ 1;
135 if (usec
>= MICROSEC
) {
142 mutex_exit(&tod_lock
);
149 * Timestamps are exported from the kernel in several places.
150 * Such timestamps are commonly used for either uniqueness or for
151 * sequencing - truncation to 32-bits is fine for uniqueness,
152 * but sequencing is going to take more work as we get closer to 2038!
155 uniqtime32(struct timeval32
*tv32p
)
160 TIMEVAL_TO_TIMEVAL32(tv32p
, &tv
);
164 gettimeofday(struct timeval
*tp
)
170 if (get_udatamodel() == DATAMODEL_NATIVE
) {
171 if (copyout(&atv
, tp
, sizeof (atv
)))
172 return (set_errno(EFAULT
));
174 struct timeval32 tv32
;
176 if (TIMEVAL_OVERFLOW(&atv
))
177 return (set_errno(EOVERFLOW
));
178 TIMEVAL_TO_TIMEVAL32(&tv32
, &atv
);
180 if (copyout(&tv32
, tp
, sizeof (tv32
)))
181 return (set_errno(EFAULT
));
188 getitimer(uint_t which
, struct itimerval
*itv
)
192 if (get_udatamodel() == DATAMODEL_NATIVE
)
193 error
= xgetitimer(which
, itv
, 0);
195 struct itimerval kitv
;
197 if ((error
= xgetitimer(which
, &kitv
, 1)) == 0) {
198 if (ITIMERVAL_OVERFLOW(&kitv
)) {
201 struct itimerval32 itv32
;
203 ITIMERVAL_TO_ITIMERVAL32(&itv32
, &kitv
);
204 if (copyout(&itv32
, itv
, sizeof (itv32
)) != 0)
210 return (error
? (set_errno(error
)) : 0);
214 xgetitimer(uint_t which
, struct itimerval
*itv
, int iskaddr
)
216 struct proc
*p
= curproc
;
218 struct itimerval aitv
;
219 hrtime_t ts
, first
, interval
, remain
;
221 mutex_enter(&p
->p_lock
);
226 aitv
= ttolwp(curthread
)->lwp_timer
[which
];
231 aitv
= p
->p_realitimer
;
233 if (timerisset(&aitv
.it_value
)) {
235 if (timercmp(&aitv
.it_value
, &now
, <)) {
236 timerclear(&aitv
.it_value
);
238 timevalsub(&aitv
.it_value
, &now
);
243 case ITIMER_REALPROF
:
244 if (curproc
->p_rprof_cyclic
== CYCLIC_NONE
) {
245 bzero(&aitv
, sizeof (aitv
));
249 aitv
= curproc
->p_rprof_timer
;
251 first
= tv2hrt(&aitv
.it_value
);
252 interval
= tv2hrt(&aitv
.it_interval
);
254 if ((ts
= gethrtime()) < first
) {
256 * We haven't gone off for the first time; the time
257 * remaining is simply the first time we will go
258 * off minus the current time.
264 * This was set as a one-shot, and we've
265 * already gone off; there is no time
271 * We have a non-zero interval; we need to
272 * determine how far we are into the current
273 * interval, and subtract that from the
274 * interval to determine the time remaining.
276 remain
= interval
- ((ts
- first
) % interval
);
280 hrt2tv(remain
, &aitv
.it_value
);
284 mutex_exit(&p
->p_lock
);
288 mutex_exit(&p
->p_lock
);
291 bcopy(&aitv
, itv
, sizeof (*itv
));
293 ASSERT(get_udatamodel() == DATAMODEL_NATIVE
);
294 if (copyout(&aitv
, itv
, sizeof (*itv
)))
303 setitimer(uint_t which
, struct itimerval
*itv
, struct itimerval
*oitv
)
308 if ((error
= getitimer(which
, oitv
)) != 0)
314 if (get_udatamodel() == DATAMODEL_NATIVE
)
315 error
= xsetitimer(which
, itv
, 0);
317 struct itimerval32 itv32
;
318 struct itimerval kitv
;
320 if (copyin(itv
, &itv32
, sizeof (itv32
)))
322 ITIMERVAL32_TO_ITIMERVAL(&kitv
, &itv32
);
323 error
= xsetitimer(which
, &kitv
, 1);
326 return (error
? (set_errno(error
)) : 0);
330 xsetitimer(uint_t which
, struct itimerval
*itv
, int iskaddr
)
332 struct itimerval aitv
;
334 struct proc
*p
= curproc
;
347 bcopy(itv
, &aitv
, sizeof (aitv
));
349 ASSERT(get_udatamodel() == DATAMODEL_NATIVE
);
350 if (copyin(itv
, &aitv
, sizeof (aitv
)))
354 if (which
== ITIMER_REALPROF
) {
355 min
= MAX((int)(cyclic_getres() / (NANOSEC
/ MICROSEC
)),
356 itimer_realprof_minimum
);
361 if (itimerfix(&aitv
.it_value
, min
) ||
362 (itimerfix(&aitv
.it_interval
, min
) && timerisset(&aitv
.it_value
)))
365 mutex_enter(&p
->p_lock
);
369 * The SITBUSY flag prevents conflicts with multiple
370 * threads attempting to perform setitimer(ITIMER_REAL)
371 * at the same time, even when we drop p->p_lock below.
372 * Any blocked thread returns successfully because the
373 * effect is the same as if it got here first, finished,
374 * and the other thread then came through and destroyed
375 * what it did. We are just protecting the system from
376 * malfunctioning due to the race condition.
378 if (p
->p_flag
& SITBUSY
) {
379 mutex_exit(&p
->p_lock
);
382 p
->p_flag
|= SITBUSY
;
383 while ((tmp_id
= p
->p_itimerid
) != 0) {
385 * Avoid deadlock in callout_delete (called from
386 * untimeout) which may go to sleep (while holding
387 * p_lock). Drop p_lock and re-acquire it after
388 * untimeout returns. Need to clear p_itimerid
389 * while holding p_lock.
392 mutex_exit(&p
->p_lock
);
393 (void) untimeout(tmp_id
);
394 mutex_enter(&p
->p_lock
);
396 if (timerisset(&aitv
.it_value
)) {
398 timevaladd(&aitv
.it_value
, &now
);
399 p
->p_itimerid
= realtime_timeout(realitexpire
,
400 p
, hzto(&aitv
.it_value
));
402 p
->p_realitimer
= aitv
;
403 p
->p_flag
&= ~SITBUSY
;
406 case ITIMER_REALPROF
:
407 cyclic
= p
->p_rprof_cyclic
;
408 p
->p_rprof_cyclic
= CYCLIC_NONE
;
410 mutex_exit(&p
->p_lock
);
413 * We're now going to acquire cpu_lock, remove the old cyclic
414 * if necessary, and add our new cyclic.
416 mutex_enter(&cpu_lock
);
418 if (cyclic
!= CYCLIC_NONE
)
419 cyclic_remove(cyclic
);
421 if (!timerisset(&aitv
.it_value
)) {
423 * If we were passed a value of 0, we're done.
425 mutex_exit(&cpu_lock
);
429 hdlr
.cyh_func
= realprofexpire
;
431 hdlr
.cyh_level
= CY_LOW_LEVEL
;
433 when
.cyt_when
= (ts
= gethrtime() + tv2hrt(&aitv
.it_value
));
434 when
.cyt_interval
= tv2hrt(&aitv
.it_interval
);
436 if (when
.cyt_interval
== 0) {
438 * Using the same logic as for CLOCK_HIGHRES timers, we
439 * set the interval to be INT64_MAX - when.cyt_when to
440 * effect a one-shot; see the comment in clock_highres.c
441 * for more details on why this works.
443 when
.cyt_interval
= INT64_MAX
- when
.cyt_when
;
446 cyclic
= cyclic_add(&hdlr
, &when
);
448 mutex_exit(&cpu_lock
);
451 * We have now successfully added the cyclic. Reacquire
452 * p_lock, and see if anyone has snuck in.
454 mutex_enter(&p
->p_lock
);
456 if (p
->p_rprof_cyclic
!= CYCLIC_NONE
) {
458 * We're racing with another thread establishing an
459 * ITIMER_REALPROF interval timer. We'll let the other
460 * thread win (this is a race at the application level,
461 * so letting the other thread win is acceptable).
463 mutex_exit(&p
->p_lock
);
464 mutex_enter(&cpu_lock
);
465 cyclic_remove(cyclic
);
466 mutex_exit(&cpu_lock
);
472 * Success. Set our tracking variables in the proc structure,
473 * cancel any outstanding ITIMER_PROF, and allocate the
474 * per-thread SIGPROF buffers, if possible.
476 hrt2tv(ts
, &aitv
.it_value
);
477 p
->p_rprof_timer
= aitv
;
478 p
->p_rprof_cyclic
= cyclic
;
482 struct itimerval
*itvp
;
484 itvp
= &ttolwp(t
)->lwp_timer
[ITIMER_PROF
];
485 timerclear(&itvp
->it_interval
);
486 timerclear(&itvp
->it_value
);
488 if (t
->t_rprof
!= NULL
)
492 kmem_zalloc(sizeof (struct rprof
), KM_NOSLEEP
);
494 } while ((t
= t
->t_forw
) != p
->p_tlist
);
499 ttolwp(curthread
)->lwp_timer
[ITIMER_VIRTUAL
] = aitv
;
503 if (p
->p_rprof_cyclic
!= CYCLIC_NONE
) {
505 * Silently ignore ITIMER_PROF if ITIMER_REALPROF
511 ttolwp(curthread
)->lwp_timer
[ITIMER_PROF
] = aitv
;
515 mutex_exit(&p
->p_lock
);
518 mutex_exit(&p
->p_lock
);
523 * Delete the ITIMER_REALPROF interval timer.
524 * Called only from exec_args() when exec occurs.
525 * The other ITIMER_* interval timers are specified
526 * to be inherited across exec(), so leave them alone.
529 delete_itimer_realprof(void)
531 kthread_t
*t
= curthread
;
532 struct proc
*p
= ttoproc(t
);
533 klwp_t
*lwp
= ttolwp(t
);
536 mutex_enter(&p
->p_lock
);
538 /* we are performing execve(); assert we are single-threaded */
539 ASSERT(t
== p
->p_tlist
&& t
== t
->t_forw
);
541 if ((cyclic
= p
->p_rprof_cyclic
) == CYCLIC_NONE
) {
542 mutex_exit(&p
->p_lock
);
544 p
->p_rprof_cyclic
= CYCLIC_NONE
;
546 * Delete any current instance of SIGPROF.
548 if (lwp
->lwp_cursig
== SIGPROF
) {
551 if (lwp
->lwp_curinfo
) {
552 siginfofree(lwp
->lwp_curinfo
);
553 lwp
->lwp_curinfo
= NULL
;
557 * Delete any pending instances of SIGPROF.
559 sigdelset(&p
->p_sig
, SIGPROF
);
560 sigdelset(&p
->p_extsig
, SIGPROF
);
561 sigdelq(p
, NULL
, SIGPROF
);
562 sigdelset(&t
->t_sig
, SIGPROF
);
563 sigdelset(&t
->t_extsig
, SIGPROF
);
564 sigdelq(p
, t
, SIGPROF
);
566 mutex_exit(&p
->p_lock
);
569 * Remove the ITIMER_REALPROF cyclic.
571 mutex_enter(&cpu_lock
);
572 cyclic_remove(cyclic
);
573 mutex_exit(&cpu_lock
);
578 * Real interval timer expired:
579 * send process whose timer expired an alarm signal.
580 * If time is not set up to reload, then just return.
581 * Else compute next time timer should go off which is > current time.
582 * This is where delay in processing this timeout causes multiple
583 * SIGALRM calls to be compressed into one.
586 realitexpire(void *arg
)
588 struct proc
*p
= arg
;
589 struct timeval
*valp
= &p
->p_realitimer
.it_value
;
590 struct timeval
*intervalp
= &p
->p_realitimer
.it_interval
;
595 mutex_enter(&p
->p_lock
);
597 if ((ticks
= hzto(valp
)) > 1) {
599 * If we are executing before we were meant to, it must be
600 * because of an overflow in a prior hzto() calculation.
601 * In this case, we want to go to sleep for the recalculated
602 * number of ticks. For the special meaning of the value "1"
603 * see comment in timespectohz().
605 p
->p_itimerid
= realtime_timeout(realitexpire
, p
, ticks
);
606 mutex_exit(&p
->p_lock
);
610 sigtoproc(p
, NULL
, SIGALRM
);
611 if (!timerisset(intervalp
)) {
615 /* advance timer value past current time */
616 timeval_advance(valp
, intervalp
);
617 p
->p_itimerid
= realtime_timeout(realitexpire
, p
, hzto(valp
));
619 mutex_exit(&p
->p_lock
);
623 * Real time profiling interval timer expired:
624 * Increment microstate counters for each lwp in the process
625 * and ensure that running lwps are kicked into the kernel.
626 * If time is not set up to reload, then just return.
627 * Else compute next time timer should go off which is > current time,
631 realprofexpire(void *arg
)
633 struct proc
*p
= arg
;
636 mutex_enter(&p
->p_lock
);
637 if (p
->p_rprof_cyclic
== CYCLIC_NONE
||
638 (t
= p
->p_tlist
) == NULL
) {
639 mutex_exit(&p
->p_lock
);
646 * Attempt to allocate the SIGPROF buffer, but don't sleep.
648 if (t
->t_rprof
== NULL
)
649 t
->t_rprof
= kmem_zalloc(sizeof (struct rprof
),
651 if (t
->t_rprof
== NULL
)
655 switch (t
->t_state
) {
657 switch (mstate
= ttolwp(t
)->lwp_mstate
.ms_prev
) {
670 mstate
= LMS_WAIT_CPU
;
673 switch (mstate
= t
->t_mstate
) {
684 mstate
= t
->t_mstate
;
687 t
->t_rprof
->rp_anystate
= 1;
688 t
->t_rprof
->rp_state
[mstate
]++;
691 * force the thread into the kernel
692 * if it is not already there.
694 if (t
->t_state
== TS_ONPROC
&& t
->t_cpu
!= CPU
)
695 poke_cpu(t
->t_cpu
->cpu_id
);
697 } while ((t
= t
->t_forw
) != p
->p_tlist
);
699 mutex_exit(&p
->p_lock
);
703 * Advances timer value past the current time of day. See the detailed
704 * comment for this logic in realitsexpire(), above.
707 timeval_advance(struct timeval
*valp
, struct timeval
*intervalp
)
710 struct timeval interval2nth
;
713 interval2nth
= *intervalp
;
714 for (cnt2nth
= 0; ; cnt2nth
++) {
715 timevaladd(valp
, &interval2nth
);
717 if (TVTSCMP(valp
, &hrestime
, >))
719 timevaladd(&interval2nth
, &interval2nth
);
723 timevalsub(valp
, &interval2nth
);
728 * Check that a proposed value to load into the .it_value or .it_interval
729 * part of an interval timer is acceptable, and set it to at least a
730 * specified minimal value.
733 itimerfix(struct timeval
*tv
, int minimum
)
735 if (tv
->tv_sec
< 0 || tv
->tv_sec
> 100000000 ||
736 tv
->tv_usec
< 0 || tv
->tv_usec
>= MICROSEC
)
738 if (tv
->tv_sec
== 0 && tv
->tv_usec
!= 0 && tv
->tv_usec
< minimum
)
739 tv
->tv_usec
= minimum
;
744 * Same as itimerfix, except a) it takes a timespec instead of a timeval and
745 * b) it doesn't truncate based on timeout granularity; consumers of this
746 * interface (e.g. timer_settime()) depend on the passed timespec not being
747 * modified implicitly.
750 itimerspecfix(timespec_t
*tv
)
752 if (tv
->tv_sec
< 0 || tv
->tv_nsec
< 0 || tv
->tv_nsec
>= NANOSEC
)
758 * Decrement an interval timer by a specified number
759 * of microseconds, which must be less than a second,
760 * i.e. < 1000000. If the timer expires, then reload
761 * it. In this case, carry over (usec - old value) to
762 * reducint the value reloaded into the timer so that
763 * the timer does not drift. This routine assumes
764 * that it is called in a context where the timers
765 * on which it is operating cannot change in value.
768 itimerdecr(struct itimerval
*itp
, int usec
)
770 if (itp
->it_value
.tv_usec
< usec
) {
771 if (itp
->it_value
.tv_sec
== 0) {
772 /* expired, and already in next interval */
773 usec
-= itp
->it_value
.tv_usec
;
776 itp
->it_value
.tv_usec
+= MICROSEC
;
777 itp
->it_value
.tv_sec
--;
779 itp
->it_value
.tv_usec
-= usec
;
781 if (timerisset(&itp
->it_value
))
783 /* expired, exactly at end of interval */
785 if (timerisset(&itp
->it_interval
)) {
786 itp
->it_value
= itp
->it_interval
;
787 itp
->it_value
.tv_usec
-= usec
;
788 if (itp
->it_value
.tv_usec
< 0) {
789 itp
->it_value
.tv_usec
+= MICROSEC
;
790 itp
->it_value
.tv_sec
--;
793 itp
->it_value
.tv_usec
= 0; /* sec is already 0 */
798 * Add and subtract routines for timevals.
799 * N.B.: subtract routine doesn't deal with
800 * results which are before the beginning,
801 * it just gets very confused in this case.
805 timevaladd(struct timeval
*t1
, struct timeval
*t2
)
807 t1
->tv_sec
+= t2
->tv_sec
;
808 t1
->tv_usec
+= t2
->tv_usec
;
813 timevalsub(struct timeval
*t1
, struct timeval
*t2
)
815 t1
->tv_sec
-= t2
->tv_sec
;
816 t1
->tv_usec
-= t2
->tv_usec
;
821 timevalfix(struct timeval
*t1
)
823 if (t1
->tv_usec
< 0) {
825 t1
->tv_usec
+= MICROSEC
;
827 if (t1
->tv_usec
>= MICROSEC
) {
829 t1
->tv_usec
-= MICROSEC
;
834 * Same as the routines above. These routines take a timespec instead
838 timespecadd(timespec_t
*t1
, timespec_t
*t2
)
840 t1
->tv_sec
+= t2
->tv_sec
;
841 t1
->tv_nsec
+= t2
->tv_nsec
;
846 timespecsub(timespec_t
*t1
, timespec_t
*t2
)
848 t1
->tv_sec
-= t2
->tv_sec
;
849 t1
->tv_nsec
-= t2
->tv_nsec
;
854 timespecfix(timespec_t
*t1
)
856 if (t1
->tv_nsec
< 0) {
858 t1
->tv_nsec
+= NANOSEC
;
860 if (t1
->tv_nsec
>= NANOSEC
) {
862 t1
->tv_nsec
-= NANOSEC
;
868 * Compute number of hz until specified time.
869 * Used to compute third argument to timeout() from an absolute time.
872 hzto(struct timeval
*tv
)
876 ts
.tv_sec
= tv
->tv_sec
;
877 ts
.tv_nsec
= tv
->tv_usec
* 1000;
878 gethrestime_lasttick(&now
);
880 return (timespectohz(&ts
, now
));
884 * Compute number of hz until specified time for a given timespec value.
885 * Used to compute third argument to timeout() from an absolute time.
888 timespectohz(timespec_t
*tv
, timespec_t now
)
895 * Compute number of ticks we will see between now and
896 * the target time; returns "1" if the destination time
897 * is before the next tick, so we always get some delay,
898 * and returns LONG_MAX ticks if we would overflow.
900 sec
= tv
->tv_sec
- now
.tv_sec
;
901 nsec
= tv
->tv_nsec
- now
.tv_nsec
+ nsec_per_tick
- 1;
906 } else if (nsec
>= NANOSEC
) {
911 ticks
= NSEC_TO_TICK(nsec
);
914 * Compute ticks, accounting for negative and overflow as above.
915 * Overflow protection kicks in at about 70 weeks for hz=50
916 * and at about 35 weeks for hz=100. (Rather longer for the 64-bit
919 if (sec
< 0 || (sec
== 0 && ticks
< 1))
920 ticks
= 1; /* protect vs nonpositive */
921 else if (sec
> (LONG_MAX
- ticks
) / hz
)
922 ticks
= LONG_MAX
; /* protect vs overflow */
924 ticks
+= sec
* hz
; /* common case */
930 * Compute number of hz with the timespec tv specified.
931 * The return type must be 64 bit integer.
934 timespectohz64(timespec_t
*tv
)
941 nsec
= tv
->tv_nsec
+ nsec_per_tick
- 1;
946 } else if (nsec
>= NANOSEC
) {
951 ticks
= NSEC_TO_TICK(nsec
);
954 * Compute ticks, accounting for negative and overflow as above.
955 * Overflow protection kicks in at about 70 weeks for hz=50
956 * and at about 35 weeks for hz=100. (Rather longer for the 64-bit
959 if (sec
< 0 || (sec
== 0 && ticks
< 1))
960 ticks
= 1; /* protect vs nonpositive */
961 else if (sec
> (((~0ULL) >> 1) - ticks
) / hz
)
962 ticks
= (~0ULL) >> 1; /* protect vs overflow */
964 ticks
+= sec
* hz
; /* common case */
970 * hrt2ts(): convert from hrtime_t to timestruc_t.
972 * All this routine really does is:
974 * tsp->sec = hrt / NANOSEC;
975 * tsp->nsec = hrt % NANOSEC;
977 * The black magic below avoids doing a 64-bit by 32-bit integer divide,
978 * which is quite expensive. There's actually much more going on here than
979 * it might first appear -- don't try this at home.
981 * For the adventuresome, here's an explanation of how it works.
983 * Multiplication by a fixed constant is easy -- you just do the appropriate
984 * shifts and adds. For example, to multiply by 10, we observe that
986 * x * 10 = x * (8 + 2)
987 * = (x * 8) + (x * 2)
988 * = (x << 3) + (x << 1).
990 * In general, you can read the algorithm right off the bits: the number 10
991 * is 1010 in binary; bits 1 and 3 are ones, so x * 10 = (x << 1) + (x << 3).
993 * Sometimes you can do better. For example, 15 is 1111 binary, so the normal
994 * shift/add computation is x * 15 = (x << 0) + (x << 1) + (x << 2) + (x << 3).
995 * But, it's cheaper if you capitalize on the fact that you have a run of ones:
996 * 1111 = 10000 - 1, hence x * 15 = (x << 4) - (x << 0). [You would never
997 * actually perform the operation << 0, since it's a no-op; I'm just writing
998 * it that way for clarity.]
1000 * The other way you can win is if you get lucky with the prime factorization
1001 * of your constant. The number 1,000,000,000, which we have to multiply
1002 * by below, is a good example. One billion is 111011100110101100101000000000
1003 * in binary. If you apply the bit-grouping trick, it doesn't buy you very
1004 * much, because it's only a win for groups of three or more equal bits:
1006 * 111011100110101100101000000000 = 1000000000000000000000000000000
1007 * - 000100011001010011011000000000
1009 * Thus, instead of the 13 shift/add pairs (26 operations) implied by the LHS,
1010 * we have reduced this to 10 shift/add pairs (20 operations) on the RHS.
1011 * This is better, but not great.
1013 * However, we can factor 1,000,000,000 = 2^9 * 5^9 = 2^9 * 125 * 125 * 125,
1014 * and multiply by each factor. Multiplication by 125 is particularly easy,
1015 * since 128 is nearby: x * 125 = (x << 7) - x - x - x, which is just four
1016 * operations. So, to multiply by 1,000,000,000, we perform three multipli-
1017 * cations by 125, then << 9, a total of only 3 * 4 + 1 = 13 operations.
1018 * This is the algorithm we actually use in both hrt2ts() and ts2hrt().
1020 * Division is harder; there is no equivalent of the simple shift-add algorithm
1021 * we used for multiplication. However, we can convert the division problem
1022 * into a multiplication problem by pre-computing the binary representation
1023 * of the reciprocal of the divisor. For the case of interest, we have
1025 * 1 / 1,000,000,000 = 1.0001001011100000101111101000001B-30,
1027 * to 32 bits of precision. (The notation B-30 means "* 2^-30", just like
1028 * E-18 means "* 10^-18".)
1030 * So, to compute x / 1,000,000,000, we just multiply x by the 32-bit
1031 * integer 10001001011100000101111101000001, then normalize (shift) the
1032 * result. This constant has several large bits runs, so the multiply
1033 * is relatively cheap:
1035 * 10001001011100000101111101000001 = 10001001100000000110000001000001
1036 * - 00000000000100000000000100000000
1038 * Again, you can just read the algorithm right off the bits:
1041 * sec += (hrt << 6);
1042 * sec -= (hrt << 8);
1043 * sec += (hrt << 13);
1044 * sec += (hrt << 14);
1045 * sec -= (hrt << 20);
1046 * sec += (hrt << 23);
1047 * sec += (hrt << 24);
1048 * sec += (hrt << 27);
1049 * sec += (hrt << 31);
1050 * sec >>= (32 + 30);
1052 * Voila! The only problem is, since hrt is 64 bits, we need to use 96-bit
1053 * arithmetic to perform this calculation. That's a waste, because ultimately
1054 * we only need the highest 32 bits of the result.
1056 * The first thing we do is to realize that we don't need to use all of hrt
1057 * in the calculation. The lowest 30 bits can contribute at most 1 to the
1058 * quotient (2^30 / 1,000,000,000 = 1.07...), so we'll deal with them later.
1059 * The highest 2 bits have to be zero, or hrt won't fit in a timestruc_t.
1060 * Thus, the only bits of hrt that matter for division are bits 30..61.
1061 * These 32 bits are just the lower-order word of (hrt >> 30). This brings
1062 * us down from 96-bit math to 64-bit math, and our algorithm becomes:
1064 * tmp = (uint32_t) (hrt >> 30);
1066 * sec += (tmp << 6);
1067 * sec -= (tmp << 8);
1068 * sec += (tmp << 13);
1069 * sec += (tmp << 14);
1070 * sec -= (tmp << 20);
1071 * sec += (tmp << 23);
1072 * sec += (tmp << 24);
1073 * sec += (tmp << 27);
1074 * sec += (tmp << 31);
1077 * Next, we're going to reduce this 64-bit computation to a 32-bit
1078 * computation. We begin by rewriting the above algorithm to use relative
1079 * shifts instead of absolute shifts. That is, instead of computing
1080 * tmp << 6, tmp << 8, tmp << 13, etc, we'll just shift incrementally:
1081 * tmp <<= 6, tmp <<= 2 (== 8 - 6), tmp <<= 5 (== 13 - 8), etc:
1083 * tmp = (uint32_t) (hrt >> 30);
1085 * tmp <<= 6; sec += tmp;
1086 * tmp <<= 2; sec -= tmp;
1087 * tmp <<= 5; sec += tmp;
1088 * tmp <<= 1; sec += tmp;
1089 * tmp <<= 6; sec -= tmp;
1090 * tmp <<= 3; sec += tmp;
1091 * tmp <<= 1; sec += tmp;
1092 * tmp <<= 3; sec += tmp;
1093 * tmp <<= 4; sec += tmp;
1096 * Now for the final step. Instead of throwing away the low 32 bits at
1097 * the end, we can throw them away as we go, only keeping the high 32 bits
1098 * of the product at each step. So, for example, where we now have
1100 * tmp <<= 6; sec = sec + tmp;
1101 * we will instead have
1102 * tmp <<= 6; sec = (sec + tmp) >> 6;
1103 * which is equivalent to
1104 * sec = (sec >> 6) + tmp;
1106 * The final shift ("sec >>= 32") goes away.
1108 * All we're really doing here is long multiplication, just like we learned in
1109 * grade school, except that at each step, we only look at the leftmost 32
1110 * columns. The cumulative error is, at most, the sum of all the bits we
1111 * throw away, which is 2^-32 + 2^-31 + ... + 2^-2 + 2^-1 == 1 - 2^-32.
1112 * Thus, the final result ("sec") is correct to +/- 1.
1114 * It turns out to be important to keep "sec" positive at each step, because
1115 * we don't want to have to explicitly extend the sign bit. Therefore,
1116 * starting with the last line of code above, each line that would have read
1117 * "sec = (sec >> n) - tmp" must be changed to "sec = tmp - (sec >> n)", and
1118 * the operators (+ or -) in all previous lines must be toggled accordingly.
1119 * Thus, we end up with:
1121 * tmp = (uint32_t) (hrt >> 30);
1122 * sec = tmp + (sec >> 6);
1123 * sec = tmp - (tmp >> 2);
1124 * sec = tmp - (sec >> 5);
1125 * sec = tmp + (sec >> 1);
1126 * sec = tmp - (sec >> 6);
1127 * sec = tmp - (sec >> 3);
1128 * sec = tmp + (sec >> 1);
1129 * sec = tmp + (sec >> 3);
1130 * sec = tmp + (sec >> 4);
1132 * This yields a value for sec that is accurate to +1/-1, so we have two
1133 * cases to deal with. The mysterious-looking "+ 7" in the code below biases
1134 * the rounding toward zero, so that sec is always less than or equal to
1135 * the correct value. With this modified code, sec is accurate to +0/-2, with
1136 * the -2 case being very rare in practice. With this change, we only have to
1137 * deal with one case (sec too small) in the cleanup code.
1139 * The other modification we make is to delete the second line above
1140 * ("sec = tmp + (sec >> 6);"), since it only has an effect when bit 31 is
1141 * set, and the cleanup code can handle that rare case. This reduces the
1142 * *guaranteed* accuracy of sec to +0/-3, but speeds up the common cases.
1144 * Finally, we compute nsec = hrt - (sec * 1,000,000,000). nsec will always
1145 * be positive (since sec is never too large), and will at most be equal to
1146 * the error in sec (times 1,000,000,000) plus the low-order 30 bits of hrt.
1147 * Thus, nsec < 3 * 1,000,000,000 + 2^30, which is less than 2^32, so we can
1148 * safely assume that nsec fits in 32 bits. Consequently, when we compute
1149 * sec * 1,000,000,000, we only need the low 32 bits, so we can just do 32-bit
1150 * arithmetic and let the high-order bits fall off the end.
1152 * Since nsec < 3 * 1,000,000,000 + 2^30 == 4,073,741,824, the cleanup loop:
1154 * while (nsec >= NANOSEC) {
1159 * is guaranteed to complete in at most 4 iterations. In practice, the loop
1160 * completes in 0 or 1 iteration over 95% of the time.
1162 * On an SS2, this implementation of hrt2ts() takes 1.7 usec, versus about
1163 * 35 usec for software division -- about 20 times faster.
1166 hrt2ts(hrtime_t hrt
, timestruc_t
*tsp
)
1168 uint32_t sec
, nsec
, tmp
;
1170 tmp
= (uint32_t)(hrt
>> 30);
1171 sec
= tmp
- (tmp
>> 2);
1172 sec
= tmp
- (sec
>> 5);
1173 sec
= tmp
+ (sec
>> 1);
1174 sec
= tmp
- (sec
>> 6) + 7;
1175 sec
= tmp
- (sec
>> 3);
1176 sec
= tmp
+ (sec
>> 1);
1177 sec
= tmp
+ (sec
>> 3);
1178 sec
= tmp
+ (sec
>> 4);
1179 tmp
= (sec
<< 7) - sec
- sec
- sec
;
1180 tmp
= (tmp
<< 7) - tmp
- tmp
- tmp
;
1181 tmp
= (tmp
<< 7) - tmp
- tmp
- tmp
;
1182 nsec
= (uint32_t)hrt
- (tmp
<< 9);
1183 while (nsec
>= NANOSEC
) {
1187 tsp
->tv_sec
= (time_t)sec
;
1188 tsp
->tv_nsec
= nsec
;
1192 * Convert from timestruc_t to hrtime_t.
1194 * The code below is equivalent to:
1196 * hrt = tsp->tv_sec * NANOSEC + tsp->tv_nsec;
1198 * but requires no integer multiply.
1201 ts2hrt(const timestruc_t
*tsp
)
1206 hrt
= (hrt
<< 7) - hrt
- hrt
- hrt
;
1207 hrt
= (hrt
<< 7) - hrt
- hrt
- hrt
;
1208 hrt
= (hrt
<< 7) - hrt
- hrt
- hrt
;
1209 hrt
= (hrt
<< 9) + tsp
->tv_nsec
;
1214 * For the various 32-bit "compatibility" paths in the system.
1217 hrt2ts32(hrtime_t hrt
, timestruc32_t
*ts32p
)
1222 TIMESPEC_TO_TIMESPEC32(ts32p
, &ts
);
1226 * If this ever becomes performance critical (ha!), we can borrow the
1227 * code from ts2hrt(), above, to multiply tv_sec by 1,000,000 and the
1228 * straightforward (x << 10) - (x << 5) + (x << 3) to multiply tv_usec by
1229 * 1,000. For now, we'll opt for readability (besides, the compiler does
1230 * a passable job of optimizing constant multiplication into shifts and adds).
1233 tv2hrt(struct timeval
*tvp
)
1235 return ((hrtime_t
)tvp
->tv_sec
* NANOSEC
+
1236 (hrtime_t
)tvp
->tv_usec
* (NANOSEC
/ MICROSEC
));
1240 hrt2tv(hrtime_t hrt
, struct timeval
*tvp
)
1242 uint32_t sec
, nsec
, tmp
;
1245 tmp
= (uint32_t)(hrt
>> 30);
1246 sec
= tmp
- (tmp
>> 2);
1247 sec
= tmp
- (sec
>> 5);
1248 sec
= tmp
+ (sec
>> 1);
1249 sec
= tmp
- (sec
>> 6) + 7;
1250 sec
= tmp
- (sec
>> 3);
1251 sec
= tmp
+ (sec
>> 1);
1252 sec
= tmp
+ (sec
>> 3);
1253 sec
= tmp
+ (sec
>> 4);
1254 tmp
= (sec
<< 7) - sec
- sec
- sec
;
1255 tmp
= (tmp
<< 7) - tmp
- tmp
- tmp
;
1256 tmp
= (tmp
<< 7) - tmp
- tmp
- tmp
;
1257 nsec
= (uint32_t)hrt
- (tmp
<< 9);
1258 while (nsec
>= NANOSEC
) {
1262 tvp
->tv_sec
= (time_t)sec
;
1264 * this routine is very similar to hr2ts, but requires microseconds
1265 * instead of nanoseconds, so an interger divide by 1000 routine
1266 * completes the conversion
1268 t
= (nsec
>> 7) + (nsec
>> 8) + (nsec
>> 12);
1269 q
= (nsec
>> 1) + t
+ (nsec
>> 15) + (t
>> 11) + (t
>> 14);
1272 tvp
->tv_usec
= q
+ ((r
+ 24) >> 10);
1277 nanosleep(timespec_t
*rqtp
, timespec_t
*rmtp
)
1284 model_t datamodel
= get_udatamodel();
1286 timecheck
= timechanged
;
1289 if (datamodel
== DATAMODEL_NATIVE
) {
1290 if (copyin(rqtp
, &rqtime
, sizeof (rqtime
)))
1291 return (set_errno(EFAULT
));
1293 timespec32_t rqtime32
;
1295 if (copyin(rqtp
, &rqtime32
, sizeof (rqtime32
)))
1296 return (set_errno(EFAULT
));
1297 TIMESPEC32_TO_TIMESPEC(&rqtime
, &rqtime32
);
1300 if (rqtime
.tv_sec
< 0 || rqtime
.tv_nsec
< 0 ||
1301 rqtime
.tv_nsec
>= NANOSEC
)
1302 return (set_errno(EINVAL
));
1304 if (timerspecisset(&rqtime
)) {
1305 timespecadd(&rqtime
, &now
);
1306 mutex_enter(&curthread
->t_delay_lock
);
1307 while ((ret
= cv_waituntil_sig(&curthread
->t_delay_cv
,
1308 &curthread
->t_delay_lock
, &rqtime
, timecheck
)) > 0)
1310 mutex_exit(&curthread
->t_delay_lock
);
1315 * If cv_waituntil_sig() returned due to a signal, and
1316 * there is time remaining, then set the time remaining.
1317 * Else set time remaining to zero
1319 rmtime
.tv_sec
= rmtime
.tv_nsec
= 0;
1321 timespec_t delta
= rqtime
;
1324 timespecsub(&delta
, &now
);
1325 if (delta
.tv_sec
> 0 || (delta
.tv_sec
== 0 &&
1330 if (datamodel
== DATAMODEL_NATIVE
) {
1331 if (copyout(&rmtime
, rmtp
, sizeof (rmtime
)))
1332 return (set_errno(EFAULT
));
1334 timespec32_t rmtime32
;
1336 TIMESPEC_TO_TIMESPEC32(&rmtime32
, &rmtime
);
1337 if (copyout(&rmtime32
, rmtp
, sizeof (rmtime32
)))
1338 return (set_errno(EFAULT
));
1343 return (set_errno(EINTR
));
1348 * Routines to convert standard UNIX time (seconds since Jan 1, 1970)
1349 * into year/month/day/hour/minute/second format, and back again.
1350 * Note: these routines require tod_lock held to protect cached state.
1352 static int days_thru_month
[64] = {
1353 0, 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366, 0, 0,
1354 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1355 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1356 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1359 todinfo_t saved_tod
;
1360 int saved_utc
= -60;
1363 utc_to_tod(time_t utc
)
1365 long dse
, day
, month
, year
;
1368 ASSERT(MUTEX_HELD(&tod_lock
));
1371 * Note that tod_set_prev() assumes utc will be set to zero in
1372 * the case of it being negative. Consequently, any change made
1373 * to this behavior would have to be reflected in that function
1376 if (utc
< 0) /* should never happen */
1379 saved_tod
.tod_sec
+= utc
- saved_utc
;
1381 if (saved_tod
.tod_sec
>= 0 && saved_tod
.tod_sec
< 60)
1382 return (saved_tod
); /* only the seconds changed */
1384 dse
= utc
/ 86400; /* days since epoch */
1386 tod
.tod_sec
= utc
% 60;
1387 tod
.tod_min
= (utc
% 3600) / 60;
1388 tod
.tod_hour
= (utc
% 86400) / 3600;
1389 tod
.tod_dow
= (dse
+ 4) % 7 + 1; /* epoch was a Thursday */
1391 year
= dse
/ 365 + 72; /* first guess -- always a bit too large */
1394 day
= dse
- 365 * (year
- 70) - ((year
- 69) >> 2);
1397 month
= ((year
& 3) << 4) + 1;
1398 while (day
>= days_thru_month
[month
+ 1])
1401 tod
.tod_day
= day
- days_thru_month
[month
] + 1;
1402 tod
.tod_month
= month
& 15;
1403 tod
.tod_year
= year
;
1410 tod_to_utc(todinfo_t tod
)
1413 int year
= tod
.tod_year
;
1414 int month
= tod
.tod_month
+ ((year
& 3) << 4);
1416 /* only warn once, not each time called */
1417 static int year_warn
= 1;
1418 static int month_warn
= 1;
1419 static int day_warn
= 1;
1420 static int hour_warn
= 1;
1421 static int min_warn
= 1;
1422 static int sec_warn
= 1;
1423 int days_diff
= days_thru_month
[month
+ 1] - days_thru_month
[month
];
1426 ASSERT(MUTEX_HELD(&tod_lock
));
1429 if (year_warn
&& (year
< 70 || year
> 8029)) {
1431 "The hardware real-time clock appears to have the "
1432 "wrong years value %d -- time needs to be reset\n",
1437 if (month_warn
&& (tod
.tod_month
< 1 || tod
.tod_month
> 12)) {
1439 "The hardware real-time clock appears to have the "
1440 "wrong months value %d -- time needs to be reset\n",
1445 if (day_warn
&& (tod
.tod_day
< 1 || tod
.tod_day
> days_diff
)) {
1447 "The hardware real-time clock appears to have the "
1448 "wrong days value %d -- time needs to be reset\n",
1453 if (hour_warn
&& (tod
.tod_hour
< 0 || tod
.tod_hour
> 23)) {
1455 "The hardware real-time clock appears to have the "
1456 "wrong hours value %d -- time needs to be reset\n",
1461 if (min_warn
&& (tod
.tod_min
< 0 || tod
.tod_min
> 59)) {
1463 "The hardware real-time clock appears to have the "
1464 "wrong minutes value %d -- time needs to be reset\n",
1469 if (sec_warn
&& (tod
.tod_sec
< 0 || tod
.tod_sec
> 59)) {
1471 "The hardware real-time clock appears to have the "
1472 "wrong seconds value %d -- time needs to be reset\n",
1478 utc
= (year
- 70); /* next 3 lines: utc = 365y + y/4 */
1479 utc
+= (utc
<< 3) + (utc
<< 6);
1480 utc
+= (utc
<< 2) + ((year
- 69) >> 2);
1481 utc
+= days_thru_month
[month
] + tod
.tod_day
- 1;
1482 utc
= (utc
<< 3) + (utc
<< 4) + tod
.tod_hour
; /* 24 * day + hour */
1483 utc
= (utc
<< 6) - (utc
<< 2) + tod
.tod_min
; /* 60 * hour + min */
1484 utc
= (utc
<< 6) - (utc
<< 2) + tod
.tod_sec
; /* 60 * min + sec */