kernel - Fix excessive call stack depth on stuck interrupt
[dragonfly.git] / sys / kern / kern_time.c
blob883ca19fa7ca9c715574428a7e8482ae0f014e62
1 /*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
29 * @(#)kern_time.c 8.1 (Berkeley) 6/10/93
30 * $FreeBSD: src/sys/kern/kern_time.c,v 1.68.2.1 2002/10/01 08:00:41 bde Exp $
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/buf.h>
36 #include <sys/sysproto.h>
37 #include <sys/resourcevar.h>
38 #include <sys/signalvar.h>
39 #include <sys/kernel.h>
40 #include <sys/sysent.h>
41 #include <sys/sysunion.h>
42 #include <sys/proc.h>
43 #include <sys/priv.h>
44 #include <sys/time.h>
45 #include <sys/vnode.h>
46 #include <sys/sysctl.h>
47 #include <sys/kern_syscall.h>
48 #include <vm/vm.h>
49 #include <vm/vm_extern.h>
51 #include <sys/msgport2.h>
52 #include <sys/spinlock2.h>
53 #include <sys/thread2.h>
55 extern struct spinlock ntp_spin;
57 struct timezone tz;
60 * Time of day and interval timer support.
62 * These routines provide the kernel entry points to get and set
63 * the time-of-day and per-process interval timers. Subroutines
64 * here provide support for adding and subtracting timeval structures
65 * and decrementing interval timers, optionally reloading the interval
66 * timers when they expire.
69 static int settime(struct timeval *);
70 static void timevalfix(struct timeval *);
71 static void realitexpire(void *arg);
74 * Nanosleep tries very hard to sleep for a precisely requested time
75 * interval, down to 1uS. The administrator can impose a minimum delay
76 * and a delay below which we hard-loop instead of initiate a timer
77 * interrupt and sleep.
79 * For machines under high loads it might be beneficial to increase min_us
80 * to e.g. 1000uS (1ms) so spining processes sleep meaningfully.
82 static int nanosleep_min_us = 10;
83 static int nanosleep_hard_us = 100;
84 static int gettimeofday_quick = 0;
85 SYSCTL_INT(_kern, OID_AUTO, nanosleep_min_us, CTLFLAG_RW,
86 &nanosleep_min_us, 0, "");
87 SYSCTL_INT(_kern, OID_AUTO, nanosleep_hard_us, CTLFLAG_RW,
88 &nanosleep_hard_us, 0, "");
89 SYSCTL_INT(_kern, OID_AUTO, gettimeofday_quick, CTLFLAG_RW,
90 &gettimeofday_quick, 0, "");
92 static struct lock masterclock_lock = LOCK_INITIALIZER("mstrclk", 0, 0);
94 static int
95 settime(struct timeval *tv)
97 struct timeval delta, tv1, tv2;
98 static struct timeval maxtime, laststep;
99 struct timespec ts;
100 int origcpu;
102 if ((origcpu = mycpu->gd_cpuid) != 0)
103 lwkt_setcpu_self(globaldata_find(0));
105 crit_enter();
106 microtime(&tv1);
107 delta = *tv;
108 timevalsub(&delta, &tv1);
111 * If the system is secure, we do not allow the time to be
112 * set to a value earlier than 1 second less than the highest
113 * time we have yet seen. The worst a miscreant can do in
114 * this circumstance is "freeze" time. He couldn't go
115 * back to the past.
117 * We similarly do not allow the clock to be stepped more
118 * than one second, nor more than once per second. This allows
119 * a miscreant to make the clock march double-time, but no worse.
121 if (securelevel > 1) {
122 if (delta.tv_sec < 0 || delta.tv_usec < 0) {
124 * Update maxtime to latest time we've seen.
126 if (tv1.tv_sec > maxtime.tv_sec)
127 maxtime = tv1;
128 tv2 = *tv;
129 timevalsub(&tv2, &maxtime);
130 if (tv2.tv_sec < -1) {
131 tv->tv_sec = maxtime.tv_sec - 1;
132 kprintf("Time adjustment clamped to -1 second\n");
134 } else {
135 if (tv1.tv_sec == laststep.tv_sec) {
136 crit_exit();
137 return (EPERM);
139 if (delta.tv_sec > 1) {
140 tv->tv_sec = tv1.tv_sec + 1;
141 kprintf("Time adjustment clamped to +1 second\n");
143 laststep = *tv;
147 ts.tv_sec = tv->tv_sec;
148 ts.tv_nsec = tv->tv_usec * 1000;
149 set_timeofday(&ts);
150 crit_exit();
152 if (origcpu != 0)
153 lwkt_setcpu_self(globaldata_find(origcpu));
155 resettodr();
156 return (0);
159 static void
160 get_process_cputime(struct proc *p, struct timespec *ats)
162 struct rusage ru;
164 lwkt_gettoken(&p->p_token);
165 calcru_proc(p, &ru);
166 lwkt_reltoken(&p->p_token);
167 timevaladd(&ru.ru_utime, &ru.ru_stime);
168 TIMEVAL_TO_TIMESPEC(&ru.ru_utime, ats);
171 static void
172 get_process_usertime(struct proc *p, struct timespec *ats)
174 struct rusage ru;
176 lwkt_gettoken(&p->p_token);
177 calcru_proc(p, &ru);
178 lwkt_reltoken(&p->p_token);
179 TIMEVAL_TO_TIMESPEC(&ru.ru_utime, ats);
182 static void
183 get_curthread_cputime(struct timespec *ats)
185 struct thread *td = curthread;
186 struct timeval sys, user;
188 calcru(td->td_lwp, &user, &sys);
189 timevaladd(&user, &sys);
190 TIMEVAL_TO_TIMESPEC(&user, ats);
194 * MPSAFE
197 kern_clock_gettime(clockid_t clock_id, struct timespec *ats)
199 struct proc *p;
201 p = curproc;
202 switch(clock_id) {
203 case CLOCK_REALTIME:
204 case CLOCK_REALTIME_PRECISE:
205 nanotime(ats);
206 break;
207 case CLOCK_REALTIME_FAST:
208 getnanotime(ats);
209 break;
210 case CLOCK_MONOTONIC:
211 case CLOCK_MONOTONIC_PRECISE:
212 case CLOCK_UPTIME:
213 case CLOCK_UPTIME_PRECISE:
214 nanouptime(ats);
215 break;
216 case CLOCK_MONOTONIC_FAST:
217 case CLOCK_UPTIME_FAST:
218 getnanouptime(ats);
219 break;
220 case CLOCK_VIRTUAL:
221 get_process_usertime(p, ats);
222 break;
223 case CLOCK_PROF:
224 case CLOCK_PROCESS_CPUTIME_ID:
225 get_process_cputime(p, ats);
226 break;
227 case CLOCK_SECOND:
228 ats->tv_sec = time_second;
229 ats->tv_nsec = 0;
230 break;
231 case CLOCK_THREAD_CPUTIME_ID:
232 get_curthread_cputime(ats);
233 break;
234 default:
235 return (EINVAL);
237 return (0);
241 * MPSAFE
244 sys_clock_gettime(struct clock_gettime_args *uap)
246 struct timespec ats;
247 int error;
249 error = kern_clock_gettime(uap->clock_id, &ats);
250 if (error == 0)
251 error = copyout(&ats, uap->tp, sizeof(ats));
253 return (error);
257 kern_clock_settime(clockid_t clock_id, struct timespec *ats)
259 struct thread *td = curthread;
260 struct timeval atv;
261 int error;
263 if ((error = priv_check(td, PRIV_CLOCK_SETTIME)) != 0)
264 return (error);
265 if (clock_id != CLOCK_REALTIME)
266 return (EINVAL);
267 if (ats->tv_nsec < 0 || ats->tv_nsec >= 1000000000)
268 return (EINVAL);
270 lockmgr(&masterclock_lock, LK_EXCLUSIVE);
271 TIMESPEC_TO_TIMEVAL(&atv, ats);
272 error = settime(&atv);
273 lockmgr(&masterclock_lock, LK_RELEASE);
275 return (error);
279 * MPALMOSTSAFE
282 sys_clock_settime(struct clock_settime_args *uap)
284 struct timespec ats;
285 int error;
287 if ((error = copyin(uap->tp, &ats, sizeof(ats))) != 0)
288 return (error);
290 error = kern_clock_settime(uap->clock_id, &ats);
292 return (error);
296 * MPSAFE
299 kern_clock_getres(clockid_t clock_id, struct timespec *ts)
301 ts->tv_sec = 0;
302 switch(clock_id) {
303 case CLOCK_REALTIME:
304 case CLOCK_REALTIME_FAST:
305 case CLOCK_REALTIME_PRECISE:
306 case CLOCK_MONOTONIC:
307 case CLOCK_MONOTONIC_FAST:
308 case CLOCK_MONOTONIC_PRECISE:
309 case CLOCK_UPTIME:
310 case CLOCK_UPTIME_FAST:
311 case CLOCK_UPTIME_PRECISE:
313 * Round up the result of the division cheaply
314 * by adding 1. Rounding up is especially important
315 * if rounding down would give 0. Perfect rounding
316 * is unimportant.
318 ts->tv_nsec = 1000000000 / sys_cputimer->freq + 1;
319 break;
320 case CLOCK_VIRTUAL:
321 case CLOCK_PROF:
322 /* Accurately round up here because we can do so cheaply. */
323 ts->tv_nsec = (1000000000 + hz - 1) / hz;
324 break;
325 case CLOCK_SECOND:
326 ts->tv_sec = 1;
327 ts->tv_nsec = 0;
328 break;
329 case CLOCK_THREAD_CPUTIME_ID:
330 case CLOCK_PROCESS_CPUTIME_ID:
331 ts->tv_nsec = 1000;
332 break;
333 default:
334 return (EINVAL);
337 return (0);
341 * MPSAFE
344 sys_clock_getres(struct clock_getres_args *uap)
346 int error;
347 struct timespec ts;
349 error = kern_clock_getres(uap->clock_id, &ts);
350 if (error == 0)
351 error = copyout(&ts, uap->tp, sizeof(ts));
353 return (error);
357 * nanosleep1()
359 * This is a general helper function for nanosleep() (aka sleep() aka
360 * usleep()).
362 * If there is less then one tick's worth of time left and
363 * we haven't done a yield, or the remaining microseconds is
364 * ridiculously low, do a yield. This avoids having
365 * to deal with systimer overheads when the system is under
366 * heavy loads. If we have done a yield already then use
367 * a systimer and an uninterruptable thread wait.
369 * If there is more then a tick's worth of time left,
370 * calculate the baseline ticks and use an interruptable
371 * tsleep, then handle the fine-grained delay on the next
372 * loop. This usually results in two sleeps occuring, a long one
373 * and a short one.
375 * MPSAFE
377 static void
378 ns1_systimer(systimer_t info, int in_ipi __unused,
379 struct intrframe *frame __unused)
381 lwkt_schedule(info->data);
385 nanosleep1(struct timespec *rqt, struct timespec *rmt)
387 static int nanowait;
388 struct timespec ts, ts2, ts3;
389 struct timeval tv;
390 int error;
392 if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000)
393 return (EINVAL);
394 /* XXX: imho this should return EINVAL at least for tv_sec < 0 */
395 if (rqt->tv_sec < 0 || (rqt->tv_sec == 0 && rqt->tv_nsec == 0))
396 return (0);
397 nanouptime(&ts);
398 timespecadd(&ts, rqt); /* ts = target timestamp compare */
399 TIMESPEC_TO_TIMEVAL(&tv, rqt); /* tv = sleep interval */
401 for (;;) {
402 int ticks;
403 struct systimer info;
405 ticks = tv.tv_usec / ustick; /* approximate */
407 if (tv.tv_sec == 0 && ticks == 0) {
408 thread_t td = curthread;
409 if (tv.tv_usec > 0 && tv.tv_usec < nanosleep_min_us)
410 tv.tv_usec = nanosleep_min_us;
411 if (tv.tv_usec < nanosleep_hard_us) {
412 lwkt_user_yield();
413 cpu_pause();
414 } else {
415 crit_enter_quick(td);
416 systimer_init_oneshot(&info, ns1_systimer,
417 td, tv.tv_usec);
418 lwkt_deschedule_self(td);
419 crit_exit_quick(td);
420 lwkt_switch();
421 systimer_del(&info); /* make sure it's gone */
423 error = iscaught(td->td_lwp);
424 } else if (tv.tv_sec == 0) {
425 error = tsleep(&nanowait, PCATCH, "nanslp", ticks);
426 } else {
427 ticks = tvtohz_low(&tv); /* also handles overflow */
428 error = tsleep(&nanowait, PCATCH, "nanslp", ticks);
430 nanouptime(&ts2);
431 if (error && error != EWOULDBLOCK) {
432 if (error == ERESTART)
433 error = EINTR;
434 if (rmt != NULL) {
435 timespecsub(&ts, &ts2);
436 if (ts.tv_sec < 0)
437 timespecclear(&ts);
438 *rmt = ts;
440 return (error);
442 if (timespeccmp(&ts2, &ts, >=))
443 return (0);
444 ts3 = ts;
445 timespecsub(&ts3, &ts2);
446 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
451 * MPSAFE
454 sys_nanosleep(struct nanosleep_args *uap)
456 int error;
457 struct timespec rqt;
458 struct timespec rmt;
460 error = copyin(uap->rqtp, &rqt, sizeof(rqt));
461 if (error)
462 return (error);
464 error = nanosleep1(&rqt, &rmt);
467 * copyout the residual if nanosleep was interrupted.
469 if (error && uap->rmtp) {
470 int error2;
472 error2 = copyout(&rmt, uap->rmtp, sizeof(rmt));
473 if (error2)
474 error = error2;
476 return (error);
480 * The gettimeofday() system call is supposed to return a fine-grained
481 * realtime stamp. However, acquiring a fine-grained stamp can create a
482 * bottleneck when multiple cpu cores are trying to accessing e.g. the
483 * HPET hardware timer all at the same time, so we have a sysctl that
484 * allows its behavior to be changed to a more coarse-grained timestamp
485 * which does not have to access a hardware timer.
488 sys_gettimeofday(struct gettimeofday_args *uap)
490 struct timeval atv;
491 int error = 0;
493 if (uap->tp) {
494 if (gettimeofday_quick)
495 getmicrotime(&atv);
496 else
497 microtime(&atv);
498 if ((error = copyout((caddr_t)&atv, (caddr_t)uap->tp,
499 sizeof (atv))))
500 return (error);
502 if (uap->tzp)
503 error = copyout((caddr_t)&tz, (caddr_t)uap->tzp,
504 sizeof (tz));
505 return (error);
509 * MPALMOSTSAFE
512 sys_settimeofday(struct settimeofday_args *uap)
514 struct thread *td = curthread;
515 struct timeval atv;
516 struct timezone atz;
517 int error;
519 if ((error = priv_check(td, PRIV_SETTIMEOFDAY)))
520 return (error);
522 * Verify all parameters before changing time.
524 * XXX: We do not allow the time to be set to 0.0, which also by
525 * happy coincidence works around a pkgsrc bulk build bug.
527 if (uap->tv) {
528 if ((error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
529 sizeof(atv))))
530 return (error);
531 if (atv.tv_usec < 0 || atv.tv_usec >= 1000000)
532 return (EINVAL);
533 if (atv.tv_sec == 0 && atv.tv_usec == 0)
534 return (EINVAL);
536 if (uap->tzp &&
537 (error = copyin((caddr_t)uap->tzp, (caddr_t)&atz, sizeof(atz))))
538 return (error);
540 lockmgr(&masterclock_lock, LK_EXCLUSIVE);
541 if (uap->tv && (error = settime(&atv))) {
542 lockmgr(&masterclock_lock, LK_RELEASE);
543 return (error);
545 lockmgr(&masterclock_lock, LK_RELEASE);
547 if (uap->tzp)
548 tz = atz;
549 return (0);
553 * WARNING! Run with ntp_spin held
555 static void
556 kern_adjtime_common(void)
558 if ((ntp_delta >= 0 && ntp_delta < ntp_default_tick_delta) ||
559 (ntp_delta < 0 && ntp_delta > -ntp_default_tick_delta))
560 ntp_tick_delta = ntp_delta;
561 else if (ntp_delta > ntp_big_delta)
562 ntp_tick_delta = 10 * ntp_default_tick_delta;
563 else if (ntp_delta < -ntp_big_delta)
564 ntp_tick_delta = -10 * ntp_default_tick_delta;
565 else if (ntp_delta > 0)
566 ntp_tick_delta = ntp_default_tick_delta;
567 else
568 ntp_tick_delta = -ntp_default_tick_delta;
571 void
572 kern_adjtime(int64_t delta, int64_t *odelta)
574 spin_lock(&ntp_spin);
575 *odelta = ntp_delta;
576 ntp_delta = delta;
577 kern_adjtime_common();
578 spin_unlock(&ntp_spin);
581 static void
582 kern_get_ntp_delta(int64_t *delta)
584 *delta = ntp_delta;
587 void
588 kern_reladjtime(int64_t delta)
590 spin_lock(&ntp_spin);
591 ntp_delta += delta;
592 kern_adjtime_common();
593 spin_unlock(&ntp_spin);
596 static void
597 kern_adjfreq(int64_t rate)
599 spin_lock(&ntp_spin);
600 ntp_tick_permanent = rate;
601 spin_unlock(&ntp_spin);
605 * MPALMOSTSAFE
608 sys_adjtime(struct adjtime_args *uap)
610 struct thread *td = curthread;
611 struct timeval atv;
612 int64_t ndelta, odelta;
613 int error;
615 if ((error = priv_check(td, PRIV_ADJTIME)))
616 return (error);
617 error = copyin(uap->delta, &atv, sizeof(struct timeval));
618 if (error)
619 return (error);
622 * Compute the total correction and the rate at which to apply it.
623 * Round the adjustment down to a whole multiple of the per-tick
624 * delta, so that after some number of incremental changes in
625 * hardclock(), tickdelta will become zero, lest the correction
626 * overshoot and start taking us away from the desired final time.
628 ndelta = (int64_t)atv.tv_sec * 1000000000 + atv.tv_usec * 1000;
629 kern_adjtime(ndelta, &odelta);
631 if (uap->olddelta) {
632 atv.tv_sec = odelta / 1000000000;
633 atv.tv_usec = odelta % 1000000000 / 1000;
634 copyout(&atv, uap->olddelta, sizeof(struct timeval));
636 return (0);
639 static int
640 sysctl_adjtime(SYSCTL_HANDLER_ARGS)
642 int64_t delta;
643 int error;
645 if (req->newptr != NULL) {
646 if (priv_check(curthread, PRIV_ROOT))
647 return (EPERM);
648 error = SYSCTL_IN(req, &delta, sizeof(delta));
649 if (error)
650 return (error);
651 kern_reladjtime(delta);
654 if (req->oldptr)
655 kern_get_ntp_delta(&delta);
656 error = SYSCTL_OUT(req, &delta, sizeof(delta));
657 return (error);
661 * delta is in nanoseconds.
663 static int
664 sysctl_delta(SYSCTL_HANDLER_ARGS)
666 int64_t delta, old_delta;
667 int error;
669 if (req->newptr != NULL) {
670 if (priv_check(curthread, PRIV_ROOT))
671 return (EPERM);
672 error = SYSCTL_IN(req, &delta, sizeof(delta));
673 if (error)
674 return (error);
675 kern_adjtime(delta, &old_delta);
678 if (req->oldptr != NULL)
679 kern_get_ntp_delta(&old_delta);
680 error = SYSCTL_OUT(req, &old_delta, sizeof(old_delta));
681 return (error);
685 * frequency is in nanoseconds per second shifted left 32.
686 * kern_adjfreq() needs it in nanoseconds per tick shifted left 32.
688 static int
689 sysctl_adjfreq(SYSCTL_HANDLER_ARGS)
691 int64_t freqdelta;
692 int error;
694 if (req->newptr != NULL) {
695 if (priv_check(curthread, PRIV_ROOT))
696 return (EPERM);
697 error = SYSCTL_IN(req, &freqdelta, sizeof(freqdelta));
698 if (error)
699 return (error);
701 freqdelta /= hz;
702 kern_adjfreq(freqdelta);
705 if (req->oldptr != NULL)
706 freqdelta = ntp_tick_permanent * hz;
707 error = SYSCTL_OUT(req, &freqdelta, sizeof(freqdelta));
708 if (error)
709 return (error);
711 return (0);
714 SYSCTL_NODE(_kern, OID_AUTO, ntp, CTLFLAG_RW, 0, "NTP related controls");
715 SYSCTL_PROC(_kern_ntp, OID_AUTO, permanent,
716 CTLTYPE_QUAD|CTLFLAG_RW, 0, 0,
717 sysctl_adjfreq, "Q", "permanent correction per second");
718 SYSCTL_PROC(_kern_ntp, OID_AUTO, delta,
719 CTLTYPE_QUAD|CTLFLAG_RW, 0, 0,
720 sysctl_delta, "Q", "one-time delta");
721 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, big_delta, CTLFLAG_RD,
722 &ntp_big_delta, sizeof(ntp_big_delta), "Q",
723 "threshold for fast adjustment");
724 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, tick_delta, CTLFLAG_RD,
725 &ntp_tick_delta, sizeof(ntp_tick_delta), "LU",
726 "per-tick adjustment");
727 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, default_tick_delta, CTLFLAG_RD,
728 &ntp_default_tick_delta, sizeof(ntp_default_tick_delta), "LU",
729 "default per-tick adjustment");
730 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, next_leap_second, CTLFLAG_RW,
731 &ntp_leap_second, sizeof(ntp_leap_second), "LU",
732 "next leap second");
733 SYSCTL_INT(_kern_ntp, OID_AUTO, insert_leap_second, CTLFLAG_RW,
734 &ntp_leap_insert, 0, "insert or remove leap second");
735 SYSCTL_PROC(_kern_ntp, OID_AUTO, adjust,
736 CTLTYPE_QUAD|CTLFLAG_RW, 0, 0,
737 sysctl_adjtime, "Q", "relative adjust for delta");
740 * Get value of an interval timer. The process virtual and
741 * profiling virtual time timers are kept in the p_stats area, since
742 * they can be swapped out. These are kept internally in the
743 * way they are specified externally: in time until they expire.
745 * The real time interval timer is kept in the process table slot
746 * for the process, and its value (it_value) is kept as an
747 * absolute time rather than as a delta, so that it is easy to keep
748 * periodic real-time signals from drifting.
750 * Virtual time timers are processed in the hardclock() routine of
751 * kern_clock.c. The real time timer is processed by a timeout
752 * routine, called from the softclock() routine. Since a callout
753 * may be delayed in real time due to interrupt processing in the system,
754 * it is possible for the real time timeout routine (realitexpire, given below),
755 * to be delayed in real time past when it is supposed to occur. It
756 * does not suffice, therefore, to reload the real timer .it_value from the
757 * real time timers .it_interval. Rather, we compute the next time in
758 * absolute time the timer should go off.
760 * MPALMOSTSAFE
763 sys_getitimer(struct getitimer_args *uap)
765 struct proc *p = curproc;
766 struct timeval ctv;
767 struct itimerval aitv;
769 if (uap->which > ITIMER_PROF)
770 return (EINVAL);
771 lwkt_gettoken(&p->p_token);
772 if (uap->which == ITIMER_REAL) {
774 * Convert from absolute to relative time in .it_value
775 * part of real time timer. If time for real time timer
776 * has passed return 0, else return difference between
777 * current time and time for the timer to go off.
779 aitv = p->p_realtimer;
780 if (timevalisset(&aitv.it_value)) {
781 getmicrouptime(&ctv);
782 if (timevalcmp(&aitv.it_value, &ctv, <))
783 timevalclear(&aitv.it_value);
784 else
785 timevalsub(&aitv.it_value, &ctv);
787 } else {
788 aitv = p->p_timer[uap->which];
790 lwkt_reltoken(&p->p_token);
791 return (copyout(&aitv, uap->itv, sizeof (struct itimerval)));
795 * MPALMOSTSAFE
798 sys_setitimer(struct setitimer_args *uap)
800 struct itimerval aitv;
801 struct timeval ctv;
802 struct itimerval *itvp;
803 struct proc *p = curproc;
804 int error;
806 if (uap->which > ITIMER_PROF)
807 return (EINVAL);
808 itvp = uap->itv;
809 if (itvp && (error = copyin((caddr_t)itvp, (caddr_t)&aitv,
810 sizeof(struct itimerval))))
811 return (error);
812 if ((uap->itv = uap->oitv) &&
813 (error = sys_getitimer((struct getitimer_args *)uap)))
814 return (error);
815 if (itvp == NULL)
816 return (0);
817 if (itimerfix(&aitv.it_value))
818 return (EINVAL);
819 if (!timevalisset(&aitv.it_value))
820 timevalclear(&aitv.it_interval);
821 else if (itimerfix(&aitv.it_interval))
822 return (EINVAL);
823 lwkt_gettoken(&p->p_token);
824 if (uap->which == ITIMER_REAL) {
825 if (timevalisset(&p->p_realtimer.it_value))
826 callout_stop_sync(&p->p_ithandle);
827 if (timevalisset(&aitv.it_value))
828 callout_reset(&p->p_ithandle,
829 tvtohz_high(&aitv.it_value), realitexpire, p);
830 getmicrouptime(&ctv);
831 timevaladd(&aitv.it_value, &ctv);
832 p->p_realtimer = aitv;
833 } else {
834 p->p_timer[uap->which] = aitv;
835 switch(uap->which) {
836 case ITIMER_VIRTUAL:
837 p->p_flags &= ~P_SIGVTALRM;
838 break;
839 case ITIMER_PROF:
840 p->p_flags &= ~P_SIGPROF;
841 break;
844 lwkt_reltoken(&p->p_token);
845 return (0);
849 * Real interval timer expired:
850 * send process whose timer expired an alarm signal.
851 * If time is not set up to reload, then just return.
852 * Else compute next time timer should go off which is > current time.
853 * This is where delay in processing this timeout causes multiple
854 * SIGALRM calls to be compressed into one.
855 * tvtohz_high() always adds 1 to allow for the time until the next clock
856 * interrupt being strictly less than 1 clock tick, but we don't want
857 * that here since we want to appear to be in sync with the clock
858 * interrupt even when we're delayed.
860 static
861 void
862 realitexpire(void *arg)
864 struct proc *p;
865 struct timeval ctv, ntv;
867 p = (struct proc *)arg;
868 PHOLD(p);
869 lwkt_gettoken(&p->p_token);
870 ksignal(p, SIGALRM);
871 if (!timevalisset(&p->p_realtimer.it_interval)) {
872 timevalclear(&p->p_realtimer.it_value);
873 goto done;
875 for (;;) {
876 timevaladd(&p->p_realtimer.it_value,
877 &p->p_realtimer.it_interval);
878 getmicrouptime(&ctv);
879 if (timevalcmp(&p->p_realtimer.it_value, &ctv, >)) {
880 ntv = p->p_realtimer.it_value;
881 timevalsub(&ntv, &ctv);
882 callout_reset(&p->p_ithandle, tvtohz_low(&ntv),
883 realitexpire, p);
884 goto done;
887 done:
888 lwkt_reltoken(&p->p_token);
889 PRELE(p);
893 * Used to validate itimer timeouts and utimes*() timespecs.
896 itimerfix(struct timeval *tv)
898 if (tv->tv_sec < 0 || tv->tv_usec < 0 || tv->tv_usec >= 1000000)
899 return (EINVAL);
900 if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < ustick)
901 tv->tv_usec = ustick;
902 return (0);
906 * Used to validate timeouts and utimes*() timespecs.
909 itimespecfix(struct timespec *ts)
911 if (ts->tv_sec < 0 || ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000ULL)
912 return (EINVAL);
913 if (ts->tv_sec == 0 && ts->tv_nsec != 0 && ts->tv_nsec < nstick)
914 ts->tv_nsec = nstick;
915 return (0);
919 * Decrement an interval timer by a specified number
920 * of microseconds, which must be less than a second,
921 * i.e. < 1000000. If the timer expires, then reload
922 * it. In this case, carry over (usec - old value) to
923 * reduce the value reloaded into the timer so that
924 * the timer does not drift. This routine assumes
925 * that it is called in a context where the timers
926 * on which it is operating cannot change in value.
929 itimerdecr(struct itimerval *itp, int usec)
932 if (itp->it_value.tv_usec < usec) {
933 if (itp->it_value.tv_sec == 0) {
934 /* expired, and already in next interval */
935 usec -= itp->it_value.tv_usec;
936 goto expire;
938 itp->it_value.tv_usec += 1000000;
939 itp->it_value.tv_sec--;
941 itp->it_value.tv_usec -= usec;
942 usec = 0;
943 if (timevalisset(&itp->it_value))
944 return (1);
945 /* expired, exactly at end of interval */
946 expire:
947 if (timevalisset(&itp->it_interval)) {
948 itp->it_value = itp->it_interval;
949 itp->it_value.tv_usec -= usec;
950 if (itp->it_value.tv_usec < 0) {
951 itp->it_value.tv_usec += 1000000;
952 itp->it_value.tv_sec--;
954 } else
955 itp->it_value.tv_usec = 0; /* sec is already 0 */
956 return (0);
960 * Add and subtract routines for timevals.
961 * N.B.: subtract routine doesn't deal with
962 * results which are before the beginning,
963 * it just gets very confused in this case.
964 * Caveat emptor.
966 void
967 timevaladd(struct timeval *t1, const struct timeval *t2)
970 t1->tv_sec += t2->tv_sec;
971 t1->tv_usec += t2->tv_usec;
972 timevalfix(t1);
975 void
976 timevalsub(struct timeval *t1, const struct timeval *t2)
979 t1->tv_sec -= t2->tv_sec;
980 t1->tv_usec -= t2->tv_usec;
981 timevalfix(t1);
984 static void
985 timevalfix(struct timeval *t1)
988 if (t1->tv_usec < 0) {
989 t1->tv_sec--;
990 t1->tv_usec += 1000000;
992 if (t1->tv_usec >= 1000000) {
993 t1->tv_sec++;
994 t1->tv_usec -= 1000000;
999 * ratecheck(): simple time-based rate-limit checking.
1002 ratecheck(struct timeval *lasttime, const struct timeval *mininterval)
1004 struct timeval tv, delta;
1005 int rv = 0;
1007 getmicrouptime(&tv); /* NB: 10ms precision */
1008 delta = tv;
1009 timevalsub(&delta, lasttime);
1012 * check for 0,0 is so that the message will be seen at least once,
1013 * even if interval is huge.
1015 if (timevalcmp(&delta, mininterval, >=) ||
1016 (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) {
1017 *lasttime = tv;
1018 rv = 1;
1021 return (rv);
1025 * ppsratecheck(): packets (or events) per second limitation.
1027 * Return 0 if the limit is to be enforced (e.g. the caller
1028 * should drop a packet because of the rate limitation).
1030 * maxpps of 0 always causes zero to be returned. maxpps of -1
1031 * always causes 1 to be returned; this effectively defeats rate
1032 * limiting.
1034 * Note that we maintain the struct timeval for compatibility
1035 * with other bsd systems. We reuse the storage and just monitor
1036 * clock ticks for minimal overhead.
1039 ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps)
1041 int now;
1044 * Reset the last time and counter if this is the first call
1045 * or more than a second has passed since the last update of
1046 * lasttime.
1048 now = ticks;
1049 if (lasttime->tv_sec == 0 || (u_int)(now - lasttime->tv_sec) >= hz) {
1050 lasttime->tv_sec = now;
1051 *curpps = 1;
1052 return (maxpps != 0);
1053 } else {
1054 (*curpps)++; /* NB: ignore potential overflow */
1055 return (maxpps < 0 || *curpps < maxpps);