4 * Kernel internal timers, kernel timekeeping, basic process system calls
6 * Copyright (C) 1991, 1992 Linus Torvalds
8 * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better.
10 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
11 * "A Kernel Model for Precision Timekeeping" by Dave Mills
12 * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
13 * serialize accesses to xtime/lost_ticks).
14 * Copyright (C) 1998 Andrea Arcangeli
15 * 1999-03-10 Improved NTP compatibility by Ulrich Windl
16 * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love
17 * 2000-10-05 Implemented scalable SMP per-CPU timer handling.
18 * Copyright (C) 2000, 2001, 2002 Ingo Molnar
19 * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
22 #include <linux/kernel_stat.h>
23 #include <linux/interrupt.h>
24 #include <linux/percpu.h>
25 #include <linux/init.h>
27 #include <linux/swap.h>
28 #include <linux/notifier.h>
29 #include <linux/thread_info.h>
30 #include <linux/time.h>
31 #include <linux/jiffies.h>
32 #include <linux/cpu.h>
34 #include <asm/uaccess.h>
35 #include <asm/div64.h>
36 #include <asm/timex.h>
39 * per-CPU timer vector definitions:
43 #define TVN_SIZE (1 << TVN_BITS)
44 #define TVR_SIZE (1 << TVR_BITS)
45 #define TVN_MASK (TVN_SIZE - 1)
46 #define TVR_MASK (TVR_SIZE - 1)
48 typedef struct tvec_s
{
49 struct list_head vec
[TVN_SIZE
];
52 typedef struct tvec_root_s
{
53 struct list_head vec
[TVR_SIZE
];
56 struct tvec_t_base_s
{
58 unsigned long timer_jiffies
;
59 struct timer_list
*running_timer
;
65 } ____cacheline_aligned_in_smp
;
67 typedef struct tvec_t_base_s tvec_base_t
;
69 static inline void set_running_timer(tvec_base_t
*base
,
70 struct timer_list
*timer
)
73 base
->running_timer
= timer
;
77 /* Fake initialization */
78 static DEFINE_PER_CPU(tvec_base_t
, tvec_bases
) = { SPIN_LOCK_UNLOCKED
};
80 static void check_timer_failed(struct timer_list
*timer
)
82 static int whine_count
;
83 if (whine_count
< 16) {
85 printk("Uninitialised timer!\n");
86 printk("This is just a warning. Your computer is OK\n");
87 printk("function=0x%p, data=0x%lx\n",
88 timer
->function
, timer
->data
);
94 spin_lock_init(&timer
->lock
);
95 timer
->magic
= TIMER_MAGIC
;
98 static inline void check_timer(struct timer_list
*timer
)
100 if (timer
->magic
!= TIMER_MAGIC
)
101 check_timer_failed(timer
);
105 static void internal_add_timer(tvec_base_t
*base
, struct timer_list
*timer
)
107 unsigned long expires
= timer
->expires
;
108 unsigned long idx
= expires
- base
->timer_jiffies
;
109 struct list_head
*vec
;
111 if (idx
< TVR_SIZE
) {
112 int i
= expires
& TVR_MASK
;
113 vec
= base
->tv1
.vec
+ i
;
114 } else if (idx
< 1 << (TVR_BITS
+ TVN_BITS
)) {
115 int i
= (expires
>> TVR_BITS
) & TVN_MASK
;
116 vec
= base
->tv2
.vec
+ i
;
117 } else if (idx
< 1 << (TVR_BITS
+ 2 * TVN_BITS
)) {
118 int i
= (expires
>> (TVR_BITS
+ TVN_BITS
)) & TVN_MASK
;
119 vec
= base
->tv3
.vec
+ i
;
120 } else if (idx
< 1 << (TVR_BITS
+ 3 * TVN_BITS
)) {
121 int i
= (expires
>> (TVR_BITS
+ 2 * TVN_BITS
)) & TVN_MASK
;
122 vec
= base
->tv4
.vec
+ i
;
123 } else if ((signed long) idx
< 0) {
125 * Can happen if you add a timer with expires == jiffies,
126 * or you set a timer to go off in the past
128 vec
= base
->tv1
.vec
+ (base
->timer_jiffies
& TVR_MASK
);
131 /* If the timeout is larger than 0xffffffff on 64-bit
132 * architectures then we use the maximum timeout:
134 if (idx
> 0xffffffffUL
) {
136 expires
= idx
+ base
->timer_jiffies
;
138 i
= (expires
>> (TVR_BITS
+ 3 * TVN_BITS
)) & TVN_MASK
;
139 vec
= base
->tv5
.vec
+ i
;
144 list_add_tail(&timer
->entry
, vec
);
147 int __mod_timer(struct timer_list
*timer
, unsigned long expires
)
149 tvec_base_t
*old_base
, *new_base
;
153 BUG_ON(!timer
->function
);
157 spin_lock_irqsave(&timer
->lock
, flags
);
158 new_base
= &__get_cpu_var(tvec_bases
);
160 old_base
= timer
->base
;
163 * Prevent deadlocks via ordering by old_base < new_base.
165 if (old_base
&& (new_base
!= old_base
)) {
166 if (old_base
< new_base
) {
167 spin_lock(&new_base
->lock
);
168 spin_lock(&old_base
->lock
);
170 spin_lock(&old_base
->lock
);
171 spin_lock(&new_base
->lock
);
174 * The timer base might have been cancelled while we were
175 * trying to take the lock(s):
177 if (timer
->base
!= old_base
) {
178 spin_unlock(&new_base
->lock
);
179 spin_unlock(&old_base
->lock
);
183 spin_lock(&new_base
->lock
);
184 if (timer
->base
!= old_base
) {
185 spin_unlock(&new_base
->lock
);
191 * Delete the previous timeout (if there was any), and install
195 list_del(&timer
->entry
);
198 timer
->expires
= expires
;
199 internal_add_timer(new_base
, timer
);
200 timer
->base
= new_base
;
202 if (old_base
&& (new_base
!= old_base
))
203 spin_unlock(&old_base
->lock
);
204 spin_unlock(&new_base
->lock
);
205 spin_unlock_irqrestore(&timer
->lock
, flags
);
211 * add_timer_on - start a timer on a particular CPU
212 * @timer: the timer to be added
213 * @cpu: the CPU to start it on
215 * This is not very scalable on SMP. Double adds are not possible.
217 void add_timer_on(struct timer_list
*timer
, int cpu
)
219 tvec_base_t
*base
= &per_cpu(tvec_bases
, cpu
);
222 BUG_ON(timer_pending(timer
) || !timer
->function
);
226 spin_lock_irqsave(&base
->lock
, flags
);
227 internal_add_timer(base
, timer
);
229 spin_unlock_irqrestore(&base
->lock
, flags
);
233 * mod_timer - modify a timer's timeout
234 * @timer: the timer to be modified
236 * mod_timer is a more efficient way to update the expire field of an
237 * active timer (if the timer is inactive it will be activated)
239 * mod_timer(timer, expires) is equivalent to:
241 * del_timer(timer); timer->expires = expires; add_timer(timer);
243 * Note that if there are multiple unserialized concurrent users of the
244 * same timer, then mod_timer() is the only safe way to modify the timeout,
245 * since add_timer() cannot modify an already running timer.
247 * The function returns whether it has modified a pending timer or not.
248 * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
249 * active timer returns 1.)
251 int mod_timer(struct timer_list
*timer
, unsigned long expires
)
253 BUG_ON(!timer
->function
);
258 * This is a common optimization triggered by the
259 * networking code - if the timer is re-modified
260 * to be the same thing then just return:
262 if (timer
->expires
== expires
&& timer_pending(timer
))
265 return __mod_timer(timer
, expires
);
269 * del_timer - deactive a timer.
270 * @timer: the timer to be deactivated
272 * del_timer() deactivates a timer - this works on both active and inactive
275 * The function returns whether it has deactivated a pending timer or not.
276 * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
277 * active timer returns 1.)
279 int del_timer(struct timer_list
*timer
)
290 spin_lock_irqsave(&base
->lock
, flags
);
291 if (base
!= timer
->base
) {
292 spin_unlock_irqrestore(&base
->lock
, flags
);
295 list_del(&timer
->entry
);
297 spin_unlock_irqrestore(&base
->lock
, flags
);
304 * del_timer_sync - deactivate a timer and wait for the handler to finish.
305 * @timer: the timer to be deactivated
307 * This function only differs from del_timer() on SMP: besides deactivating
308 * the timer it also makes sure the handler has finished executing on other
311 * Synchronization rules: callers must prevent restarting of the timer,
312 * otherwise this function is meaningless. It must not be called from
313 * interrupt contexts. Upon exit the timer is not queued and the handler
314 * is not running on any CPU.
316 * The function returns whether it has deactivated a pending timer or not.
318 int del_timer_sync(struct timer_list
*timer
)
326 ret
+= del_timer(timer
);
328 for (i
= 0; i
< NR_CPUS
; i
++) {
332 base
= &per_cpu(tvec_bases
, i
);
333 if (base
->running_timer
== timer
) {
334 while (base
->running_timer
== timer
) {
336 preempt_check_resched();
342 if (timer_pending(timer
))
350 static int cascade(tvec_base_t
*base
, tvec_t
*tv
, int index
)
352 /* cascade all the timers from tv up one level */
353 struct list_head
*head
, *curr
;
355 head
= tv
->vec
+ index
;
358 * We are removing _all_ timers from the list, so we don't have to
359 * detach them individually, just clear the list afterwards.
361 while (curr
!= head
) {
362 struct timer_list
*tmp
;
364 tmp
= list_entry(curr
, struct timer_list
, entry
);
365 BUG_ON(tmp
->base
!= base
);
367 internal_add_timer(base
, tmp
);
369 INIT_LIST_HEAD(head
);
375 * __run_timers - run all expired timers (if any) on this CPU.
376 * @base: the timer vector to be processed.
378 * This function cascades all vectors and executes all expired timer
381 #define INDEX(N) (base->timer_jiffies >> (TVR_BITS + N * TVN_BITS)) & TVN_MASK
383 static inline void __run_timers(tvec_base_t
*base
)
385 struct timer_list
*timer
;
387 spin_lock_irq(&base
->lock
);
388 while (time_after_eq(jiffies
, base
->timer_jiffies
)) {
389 struct list_head work_list
= LIST_HEAD_INIT(work_list
);
390 struct list_head
*head
= &work_list
;
391 int index
= base
->timer_jiffies
& TVR_MASK
;
397 (!cascade(base
, &base
->tv2
, INDEX(0))) &&
398 (!cascade(base
, &base
->tv3
, INDEX(1))) &&
399 !cascade(base
, &base
->tv4
, INDEX(2)))
400 cascade(base
, &base
->tv5
, INDEX(3));
401 ++base
->timer_jiffies
;
402 list_splice_init(base
->tv1
.vec
+ index
, &work_list
);
404 if (!list_empty(head
)) {
405 void (*fn
)(unsigned long);
408 timer
= list_entry(head
->next
,struct timer_list
,entry
);
409 fn
= timer
->function
;
412 list_del(&timer
->entry
);
414 set_running_timer(base
, timer
);
415 spin_unlock_irq(&base
->lock
);
417 spin_lock_irq(&base
->lock
);
421 set_running_timer(base
, NULL
);
422 spin_unlock_irq(&base
->lock
);
425 /******************************************************************/
428 * Timekeeping variables
430 unsigned long tick_usec
= TICK_USEC
; /* USER_HZ period (usec) */
431 unsigned long tick_nsec
= TICK_NSEC
; /* ACTHZ period (nsec) */
435 * wall_to_monotonic is what we need to add to xtime (or xtime corrected
436 * for sub jiffie times) to get to monotonic time. Monotonic is pegged at zero
437 * at zero at system boot time, so wall_to_monotonic will be negative,
438 * however, we will ALWAYS keep the tv_nsec part positive so we can use
439 * the usual normalization.
441 struct timespec xtime
__attribute__ ((aligned (16)));
442 struct timespec wall_to_monotonic
__attribute__ ((aligned (16)));
444 /* Don't completely fail for HZ > 500. */
445 int tickadj
= 500/HZ
? : 1; /* microsecs */
449 * phase-lock loop variables
451 /* TIME_ERROR prevents overwriting the CMOS clock */
452 int time_state
= TIME_OK
; /* clock synchronization status */
453 int time_status
= STA_UNSYNC
; /* clock status bits */
454 long time_offset
; /* time adjustment (us) */
455 long time_constant
= 2; /* pll time constant */
456 long time_tolerance
= MAXFREQ
; /* frequency tolerance (ppm) */
457 long time_precision
= 1; /* clock precision (us) */
458 long time_maxerror
= NTP_PHASE_LIMIT
; /* maximum error (us) */
459 long time_esterror
= NTP_PHASE_LIMIT
; /* estimated error (us) */
460 long time_phase
; /* phase offset (scaled us) */
461 long time_freq
= (((NSEC_PER_SEC
+ HZ
/2) % HZ
- HZ
/2) << SHIFT_USEC
) / NSEC_PER_USEC
;
462 /* frequency offset (scaled ppm)*/
463 long time_adj
; /* tick adjust (scaled 1 / HZ) */
464 long time_reftime
; /* time at last adjustment (s) */
468 * this routine handles the overflow of the microsecond field
470 * The tricky bits of code to handle the accurate clock support
471 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
472 * They were originally developed for SUN and DEC kernels.
473 * All the kudos should go to Dave for this stuff.
476 static void second_overflow(void)
480 /* Bump the maxerror field */
481 time_maxerror
+= time_tolerance
>> SHIFT_USEC
;
482 if ( time_maxerror
> NTP_PHASE_LIMIT
) {
483 time_maxerror
= NTP_PHASE_LIMIT
;
484 time_status
|= STA_UNSYNC
;
488 * Leap second processing. If in leap-insert state at
489 * the end of the day, the system clock is set back one
490 * second; if in leap-delete state, the system clock is
491 * set ahead one second. The microtime() routine or
492 * external clock driver will insure that reported time
493 * is always monotonic. The ugly divides should be
496 switch (time_state
) {
499 if (time_status
& STA_INS
)
500 time_state
= TIME_INS
;
501 else if (time_status
& STA_DEL
)
502 time_state
= TIME_DEL
;
506 if (xtime
.tv_sec
% 86400 == 0) {
508 wall_to_monotonic
.tv_sec
++;
509 time_interpolator_update(-NSEC_PER_SEC
);
510 time_state
= TIME_OOP
;
512 printk(KERN_NOTICE
"Clock: inserting leap second 23:59:60 UTC\n");
517 if ((xtime
.tv_sec
+ 1) % 86400 == 0) {
519 wall_to_monotonic
.tv_sec
--;
520 time_interpolator_update(NSEC_PER_SEC
);
521 time_state
= TIME_WAIT
;
523 printk(KERN_NOTICE
"Clock: deleting leap second 23:59:59 UTC\n");
528 time_state
= TIME_WAIT
;
532 if (!(time_status
& (STA_INS
| STA_DEL
)))
533 time_state
= TIME_OK
;
537 * Compute the phase adjustment for the next second. In
538 * PLL mode, the offset is reduced by a fixed factor
539 * times the time constant. In FLL mode the offset is
540 * used directly. In either mode, the maximum phase
541 * adjustment for each second is clamped so as to spread
542 * the adjustment over not more than the number of
543 * seconds between updates.
545 if (time_offset
< 0) {
546 ltemp
= -time_offset
;
547 if (!(time_status
& STA_FLL
))
548 ltemp
>>= SHIFT_KG
+ time_constant
;
549 if (ltemp
> (MAXPHASE
/ MINSEC
) << SHIFT_UPDATE
)
550 ltemp
= (MAXPHASE
/ MINSEC
) << SHIFT_UPDATE
;
551 time_offset
+= ltemp
;
552 time_adj
= -ltemp
<< (SHIFT_SCALE
- SHIFT_HZ
- SHIFT_UPDATE
);
555 if (!(time_status
& STA_FLL
))
556 ltemp
>>= SHIFT_KG
+ time_constant
;
557 if (ltemp
> (MAXPHASE
/ MINSEC
) << SHIFT_UPDATE
)
558 ltemp
= (MAXPHASE
/ MINSEC
) << SHIFT_UPDATE
;
559 time_offset
-= ltemp
;
560 time_adj
= ltemp
<< (SHIFT_SCALE
- SHIFT_HZ
- SHIFT_UPDATE
);
564 * Compute the frequency estimate and additional phase
565 * adjustment due to frequency error for the next
566 * second. When the PPS signal is engaged, gnaw on the
567 * watchdog counter and update the frequency computed by
568 * the pll and the PPS signal.
571 if (pps_valid
== PPS_VALID
) { /* PPS signal lost */
572 pps_jitter
= MAXTIME
;
573 pps_stabil
= MAXFREQ
;
574 time_status
&= ~(STA_PPSSIGNAL
| STA_PPSJITTER
|
575 STA_PPSWANDER
| STA_PPSERROR
);
577 ltemp
= time_freq
+ pps_freq
;
579 time_adj
-= -ltemp
>>
580 (SHIFT_USEC
+ SHIFT_HZ
- SHIFT_SCALE
);
583 (SHIFT_USEC
+ SHIFT_HZ
- SHIFT_SCALE
);
586 /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
587 * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
590 time_adj
-= (-time_adj
>> 2) + (-time_adj
>> 5);
592 time_adj
+= (time_adj
>> 2) + (time_adj
>> 5);
595 /* Compensate for (HZ==1000) != (1 << SHIFT_HZ).
596 * Add 1.5625% and 0.78125% to get 1023.4375; => only 0.05% error (p. 14)
599 time_adj
-= (-time_adj
>> 6) + (-time_adj
>> 7);
601 time_adj
+= (time_adj
>> 6) + (time_adj
>> 7);
605 /* in the NTP reference this is called "hardclock()" */
606 static void update_wall_time_one_tick(void)
608 long time_adjust_step
, delta_nsec
;
610 if ( (time_adjust_step
= time_adjust
) != 0 ) {
611 /* We are doing an adjtime thing.
613 * Prepare time_adjust_step to be within bounds.
614 * Note that a positive time_adjust means we want the clock
617 * Limit the amount of the step to be in the range
618 * -tickadj .. +tickadj
620 if (time_adjust
> tickadj
)
621 time_adjust_step
= tickadj
;
622 else if (time_adjust
< -tickadj
)
623 time_adjust_step
= -tickadj
;
625 /* Reduce by this step the amount of time left */
626 time_adjust
-= time_adjust_step
;
628 delta_nsec
= tick_nsec
+ time_adjust_step
* 1000;
630 * Advance the phase, once it gets to one microsecond, then
631 * advance the tick more.
633 time_phase
+= time_adj
;
634 if (time_phase
<= -FINENSEC
) {
635 long ltemp
= -time_phase
>> (SHIFT_SCALE
- 10);
636 time_phase
+= ltemp
<< (SHIFT_SCALE
- 10);
639 else if (time_phase
>= FINENSEC
) {
640 long ltemp
= time_phase
>> (SHIFT_SCALE
- 10);
641 time_phase
-= ltemp
<< (SHIFT_SCALE
- 10);
644 xtime
.tv_nsec
+= delta_nsec
;
645 time_interpolator_update(delta_nsec
);
649 * Using a loop looks inefficient, but "ticks" is
650 * usually just one (we shouldn't be losing ticks,
651 * we're doing this this way mainly for interrupt
652 * latency reasons, not because we think we'll
653 * have lots of lost timer ticks
655 static void update_wall_time(unsigned long ticks
)
659 update_wall_time_one_tick();
662 if (xtime
.tv_nsec
>= 1000000000) {
663 xtime
.tv_nsec
-= 1000000000;
665 time_interpolator_update(NSEC_PER_SEC
);
670 static inline void do_process_times(struct task_struct
*p
,
671 unsigned long user
, unsigned long system
)
675 psecs
= (p
->utime
+= user
);
676 psecs
+= (p
->stime
+= system
);
677 if (psecs
/ HZ
> p
->rlim
[RLIMIT_CPU
].rlim_cur
) {
678 /* Send SIGXCPU every second.. */
680 send_sig(SIGXCPU
, p
, 1);
681 /* and SIGKILL when we go over max.. */
682 if (psecs
/ HZ
> p
->rlim
[RLIMIT_CPU
].rlim_max
)
683 send_sig(SIGKILL
, p
, 1);
687 static inline void do_it_virt(struct task_struct
* p
, unsigned long ticks
)
689 unsigned long it_virt
= p
->it_virt_value
;
694 it_virt
= p
->it_virt_incr
;
695 send_sig(SIGVTALRM
, p
, 1);
697 p
->it_virt_value
= it_virt
;
701 static inline void do_it_prof(struct task_struct
*p
)
703 unsigned long it_prof
= p
->it_prof_value
;
706 if (--it_prof
== 0) {
707 it_prof
= p
->it_prof_incr
;
708 send_sig(SIGPROF
, p
, 1);
710 p
->it_prof_value
= it_prof
;
714 void update_one_process(struct task_struct
*p
, unsigned long user
,
715 unsigned long system
, int cpu
)
717 do_process_times(p
, user
, system
);
723 * Called from the timer interrupt handler to charge one tick to the current
724 * process. user_tick is 1 if the tick is user time, 0 for system.
726 void update_process_times(int user_tick
)
728 struct task_struct
*p
= current
;
729 int cpu
= smp_processor_id(), system
= user_tick
^ 1;
731 update_one_process(p
, user_tick
, system
, cpu
);
733 scheduler_tick(user_tick
, system
);
737 * Nr of active tasks - counted in fixed-point numbers
739 static unsigned long count_active_tasks(void)
741 return (nr_running() + nr_uninterruptible()) * FIXED_1
;
745 * Hmm.. Changed this, as the GNU make sources (load.c) seems to
746 * imply that avenrun[] is the standard name for this kind of thing.
747 * Nothing else seems to be standardized: the fractional size etc
748 * all seem to differ on different machines.
750 * Requires xtime_lock to access.
752 unsigned long avenrun
[3];
755 * calc_load - given tick count, update the avenrun load estimates.
756 * This is called while holding a write_lock on xtime_lock.
758 static inline void calc_load(unsigned long ticks
)
760 unsigned long active_tasks
; /* fixed-point */
761 static int count
= LOAD_FREQ
;
766 active_tasks
= count_active_tasks();
767 CALC_LOAD(avenrun
[0], EXP_1
, active_tasks
);
768 CALC_LOAD(avenrun
[1], EXP_5
, active_tasks
);
769 CALC_LOAD(avenrun
[2], EXP_15
, active_tasks
);
773 /* jiffies at the most recent update of wall time */
774 unsigned long wall_jiffies
= INITIAL_JIFFIES
;
777 * This read-write spinlock protects us from races in SMP while
778 * playing with xtime and avenrun.
780 #ifndef ARCH_HAVE_XTIME_LOCK
781 seqlock_t xtime_lock __cacheline_aligned_in_smp
= SEQLOCK_UNLOCKED
;
785 * This function runs timers and the timer-tq in bottom half context.
787 static void run_timer_softirq(struct softirq_action
*h
)
789 tvec_base_t
*base
= &__get_cpu_var(tvec_bases
);
791 if (time_after_eq(jiffies
, base
->timer_jiffies
))
796 * Called by the local, per-CPU timer interrupt on SMP.
798 void run_local_timers(void)
800 raise_softirq(TIMER_SOFTIRQ
);
804 * Called by the timer interrupt. xtime_lock must already be taken
807 static inline void update_times(void)
811 ticks
= jiffies
- wall_jiffies
;
813 wall_jiffies
+= ticks
;
814 update_wall_time(ticks
);
820 * The 64-bit jiffies value is not atomic - you MUST NOT read it
821 * without sampling the sequence number in xtime_lock.
822 * jiffies is defined in the linker script...
825 void do_timer(struct pt_regs
*regs
)
829 /* SMP process accounting uses the local APIC timer */
831 update_process_times(user_mode(regs
));
836 #if !defined(__alpha__) && !defined(__ia64__)
839 * For backwards compatibility? This can be done in libc so Alpha
840 * and all newer ports shouldn't need it.
842 asmlinkage
unsigned long sys_alarm(unsigned int seconds
)
844 struct itimerval it_new
, it_old
;
845 unsigned int oldalarm
;
847 it_new
.it_interval
.tv_sec
= it_new
.it_interval
.tv_usec
= 0;
848 it_new
.it_value
.tv_sec
= seconds
;
849 it_new
.it_value
.tv_usec
= 0;
850 do_setitimer(ITIMER_REAL
, &it_new
, &it_old
);
851 oldalarm
= it_old
.it_value
.tv_sec
;
852 /* ehhh.. We can't return 0 if we have an alarm pending.. */
853 /* And we'd better return too much than too little anyway */
854 if (it_old
.it_value
.tv_usec
)
864 * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this
865 * should be moved into arch/i386 instead?
869 * sys_getpid - return the thread group id of the current process
871 * Note, despite the name, this returns the tgid not the pid. The tgid and
872 * the pid are identical unless CLONE_THREAD was specified on clone() in
873 * which case the tgid is the same in all threads of the same group.
875 * This is SMP safe as current->tgid does not change.
877 asmlinkage
long sys_getpid(void)
879 return current
->tgid
;
883 * Accessing ->group_leader->real_parent is not SMP-safe, it could
884 * change from under us. However, rather than getting any lock
885 * we can use an optimistic algorithm: get the parent
886 * pid, and go back and check that the parent is still
887 * the same. If it has changed (which is extremely unlikely
888 * indeed), we just try again..
890 * NOTE! This depends on the fact that even if we _do_
891 * get an old value of "parent", we can happily dereference
892 * the pointer (it was and remains a dereferencable kernel pointer
893 * no matter what): we just can't necessarily trust the result
894 * until we know that the parent pointer is valid.
896 * NOTE2: ->group_leader never changes from under us.
898 asmlinkage
long sys_getppid(void)
901 struct task_struct
*me
= current
;
902 struct task_struct
*parent
;
904 parent
= me
->group_leader
->real_parent
;
909 struct task_struct
*old
= parent
;
912 * Make sure we read the pid before re-reading the
916 parent
= me
->group_leader
->real_parent
;
926 asmlinkage
long sys_getuid(void)
928 /* Only we change this so SMP safe */
932 asmlinkage
long sys_geteuid(void)
934 /* Only we change this so SMP safe */
935 return current
->euid
;
938 asmlinkage
long sys_getgid(void)
940 /* Only we change this so SMP safe */
944 asmlinkage
long sys_getegid(void)
946 /* Only we change this so SMP safe */
947 return current
->egid
;
952 static void process_timeout(unsigned long __data
)
954 wake_up_process((task_t
*)__data
);
958 * schedule_timeout - sleep until timeout
959 * @timeout: timeout value in jiffies
961 * Make the current task sleep until @timeout jiffies have
962 * elapsed. The routine will return immediately unless
963 * the current task state has been set (see set_current_state()).
965 * You can set the task state as follows -
967 * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
968 * pass before the routine returns. The routine will return 0
970 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
971 * delivered to the current task. In this case the remaining time
972 * in jiffies will be returned, or 0 if the timer expired in time
974 * The current task state is guaranteed to be TASK_RUNNING when this
977 * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
978 * the CPU away without a bound on the timeout. In this case the return
979 * value will be %MAX_SCHEDULE_TIMEOUT.
981 * In all cases the return value is guaranteed to be non-negative.
983 signed long schedule_timeout(signed long timeout
)
985 struct timer_list timer
;
986 unsigned long expire
;
990 case MAX_SCHEDULE_TIMEOUT
:
992 * These two special cases are useful to be comfortable
993 * in the caller. Nothing more. We could take
994 * MAX_SCHEDULE_TIMEOUT from one of the negative value
995 * but I' d like to return a valid offset (>=0) to allow
996 * the caller to do everything it want with the retval.
1002 * Another bit of PARANOID. Note that the retval will be
1003 * 0 since no piece of kernel is supposed to do a check
1004 * for a negative retval of schedule_timeout() (since it
1005 * should never happens anyway). You just have the printk()
1006 * that will tell you if something is gone wrong and where.
1010 printk(KERN_ERR
"schedule_timeout: wrong timeout "
1011 "value %lx from %p\n", timeout
,
1012 __builtin_return_address(0));
1013 current
->state
= TASK_RUNNING
;
1018 expire
= timeout
+ jiffies
;
1021 timer
.expires
= expire
;
1022 timer
.data
= (unsigned long) current
;
1023 timer
.function
= process_timeout
;
1027 del_timer_sync(&timer
);
1029 timeout
= expire
- jiffies
;
1032 return timeout
< 0 ? 0 : timeout
;
1035 /* Thread ID - the internal kernel "pid" */
1036 asmlinkage
long sys_gettid(void)
1038 return current
->pid
;
1040 #ifndef FOLD_NANO_SLEEP_INTO_CLOCK_NANO_SLEEP
1042 static long nanosleep_restart(struct restart_block
*restart
)
1044 unsigned long expire
= restart
->arg0
, now
= jiffies
;
1045 struct timespec
*rmtp
= (struct timespec
*) restart
->arg1
;
1048 /* Did it expire while we handled signals? */
1049 if (!time_after(expire
, now
))
1052 current
->state
= TASK_INTERRUPTIBLE
;
1053 expire
= schedule_timeout(expire
- now
);
1058 jiffies_to_timespec(expire
, &t
);
1060 ret
= -ERESTART_RESTARTBLOCK
;
1061 if (rmtp
&& copy_to_user(rmtp
, &t
, sizeof(t
)))
1063 /* The 'restart' block is already filled in */
1068 asmlinkage
long sys_nanosleep(struct timespec
*rqtp
, struct timespec
*rmtp
)
1071 unsigned long expire
;
1074 if (copy_from_user(&t
, rqtp
, sizeof(t
)))
1077 if ((t
.tv_nsec
>= 1000000000L) || (t
.tv_nsec
< 0) || (t
.tv_sec
< 0))
1080 expire
= timespec_to_jiffies(&t
) + (t
.tv_sec
|| t
.tv_nsec
);
1081 current
->state
= TASK_INTERRUPTIBLE
;
1082 expire
= schedule_timeout(expire
);
1086 struct restart_block
*restart
;
1087 jiffies_to_timespec(expire
, &t
);
1088 if (rmtp
&& copy_to_user(rmtp
, &t
, sizeof(t
)))
1091 restart
= ¤t_thread_info()->restart_block
;
1092 restart
->fn
= nanosleep_restart
;
1093 restart
->arg0
= jiffies
+ expire
;
1094 restart
->arg1
= (unsigned long) rmtp
;
1095 ret
= -ERESTART_RESTARTBLOCK
;
1099 #endif // ! FOLD_NANO_SLEEP_INTO_CLOCK_NANO_SLEEP
1102 * sys_sysinfo - fill in sysinfo struct
1104 asmlinkage
long sys_sysinfo(struct sysinfo __user
*info
)
1107 unsigned long mem_total
, sav_total
;
1108 unsigned int mem_unit
, bitcount
;
1111 memset((char *)&val
, 0, sizeof(struct sysinfo
));
1115 seq
= read_seqbegin(&xtime_lock
);
1118 * This is annoying. The below is the same thing
1119 * posix_get_clock_monotonic() does, but it wants to
1120 * take the lock which we want to cover the loads stuff
1124 do_gettimeofday((struct timeval
*)&tp
);
1125 tp
.tv_nsec
*= NSEC_PER_USEC
;
1126 tp
.tv_sec
+= wall_to_monotonic
.tv_sec
;
1127 tp
.tv_nsec
+= wall_to_monotonic
.tv_nsec
;
1128 if (tp
.tv_nsec
- NSEC_PER_SEC
>= 0) {
1129 tp
.tv_nsec
= tp
.tv_nsec
- NSEC_PER_SEC
;
1132 val
.uptime
= tp
.tv_sec
+ (tp
.tv_nsec
? 1 : 0);
1134 val
.loads
[0] = avenrun
[0] << (SI_LOAD_SHIFT
- FSHIFT
);
1135 val
.loads
[1] = avenrun
[1] << (SI_LOAD_SHIFT
- FSHIFT
);
1136 val
.loads
[2] = avenrun
[2] << (SI_LOAD_SHIFT
- FSHIFT
);
1138 val
.procs
= nr_threads
;
1139 } while (read_seqretry(&xtime_lock
, seq
));
1145 * If the sum of all the available memory (i.e. ram + swap)
1146 * is less than can be stored in a 32 bit unsigned long then
1147 * we can be binary compatible with 2.2.x kernels. If not,
1148 * well, in that case 2.2.x was broken anyways...
1150 * -Erik Andersen <andersee@debian.org>
1153 mem_total
= val
.totalram
+ val
.totalswap
;
1154 if (mem_total
< val
.totalram
|| mem_total
< val
.totalswap
)
1157 mem_unit
= val
.mem_unit
;
1158 while (mem_unit
> 1) {
1161 sav_total
= mem_total
;
1163 if (mem_total
< sav_total
)
1168 * If mem_total did not overflow, multiply all memory values by
1169 * val.mem_unit and set it to 1. This leaves things compatible
1170 * with 2.2.x, and also retains compatibility with earlier 2.4.x
1175 val
.totalram
<<= bitcount
;
1176 val
.freeram
<<= bitcount
;
1177 val
.sharedram
<<= bitcount
;
1178 val
.bufferram
<<= bitcount
;
1179 val
.totalswap
<<= bitcount
;
1180 val
.freeswap
<<= bitcount
;
1181 val
.totalhigh
<<= bitcount
;
1182 val
.freehigh
<<= bitcount
;
1185 if (copy_to_user(info
, &val
, sizeof(struct sysinfo
)))
1191 static void __devinit
init_timers_cpu(int cpu
)
1196 base
= &per_cpu(tvec_bases
, cpu
);
1197 spin_lock_init(&base
->lock
);
1198 for (j
= 0; j
< TVN_SIZE
; j
++) {
1199 INIT_LIST_HEAD(base
->tv5
.vec
+ j
);
1200 INIT_LIST_HEAD(base
->tv4
.vec
+ j
);
1201 INIT_LIST_HEAD(base
->tv3
.vec
+ j
);
1202 INIT_LIST_HEAD(base
->tv2
.vec
+ j
);
1204 for (j
= 0; j
< TVR_SIZE
; j
++)
1205 INIT_LIST_HEAD(base
->tv1
.vec
+ j
);
1207 base
->timer_jiffies
= jiffies
;
1210 static int __devinit
timer_cpu_notify(struct notifier_block
*self
,
1211 unsigned long action
, void *hcpu
)
1213 long cpu
= (long)hcpu
;
1215 case CPU_UP_PREPARE
:
1216 init_timers_cpu(cpu
);
1224 static struct notifier_block __devinitdata timers_nb
= {
1225 .notifier_call
= timer_cpu_notify
,
1229 void __init
init_timers(void)
1231 timer_cpu_notify(&timers_nb
, (unsigned long)CPU_UP_PREPARE
,
1232 (void *)(long)smp_processor_id());
1233 register_cpu_notifier(&timers_nb
);
1234 open_softirq(TIMER_SOFTIRQ
, run_timer_softirq
, NULL
);
1237 #ifdef CONFIG_TIME_INTERPOLATION
1238 volatile unsigned long last_nsec_offset
;
1239 #ifndef __HAVE_ARCH_CMPXCHG
1240 spinlock_t last_nsec_offset_lock
= SPIN_LOCK_UNLOCKED
;
1243 struct time_interpolator
*time_interpolator
;
1244 static struct time_interpolator
*time_interpolator_list
;
1245 static spinlock_t time_interpolator_lock
= SPIN_LOCK_UNLOCKED
;
1248 is_better_time_interpolator(struct time_interpolator
*new)
1250 if (!time_interpolator
)
1252 return new->frequency
> 2*time_interpolator
->frequency
||
1253 (unsigned long)new->drift
< (unsigned long)time_interpolator
->drift
;
1257 register_time_interpolator(struct time_interpolator
*ti
)
1259 spin_lock(&time_interpolator_lock
);
1260 write_seqlock_irq(&xtime_lock
);
1261 if (is_better_time_interpolator(ti
))
1262 time_interpolator
= ti
;
1263 write_sequnlock_irq(&xtime_lock
);
1265 ti
->next
= time_interpolator_list
;
1266 time_interpolator_list
= ti
;
1267 spin_unlock(&time_interpolator_lock
);
1271 unregister_time_interpolator(struct time_interpolator
*ti
)
1273 struct time_interpolator
*curr
, **prev
;
1275 spin_lock(&time_interpolator_lock
);
1276 prev
= &time_interpolator_list
;
1277 for (curr
= *prev
; curr
; curr
= curr
->next
) {
1285 write_seqlock_irq(&xtime_lock
);
1286 if (ti
== time_interpolator
) {
1287 /* we lost the best time-interpolator: */
1288 time_interpolator
= NULL
;
1289 /* find the next-best interpolator */
1290 for (curr
= time_interpolator_list
; curr
; curr
= curr
->next
)
1291 if (is_better_time_interpolator(curr
))
1292 time_interpolator
= curr
;
1294 write_sequnlock_irq(&xtime_lock
);
1295 spin_unlock(&time_interpolator_lock
);
1297 #endif /* CONFIG_TIME_INTERPOLATION */