[PATCH] i386: vMI timer patches
[linux-2.6/mini2440.git] / arch / i386 / kernel / vmitime.c
blob7c3033dbe5f5afc11386c9b02ccbe2ec4d331c64
1 /*
2 * VMI paravirtual timer support routines.
4 * Copyright (C) 2005, VMware, Inc.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 * Send feedback to dhecht@vmware.com
26 * Portions of this code from arch/i386/kernel/timers/timer_tsc.c.
27 * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c.
28 * See comments there for proper credits.
31 #include <linux/spinlock.h>
32 #include <linux/init.h>
33 #include <linux/errno.h>
34 #include <linux/jiffies.h>
35 #include <linux/interrupt.h>
36 #include <linux/kernel_stat.h>
37 #include <linux/rcupdate.h>
38 #include <linux/clocksource.h>
40 #include <asm/timer.h>
41 #include <asm/io.h>
42 #include <asm/apic.h>
43 #include <asm/div64.h>
44 #include <asm/timer.h>
45 #include <asm/desc.h>
47 #include <asm/vmi.h>
48 #include <asm/vmi_time.h>
50 #include <mach_timer.h>
51 #include <io_ports.h>
53 #ifdef CONFIG_X86_LOCAL_APIC
54 #define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT
55 #else
56 #define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0
57 #endif
59 /* Cached VMI operations */
60 struct vmi_timer_ops vmi_timer_ops;
62 #ifdef CONFIG_NO_IDLE_HZ
64 /* /proc/sys/kernel/hz_timer state. */
65 int sysctl_hz_timer;
67 /* Some stats */
68 static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs);
69 static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies);
70 static DEFINE_PER_CPU(unsigned long, idle_start_jiffies);
72 #endif /* CONFIG_NO_IDLE_HZ */
74 /* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */
75 static int alarm_hz = CONFIG_VMI_ALARM_HZ;
77 /* Cache of the value get_cycle_frequency / HZ. */
78 static signed long long cycles_per_jiffy;
80 /* Cache of the value get_cycle_frequency / alarm_hz. */
81 static signed long long cycles_per_alarm;
83 /* The number of cycles accounted for by the 'jiffies'/'xtime' count.
84 * Protected by xtime_lock. */
85 static unsigned long long real_cycles_accounted_system;
87 /* The number of cycles accounted for by update_process_times(), per cpu. */
88 static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu);
90 /* The number of stolen cycles accounted, per cpu. */
91 static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu);
93 /* Clock source. */
94 static cycle_t read_real_cycles(void)
96 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
99 static cycle_t read_available_cycles(void)
101 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
104 #if 0
105 static cycle_t read_stolen_cycles(void)
107 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN);
109 #endif /* 0 */
111 static struct clocksource clocksource_vmi = {
112 .name = "vmi-timer",
113 .rating = 450,
114 .read = read_real_cycles,
115 .mask = CLOCKSOURCE_MASK(64),
116 .mult = 0, /* to be set */
117 .shift = 22,
118 .is_continuous = 1,
122 /* Timer interrupt handler. */
123 static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id);
125 static struct irqaction vmi_timer_irq = {
126 vmi_timer_interrupt,
127 SA_INTERRUPT,
128 CPU_MASK_NONE,
129 "VMI-alarm",
130 NULL,
131 NULL
134 /* Alarm rate */
135 static int __init vmi_timer_alarm_rate_setup(char* str)
137 int alarm_rate;
138 if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) {
139 alarm_hz = alarm_rate;
140 printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz);
142 return 1;
144 __setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup);
147 /* Initialization */
148 static void vmi_get_wallclock_ts(struct timespec *ts)
150 unsigned long long wallclock;
151 wallclock = vmi_timer_ops.get_wallclock(); // nsec units
152 ts->tv_nsec = do_div(wallclock, 1000000000);
153 ts->tv_sec = wallclock;
156 static void update_xtime_from_wallclock(void)
158 struct timespec ts;
159 vmi_get_wallclock_ts(&ts);
160 do_settimeofday(&ts);
163 unsigned long vmi_get_wallclock(void)
165 struct timespec ts;
166 vmi_get_wallclock_ts(&ts);
167 return ts.tv_sec;
170 int vmi_set_wallclock(unsigned long now)
172 return -1;
175 unsigned long long vmi_sched_clock(void)
177 return read_available_cycles();
180 void __init vmi_time_init(void)
182 unsigned long long cycles_per_sec, cycles_per_msec;
184 setup_irq(0, &vmi_timer_irq);
185 #ifdef CONFIG_X86_LOCAL_APIC
186 set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt);
187 #endif
189 no_sync_cmos_clock = 1;
191 vmi_get_wallclock_ts(&xtime);
192 set_normalized_timespec(&wall_to_monotonic,
193 -xtime.tv_sec, -xtime.tv_nsec);
195 real_cycles_accounted_system = read_real_cycles();
196 update_xtime_from_wallclock();
197 per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles();
199 cycles_per_sec = vmi_timer_ops.get_cycle_frequency();
201 cycles_per_jiffy = cycles_per_sec;
202 (void)do_div(cycles_per_jiffy, HZ);
203 cycles_per_alarm = cycles_per_sec;
204 (void)do_div(cycles_per_alarm, alarm_hz);
205 cycles_per_msec = cycles_per_sec;
206 (void)do_div(cycles_per_msec, 1000);
207 cpu_khz = cycles_per_msec;
209 printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;"
210 "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy,
211 cycles_per_alarm);
213 clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
214 clocksource_vmi.shift);
215 if (clocksource_register(&clocksource_vmi))
216 printk(KERN_WARNING "Error registering VMITIME clocksource.");
218 /* Disable PIT. */
219 outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
221 /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu
222 * reduce the latency calling update_process_times. */
223 vmi_timer_ops.set_alarm(
224 VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
225 per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
226 cycles_per_alarm);
229 #ifdef CONFIG_X86_LOCAL_APIC
231 void __init vmi_timer_setup_boot_alarm(void)
233 local_irq_disable();
235 /* Route the interrupt to the correct vector. */
236 apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
238 /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */
239 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
240 vmi_timer_ops.set_alarm(
241 VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
242 per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
243 cycles_per_alarm);
244 local_irq_enable();
247 /* Initialize the time accounting variables for an AP on an SMP system.
248 * Also, set the local alarm for the AP. */
249 void __init vmi_timer_setup_secondary_alarm(void)
251 int cpu = smp_processor_id();
253 /* Route the interrupt to the correct vector. */
254 apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
256 per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles();
258 vmi_timer_ops.set_alarm(
259 VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
260 per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
261 cycles_per_alarm);
264 #endif
266 /* Update system wide (real) time accounting (e.g. jiffies, xtime). */
267 static void vmi_account_real_cycles(unsigned long long cur_real_cycles)
269 long long cycles_not_accounted;
271 write_seqlock(&xtime_lock);
273 cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system;
274 while (cycles_not_accounted >= cycles_per_jiffy) {
275 /* systems wide jiffies and wallclock. */
276 do_timer(1);
278 cycles_not_accounted -= cycles_per_jiffy;
279 real_cycles_accounted_system += cycles_per_jiffy;
282 if (vmi_timer_ops.wallclock_updated())
283 update_xtime_from_wallclock();
285 write_sequnlock(&xtime_lock);
288 /* Update per-cpu process times. */
289 static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu,
290 unsigned long long cur_process_times_cycles)
292 long long cycles_not_accounted;
293 cycles_not_accounted = cur_process_times_cycles -
294 per_cpu(process_times_cycles_accounted_cpu, cpu);
296 while (cycles_not_accounted >= cycles_per_jiffy) {
297 /* Account time to the current process. This includes
298 * calling into the scheduler to decrement the timeslice
299 * and possibly reschedule.*/
300 update_process_times(user_mode(regs));
301 /* XXX handle /proc/profile multiplier. */
302 profile_tick(CPU_PROFILING);
304 cycles_not_accounted -= cycles_per_jiffy;
305 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
309 #ifdef CONFIG_NO_IDLE_HZ
310 /* Update per-cpu idle times. Used when a no-hz halt is ended. */
311 static void vmi_account_no_hz_idle_cycles(int cpu,
312 unsigned long long cur_process_times_cycles)
314 long long cycles_not_accounted;
315 unsigned long no_idle_hz_jiffies = 0;
317 cycles_not_accounted = cur_process_times_cycles -
318 per_cpu(process_times_cycles_accounted_cpu, cpu);
320 while (cycles_not_accounted >= cycles_per_jiffy) {
321 no_idle_hz_jiffies++;
322 cycles_not_accounted -= cycles_per_jiffy;
323 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
325 /* Account time to the idle process. */
326 account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies));
328 #endif
330 /* Update per-cpu stolen time. */
331 static void vmi_account_stolen_cycles(int cpu,
332 unsigned long long cur_real_cycles,
333 unsigned long long cur_avail_cycles)
335 long long stolen_cycles_not_accounted;
336 unsigned long stolen_jiffies = 0;
338 if (cur_real_cycles < cur_avail_cycles)
339 return;
341 stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles -
342 per_cpu(stolen_cycles_accounted_cpu, cpu);
344 while (stolen_cycles_not_accounted >= cycles_per_jiffy) {
345 stolen_jiffies++;
346 stolen_cycles_not_accounted -= cycles_per_jiffy;
347 per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
349 /* HACK: pass NULL to force time onto cpustat->steal. */
350 account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies));
353 /* Body of either IRQ0 interrupt handler (UP no local-APIC) or
354 * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */
355 static void vmi_local_timer_interrupt(int cpu)
357 unsigned long long cur_real_cycles, cur_process_times_cycles;
359 cur_real_cycles = read_real_cycles();
360 cur_process_times_cycles = read_available_cycles();
361 /* Update system wide (real) time state (xtime, jiffies). */
362 vmi_account_real_cycles(cur_real_cycles);
363 /* Update per-cpu process times. */
364 vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles);
365 /* Update time stolen from this cpu by the hypervisor. */
366 vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
369 #ifdef CONFIG_NO_IDLE_HZ
371 /* Must be called only from idle loop, with interrupts disabled. */
372 int vmi_stop_hz_timer(void)
374 /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */
376 unsigned long seq, next;
377 unsigned long long real_cycles_expiry;
378 int cpu = smp_processor_id();
379 int idle;
381 BUG_ON(!irqs_disabled());
382 if (sysctl_hz_timer != 0)
383 return 0;
385 cpu_set(cpu, nohz_cpu_mask);
386 smp_mb();
387 if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
388 (next = next_timer_interrupt(), time_before_eq(next, jiffies))) {
389 cpu_clear(cpu, nohz_cpu_mask);
390 next = jiffies;
391 idle = 0;
392 } else
393 idle = 1;
395 /* Convert jiffies to the real cycle counter. */
396 do {
397 seq = read_seqbegin(&xtime_lock);
398 real_cycles_expiry = real_cycles_accounted_system +
399 (long)(next - jiffies) * cycles_per_jiffy;
400 } while (read_seqretry(&xtime_lock, seq));
402 /* This cpu is going idle. Disable the periodic alarm. */
403 if (idle) {
404 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
405 per_cpu(idle_start_jiffies, cpu) = jiffies;
408 /* Set the real time alarm to expire at the next event. */
409 vmi_timer_ops.set_alarm(
410 VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL,
411 real_cycles_expiry, 0);
413 return idle;
416 static void vmi_reenable_hz_timer(int cpu)
418 /* For /proc/vmi/info idle_hz stat. */
419 per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu);
420 per_cpu(vmi_idle_no_hz_irqs, cpu)++;
422 /* Don't bother explicitly cancelling the one-shot alarm -- at
423 * worse we will receive a spurious timer interrupt. */
424 vmi_timer_ops.set_alarm(
425 VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
426 per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
427 cycles_per_alarm);
428 /* Indicate this cpu is no longer nohz idle. */
429 cpu_clear(cpu, nohz_cpu_mask);
432 /* Called from interrupt handlers when (local) HZ timer is disabled. */
433 void vmi_account_time_restart_hz_timer(void)
435 unsigned long long cur_real_cycles, cur_process_times_cycles;
436 int cpu = smp_processor_id();
438 BUG_ON(!irqs_disabled());
439 /* Account the time during which the HZ timer was disabled. */
440 cur_real_cycles = read_real_cycles();
441 cur_process_times_cycles = read_available_cycles();
442 /* Update system wide (real) time state (xtime, jiffies). */
443 vmi_account_real_cycles(cur_real_cycles);
444 /* Update per-cpu idle times. */
445 vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles);
446 /* Update time stolen from this cpu by the hypervisor. */
447 vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
448 /* Reenable the hz timer. */
449 vmi_reenable_hz_timer(cpu);
452 #endif /* CONFIG_NO_IDLE_HZ */
454 /* UP (and no local-APIC) VMI-timer alarm interrupt handler.
455 * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after
456 * APIC setup and setup_boot_vmi_alarm() is called. */
457 static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
459 vmi_local_timer_interrupt(smp_processor_id());
460 return IRQ_HANDLED;
463 #ifdef CONFIG_X86_LOCAL_APIC
465 /* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector.
466 * Also used in UP when CONFIG_X86_LOCAL_APIC.
467 * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */
468 void smp_apic_vmi_timer_interrupt(struct pt_regs *regs)
470 struct pt_regs *old_regs = set_irq_regs(regs);
471 int cpu = smp_processor_id();
474 * the NMI deadlock-detector uses this.
476 per_cpu(irq_stat,cpu).apic_timer_irqs++;
479 * NOTE! We'd better ACK the irq immediately,
480 * because timer handling can be slow.
482 ack_APIC_irq();
485 * update_process_times() expects us to have done irq_enter().
486 * Besides, if we don't timer interrupts ignore the global
487 * interrupt lock, which is the WrongThing (tm) to do.
489 irq_enter();
490 vmi_local_timer_interrupt(cpu);
491 irq_exit();
492 set_irq_regs(old_regs);
495 #endif /* CONFIG_X86_LOCAL_APIC */