[PATCH] disable lost tick compensation before TSCs are synced
[linux-2.6/sactl.git] / arch / i386 / kernel / timers / timer_tsc.c
blob7c86e3c5f1c1eb48fd4a30acba88416c488efcdd
1 /*
2 * This code largely moved from arch/i386/kernel/time.c.
3 * See comments there for proper credits.
5 * 2004-06-25 Jesper Juhl
6 * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
7 * failing to inline.
8 */
10 #include <linux/spinlock.h>
11 #include <linux/init.h>
12 #include <linux/timex.h>
13 #include <linux/errno.h>
14 #include <linux/cpufreq.h>
15 #include <linux/string.h>
16 #include <linux/jiffies.h>
18 #include <asm/timer.h>
19 #include <asm/io.h>
20 /* processor.h for distable_tsc flag */
21 #include <asm/processor.h>
23 #include "io_ports.h"
24 #include "mach_timer.h"
26 #include <asm/hpet.h>
27 #include <asm/i8253.h>
29 #ifdef CONFIG_HPET_TIMER
30 static unsigned long hpet_usec_quotient;
31 static unsigned long hpet_last;
32 static struct timer_opts timer_tsc;
33 #endif
35 static inline void cpufreq_delayed_get(void);
37 int tsc_disable __devinitdata = 0;
39 static int use_tsc;
40 /* Number of usecs that the last interrupt was delayed */
41 static int delay_at_last_interrupt;
43 static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
44 static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
45 static unsigned long long monotonic_base;
46 static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
48 /* Avoid compensating for lost ticks before TSCs are synched */
49 static int detect_lost_ticks;
50 static int __init start_lost_tick_compensation(void)
52 detect_lost_ticks = 1;
53 return 0;
55 late_initcall(start_lost_tick_compensation);
57 /* convert from cycles(64bits) => nanoseconds (64bits)
58 * basic equation:
59 * ns = cycles / (freq / ns_per_sec)
60 * ns = cycles * (ns_per_sec / freq)
61 * ns = cycles * (10^9 / (cpu_khz * 10^3))
62 * ns = cycles * (10^6 / cpu_khz)
64 * Then we use scaling math (suggested by george@mvista.com) to get:
65 * ns = cycles * (10^6 * SC / cpu_khz) / SC
66 * ns = cycles * cyc2ns_scale / SC
68 * And since SC is a constant power of two, we can convert the div
69 * into a shift.
71 * We can use khz divisor instead of mhz to keep a better percision, since
72 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
73 * (mathieu.desnoyers@polymtl.ca)
75 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
77 static unsigned long cyc2ns_scale;
78 #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
80 static inline void set_cyc2ns_scale(unsigned long cpu_khz)
82 cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
85 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
87 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
90 static int count2; /* counter for mark_offset_tsc() */
92 /* Cached *multiplier* to convert TSC counts to microseconds.
93 * (see the equation below).
94 * Equal to 2^32 * (1 / (clocks per usec) ).
95 * Initialized in time_init.
97 static unsigned long fast_gettimeoffset_quotient;
99 static unsigned long get_offset_tsc(void)
101 register unsigned long eax, edx;
103 /* Read the Time Stamp Counter */
105 rdtsc(eax,edx);
107 /* .. relative to previous jiffy (32 bits is enough) */
108 eax -= last_tsc_low; /* tsc_low delta */
111 * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
112 * = (tsc_low delta) * (usecs_per_clock)
113 * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
115 * Using a mull instead of a divl saves up to 31 clock cycles
116 * in the critical path.
119 __asm__("mull %2"
120 :"=a" (eax), "=d" (edx)
121 :"rm" (fast_gettimeoffset_quotient),
122 "0" (eax));
124 /* our adjusted time offset in microseconds */
125 return delay_at_last_interrupt + edx;
128 static unsigned long long monotonic_clock_tsc(void)
130 unsigned long long last_offset, this_offset, base;
131 unsigned seq;
133 /* atomically read monotonic base & last_offset */
134 do {
135 seq = read_seqbegin(&monotonic_lock);
136 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
137 base = monotonic_base;
138 } while (read_seqretry(&monotonic_lock, seq));
140 /* Read the Time Stamp Counter */
141 rdtscll(this_offset);
143 /* return the value in ns */
144 return base + cycles_2_ns(this_offset - last_offset);
148 * Scheduler clock - returns current time in nanosec units.
150 unsigned long long sched_clock(void)
152 unsigned long long this_offset;
155 * In the NUMA case we dont use the TSC as they are not
156 * synchronized across all CPUs.
158 #ifndef CONFIG_NUMA
159 if (!use_tsc)
160 #endif
161 /* no locking but a rare wrong value is not a big deal */
162 return jiffies_64 * (1000000000 / HZ);
164 /* Read the Time Stamp Counter */
165 rdtscll(this_offset);
167 /* return the value in ns */
168 return cycles_2_ns(this_offset);
171 static void delay_tsc(unsigned long loops)
173 unsigned long bclock, now;
175 rdtscl(bclock);
178 rep_nop();
179 rdtscl(now);
180 } while ((now-bclock) < loops);
183 #ifdef CONFIG_HPET_TIMER
184 static void mark_offset_tsc_hpet(void)
186 unsigned long long this_offset, last_offset;
187 unsigned long offset, temp, hpet_current;
189 write_seqlock(&monotonic_lock);
190 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
192 * It is important that these two operations happen almost at
193 * the same time. We do the RDTSC stuff first, since it's
194 * faster. To avoid any inconsistencies, we need interrupts
195 * disabled locally.
198 * Interrupts are just disabled locally since the timer irq
199 * has the SA_INTERRUPT flag set. -arca
201 /* read Pentium cycle counter */
203 hpet_current = hpet_readl(HPET_COUNTER);
204 rdtsc(last_tsc_low, last_tsc_high);
206 /* lost tick compensation */
207 offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
208 if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))
209 && detect_lost_ticks) {
210 int lost_ticks = (offset - hpet_last) / hpet_tick;
211 jiffies_64 += lost_ticks;
213 hpet_last = hpet_current;
215 /* update the monotonic base value */
216 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
217 monotonic_base += cycles_2_ns(this_offset - last_offset);
218 write_sequnlock(&monotonic_lock);
220 /* calculate delay_at_last_interrupt */
222 * Time offset = (hpet delta) * ( usecs per HPET clock )
223 * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
224 * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
225 * Where,
226 * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
228 delay_at_last_interrupt = hpet_current - offset;
229 ASM_MUL64_REG(temp, delay_at_last_interrupt,
230 hpet_usec_quotient, delay_at_last_interrupt);
232 #endif
235 #ifdef CONFIG_CPU_FREQ
236 #include <linux/workqueue.h>
238 static unsigned int cpufreq_delayed_issched = 0;
239 static unsigned int cpufreq_init = 0;
240 static struct work_struct cpufreq_delayed_get_work;
242 static void handle_cpufreq_delayed_get(void *v)
244 unsigned int cpu;
245 for_each_online_cpu(cpu) {
246 cpufreq_get(cpu);
248 cpufreq_delayed_issched = 0;
251 /* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
252 * to verify the CPU frequency the timing core thinks the CPU is running
253 * at is still correct.
255 static inline void cpufreq_delayed_get(void)
257 if (cpufreq_init && !cpufreq_delayed_issched) {
258 cpufreq_delayed_issched = 1;
259 printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
260 schedule_work(&cpufreq_delayed_get_work);
264 /* If the CPU frequency is scaled, TSC-based delays will need a different
265 * loops_per_jiffy value to function properly.
268 static unsigned int ref_freq = 0;
269 static unsigned long loops_per_jiffy_ref = 0;
271 #ifndef CONFIG_SMP
272 static unsigned long fast_gettimeoffset_ref = 0;
273 static unsigned int cpu_khz_ref = 0;
274 #endif
276 static int
277 time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
278 void *data)
280 struct cpufreq_freqs *freq = data;
282 if (val != CPUFREQ_RESUMECHANGE)
283 write_seqlock_irq(&xtime_lock);
284 if (!ref_freq) {
285 ref_freq = freq->old;
286 loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
287 #ifndef CONFIG_SMP
288 fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
289 cpu_khz_ref = cpu_khz;
290 #endif
293 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
294 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
295 (val == CPUFREQ_RESUMECHANGE)) {
296 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
297 cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
298 #ifndef CONFIG_SMP
299 if (cpu_khz)
300 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
301 if (use_tsc) {
302 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
303 fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
304 set_cyc2ns_scale(cpu_khz);
307 #endif
310 if (val != CPUFREQ_RESUMECHANGE)
311 write_sequnlock_irq(&xtime_lock);
313 return 0;
316 static struct notifier_block time_cpufreq_notifier_block = {
317 .notifier_call = time_cpufreq_notifier
321 static int __init cpufreq_tsc(void)
323 int ret;
324 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
325 ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
326 CPUFREQ_TRANSITION_NOTIFIER);
327 if (!ret)
328 cpufreq_init = 1;
329 return ret;
331 core_initcall(cpufreq_tsc);
333 #else /* CONFIG_CPU_FREQ */
334 static inline void cpufreq_delayed_get(void) { return; }
335 #endif
337 int recalibrate_cpu_khz(void)
339 #ifndef CONFIG_SMP
340 unsigned int cpu_khz_old = cpu_khz;
342 if (cpu_has_tsc) {
343 local_irq_disable();
344 init_cpu_khz();
345 local_irq_enable();
346 cpu_data[0].loops_per_jiffy =
347 cpufreq_scale(cpu_data[0].loops_per_jiffy,
348 cpu_khz_old,
349 cpu_khz);
350 return 0;
351 } else
352 return -ENODEV;
353 #else
354 return -ENODEV;
355 #endif
357 EXPORT_SYMBOL(recalibrate_cpu_khz);
359 static void mark_offset_tsc(void)
361 unsigned long lost,delay;
362 unsigned long delta = last_tsc_low;
363 int count;
364 int countmp;
365 static int count1 = 0;
366 unsigned long long this_offset, last_offset;
367 static int lost_count = 0;
369 write_seqlock(&monotonic_lock);
370 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
372 * It is important that these two operations happen almost at
373 * the same time. We do the RDTSC stuff first, since it's
374 * faster. To avoid any inconsistencies, we need interrupts
375 * disabled locally.
379 * Interrupts are just disabled locally since the timer irq
380 * has the SA_INTERRUPT flag set. -arca
383 /* read Pentium cycle counter */
385 rdtsc(last_tsc_low, last_tsc_high);
387 spin_lock(&i8253_lock);
388 outb_p(0x00, PIT_MODE); /* latch the count ASAP */
390 count = inb_p(PIT_CH0); /* read the latched count */
391 count |= inb(PIT_CH0) << 8;
394 * VIA686a test code... reset the latch if count > max + 1
395 * from timer_pit.c - cjb
397 if (count > LATCH) {
398 outb_p(0x34, PIT_MODE);
399 outb_p(LATCH & 0xff, PIT_CH0);
400 outb(LATCH >> 8, PIT_CH0);
401 count = LATCH - 1;
404 spin_unlock(&i8253_lock);
406 if (pit_latch_buggy) {
407 /* get center value of last 3 time lutch */
408 if ((count2 >= count && count >= count1)
409 || (count1 >= count && count >= count2)) {
410 count2 = count1; count1 = count;
411 } else if ((count1 >= count2 && count2 >= count)
412 || (count >= count2 && count2 >= count1)) {
413 countmp = count;count = count2;
414 count2 = count1;count1 = countmp;
415 } else {
416 count2 = count1; count1 = count; count = count1;
420 /* lost tick compensation */
421 delta = last_tsc_low - delta;
423 register unsigned long eax, edx;
424 eax = delta;
425 __asm__("mull %2"
426 :"=a" (eax), "=d" (edx)
427 :"rm" (fast_gettimeoffset_quotient),
428 "0" (eax));
429 delta = edx;
431 delta += delay_at_last_interrupt;
432 lost = delta/(1000000/HZ);
433 delay = delta%(1000000/HZ);
434 if (lost >= 2 && detect_lost_ticks) {
435 jiffies_64 += lost-1;
437 /* sanity check to ensure we're not always losing ticks */
438 if (lost_count++ > 100) {
439 printk(KERN_WARNING "Losing too many ticks!\n");
440 printk(KERN_WARNING "TSC cannot be used as a timesource. \n");
441 printk(KERN_WARNING "Possible reasons for this are:\n");
442 printk(KERN_WARNING " You're running with Speedstep,\n");
443 printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n");
444 printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n");
445 printk(KERN_WARNING "Falling back to a sane timesource now.\n");
447 clock_fallback();
449 /* ... but give the TSC a fair chance */
450 if (lost_count > 25)
451 cpufreq_delayed_get();
452 } else
453 lost_count = 0;
454 /* update the monotonic base value */
455 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
456 monotonic_base += cycles_2_ns(this_offset - last_offset);
457 write_sequnlock(&monotonic_lock);
459 /* calculate delay_at_last_interrupt */
460 count = ((LATCH-1) - count) * TICK_SIZE;
461 delay_at_last_interrupt = (count + LATCH/2) / LATCH;
463 /* catch corner case where tick rollover occured
464 * between tsc and pit reads (as noted when
465 * usec delta is > 90% # of usecs/tick)
467 if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
468 jiffies_64++;
471 static int __init init_tsc(char* override)
474 /* check clock override */
475 if (override[0] && strncmp(override,"tsc",3)) {
476 #ifdef CONFIG_HPET_TIMER
477 if (is_hpet_enabled()) {
478 printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
479 } else
480 #endif
482 return -ENODEV;
487 * If we have APM enabled or the CPU clock speed is variable
488 * (CPU stops clock on HLT or slows clock to save power)
489 * then the TSC timestamps may diverge by up to 1 jiffy from
490 * 'real time' but nothing will break.
491 * The most frequent case is that the CPU is "woken" from a halt
492 * state by the timer interrupt itself, so we get 0 error. In the
493 * rare cases where a driver would "wake" the CPU and request a
494 * timestamp, the maximum error is < 1 jiffy. But timestamps are
495 * still perfectly ordered.
496 * Note that the TSC counter will be reset if APM suspends
497 * to disk; this won't break the kernel, though, 'cuz we're
498 * smart. See arch/i386/kernel/apm.c.
501 * Firstly we have to do a CPU check for chips with
502 * a potentially buggy TSC. At this point we haven't run
503 * the ident/bugs checks so we must run this hook as it
504 * may turn off the TSC flag.
506 * NOTE: this doesn't yet handle SMP 486 machines where only
507 * some CPU's have a TSC. Thats never worked and nobody has
508 * moaned if you have the only one in the world - you fix it!
511 count2 = LATCH; /* initialize counter for mark_offset_tsc() */
513 if (cpu_has_tsc) {
514 unsigned long tsc_quotient;
515 #ifdef CONFIG_HPET_TIMER
516 if (is_hpet_enabled() && hpet_use_timer) {
517 unsigned long result, remain;
518 printk("Using TSC for gettimeofday\n");
519 tsc_quotient = calibrate_tsc_hpet(NULL);
520 timer_tsc.mark_offset = &mark_offset_tsc_hpet;
522 * Math to calculate hpet to usec multiplier
523 * Look for the comments at get_offset_tsc_hpet()
525 ASM_DIV64_REG(result, remain, hpet_tick,
526 0, KERNEL_TICK_USEC);
527 if (remain > (hpet_tick >> 1))
528 result++; /* rounding the result */
530 hpet_usec_quotient = result;
531 } else
532 #endif
534 tsc_quotient = calibrate_tsc();
537 if (tsc_quotient) {
538 fast_gettimeoffset_quotient = tsc_quotient;
539 use_tsc = 1;
541 * We could be more selective here I suspect
542 * and just enable this for the next intel chips ?
544 /* report CPU clock rate in Hz.
545 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
546 * clock/second. Our precision is about 100 ppm.
548 { unsigned long eax=0, edx=1000;
549 __asm__("divl %2"
550 :"=a" (cpu_khz), "=d" (edx)
551 :"r" (tsc_quotient),
552 "0" (eax), "1" (edx));
553 printk("Detected %u.%03u MHz processor.\n",
554 cpu_khz / 1000, cpu_khz % 1000);
556 set_cyc2ns_scale(cpu_khz);
557 return 0;
560 return -ENODEV;
563 static int tsc_resume(void)
565 write_seqlock(&monotonic_lock);
566 /* Assume this is the last mark offset time */
567 rdtsc(last_tsc_low, last_tsc_high);
568 #ifdef CONFIG_HPET_TIMER
569 if (is_hpet_enabled() && hpet_use_timer)
570 hpet_last = hpet_readl(HPET_COUNTER);
571 #endif
572 write_sequnlock(&monotonic_lock);
573 return 0;
576 #ifndef CONFIG_X86_TSC
577 /* disable flag for tsc. Takes effect by clearing the TSC cpu flag
578 * in cpu/common.c */
579 static int __init tsc_setup(char *str)
581 tsc_disable = 1;
582 return 1;
584 #else
585 static int __init tsc_setup(char *str)
587 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
588 "cannot disable TSC.\n");
589 return 1;
591 #endif
592 __setup("notsc", tsc_setup);
596 /************************************************************/
598 /* tsc timer_opts struct */
599 static struct timer_opts timer_tsc = {
600 .name = "tsc",
601 .mark_offset = mark_offset_tsc,
602 .get_offset = get_offset_tsc,
603 .monotonic_clock = monotonic_clock_tsc,
604 .delay = delay_tsc,
605 .read_timer = read_timer_tsc,
606 .resume = tsc_resume,
609 struct init_timer_opts __initdata timer_tsc_init = {
610 .init = init_tsc,
611 .opts = &timer_tsc,