1 #include <linux/kernel.h>
2 #include <linux/sched.h>
3 #include <linux/interrupt.h>
4 #include <linux/init.h>
5 #include <linux/clocksource.h>
6 #include <linux/time.h>
7 #include <linux/acpi.h>
8 #include <linux/cpufreq.h>
9 #include <linux/acpi_pmtmr.h>
12 #include <asm/timex.h>
13 #include <asm/timer.h>
14 #include <asm/vgtod.h>
16 static int notsc __initdata
= 0;
18 unsigned int cpu_khz
; /* TSC clocks / usec, not used here */
19 EXPORT_SYMBOL(cpu_khz
);
21 EXPORT_SYMBOL(tsc_khz
);
23 /* Accelerators for sched_clock()
24 * convert from cycles(64bits) => nanoseconds (64bits)
26 * ns = cycles / (freq / ns_per_sec)
27 * ns = cycles * (ns_per_sec / freq)
28 * ns = cycles * (10^9 / (cpu_khz * 10^3))
29 * ns = cycles * (10^6 / cpu_khz)
31 * Then we use scaling math (suggested by george@mvista.com) to get:
32 * ns = cycles * (10^6 * SC / cpu_khz) / SC
33 * ns = cycles * cyc2ns_scale / SC
35 * And since SC is a constant power of two, we can convert the div
38 * We can use khz divisor instead of mhz to keep a better precision, since
39 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
40 * (mathieu.desnoyers@polymtl.ca)
42 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
44 DEFINE_PER_CPU(unsigned long, cyc2ns
);
46 static void set_cyc2ns_scale(unsigned long cpu_khz
, int cpu
)
48 unsigned long flags
, prev_scale
, *scale
;
49 unsigned long long tsc_now
, ns_now
;
51 local_irq_save(flags
);
52 sched_clock_idle_sleep_event();
54 scale
= &per_cpu(cyc2ns
, cpu
);
57 ns_now
= __cycles_2_ns(tsc_now
);
61 *scale
= (NSEC_PER_MSEC
<< CYC2NS_SCALE_FACTOR
)/cpu_khz
;
63 sched_clock_idle_wakeup_event(0);
64 local_irq_restore(flags
);
67 unsigned long long native_sched_clock(void)
71 /* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
72 * which means it is not completely exact and may not be monotonous
73 * between CPUs. But the errors should be too small to matter for
74 * scheduling purposes.
78 return cycles_2_ns(a
);
81 /* We need to define a real function for sched_clock, to override the
82 weak default version */
83 #ifdef CONFIG_PARAVIRT
84 unsigned long long sched_clock(void)
86 return paravirt_sched_clock();
90 sched_clock(void) __attribute__((alias("native_sched_clock")));
94 static int tsc_unstable
;
96 int check_tsc_unstable(void)
100 EXPORT_SYMBOL_GPL(check_tsc_unstable
);
102 #ifdef CONFIG_CPU_FREQ
104 /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
107 * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's
108 * not that important because current Opteron setups do not support
109 * scaling on SMP anyroads.
111 * Should fix up last_tsc too. Currently gettimeofday in the
112 * first tick after the change will be slightly wrong.
115 static unsigned int ref_freq
;
116 static unsigned long loops_per_jiffy_ref
;
117 static unsigned long tsc_khz_ref
;
119 static int time_cpufreq_notifier(struct notifier_block
*nb
, unsigned long val
,
122 struct cpufreq_freqs
*freq
= data
;
123 unsigned long *lpj
, dummy
;
125 if (cpu_has(&cpu_data(freq
->cpu
), X86_FEATURE_CONSTANT_TSC
))
129 if (!(freq
->flags
& CPUFREQ_CONST_LOOPS
))
131 lpj
= &cpu_data(freq
->cpu
).loops_per_jiffy
;
133 lpj
= &boot_cpu_data
.loops_per_jiffy
;
137 ref_freq
= freq
->old
;
138 loops_per_jiffy_ref
= *lpj
;
139 tsc_khz_ref
= tsc_khz
;
141 if ((val
== CPUFREQ_PRECHANGE
&& freq
->old
< freq
->new) ||
142 (val
== CPUFREQ_POSTCHANGE
&& freq
->old
> freq
->new) ||
143 (val
== CPUFREQ_RESUMECHANGE
)) {
145 cpufreq_scale(loops_per_jiffy_ref
, ref_freq
, freq
->new);
147 tsc_khz
= cpufreq_scale(tsc_khz_ref
, ref_freq
, freq
->new);
148 if (!(freq
->flags
& CPUFREQ_CONST_LOOPS
))
149 mark_tsc_unstable("cpufreq changes");
153 set_cyc2ns_scale(tsc_khz_ref
, smp_processor_id());
159 static struct notifier_block time_cpufreq_notifier_block
= {
160 .notifier_call
= time_cpufreq_notifier
163 static int __init
cpufreq_tsc(void)
165 cpufreq_register_notifier(&time_cpufreq_notifier_block
,
166 CPUFREQ_TRANSITION_NOTIFIER
);
170 core_initcall(cpufreq_tsc
);
174 #define MAX_RETRIES 5
175 #define SMI_TRESHOLD 50000
178 * Read TSC and the reference counters. Take care of SMI disturbance
180 static unsigned long __init
tsc_read_refs(unsigned long *pm
,
183 unsigned long t1
, t2
;
186 for (i
= 0; i
< MAX_RETRIES
; i
++) {
189 *hpet
= hpet_readl(HPET_COUNTER
) & 0xFFFFFFFF;
191 *pm
= acpi_pm_read_early();
193 if ((t2
- t1
) < SMI_TRESHOLD
)
200 * tsc_calibrate - calibrate the tsc on boot
202 void __init
tsc_calibrate(void)
204 unsigned long flags
, tsc1
, tsc2
, tr1
, tr2
, pm1
, pm2
, hpet1
, hpet2
;
205 int hpet
= is_hpet_enabled(), cpu
;
207 local_irq_save(flags
);
209 tsc1
= tsc_read_refs(&pm1
, hpet
? &hpet1
: NULL
);
211 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
214 outb((CLOCK_TICK_RATE
/ (1000 / 50)) & 0xff, 0x42);
215 outb((CLOCK_TICK_RATE
/ (1000 / 50)) >> 8, 0x42);
217 while ((inb(0x61) & 0x20) == 0);
220 tsc2
= tsc_read_refs(&pm2
, hpet
? &hpet2
: NULL
);
222 local_irq_restore(flags
);
225 * Preset the result with the raw and inaccurate PIT
228 tsc_khz
= (tr2
- tr1
) / 50;
230 /* hpet or pmtimer available ? */
231 if (!hpet
&& !pm1
&& !pm2
) {
232 printk(KERN_INFO
"TSC calibrated against PIT\n");
236 /* Check, whether the sampling was disturbed by an SMI */
237 if (tsc1
== ULONG_MAX
|| tsc2
== ULONG_MAX
) {
238 printk(KERN_WARNING
"TSC calibration disturbed by SMI, "
239 "using PIT calibration result\n");
243 tsc2
= (tsc2
- tsc1
) * 1000000L;
246 printk(KERN_INFO
"TSC calibrated against HPET\n");
248 hpet2
+= 0x100000000;
250 tsc1
= (hpet2
* hpet_readl(HPET_PERIOD
)) / 1000000;
252 printk(KERN_INFO
"TSC calibrated against PM_TIMER\n");
254 pm2
+= ACPI_PM_OVRRUN
;
256 tsc1
= (pm2
* 1000000000) / PMTMR_TICKS_PER_SEC
;
259 tsc_khz
= tsc2
/ tsc1
;
261 for_each_possible_cpu(cpu
)
262 set_cyc2ns_scale(tsc_khz
, cpu
);
266 * Make an educated guess if the TSC is trustworthy and synchronized
269 __cpuinit
int unsynchronized_tsc(void)
275 if (apic_is_clustered_box())
279 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC
))
282 /* Assume multi socket systems are not synchronized */
283 return num_present_cpus() > 1;
286 int __init
notsc_setup(char *s
)
292 __setup("notsc", notsc_setup
);
294 static struct clocksource clocksource_tsc
;
297 * We compare the TSC to the cycle_last value in the clocksource
298 * structure to avoid a nasty time-warp. This can be observed in a
299 * very small window right after one CPU updated cycle_last under
300 * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
301 * is smaller than the cycle_last reference value due to a TSC which
302 * is slighty behind. This delta is nowhere else observable, but in
303 * that case it results in a forward time jump in the range of hours
304 * due to the unsigned delta calculation of the time keeping core
305 * code, which is necessary to support wrapping clocksources like pm
308 static cycle_t
read_tsc(void)
310 cycle_t ret
= (cycle_t
)get_cycles();
312 return ret
>= clocksource_tsc
.cycle_last
?
313 ret
: clocksource_tsc
.cycle_last
;
316 static cycle_t __vsyscall_fn
vread_tsc(void)
318 cycle_t ret
= (cycle_t
)vget_cycles();
320 return ret
>= __vsyscall_gtod_data
.clock
.cycle_last
?
321 ret
: __vsyscall_gtod_data
.clock
.cycle_last
;
324 static struct clocksource clocksource_tsc
= {
328 .mask
= CLOCKSOURCE_MASK(64),
330 .flags
= CLOCK_SOURCE_IS_CONTINUOUS
|
331 CLOCK_SOURCE_MUST_VERIFY
,
335 void mark_tsc_unstable(char *reason
)
339 printk("Marking TSC unstable due to %s\n", reason
);
340 /* Change only the rating, when not registered */
341 if (clocksource_tsc
.mult
)
342 clocksource_change_rating(&clocksource_tsc
, 0);
344 clocksource_tsc
.rating
= 0;
347 EXPORT_SYMBOL_GPL(mark_tsc_unstable
);
349 void __init
init_tsc_clocksource(void)
352 clocksource_tsc
.mult
= clocksource_khz2mult(tsc_khz
,
353 clocksource_tsc
.shift
);
354 if (check_tsc_unstable())
355 clocksource_tsc
.rating
= 0;
357 clocksource_register(&clocksource_tsc
);