1 /* smp.c: Sparc64 SMP support.
3 * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
6 #include <linux/kernel.h>
7 #include <linux/sched.h>
9 #include <linux/pagemap.h>
10 #include <linux/threads.h>
11 #include <linux/smp.h>
12 #include <linux/smp_lock.h>
13 #include <linux/interrupt.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/delay.h>
16 #include <linux/init.h>
17 #include <linux/spinlock.h>
20 #include <asm/ptrace.h>
21 #include <asm/atomic.h>
25 #include <asm/pgtable.h>
26 #include <asm/oplib.h>
27 #include <asm/hardirq.h>
28 #include <asm/softirq.h>
29 #include <asm/uaccess.h>
30 #include <asm/timer.h>
31 #include <asm/starfire.h>
33 #define __KERNEL_SYSCALLS__
34 #include <linux/unistd.h>
36 extern int linux_num_cpus
;
37 extern void calibrate_delay(void);
38 extern unsigned prom_cpu_nodes
[];
40 struct cpuinfo_sparc cpu_data
[NR_CPUS
] __attribute__ ((aligned (64)));
42 volatile int __cpu_number_map
[NR_CPUS
] __attribute__ ((aligned (64)));
43 volatile int __cpu_logical_map
[NR_CPUS
] __attribute__ ((aligned (64)));
45 /* Please don't make this stuff initdata!!! --DaveM */
46 static unsigned char boot_cpu_id
= 0;
47 static int smp_activated
= 0;
50 spinlock_t kernel_flag
= SPIN_LOCK_UNLOCKED
;
52 volatile int smp_processors_ready
= 0;
53 unsigned long cpu_present_map
= 0;
55 int smp_threads_ready
= 0;
57 void __init
smp_setup(char *str
, int *ints
)
59 /* XXX implement me XXX */
62 int smp_info(char *buf
)
66 strcpy(buf
, "State:\n");
67 for (i
= 0; i
< NR_CPUS
; i
++)
68 if(cpu_present_map
& (1UL << i
))
69 len
+= sprintf(buf
+ len
,
70 "CPU%d:\t\tonline\n", i
);
74 int smp_bogo(char *buf
)
78 for (i
= 0; i
< NR_CPUS
; i
++)
79 if(cpu_present_map
& (1UL << i
))
80 len
+= sprintf(buf
+ len
,
81 "Cpu%dBogo\t: %lu.%02lu\n",
82 i
, cpu_data
[i
].udelay_val
/ 500000,
83 (cpu_data
[i
].udelay_val
/ 5000) % 100);
87 void __init
smp_store_cpu_info(int id
)
91 /* multiplier and counter set by
92 smp_setup_percpu_timer() */
93 cpu_data
[id
].udelay_val
= loops_per_sec
;
95 cpu_data
[id
].pgcache_size
= 0;
96 cpu_data
[id
].pte_cache
[0] = NULL
;
97 cpu_data
[id
].pte_cache
[1] = NULL
;
98 cpu_data
[id
].pgdcache_size
= 0;
99 cpu_data
[id
].pgd_cache
= NULL
;
100 cpu_data
[id
].idle_volume
= 1;
102 for(i
= 0; i
< 16; i
++)
103 cpu_data
[id
].irq_worklists
[i
] = 0;
106 void __init
smp_commence(void)
110 static void smp_setup_percpu_timer(void);
111 static void smp_tune_scheduling(void);
113 static volatile unsigned long callin_flag
= 0;
115 extern void inherit_locked_prom_mappings(int save_p
);
116 extern void cpu_probe(void);
118 void __init
smp_callin(void)
120 int cpuid
= hard_smp_processor_id();
121 unsigned long pstate
;
123 inherit_locked_prom_mappings(0);
130 /* Guarentee that the following sequences execute
133 __asm__
__volatile__("rdpr %%pstate, %0\n\t"
134 "wrpr %0, %1, %%pstate"
138 /* Set things up so user can access tick register for profiling
139 * purposes. Also workaround BB_ERRATA_1 by doing a dummy
140 * read back of %tick after writing it.
142 __asm__
__volatile__("
143 sethi %%hi(0x80000000), %%g1
149 andn %%g2, %%g1, %%g2
156 /* Restore PSTATE_IE. */
157 __asm__
__volatile__("wrpr %0, 0x0, %%pstate"
161 smp_setup_percpu_timer();
166 smp_store_cpu_info(cpuid
);
168 __asm__
__volatile__("membar #Sync\n\t"
169 "flush %%g6" : : : "memory");
171 /* Clear this or we will die instantly when we
172 * schedule back to this idler...
174 current
->thread
.flags
&= ~(SPARC_FLAG_NEWCHILD
);
176 /* Attach to the address space of init_task. */
177 atomic_inc(&init_mm
.mm_count
);
178 current
->active_mm
= &init_mm
;
180 while(!smp_processors_ready
)
184 extern int cpu_idle(void);
185 extern void init_IRQ(void);
187 void initialize_secondary(void)
191 int start_secondary(void *unused
)
201 printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());
202 panic("SMP bolixed\n");
205 extern struct prom_cpuinfo linux_cpus
[64];
207 extern unsigned long sparc64_cpu_startup
;
209 /* The OBP cpu startup callback truncates the 3rd arg cookie to
210 * 32-bits (I think) so to be safe we have it read the pointer
211 * contained here so we work on >4GB machines. -DaveM
213 static struct task_struct
*cpu_new_task
= NULL
;
215 void __init
smp_boot_cpus(void)
219 printk("Entering UltraSMPenguin Mode...\n");
221 smp_store_cpu_info(boot_cpu_id
);
222 smp_tune_scheduling();
225 if(linux_num_cpus
== 1)
228 for(i
= 0; i
< NR_CPUS
; i
++) {
232 if(cpu_present_map
& (1UL << i
)) {
233 unsigned long entry
= (unsigned long)(&sparc64_cpu_startup
);
234 unsigned long cookie
= (unsigned long)(&cpu_new_task
);
235 struct task_struct
*p
;
239 prom_printf("Starting CPU %d... ", i
);
240 kernel_thread(start_secondary
, NULL
, CLONE_PID
);
243 p
= init_task
.prev_task
;
244 init_tasks
[cpucount
] = p
;
247 p
->has_cpu
= 1; /* we schedule the first task manually */
249 del_from_runqueue(p
);
253 for (no
= 0; no
< linux_num_cpus
; no
++)
254 if (linux_cpus
[no
].mid
== i
)
257 prom_startcpu(linux_cpus
[no
].prom_node
,
259 for(timeout
= 0; timeout
< 5000000; timeout
++) {
265 __cpu_number_map
[i
] = cpucount
;
266 __cpu_logical_map
[cpucount
] = i
;
267 prom_cpu_nodes
[i
] = linux_cpus
[no
].prom_node
;
271 printk("Processor %d is stuck.\n", i
);
272 prom_printf("FAILED\n");
276 cpu_present_map
&= ~(1UL << i
);
277 __cpu_number_map
[i
] = -1;
282 printk("Error: only one processor found.\n");
283 cpu_present_map
= (1UL << smp_processor_id());
285 unsigned long bogosum
= 0;
287 for(i
= 0; i
< NR_CPUS
; i
++) {
288 if(cpu_present_map
& (1UL << i
))
289 bogosum
+= cpu_data
[i
].udelay_val
;
291 printk("Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
293 (bogosum
+ 2500)/500000,
294 ((bogosum
+ 2500)/5000)%100);
296 smp_num_cpus
= cpucount
+ 1;
298 smp_processors_ready
= 1;
299 membar("#StoreStore | #StoreLoad");
302 /* #define XCALL_DEBUG */
304 static inline void xcall_deliver(u64 data0
, u64 data1
, u64 data2
, u64 pstate
, unsigned long cpu
)
309 if (this_is_starfire
) {
310 /* map to real upaid */
311 cpu
= (((cpu
& 0x3c) << 1) |
312 ((cpu
& 0x40) >> 4) |
316 target
= (cpu
<< 14) | 0x70;
318 printk("CPU[%d]: xcall(data[%016lx:%016lx:%016lx],tgt[%016lx])\n",
319 smp_processor_id(), data0
, data1
, data2
, target
);
322 /* Ok, this is the real Spitfire Errata #54.
323 * One must read back from a UDB internal register
324 * after writes to the UDB interrupt dispatch, but
325 * before the membar Sync for that write.
326 * So we use the high UDB control register (ASI 0x7f,
327 * ADDR 0x20) for the dummy read. -DaveM
330 __asm__
__volatile__("
331 wrpr %1, %2, %%pstate
340 ldxa [%%g1] 0x7f, %%g0
343 : "r" (pstate
), "i" (PSTATE_IE
), "i" (ASI_UDB_INTR_W
),
344 "r" (data0
), "r" (data1
), "r" (data2
), "r" (target
), "r" (0x10), "0" (tmp
)
347 /* NOTE: PSTATE_IE is still clear. */
350 __asm__
__volatile__("ldxa [%%g0] %1, %0"
352 : "i" (ASI_INTR_DISPATCH_STAT
));
354 __asm__
__volatile__("wrpr %0, 0x0, %%pstate"
361 } while(result
& 0x1);
362 __asm__
__volatile__("wrpr %0, 0x0, %%pstate"
366 printk("CPU[%d]: mondo stuckage result[%016lx]\n",
367 smp_processor_id(), result
);
371 printk("CPU[%d]: Penguin %d NACK's master.\n", smp_processor_id(), cpu
);
378 void smp_cross_call(unsigned long *func
, u32 ctx
, u64 data1
, u64 data2
)
380 if(smp_processors_ready
) {
381 unsigned long mask
= (cpu_present_map
& ~(1UL<<smp_processor_id()));
382 u64 pstate
, data0
= (((u64
)ctx
)<<32 | (((u64
)func
) & 0xffffffff));
383 int i
, ncpus
= smp_num_cpus
- 1;
385 __asm__
__volatile__("rdpr %%pstate, %0" : "=r" (pstate
));
386 for(i
= 0; i
< NR_CPUS
; i
++) {
387 if(mask
& (1UL << i
)) {
388 xcall_deliver(data0
, data1
, data2
, pstate
, i
);
393 /* NOTE: Caller runs local copy on master. */
397 struct call_data_struct
{
398 void (*func
) (void *info
);
404 extern unsigned long xcall_call_function
;
406 int smp_call_function(void (*func
)(void *info
), void *info
,
407 int nonatomic
, int wait
)
409 struct call_data_struct data
;
410 int cpus
= smp_num_cpus
- 1;
417 atomic_set(&data
.finished
, 0);
420 smp_cross_call(&xcall_call_function
,
423 while (atomic_read(&data
.finished
) != cpus
)
430 void smp_call_function_client(struct call_data_struct
*call_data
)
432 call_data
->func(call_data
->info
);
434 atomic_inc(&call_data
->finished
);
437 extern unsigned long xcall_flush_tlb_page
;
438 extern unsigned long xcall_flush_tlb_mm
;
439 extern unsigned long xcall_flush_tlb_range
;
440 extern unsigned long xcall_flush_tlb_all
;
441 extern unsigned long xcall_tlbcachesync
;
442 extern unsigned long xcall_flush_cache_all
;
443 extern unsigned long xcall_report_regs
;
444 extern unsigned long xcall_receive_signal
;
446 void smp_receive_signal(int cpu
)
448 if(smp_processors_ready
&&
449 (cpu_present_map
& (1UL<<cpu
)) != 0) {
450 u64 pstate
, data0
= (((u64
)&xcall_receive_signal
) & 0xffffffff);
451 __asm__
__volatile__("rdpr %%pstate, %0" : "=r" (pstate
));
452 xcall_deliver(data0
, 0, 0, pstate
, cpu
);
456 void smp_report_regs(void)
458 smp_cross_call(&xcall_report_regs
, 0, 0, 0);
461 void smp_flush_cache_all(void)
463 smp_cross_call(&xcall_flush_cache_all
, 0, 0, 0);
467 void smp_flush_tlb_all(void)
469 smp_cross_call(&xcall_flush_tlb_all
, 0, 0, 0);
473 /* We know that the window frames of the user have been flushed
474 * to the stack before we get here because all callers of us
475 * are flush_tlb_*() routines, and these run after flush_cache_*()
476 * which performs the flushw.
478 * XXX I diked out the fancy flush avoidance code for the
479 * XXX swapping cases for now until the new MM code stabilizes. -DaveM
481 * The SMP TLB coherency scheme we use works as follows:
483 * 1) mm->cpu_vm_mask is a bit mask of which cpus an address
484 * space has (potentially) executed on, this is the heuristic
485 * we use to avoid doing cross calls.
487 * 2) TLB context numbers are shared globally across all processors
488 * in the system, this allows us to play several games to avoid
491 * One invariant is that when a cpu switches to a process, and
492 * that processes tsk->active_mm->cpu_vm_mask does not have the
493 * current cpu's bit set, that tlb context is flushed locally.
495 * If the address space is non-shared (ie. mm->count == 1) we avoid
496 * cross calls when we want to flush the currently running process's
497 * tlb state. This is done by clearing all cpu bits except the current
498 * processor's in current->active_mm->cpu_vm_mask and performing the
499 * flush locally only. This will force any subsequent cpus which run
500 * this task to flush the context from the local tlb if the process
501 * migrates to another cpu (again).
503 * 3) For shared address spaces (threads) and swapping we bite the
504 * bullet for most cases and perform the cross call.
506 * The performance gain from "optimizing" away the cross call for threads is
507 * questionable (in theory the big win for threads is the massive sharing of
508 * address space state across processors).
510 * For the swapping case the locking is difficult to get right, we'd have to
511 * enforce strict ordered access to mm->cpu_vm_mask via a spinlock for example.
512 * Then again one could argue that when you are swapping, the cost of a cross
513 * call won't even show up on the performance radar. But in any case we do get
514 * rid of the cross-call when the task has a dead context or the task has only
515 * ever run on the local cpu.
517 void smp_flush_tlb_mm(struct mm_struct
*mm
)
519 if (CTX_VALID(mm
->context
)) {
520 u32 ctx
= CTX_HWBITS(mm
->context
);
521 int cpu
= smp_processor_id();
523 if (mm
== current
->active_mm
&& atomic_read(&mm
->mm_users
) == 1) {
524 /* See smp_flush_tlb_page for info about this. */
525 mm
->cpu_vm_mask
= (1UL << cpu
);
526 goto local_flush_and_out
;
529 smp_cross_call(&xcall_flush_tlb_mm
, ctx
, 0, 0);
532 __flush_tlb_mm(ctx
, SECONDARY_CONTEXT
);
536 void smp_flush_tlb_range(struct mm_struct
*mm
, unsigned long start
,
539 if (CTX_VALID(mm
->context
)) {
540 u32 ctx
= CTX_HWBITS(mm
->context
);
541 int cpu
= smp_processor_id();
546 if (mm
== current
->active_mm
&& atomic_read(&mm
->mm_users
) == 1) {
547 mm
->cpu_vm_mask
= (1UL << cpu
);
548 goto local_flush_and_out
;
551 smp_cross_call(&xcall_flush_tlb_range
, ctx
, start
, end
);
554 __flush_tlb_range(ctx
, start
, SECONDARY_CONTEXT
, end
, PAGE_SIZE
, (end
-start
));
558 void smp_flush_tlb_page(struct mm_struct
*mm
, unsigned long page
)
560 if (CTX_VALID(mm
->context
)) {
561 u32 ctx
= CTX_HWBITS(mm
->context
);
562 int cpu
= smp_processor_id();
565 if (mm
== current
->active_mm
&& atomic_read(&mm
->mm_users
) == 1) {
566 /* By virtue of being the current address space, and
567 * having the only reference to it, the following operation
570 * It would not be a win to perform the xcall tlb flush in
571 * this case, because even if we switch back to one of the
572 * other processors in cpu_vm_mask it is almost certain that
573 * all TLB entries for this context will be replaced by the
576 mm
->cpu_vm_mask
= (1UL << cpu
);
577 goto local_flush_and_out
;
579 /* By virtue of running under the mm->page_table_lock,
580 * and mmu_context.h:switch_mm doing the same, the following
583 if (mm
->cpu_vm_mask
== (1UL << cpu
))
584 goto local_flush_and_out
;
587 /* OK, we have to actually perform the cross call. Most likely
588 * this is a cloned mm or kswapd is kicking out pages for a task
589 * which has run recently on another cpu.
591 smp_cross_call(&xcall_flush_tlb_page
, ctx
, page
, 0);
594 __flush_tlb_page(ctx
, page
, SECONDARY_CONTEXT
);
599 /* #define CAPTURE_DEBUG */
600 extern unsigned long xcall_capture
;
602 static atomic_t smp_capture_depth
= ATOMIC_INIT(0);
603 static atomic_t smp_capture_registry
= ATOMIC_INIT(0);
604 static unsigned long penguins_are_doing_time
= 0;
606 void smp_capture(void)
608 if (smp_processors_ready
) {
609 int result
= __atomic_add(1, &smp_capture_depth
);
611 membar("#StoreStore | #LoadStore");
613 int ncpus
= smp_num_cpus
;
616 printk("CPU[%d]: Sending penguins to jail...",
619 penguins_are_doing_time
= 1;
620 membar("#StoreStore | #LoadStore");
621 atomic_inc(&smp_capture_registry
);
622 smp_cross_call(&xcall_capture
, 0, 0, 0);
623 while(atomic_read(&smp_capture_registry
) != ncpus
)
632 void smp_release(void)
634 if(smp_processors_ready
) {
635 if(atomic_dec_and_test(&smp_capture_depth
)) {
637 printk("CPU[%d]: Giving pardon to imprisoned penguins\n",
640 penguins_are_doing_time
= 0;
641 membar("#StoreStore | #StoreLoad");
642 atomic_dec(&smp_capture_registry
);
647 /* Imprisoned penguins run with %pil == 15, but PSTATE_IE set, so they
648 * can service tlb flush xcalls...
650 extern void prom_world(int);
651 extern void save_alternate_globals(unsigned long *);
652 extern void restore_alternate_globals(unsigned long *);
653 void smp_penguin_jailcell(void)
655 unsigned long global_save
[24];
657 __asm__
__volatile__("flushw");
658 save_alternate_globals(global_save
);
660 atomic_inc(&smp_capture_registry
);
661 membar("#StoreLoad | #StoreStore");
662 while(penguins_are_doing_time
)
664 restore_alternate_globals(global_save
);
665 atomic_dec(&smp_capture_registry
);
669 extern unsigned long xcall_promstop
;
671 void smp_promstop_others(void)
673 if (smp_processors_ready
)
674 smp_cross_call(&xcall_promstop
, 0, 0, 0);
677 extern void sparc64_do_profile(unsigned long pc
, unsigned long o7
);
679 static unsigned long current_tick_offset
;
681 #define prof_multiplier(__cpu) cpu_data[(__cpu)].multiplier
682 #define prof_counter(__cpu) cpu_data[(__cpu)].counter
684 void smp_percpu_timer_interrupt(struct pt_regs
*regs
)
686 unsigned long compare
, tick
, pstate
;
687 int cpu
= smp_processor_id();
688 int user
= user_mode(regs
);
691 * Check for level 14 softint.
693 if (!(get_softint() & (1UL << 0))) {
694 extern void handler_irq(int, struct pt_regs
*);
696 handler_irq(14, regs
);
700 clear_softint((1UL << 0));
703 sparc64_do_profile(regs
->tpc
, regs
->u_regs
[UREG_RETPC
]);
704 if (!--prof_counter(cpu
)) {
705 if (cpu
== boot_cpu_id
) {
708 kstat
.irqs
[cpu
][0]++;
709 timer_tick_interrupt(regs
);
714 update_process_times(user
);
716 prof_counter(cpu
) = prof_multiplier(cpu
);
719 /* Guarentee that the following sequences execute
722 __asm__
__volatile__("rdpr %%pstate, %0\n\t"
723 "wrpr %0, %1, %%pstate"
727 /* Workaround for Spitfire Errata (#54 I think??), I discovered
728 * this via Sun BugID 4008234, mentioned in Solaris-2.5.1 patch
731 * On Blackbird writes to %tick_cmpr can fail, the
732 * workaround seems to be to execute the wr instruction
733 * at the start of an I-cache line, and perform a dummy
734 * read back from %tick_cmpr right after writing to it. -DaveM
736 * Just to be anal we add a workaround for Spitfire
737 * Errata 50 by preventing pipeline bypasses on the
738 * final read of the %tick register into a compare
739 * instruction. The Errata 50 description states
740 * that %tick is not prone to this bug, but I am not
741 * taking any chances.
743 __asm__
__volatile__("rd %%tick_cmpr, %0\n\t"
744 "ba,pt %%xcc, 1f\n\t"
745 " add %0, %2, %0\n\t"
747 "1: wr %0, 0x0, %%tick_cmpr\n\t"
748 "rd %%tick_cmpr, %%g0\n\t"
751 : "=&r" (compare
), "=r" (tick
)
752 : "r" (current_tick_offset
));
754 /* Restore PSTATE_IE. */
755 __asm__
__volatile__("wrpr %0, 0x0, %%pstate"
758 } while (tick
>= compare
);
761 static void __init
smp_setup_percpu_timer(void)
763 int cpu
= smp_processor_id();
764 unsigned long pstate
;
766 prof_counter(cpu
) = prof_multiplier(cpu
) = 1;
768 /* Guarentee that the following sequences execute
771 __asm__
__volatile__("rdpr %%pstate, %0\n\t"
772 "wrpr %0, %1, %%pstate"
776 /* Workaround for Spitfire Errata (#54 I think??), I discovered
777 * this via Sun BugID 4008234, mentioned in Solaris-2.5.1 patch
780 * On Blackbird writes to %tick_cmpr can fail, the
781 * workaround seems to be to execute the wr instruction
782 * at the start of an I-cache line, and perform a dummy
783 * read back from %tick_cmpr right after writing to it. -DaveM
785 __asm__
__volatile__("
790 1: wr %%g1, 0x0, %%tick_cmpr
791 rd %%tick_cmpr, %%g0"
793 : "r" (current_tick_offset
)
796 /* Restore PSTATE_IE. */
797 __asm__
__volatile__("wrpr %0, 0x0, %%pstate"
802 void __init
smp_tick_init(void)
806 boot_cpu_id
= hard_smp_processor_id();
807 current_tick_offset
= timer_tick_offset
;
809 for(i
= 0; i
< linux_num_cpus
; i
++)
810 cpu_present_map
|= (1UL << linux_cpus
[i
].mid
);
811 for(i
= 0; i
< NR_CPUS
; i
++) {
812 __cpu_number_map
[i
] = -1;
813 __cpu_logical_map
[i
] = -1;
815 __cpu_number_map
[boot_cpu_id
] = 0;
816 prom_cpu_nodes
[boot_cpu_id
] = linux_cpus
[0].prom_node
;
817 __cpu_logical_map
[0] = boot_cpu_id
;
818 current
->processor
= boot_cpu_id
;
819 prof_counter(boot_cpu_id
) = prof_multiplier(boot_cpu_id
) = 1;
822 static inline unsigned long find_flush_base(unsigned long size
)
824 struct page
*p
= mem_map
;
825 unsigned long found
, base
;
827 size
= PAGE_ALIGN(size
);
829 base
= (unsigned long) page_address(p
);
832 if(p
>= (mem_map
+ max_mapnr
))
834 if(PageReserved(p
)) {
836 base
= (unsigned long) page_address(p
);
845 cycles_t cacheflush_time
;
847 static void __init
smp_tune_scheduling (void)
849 unsigned long orig_flush_base
, flush_base
, flags
, *p
;
850 unsigned int ecache_size
, order
;
851 cycles_t tick1
, tick2
, raw
;
853 /* Approximate heuristic for SMP scheduling. It is an
854 * estimation of the time it takes to flush the L2 cache
855 * on the local processor.
857 * The ia32 chooses to use the L1 cache flush time instead,
858 * and I consider this complete nonsense. The Ultra can service
859 * a miss to the L1 with a hit to the L2 in 7 or 8 cycles, and
860 * L2 misses are what create extra bus traffic (ie. the "cost"
861 * of moving a process from one cpu to another).
863 printk("SMP: Calibrating ecache flush... ");
864 ecache_size
= prom_getintdefault(linux_cpus
[0].prom_node
,
865 "ecache-size", (512 * 1024));
866 if (ecache_size
> (4 * 1024 * 1024))
867 ecache_size
= (4 * 1024 * 1024);
868 orig_flush_base
= flush_base
=
869 __get_free_pages(GFP_KERNEL
, order
= get_order(ecache_size
));
871 if (flush_base
!= 0UL) {
872 __save_and_cli(flags
);
874 /* Scan twice the size once just to get the TLB entries
875 * loaded and make sure the second scan measures pure misses.
877 for (p
= (unsigned long *)flush_base
;
878 ((unsigned long)p
) < (flush_base
+ (ecache_size
<<1));
879 p
+= (64 / sizeof(unsigned long)))
880 *((volatile unsigned long *)p
);
882 /* Now the real measurement. */
883 __asm__
__volatile__("
888 1: ldx [%2 + 0x000], %%g1
889 ldx [%2 + 0x040], %%g2
890 ldx [%2 + 0x080], %%g3
891 ldx [%2 + 0x0c0], %%g5
898 : "=&r" (tick1
), "=&r" (tick2
), "=&r" (flush_base
)
899 : "2" (flush_base
), "r" (flush_base
+ ecache_size
)
900 : "g1", "g2", "g3", "g5");
902 __restore_flags(flags
);
904 raw
= (tick2
- tick1
);
906 /* Dampen it a little, considering two processes
907 * sharing the cache and fitting.
909 cacheflush_time
= (raw
- (raw
>> 2));
911 free_pages(orig_flush_base
, order
);
913 cacheflush_time
= ((ecache_size
<< 2) +
917 printk("Using heuristic of %d cycles.\n",
918 (int) cacheflush_time
);
921 /* /proc/profile writes can call this, don't __init it please. */
922 int setup_profiling_timer(unsigned int multiplier
)
927 if((!multiplier
) || (timer_tick_offset
/ multiplier
) < 1000)
931 for(i
= 0; i
< NR_CPUS
; i
++) {
932 if(cpu_present_map
& (1UL << i
))
933 prof_multiplier(i
) = multiplier
;
935 current_tick_offset
= (timer_tick_offset
/ multiplier
);
936 restore_flags(flags
);