2 * SN2 Platform specific SMP Support
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file "COPYING" in the main directory of this archive
8 * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved.
11 #include <linux/init.h>
12 #include <linux/kernel.h>
13 #include <linux/spinlock.h>
14 #include <linux/threads.h>
15 #include <linux/sched.h>
16 #include <linux/smp.h>
17 #include <linux/interrupt.h>
18 #include <linux/irq.h>
19 #include <linux/mmzone.h>
20 #include <linux/module.h>
21 #include <linux/bitops.h>
22 #include <linux/nodemask.h>
23 #include <linux/proc_fs.h>
24 #include <linux/seq_file.h>
26 #include <asm/processor.h>
29 #include <asm/system.h>
30 #include <asm/delay.h>
35 #include <asm/hw_irq.h>
36 #include <asm/current.h>
37 #include <asm/sn/sn_cpuid.h>
38 #include <asm/sn/sn_sal.h>
39 #include <asm/sn/addrs.h>
40 #include <asm/sn/shub_mmr.h>
41 #include <asm/sn/nodepda.h>
42 #include <asm/sn/rw_mmr.h>
44 DEFINE_PER_CPU(struct ptc_stats
, ptcstats
);
45 DECLARE_PER_CPU(struct ptc_stats
, ptcstats
);
47 static __cacheline_aligned
DEFINE_SPINLOCK(sn2_global_ptc_lock
);
49 void sn2_ptc_deadlock_recovery(short *, short, int, volatile unsigned long *, unsigned long data0
,
50 volatile unsigned long *, unsigned long data1
);
56 * xyz - 3 digit hex number:
57 * x - Force PTC purges to use shub:
61 * 0 - disable interrupts
62 * 1 - leave interuupts enabled
68 * Note: on shub1, only ptctest == 0 is supported. Don't try other values!
71 static unsigned int sn2_ptctest
= 0;
73 static int __init
ptc_test(char *str
)
75 get_option(&str
, &sn2_ptctest
);
78 __setup("ptctest=", ptc_test
);
80 static inline int ptc_lock(unsigned long *flagp
)
82 unsigned long opt
= sn2_ptctest
& 255;
86 spin_lock_irqsave(&sn2_global_ptc_lock
, *flagp
);
89 spin_lock_irqsave(&sn_nodepda
->ptc_lock
, *flagp
);
92 local_irq_save(*flagp
);
95 spin_lock(&sn2_global_ptc_lock
);
98 spin_lock(&sn_nodepda
->ptc_lock
);
108 static inline void ptc_unlock(unsigned long flags
, int opt
)
112 spin_unlock_irqrestore(&sn2_global_ptc_lock
, flags
);
115 spin_unlock_irqrestore(&sn_nodepda
->ptc_lock
, flags
);
118 local_irq_restore(flags
);
121 spin_unlock(&sn2_global_ptc_lock
);
124 spin_unlock(&sn_nodepda
->ptc_lock
);
134 #define sn2_ptctest 0
136 static inline int ptc_lock(unsigned long *flagp
)
138 spin_lock_irqsave(&sn2_global_ptc_lock
, *flagp
);
142 static inline void ptc_unlock(unsigned long flags
, int opt
)
144 spin_unlock_irqrestore(&sn2_global_ptc_lock
, flags
);
150 unsigned long change_rid
;
151 unsigned long shub_ptc_flushes
;
152 unsigned long nodes_flushed
;
153 unsigned long deadlocks
;
154 unsigned long lock_itc_clocks
;
155 unsigned long shub_itc_clocks
;
156 unsigned long shub_itc_clocks_max
;
159 static inline unsigned long wait_piowc(void)
161 volatile unsigned long *piows
, zeroval
;
164 piows
= pda
->pio_write_status_addr
;
165 zeroval
= pda
->pio_write_status_val
;
168 } while (((ws
= *piows
) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK
) != zeroval
);
172 void sn_tlb_migrate_finish(struct mm_struct
*mm
)
174 if (mm
== current
->mm
)
179 * sn2_global_tlb_purge - globally purge translation cache of virtual address range
180 * @start: start of virtual address range
181 * @end: end of virtual address range
182 * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc))
184 * Purges the translation caches of all processors of the given virtual address
188 * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context.
189 * - cpu_vm_mask is converted into a nodemask of the nodes containing the
190 * cpus in cpu_vm_mask.
191 * - if only one bit is set in cpu_vm_mask & it is the current cpu,
192 * then only the local TLB needs to be flushed. This flushing can be done
193 * using ptc.l. This is the common case & avoids the global spinlock.
194 * - if multiple cpus have loaded the context, then flushing has to be
195 * done with ptc.g/MMRs under protection of the global ptc_lock.
199 sn2_global_tlb_purge(unsigned long start
, unsigned long end
,
202 int i
, opt
, shub1
, cnode
, mynasid
, cpu
, lcpu
= 0, nasid
, flushed
= 0;
203 volatile unsigned long *ptc0
, *ptc1
;
204 unsigned long itc
, itc2
, flags
, data0
= 0, data1
= 0;
205 struct mm_struct
*mm
= current
->active_mm
;
206 short nasids
[MAX_NUMNODES
], nix
;
207 nodemask_t nodes_flushed
;
209 nodes_clear(nodes_flushed
);
212 for_each_cpu_mask(cpu
, mm
->cpu_vm_mask
) {
213 cnode
= cpu_to_node(cpu
);
214 node_set(cnode
, nodes_flushed
);
221 if (likely(i
== 1 && lcpu
== smp_processor_id())) {
223 ia64_ptcl(start
, nbits
<< 2);
224 start
+= (1UL << nbits
);
225 } while (start
< end
);
227 __get_cpu_var(ptcstats
).ptc_l
++;
232 if (atomic_read(&mm
->mm_users
) == 1) {
234 __get_cpu_var(ptcstats
).change_rid
++;
239 itc
= ia64_get_itc();
241 for_each_node_mask(cnode
, nodes_flushed
)
242 nasids
[nix
++] = cnodeid_to_nasid(cnode
);
246 data0
= (1UL << SH1_PTC_0_A_SHFT
) |
247 (nbits
<< SH1_PTC_0_PS_SHFT
) |
248 ((ia64_get_rr(start
) >> 8) << SH1_PTC_0_RID_SHFT
) |
249 (1UL << SH1_PTC_0_START_SHFT
);
250 ptc0
= (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0
);
251 ptc1
= (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1
);
253 data0
= (1UL << SH2_PTC_A_SHFT
) |
254 (nbits
<< SH2_PTC_PS_SHFT
) |
255 (1UL << SH2_PTC_START_SHFT
);
256 ptc0
= (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC
+
257 ((ia64_get_rr(start
) >> 8) << SH2_PTC_RID_SHFT
) );
262 mynasid
= get_nasid();
264 itc
= ia64_get_itc();
265 opt
= ptc_lock(&flags
);
266 itc2
= ia64_get_itc();
267 __get_cpu_var(ptcstats
).lock_itc_clocks
+= itc2
- itc
;
268 __get_cpu_var(ptcstats
).shub_ptc_flushes
++;
269 __get_cpu_var(ptcstats
).nodes_flushed
+= nix
;
273 data1
= start
| (1UL << SH1_PTC_1_START_SHFT
);
275 data0
= (data0
& ~SH2_PTC_ADDR_MASK
) | (start
& SH2_PTC_ADDR_MASK
);
276 for (i
= 0; i
< nix
; i
++) {
278 if ((!(sn2_ptctest
& 3)) && unlikely(nasid
== mynasid
)) {
279 ia64_ptcga(start
, nbits
<< 2);
282 ptc0
= CHANGE_NASID(nasid
, ptc0
);
284 ptc1
= CHANGE_NASID(nasid
, ptc1
);
285 pio_atomic_phys_write_mmrs(ptc0
, data0
, ptc1
,
292 (SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK
))) {
293 sn2_ptc_deadlock_recovery(nasids
, nix
, mynasid
, ptc0
, data0
, ptc1
, data1
);
296 start
+= (1UL << nbits
);
298 } while (start
< end
);
300 itc2
= ia64_get_itc() - itc2
;
301 __get_cpu_var(ptcstats
).shub_itc_clocks
+= itc2
;
302 if (itc2
> __get_cpu_var(ptcstats
).shub_itc_clocks_max
)
303 __get_cpu_var(ptcstats
).shub_itc_clocks_max
= itc2
;
305 ptc_unlock(flags
, opt
);
311 * sn2_ptc_deadlock_recovery
313 * Recover from PTC deadlocks conditions. Recovery requires stepping thru each
314 * TLB flush transaction. The recovery sequence is somewhat tricky & is
315 * coded in assembly language.
317 void sn2_ptc_deadlock_recovery(short *nasids
, short nix
, int mynasid
, volatile unsigned long *ptc0
, unsigned long data0
,
318 volatile unsigned long *ptc1
, unsigned long data1
)
320 extern void sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
321 volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long);
323 unsigned long *piows
, zeroval
;
325 __get_cpu_var(ptcstats
).deadlocks
++;
327 piows
= (unsigned long *) pda
->pio_write_status_addr
;
328 zeroval
= pda
->pio_write_status_val
;
330 for (i
=0; i
< nix
; i
++) {
332 if (!(sn2_ptctest
& 3) && nasid
== mynasid
)
334 ptc0
= CHANGE_NASID(nasid
, ptc0
);
336 ptc1
= CHANGE_NASID(nasid
, ptc1
);
337 sn2_ptc_deadlock_recovery_core(ptc0
, data0
, ptc1
, data1
, piows
, zeroval
);
343 * sn_send_IPI_phys - send an IPI to a Nasid and slice
344 * @nasid: nasid to receive the interrupt (may be outside partition)
345 * @physid: physical cpuid to receive the interrupt.
346 * @vector: command to send
347 * @delivery_mode: delivery mechanism
349 * Sends an IPI (interprocessor interrupt) to the processor specified by
352 * @delivery_mode can be one of the following
354 * %IA64_IPI_DM_INT - pend an interrupt
355 * %IA64_IPI_DM_PMI - pend a PMI
356 * %IA64_IPI_DM_NMI - pend an NMI
357 * %IA64_IPI_DM_INIT - pend an INIT interrupt
359 void sn_send_IPI_phys(int nasid
, long physid
, int vector
, int delivery_mode
)
362 unsigned long flags
= 0;
365 p
= (long *)GLOBAL_MMR_PHYS_ADDR(nasid
, SH_IPI_INT
);
366 val
= (1UL << SH_IPI_INT_SEND_SHFT
) |
367 (physid
<< SH_IPI_INT_PID_SHFT
) |
368 ((long)delivery_mode
<< SH_IPI_INT_TYPE_SHFT
) |
369 ((long)vector
<< SH_IPI_INT_IDX_SHFT
) |
370 (0x000feeUL
<< SH_IPI_INT_BASE_SHFT
);
373 if (enable_shub_wars_1_1()) {
374 spin_lock_irqsave(&sn2_global_ptc_lock
, flags
);
376 pio_phys_write_mmr(p
, val
);
377 if (enable_shub_wars_1_1()) {
379 spin_unlock_irqrestore(&sn2_global_ptc_lock
, flags
);
384 EXPORT_SYMBOL(sn_send_IPI_phys
);
387 * sn2_send_IPI - send an IPI to a processor
388 * @cpuid: target of the IPI
389 * @vector: command to send
390 * @delivery_mode: delivery mechanism
391 * @redirect: redirect the IPI?
393 * Sends an IPI (InterProcessor Interrupt) to the processor specified by
394 * @cpuid. @vector specifies the command to send, while @delivery_mode can
395 * be one of the following
397 * %IA64_IPI_DM_INT - pend an interrupt
398 * %IA64_IPI_DM_PMI - pend a PMI
399 * %IA64_IPI_DM_NMI - pend an NMI
400 * %IA64_IPI_DM_INIT - pend an INIT interrupt
402 void sn2_send_IPI(int cpuid
, int vector
, int delivery_mode
, int redirect
)
407 physid
= cpu_physical_id(cpuid
);
408 nasid
= cpuid_to_nasid(cpuid
);
410 /* the following is used only when starting cpus at boot time */
411 if (unlikely(nasid
== -1))
412 ia64_sn_get_sapic_info(physid
, &nasid
, NULL
, NULL
);
414 sn_send_IPI_phys(nasid
, physid
, vector
, delivery_mode
);
417 #ifdef CONFIG_PROC_FS
419 #define PTC_BASENAME "sgi_sn/ptc_statistics"
421 static void *sn2_ptc_seq_start(struct seq_file
*file
, loff_t
* offset
)
423 if (*offset
< NR_CPUS
)
428 static void *sn2_ptc_seq_next(struct seq_file
*file
, void *data
, loff_t
* offset
)
431 if (*offset
< NR_CPUS
)
436 static void sn2_ptc_seq_stop(struct seq_file
*file
, void *data
)
440 static int sn2_ptc_seq_show(struct seq_file
*file
, void *data
)
442 struct ptc_stats
*stat
;
445 cpu
= *(loff_t
*) data
;
448 seq_printf(file
, "# ptc_l change_rid shub_ptc_flushes shub_nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max\n");
449 seq_printf(file
, "# ptctest %d\n", sn2_ptctest
);
452 if (cpu
< NR_CPUS
&& cpu_online(cpu
)) {
453 stat
= &per_cpu(ptcstats
, cpu
);
454 seq_printf(file
, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu
, stat
->ptc_l
,
455 stat
->change_rid
, stat
->shub_ptc_flushes
, stat
->nodes_flushed
,
457 1000 * stat
->lock_itc_clocks
/ per_cpu(cpu_info
, cpu
).cyc_per_usec
,
458 1000 * stat
->shub_itc_clocks
/ per_cpu(cpu_info
, cpu
).cyc_per_usec
,
459 1000 * stat
->shub_itc_clocks_max
/ per_cpu(cpu_info
, cpu
).cyc_per_usec
);
465 static struct seq_operations sn2_ptc_seq_ops
= {
466 .start
= sn2_ptc_seq_start
,
467 .next
= sn2_ptc_seq_next
,
468 .stop
= sn2_ptc_seq_stop
,
469 .show
= sn2_ptc_seq_show
472 int sn2_ptc_proc_open(struct inode
*inode
, struct file
*file
)
474 return seq_open(file
, &sn2_ptc_seq_ops
);
477 static struct file_operations proc_sn2_ptc_operations
= {
478 .open
= sn2_ptc_proc_open
,
481 .release
= seq_release
,
484 static struct proc_dir_entry
*proc_sn2_ptc
;
486 static int __init
sn2_ptc_init(void)
488 if (!(proc_sn2_ptc
= create_proc_entry(PTC_BASENAME
, 0444, NULL
))) {
489 printk(KERN_ERR
"unable to create %s proc entry", PTC_BASENAME
);
492 proc_sn2_ptc
->proc_fops
= &proc_sn2_ptc_operations
;
493 spin_lock_init(&sn2_global_ptc_lock
);
497 static void __exit
sn2_ptc_exit(void)
499 remove_proc_entry(PTC_BASENAME
, NULL
);
502 module_init(sn2_ptc_init
);
503 module_exit(sn2_ptc_exit
);
504 #endif /* CONFIG_PROC_FS */