2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
8 * Copyright (C) 2000, 2001 Paolo Alberelli
9 * Copyright (C) 2003 <Richard.Curnow@superh.com> (/proc/tlb, audit_mm, bug fixes)
10 * Copyright (C) 2003 Paul Mundt
14 #include <linux/signal.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/errno.h>
18 #include <linux/string.h>
19 #include <linux/types.h>
20 #include <linux/ptrace.h>
21 #include <linux/mman.h>
23 #include <linux/smp.h>
24 #include <linux/smp_lock.h>
25 #include <linux/interrupt.h>
27 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <asm/pgalloc.h>
32 #include <asm/hardirq.h>
33 #include <asm/mmu_context.h>
34 #include <asm/registers.h> /* required by inline asm statements */
36 #if defined(CONFIG_SH64_PROC_TLB)
37 #include <linux/init.h>
38 #include <linux/proc_fs.h>
39 /* Count numbers of tlb refills in each region */
40 static unsigned long long calls_to_update_mmu_cache
= 0ULL;
41 static unsigned long long calls_to_flush_tlb_page
= 0ULL;
42 static unsigned long long calls_to_flush_tlb_range
= 0ULL;
43 static unsigned long long calls_to_flush_tlb_mm
= 0ULL;
44 static unsigned long long calls_to_flush_tlb_all
= 0ULL;
45 unsigned long long calls_to_do_slow_page_fault
= 0ULL;
46 unsigned long long calls_to_do_fast_page_fault
= 0ULL;
48 /* Count size of ranges for flush_tlb_range */
49 static unsigned long long flush_tlb_range_1
= 0ULL;
50 static unsigned long long flush_tlb_range_2
= 0ULL;
51 static unsigned long long flush_tlb_range_3_4
= 0ULL;
52 static unsigned long long flush_tlb_range_5_7
= 0ULL;
53 static unsigned long long flush_tlb_range_8_11
= 0ULL;
54 static unsigned long long flush_tlb_range_12_15
= 0ULL;
55 static unsigned long long flush_tlb_range_16_up
= 0ULL;
57 static unsigned long long page_not_present
= 0ULL;
61 extern void die(const char *,struct pt_regs
*,long);
63 #define PFLAG(val,flag) (( (val) & (flag) ) ? #flag : "" )
64 #define PPROT(flag) PFLAG(pgprot_val(prot),flag)
66 static __inline__
void print_prots(pgprot_t prot
)
68 printk("prot is 0x%08lx\n",pgprot_val(prot
));
70 printk("%s %s %s %s %s\n",PPROT(_PAGE_SHARED
),PPROT(_PAGE_READ
),
71 PPROT(_PAGE_EXECUTE
),PPROT(_PAGE_WRITE
),PPROT(_PAGE_USER
));
75 static __inline__
void print_vma(struct vm_area_struct
*vma
)
77 printk("vma start 0x%08lx\n",vma
->vm_start
);
78 printk("vma end 0x%08lx\n",vma
->vm_end
);
80 print_prots(vma
->vm_page_prot
);
81 printk("vm_flags 0x%08lx\n",vma
->vm_flags
);
84 static __inline__
void print_task(struct task_struct
*tsk
)
86 printk("Task pid %d\n",tsk
->pid
);
89 static pte_t
*lookup_pte(struct mm_struct
*mm
, unsigned long address
)
96 dir
= pgd_offset(mm
, address
);
101 pmd
= pmd_offset(dir
, address
);
102 if (pmd_none(*pmd
)) {
106 pte
= pte_offset(pmd
, address
);
109 if (pte_none(entry
)) {
112 if (!pte_present(entry
)) {
120 * This routine handles page faults. It determines the address,
121 * and the problem, and then passes it off to one of the appropriate
124 asmlinkage
void do_page_fault(struct pt_regs
*regs
, unsigned long writeaccess
,
125 unsigned long textaccess
, unsigned long address
)
127 struct task_struct
*tsk
;
128 struct mm_struct
*mm
;
129 struct vm_area_struct
* vma
;
131 unsigned long long lpage
;
135 #if defined(CONFIG_SH64_PROC_TLB)
136 ++calls_to_do_slow_page_fault
;
140 * Note this is now called with interrupts still disabled
141 * This is to cope with being called for a missing IO port
142 * address with interupts disabled. This should be fixed as
143 * soon as we have a better 'fast path' miss handler.
145 * Plus take care how you try and debug this stuff.
146 * For example, writing debug data to a port which you
147 * have just faulted on is not going to work.
153 /* Not an IO address, so reenable interrupts */
157 * If we're in an interrupt or have no user
158 * context, we must not take the fault..
160 if (in_interrupt() || !mm
)
163 /* TLB misses upon some cache flushes get done under cli() */
164 down_read(&mm
->mmap_sem
);
166 vma
= find_vma(mm
, address
);
171 printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
172 __FUNCTION__
,__LINE__
,
173 address
,regs
->pc
,textaccess
,writeaccess
);
178 if (vma
->vm_start
<= address
) {
182 if (!(vma
->vm_flags
& VM_GROWSDOWN
)) {
185 printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
186 __FUNCTION__
,__LINE__
,
187 address
,regs
->pc
,textaccess
,writeaccess
);
194 if (expand_stack(vma
, address
)) {
197 printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
198 __FUNCTION__
,__LINE__
,
199 address
,regs
->pc
,textaccess
,writeaccess
);
205 * Ok, we have a good vm_area for this memory access, so
210 if (!(vma
->vm_flags
& VM_WRITE
))
213 if (!(vma
->vm_flags
& (VM_READ
| VM_EXEC
)))
218 if (!(vma
->vm_flags
& VM_EXEC
))
223 * If for any reason at all we couldn't handle the fault,
224 * make sure we exit gracefully rather than endlessly redo
228 switch (handle_mm_fault(mm
, vma
, address
, writeaccess
)) {
240 /* If we get here, the page fault has been handled. Do the TLB refill
241 now from the newly-setup PTE, to avoid having to fault again right
242 away on the same instruction. */
243 pte
= lookup_pte (mm
, address
);
245 /* From empirical evidence, we can get here, due to
246 !pte_present(pte). (e.g. if a swap-in occurs, and the page
247 is swapped back out again before the process that wanted it
248 gets rescheduled?) */
252 __do_tlb_refill(address
, textaccess
, pte
);
256 up_read(&mm
->mmap_sem
);
260 * Something tried to access memory that isn't in our memory map..
261 * Fix it, but check if it's kernel or user first..
265 printk("fault:bad area\n");
267 up_read(&mm
->mmap_sem
);
269 if (user_mode(regs
)) {
270 tsk
->thread
.address
= address
;
271 tsk
->thread
.error_code
= writeaccess
;
272 force_sig(SIGSEGV
, tsk
);
278 printk("fault:No context\n");
280 /* Are we prepared to handle this kernel fault? */
281 fixup
= search_exception_table(regs
->pc
);
288 * Oops. The kernel tried to access some bad page. We'll have to
289 * terminate things with extreme prejudice.
292 if (address
< PAGE_SIZE
)
293 printk(KERN_ALERT
"Unable to handle kernel NULL pointer dereference");
295 printk(KERN_ALERT
"Unable to handle kernel paging request");
296 printk(" at virtual address %08lx\n", address
);
297 printk(KERN_ALERT
"pc = %08Lx%08Lx\n", regs
->pc
>> 32, regs
->pc
& 0xffffffff);
298 die("Oops", regs
, writeaccess
);
302 * We ran out of memory, or some other thing happened to us that made
303 * us unable to handle the page fault gracefully.
306 if (current
->pid
== 1) {
310 printk("fault:Out of memory\n");
311 up_read(&mm
->mmap_sem
);
312 printk("VM: killing process %s\n", tsk
->comm
);
318 printk("fault:Do sigbus\n");
319 up_read(&mm
->mmap_sem
);
322 * Send a sigbus, regardless of whether we were in kernel
325 tsk
->thread
.address
= address
;
326 tsk
->thread
.error_code
= writeaccess
;
327 tsk
->thread
.trap_no
= 14;
328 force_sig(SIGBUS
, tsk
);
330 /* Kernel mode? Handle exceptions or die */
331 if (!user_mode(regs
))
336 void flush_tlb_all(void);
338 void update_mmu_cache(struct vm_area_struct
* vma
,
339 unsigned long address
, pte_t pte
)
341 #if defined(CONFIG_SH64_PROC_TLB)
342 ++calls_to_update_mmu_cache
;
344 /* This appears to get called once for every pte entry that gets
345 established => I don't think it's efficient to try refilling the
346 TLBs with the pages - some may not get accessed even. Also, for
347 executable pages, it is impossible to determine reliably here which
348 TLB they should be mapped into (or both even).
350 So, just do nothing here and handle faults on demand. In the
351 TLBMISS handling case, the refill is now done anyway after the pte
352 has been fixed up, so that deals with most useful cases.
358 static void __flush_tlb_page(struct vm_area_struct
*vma
, unsigned long page
)
360 unsigned long long match
, pteh
=0, lpage
;
362 struct mm_struct
*mm
;
366 if (mm
->context
== NO_CONTEXT
)
370 * Sign-extend based on neff.
372 lpage
= (page
& NEFF_SIGN
) ? (page
| NEFF_MASK
) : page
;
373 match
= ((mm
->context
& MMU_CONTEXT_ASID_MASK
) << PTEH_ASID_SHIFT
) | PTEH_VALID
;
376 /* Do ITLB : don't bother for pages in non-exectutable VMAs */
377 if (vma
->vm_flags
& VM_EXEC
) {
378 for_each_itlb_entry(tlb
) {
379 asm volatile ("getcfg %1, 0, %0"
384 __flush_tlb_slot(tlb
);
391 /* Do DTLB : any page could potentially be in here. */
392 for_each_dtlb_entry(tlb
) {
393 asm volatile ("getcfg %1, 0, %0"
398 __flush_tlb_slot(tlb
);
405 void flush_tlb_page(struct vm_area_struct
*vma
, unsigned long page
)
409 #if defined(CONFIG_SH64_PROC_TLB)
410 ++calls_to_flush_tlb_page
;
416 __flush_tlb_page(vma
, page
);
417 restore_flags(flags
);
421 void flush_tlb_range(struct mm_struct
*mm
, unsigned long start
,
425 unsigned long long match
, pteh
=0, pteh_epn
, pteh_low
;
428 #if defined(CONFIG_SH64_PROC_TLB)
429 ++calls_to_flush_tlb_range
;
432 unsigned long size
= (end
- 1) - start
;
433 size
>>= 12; /* divide by PAGE_SIZE */
434 size
++; /* end=start+4096 => 1 page */
436 case 1 : flush_tlb_range_1
++; break;
437 case 2 : flush_tlb_range_2
++; break;
438 case 3 ... 4 : flush_tlb_range_3_4
++; break;
439 case 5 ... 7 : flush_tlb_range_5_7
++; break;
440 case 8 ... 11 : flush_tlb_range_8_11
++; break;
441 case 12 ... 15 : flush_tlb_range_12_15
++; break;
442 default : flush_tlb_range_16_up
++; break;
447 if (mm
->context
== NO_CONTEXT
)
455 match
= ((mm
->context
& MMU_CONTEXT_ASID_MASK
) << PTEH_ASID_SHIFT
) | PTEH_VALID
;
458 for_each_itlb_entry(tlb
) {
459 asm volatile ("getcfg %1, 0, %0"
463 pteh_epn
= pteh
& PAGE_MASK
;
464 pteh_low
= pteh
& ~PAGE_MASK
;
466 if (pteh_low
== match
&& pteh_epn
>= start
&& pteh_epn
<= end
)
467 __flush_tlb_slot(tlb
);
471 for_each_dtlb_entry(tlb
) {
472 asm volatile ("getcfg %1, 0, %0"
476 pteh_epn
= pteh
& PAGE_MASK
;
477 pteh_low
= pteh
& ~PAGE_MASK
;
479 if (pteh_low
== match
&& pteh_epn
>= start
&& pteh_epn
<= end
)
480 __flush_tlb_slot(tlb
);
483 restore_flags(flags
);
486 void flush_tlb_mm(struct mm_struct
*mm
)
490 #if defined(CONFIG_SH64_PROC_TLB)
491 ++calls_to_flush_tlb_mm
;
494 if (mm
->context
== NO_CONTEXT
)
499 mm
->context
=NO_CONTEXT
;
501 activate_context(mm
);
503 restore_flags(flags
);
507 void flush_tlb_all(void)
509 /* Invalidate all, including shared pages, excluding fixed TLBs */
511 unsigned long flags
, tlb
;
513 #if defined(CONFIG_SH64_PROC_TLB)
514 ++calls_to_flush_tlb_all
;
519 /* Flush each ITLB entry */
520 for_each_itlb_entry(tlb
) {
521 __flush_tlb_slot(tlb
);
524 /* Flush each DTLB entry */
525 for_each_dtlb_entry(tlb
) {
526 __flush_tlb_slot(tlb
);
529 restore_flags(flags
);
532 #ifdef CONFIG_SH64_PAGE_TABLE_AUDIT
533 /* Scan the page table structure of an entire struct mm for obvious anomalies. */
535 static inline int starts_a(unsigned long xx
)
537 /* Check if virtual address starts with 'a'. The kernel's superpage
538 * lives at a0000000-bfffffff, and the lower part maps the RAM
539 directly. So if a kernel pointer doesn't start with it's top nibble
540 = 'a' (at least up to 256Mb of RAM), it's bad. - RPC */
541 if (((xx
>> 28) & 0xf) == 0xa) {
548 static inline int is_present(unsigned long x
)
550 unsigned long mask
= 0x80000000UL
| _KERNPG_TABLE
;
558 void audit_mm (unsigned long long expevt
, unsigned long long intevt
, unsigned long long tra
, unsigned long long vec
)
560 /* It seems simpler to have pgd,pmd,pte declared as pointers of
561 appropriate types that we can just apply array indices to. What
562 we're doing here didn't fit with the existing macros very well. */
563 typedef unsigned long long my_pte_t
;
564 typedef my_pte_t
*my_pmd_t
;
565 typedef my_pmd_t
*my_pgd_t
;
567 #define SHOW_SRC do { \
569 case 0x100: case 0x400: \
570 printk("EXPEVT=%08llx TRA=%08llx VEC=%08llx\n", expevt, tra, vec); break; \
572 printk("INTEVT=%08llx\n", intevt); break; \
574 printk("Source of this syscall/irq/exc unknown\n"); break; \
583 unsigned long xx_pgd
, xx_pmd
, xx_pte
;
585 int pid
= current
->pid
;
586 struct mm_struct
*mm
;
588 /* Ought to check for the page_present condition also. */
590 mm
= current
->active_mm
;
591 if (in_interrupt() || !mm
) return;
593 pgd
= (my_pgd_t
*) mm
->pgd
;
594 xx_pgd
= (unsigned long) pgd
;
596 if (!starts_a(xx_pgd
)) {
597 printk("PID %5d, corrupt mm->pgd = %08lx\n", pid
, xx_pgd
);
599 return; /* not a lot of point going on. */
602 for (i
=0; i
<USER_PTRS_PER_PGD
; i
++) {
604 xx_pmd
= (unsigned long) pmd
;
605 if (xx_pmd
== _PGD_EMPTY
) continue;
606 if (!is_present(xx_pmd
)) {
607 printk("PID %5d, pgd[%d] not present (=%08lx)\n", pid
, i
, xx_pmd
);
612 if (!starts_a(xx_pmd
)) {
613 printk("PID %5d, pgd[%d] corrupted (=%08lx)\n", pid
, i
, xx_pmd
);
615 continue; /* no point scanning further. */
618 for (j
=0; j
<PTRS_PER_PMD
; j
++) {
620 xx_pte
= (unsigned long) pte
;
621 if (xx_pte
== _PMD_EMPTY
) continue;
622 if (!is_present(xx_pte
)) {
623 printk("PID %5d, pmd[%d] not present (=%08lx) (in pgd[%d])\n", pid
, j
, xx_pte
, i
);
627 if (!starts_a(xx_pte
)) {
628 printk("PID %5d, pmd[%d] corrupted (=%08lx) (in pgd[%d])\n", pid
, j
, xx_pte
, i
);
630 continue; /* no point scanning further. */
633 for (k
=0; k
<PTRS_PER_PTE
; k
++) {
634 unsigned long long entry
;
635 unsigned long masked_entry
;
639 /* All user pages should be mapped onto EMI or be absent? */
640 if (entry
& _PAGE_PRESENT
) {
642 masked_entry
= (unsigned long) entry
& 0xfffff000UL
;
643 in_pci
= (masked_entry
>= 0x40000000UL
) && (masked_entry
< 0x5fffffffUL
);
644 in_emi
= (masked_entry
>= 0x80000000UL
) && (masked_entry
< 0x8fffffffUL
); /* Assume 256Mb of RAM at most */
645 if (!in_emi
&& !in_pci
) { /* Assume 128Mb of RAM */
646 printk("PID %5d, pte[%d] corrupted (=%08lx%08lx) (in pmd[%d], pgd[%d])\n", pid
, k
, (unsigned long) (entry
>>32), (unsigned long) entry
, j
, i
);
648 continue; /* no point scanning further. */
655 #endif /* CONFIG_SH64_PAGE_TABLE_AUDIT */
657 #if defined(CONFIG_SH64_PROC_TLB)
658 /* Procfs interface to read the performance information */
661 tlb_proc_info(char *buf
, char **start
, off_t fpos
, int length
, int *eof
, void *data
)
664 len
+= sprintf(buf
+len
, "do_fast_page_fault called %12lld times\n", calls_to_do_fast_page_fault
);
665 len
+= sprintf(buf
+len
, "do_slow_page_fault called %12lld times\n", calls_to_do_slow_page_fault
);
666 len
+= sprintf(buf
+len
, "update_mmu_cache called %12lld times\n", calls_to_update_mmu_cache
);
667 len
+= sprintf(buf
+len
, "flush_tlb_page called %12lld times\n", calls_to_flush_tlb_page
);
668 len
+= sprintf(buf
+len
, "flush_tlb_range called %12lld times\n", calls_to_flush_tlb_range
);
669 len
+= sprintf(buf
+len
, "flush_tlb_mm called %12lld times\n", calls_to_flush_tlb_mm
);
670 len
+= sprintf(buf
+len
, "flush_tlb_all called %12lld times\n", calls_to_flush_tlb_all
);
671 len
+= sprintf(buf
+len
, "flush_tlb_range_sizes\n"
679 flush_tlb_range_1
, flush_tlb_range_2
, flush_tlb_range_3_4
,
680 flush_tlb_range_5_7
, flush_tlb_range_8_11
, flush_tlb_range_12_15
,
681 flush_tlb_range_16_up
);
682 len
+= sprintf(buf
+len
, "page not present %12lld times\n", page_not_present
);
687 static int __init
register_proc_tlb(void)
689 create_proc_read_entry("tlb", 0, NULL
, tlb_proc_info
, NULL
);
693 __initcall(register_proc_tlb
);