Original kernel 2.4.37.5
[tomato.git] / release / src / linux / linux / arch / sh64 / mm / fault.c
blobbf57ff5f998514a89d8712729fc08c857ea9a94b
1 /*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
6 * arch/sh64/mm/fault.c
8 * Copyright (C) 2000, 2001 Paolo Alberelli
9 * Copyright (C) 2003 <Richard.Curnow@superh.com> (/proc/tlb, audit_mm, bug fixes)
10 * Copyright (C) 2003 Paul Mundt
14 #include <linux/signal.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/errno.h>
18 #include <linux/string.h>
19 #include <linux/types.h>
20 #include <linux/ptrace.h>
21 #include <linux/mman.h>
22 #include <linux/mm.h>
23 #include <linux/smp.h>
24 #include <linux/smp_lock.h>
25 #include <linux/interrupt.h>
27 #include <asm/system.h>
28 #include <asm/io.h>
29 #include <asm/tlb.h>
30 #include <asm/uaccess.h>
31 #include <asm/pgalloc.h>
32 #include <asm/hardirq.h>
33 #include <asm/mmu_context.h>
34 #include <asm/registers.h> /* required by inline asm statements */
36 #if defined(CONFIG_SH64_PROC_TLB)
37 #include <linux/init.h>
38 #include <linux/proc_fs.h>
39 /* Count numbers of tlb refills in each region */
40 static unsigned long long calls_to_update_mmu_cache = 0ULL;
41 static unsigned long long calls_to_flush_tlb_page = 0ULL;
42 static unsigned long long calls_to_flush_tlb_range = 0ULL;
43 static unsigned long long calls_to_flush_tlb_mm = 0ULL;
44 static unsigned long long calls_to_flush_tlb_all = 0ULL;
45 unsigned long long calls_to_do_slow_page_fault = 0ULL;
46 unsigned long long calls_to_do_fast_page_fault = 0ULL;
48 /* Count size of ranges for flush_tlb_range */
49 static unsigned long long flush_tlb_range_1 = 0ULL;
50 static unsigned long long flush_tlb_range_2 = 0ULL;
51 static unsigned long long flush_tlb_range_3_4 = 0ULL;
52 static unsigned long long flush_tlb_range_5_7 = 0ULL;
53 static unsigned long long flush_tlb_range_8_11 = 0ULL;
54 static unsigned long long flush_tlb_range_12_15 = 0ULL;
55 static unsigned long long flush_tlb_range_16_up = 0ULL;
57 static unsigned long long page_not_present = 0ULL;
59 #endif
61 extern void die(const char *,struct pt_regs *,long);
63 #define PFLAG(val,flag) (( (val) & (flag) ) ? #flag : "" )
64 #define PPROT(flag) PFLAG(pgprot_val(prot),flag)
66 static __inline__ void print_prots(pgprot_t prot)
68 printk("prot is 0x%08lx\n",pgprot_val(prot));
70 printk("%s %s %s %s %s\n",PPROT(_PAGE_SHARED),PPROT(_PAGE_READ),
71 PPROT(_PAGE_EXECUTE),PPROT(_PAGE_WRITE),PPROT(_PAGE_USER));
75 static __inline__ void print_vma(struct vm_area_struct *vma)
77 printk("vma start 0x%08lx\n",vma->vm_start);
78 printk("vma end 0x%08lx\n",vma->vm_end);
80 print_prots(vma->vm_page_prot);
81 printk("vm_flags 0x%08lx\n",vma->vm_flags);
84 static __inline__ void print_task(struct task_struct *tsk)
86 printk("Task pid %d\n",tsk->pid);
89 static pte_t *lookup_pte(struct mm_struct *mm, unsigned long address)
91 pgd_t *dir;
92 pmd_t *pmd;
93 pte_t *pte;
94 pte_t entry;
96 dir = pgd_offset(mm, address);
97 if (pgd_none(*dir)) {
98 return NULL;
101 pmd = pmd_offset(dir, address);
102 if (pmd_none(*pmd)) {
103 return NULL;
106 pte = pte_offset(pmd, address);
107 entry = *pte;
109 if (pte_none(entry)) {
110 return NULL;
112 if (!pte_present(entry)) {
113 return NULL;
116 return pte;
120 * This routine handles page faults. It determines the address,
121 * and the problem, and then passes it off to one of the appropriate
122 * routines.
124 asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
125 unsigned long textaccess, unsigned long address)
127 struct task_struct *tsk;
128 struct mm_struct *mm;
129 struct vm_area_struct * vma;
130 unsigned long page;
131 unsigned long long lpage;
132 unsigned long fixup;
133 pte_t *pte;
135 #if defined(CONFIG_SH64_PROC_TLB)
136 ++calls_to_do_slow_page_fault;
137 #endif
139 /* SIM
140 * Note this is now called with interrupts still disabled
141 * This is to cope with being called for a missing IO port
142 * address with interupts disabled. This should be fixed as
143 * soon as we have a better 'fast path' miss handler.
145 * Plus take care how you try and debug this stuff.
146 * For example, writing debug data to a port which you
147 * have just faulted on is not going to work.
150 tsk = current;
151 mm = tsk->mm;
153 /* Not an IO address, so reenable interrupts */
154 sti();
157 * If we're in an interrupt or have no user
158 * context, we must not take the fault..
160 if (in_interrupt() || !mm)
161 goto no_context;
163 /* TLB misses upon some cache flushes get done under cli() */
164 down_read(&mm->mmap_sem);
166 vma = find_vma(mm, address);
168 if (!vma) {
169 #ifdef DEBUG_FAULT
170 print_task(tsk);
171 printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
172 __FUNCTION__,__LINE__,
173 address,regs->pc,textaccess,writeaccess);
174 show_regs(regs);
175 #endif
176 goto bad_area;
178 if (vma->vm_start <= address) {
179 goto good_area;
182 if (!(vma->vm_flags & VM_GROWSDOWN)) {
183 #ifdef DEBUG_FAULT
184 print_task(tsk);
185 printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
186 __FUNCTION__,__LINE__,
187 address,regs->pc,textaccess,writeaccess);
188 show_regs(regs);
190 print_vma(vma);
191 #endif
192 goto bad_area;
194 if (expand_stack(vma, address)) {
195 #ifdef DEBUG_FAULT
196 print_task(tsk);
197 printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
198 __FUNCTION__,__LINE__,
199 address,regs->pc,textaccess,writeaccess);
200 show_regs(regs);
201 #endif
202 goto bad_area;
205 * Ok, we have a good vm_area for this memory access, so
206 * we can handle it..
208 good_area:
209 if (writeaccess) {
210 if (!(vma->vm_flags & VM_WRITE))
211 goto bad_area;
212 } else {
213 if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
214 goto bad_area;
217 if (textaccess) {
218 if (!(vma->vm_flags & VM_EXEC))
219 goto bad_area;
223 * If for any reason at all we couldn't handle the fault,
224 * make sure we exit gracefully rather than endlessly redo
225 * the fault.
227 survive:
228 switch (handle_mm_fault(mm, vma, address, writeaccess)) {
229 case 1:
230 tsk->min_flt++;
231 break;
232 case 2:
233 tsk->maj_flt++;
234 break;
235 case 0:
236 goto do_sigbus;
237 default:
238 goto out_of_memory;
240 /* If we get here, the page fault has been handled. Do the TLB refill
241 now from the newly-setup PTE, to avoid having to fault again right
242 away on the same instruction. */
243 pte = lookup_pte (mm, address);
244 if (!pte) {
245 /* From empirical evidence, we can get here, due to
246 !pte_present(pte). (e.g. if a swap-in occurs, and the page
247 is swapped back out again before the process that wanted it
248 gets rescheduled?) */
249 goto no_pte;
252 __do_tlb_refill(address, textaccess, pte);
254 no_pte:
256 up_read(&mm->mmap_sem);
257 return;
260 * Something tried to access memory that isn't in our memory map..
261 * Fix it, but check if it's kernel or user first..
263 bad_area:
264 #ifdef DEBUG_FAULT
265 printk("fault:bad area\n");
266 #endif
267 up_read(&mm->mmap_sem);
269 if (user_mode(regs)) {
270 tsk->thread.address = address;
271 tsk->thread.error_code = writeaccess;
272 force_sig(SIGSEGV, tsk);
273 return;
276 no_context:
277 #ifdef DEBUG_FAULT
278 printk("fault:No context\n");
279 #endif
280 /* Are we prepared to handle this kernel fault? */
281 fixup = search_exception_table(regs->pc);
282 if (fixup != 0) {
283 regs->pc = fixup;
284 return;
288 * Oops. The kernel tried to access some bad page. We'll have to
289 * terminate things with extreme prejudice.
292 if (address < PAGE_SIZE)
293 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
294 else
295 printk(KERN_ALERT "Unable to handle kernel paging request");
296 printk(" at virtual address %08lx\n", address);
297 printk(KERN_ALERT "pc = %08Lx%08Lx\n", regs->pc >> 32, regs->pc & 0xffffffff);
298 die("Oops", regs, writeaccess);
299 do_exit(SIGKILL);
302 * We ran out of memory, or some other thing happened to us that made
303 * us unable to handle the page fault gracefully.
305 out_of_memory:
306 if (current->pid == 1) {
307 yield();
308 goto survive;
310 printk("fault:Out of memory\n");
311 up_read(&mm->mmap_sem);
312 printk("VM: killing process %s\n", tsk->comm);
313 if (user_mode(regs))
314 do_exit(SIGKILL);
315 goto no_context;
317 do_sigbus:
318 printk("fault:Do sigbus\n");
319 up_read(&mm->mmap_sem);
322 * Send a sigbus, regardless of whether we were in kernel
323 * or user mode.
325 tsk->thread.address = address;
326 tsk->thread.error_code = writeaccess;
327 tsk->thread.trap_no = 14;
328 force_sig(SIGBUS, tsk);
330 /* Kernel mode? Handle exceptions or die */
331 if (!user_mode(regs))
332 goto no_context;
336 void flush_tlb_all(void);
338 void update_mmu_cache(struct vm_area_struct * vma,
339 unsigned long address, pte_t pte)
341 #if defined(CONFIG_SH64_PROC_TLB)
342 ++calls_to_update_mmu_cache;
343 #endif
344 /* This appears to get called once for every pte entry that gets
345 established => I don't think it's efficient to try refilling the
346 TLBs with the pages - some may not get accessed even. Also, for
347 executable pages, it is impossible to determine reliably here which
348 TLB they should be mapped into (or both even).
350 So, just do nothing here and handle faults on demand. In the
351 TLBMISS handling case, the refill is now done anyway after the pte
352 has been fixed up, so that deals with most useful cases.
355 return;
358 static void __flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
360 unsigned long long match, pteh=0, lpage;
361 unsigned long tlb;
362 struct mm_struct *mm;
364 mm = vma->vm_mm;
366 if (mm->context == NO_CONTEXT)
367 return;
370 * Sign-extend based on neff.
372 lpage = (page & NEFF_SIGN) ? (page | NEFF_MASK) : page;
373 match = ((mm->context & MMU_CONTEXT_ASID_MASK) << PTEH_ASID_SHIFT) | PTEH_VALID;
374 match |= lpage;
376 /* Do ITLB : don't bother for pages in non-exectutable VMAs */
377 if (vma->vm_flags & VM_EXEC) {
378 for_each_itlb_entry(tlb) {
379 asm volatile ("getcfg %1, 0, %0"
380 : "=r" (pteh)
381 : "r" (tlb) );
383 if (pteh == match) {
384 __flush_tlb_slot(tlb);
385 break;
391 /* Do DTLB : any page could potentially be in here. */
392 for_each_dtlb_entry(tlb) {
393 asm volatile ("getcfg %1, 0, %0"
394 : "=r" (pteh)
395 : "r" (tlb) );
397 if (pteh == match) {
398 __flush_tlb_slot(tlb);
399 break;
405 void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
407 unsigned long flags;
409 #if defined(CONFIG_SH64_PROC_TLB)
410 ++calls_to_flush_tlb_page;
411 #endif
413 if (vma->vm_mm) {
414 page &= PAGE_MASK;
415 save_and_cli(flags);
416 __flush_tlb_page(vma, page);
417 restore_flags(flags);
421 void flush_tlb_range(struct mm_struct *mm, unsigned long start,
422 unsigned long end)
424 unsigned long flags;
425 unsigned long long match, pteh=0, pteh_epn, pteh_low;
426 unsigned long tlb;
428 #if defined(CONFIG_SH64_PROC_TLB)
429 ++calls_to_flush_tlb_range;
432 unsigned long size = (end - 1) - start;
433 size >>= 12; /* divide by PAGE_SIZE */
434 size++; /* end=start+4096 => 1 page */
435 switch (size) {
436 case 1 : flush_tlb_range_1++; break;
437 case 2 : flush_tlb_range_2++; break;
438 case 3 ... 4 : flush_tlb_range_3_4++; break;
439 case 5 ... 7 : flush_tlb_range_5_7++; break;
440 case 8 ... 11 : flush_tlb_range_8_11++; break;
441 case 12 ... 15 : flush_tlb_range_12_15++; break;
442 default : flush_tlb_range_16_up++; break;
445 #endif
447 if (mm->context == NO_CONTEXT)
448 return;
450 save_and_cli(flags);
452 start &= PAGE_MASK;
453 end &= PAGE_MASK;
455 match = ((mm->context & MMU_CONTEXT_ASID_MASK) << PTEH_ASID_SHIFT) | PTEH_VALID;
457 /* Flush ITLB */
458 for_each_itlb_entry(tlb) {
459 asm volatile ("getcfg %1, 0, %0"
460 : "=r" (pteh)
461 : "r" (tlb) );
463 pteh_epn = pteh & PAGE_MASK;
464 pteh_low = pteh & ~PAGE_MASK;
466 if (pteh_low == match && pteh_epn >= start && pteh_epn <= end)
467 __flush_tlb_slot(tlb);
470 /* Flush DTLB */
471 for_each_dtlb_entry(tlb) {
472 asm volatile ("getcfg %1, 0, %0"
473 : "=r" (pteh)
474 : "r" (tlb) );
476 pteh_epn = pteh & PAGE_MASK;
477 pteh_low = pteh & ~PAGE_MASK;
479 if (pteh_low == match && pteh_epn >= start && pteh_epn <= end)
480 __flush_tlb_slot(tlb);
483 restore_flags(flags);
486 void flush_tlb_mm(struct mm_struct *mm)
488 unsigned long flags;
490 #if defined(CONFIG_SH64_PROC_TLB)
491 ++calls_to_flush_tlb_mm;
492 #endif
494 if (mm->context == NO_CONTEXT)
495 return;
497 save_and_cli(flags);
499 mm->context=NO_CONTEXT;
500 if(mm==current->mm)
501 activate_context(mm);
503 restore_flags(flags);
507 void flush_tlb_all(void)
509 /* Invalidate all, including shared pages, excluding fixed TLBs */
511 unsigned long flags, tlb;
513 #if defined(CONFIG_SH64_PROC_TLB)
514 ++calls_to_flush_tlb_all;
515 #endif
517 save_and_cli(flags);
519 /* Flush each ITLB entry */
520 for_each_itlb_entry(tlb) {
521 __flush_tlb_slot(tlb);
524 /* Flush each DTLB entry */
525 for_each_dtlb_entry(tlb) {
526 __flush_tlb_slot(tlb);
529 restore_flags(flags);
532 #ifdef CONFIG_SH64_PAGE_TABLE_AUDIT
533 /* Scan the page table structure of an entire struct mm for obvious anomalies. */
535 static inline int starts_a(unsigned long xx)
537 /* Check if virtual address starts with 'a'. The kernel's superpage
538 * lives at a0000000-bfffffff, and the lower part maps the RAM
539 directly. So if a kernel pointer doesn't start with it's top nibble
540 = 'a' (at least up to 256Mb of RAM), it's bad. - RPC */
541 if (((xx >> 28) & 0xf) == 0xa) {
542 return 1;
543 } else {
544 return 0;
548 static inline int is_present(unsigned long x)
550 unsigned long mask = 0x80000000UL | _KERNPG_TABLE;
551 unsigned long y;
553 y = x & mask;
554 if (y) return 1;
555 else return 0;
558 void audit_mm (unsigned long long expevt, unsigned long long intevt, unsigned long long tra, unsigned long long vec)
560 /* It seems simpler to have pgd,pmd,pte declared as pointers of
561 appropriate types that we can just apply array indices to. What
562 we're doing here didn't fit with the existing macros very well. */
563 typedef unsigned long long my_pte_t;
564 typedef my_pte_t *my_pmd_t;
565 typedef my_pmd_t *my_pgd_t;
567 #define SHOW_SRC do { \
568 switch (vec) { \
569 case 0x100: case 0x400: \
570 printk("EXPEVT=%08llx TRA=%08llx VEC=%08llx\n", expevt, tra, vec); break; \
571 case 0x600: \
572 printk("INTEVT=%08llx\n", intevt); break; \
573 default: \
574 printk("Source of this syscall/irq/exc unknown\n"); break; \
577 while (0)
579 my_pgd_t *pgd;
580 my_pmd_t *pmd;
581 my_pte_t *pte;
583 unsigned long xx_pgd, xx_pmd, xx_pte;
584 int i, j, k;
585 int pid = current->pid;
586 struct mm_struct *mm;
588 /* Ought to check for the page_present condition also. */
590 mm = current->active_mm;
591 if (in_interrupt() || !mm) return;
593 pgd = (my_pgd_t *) mm->pgd;
594 xx_pgd = (unsigned long) pgd;
596 if (!starts_a(xx_pgd)) {
597 printk("PID %5d, corrupt mm->pgd = %08lx\n", pid, xx_pgd);
598 SHOW_SRC;
599 return; /* not a lot of point going on. */
602 for (i=0; i<USER_PTRS_PER_PGD; i++) {
603 pmd = pgd[i];
604 xx_pmd = (unsigned long) pmd;
605 if (xx_pmd == _PGD_EMPTY) continue;
606 if (!is_present(xx_pmd)) {
607 printk("PID %5d, pgd[%d] not present (=%08lx)\n", pid, i, xx_pmd);
608 SHOW_SRC;
609 continue;
612 if (!starts_a(xx_pmd)) {
613 printk("PID %5d, pgd[%d] corrupted (=%08lx)\n", pid, i, xx_pmd);
614 SHOW_SRC;
615 continue; /* no point scanning further. */
618 for (j=0; j<PTRS_PER_PMD; j++) {
619 pte = pmd[j];
620 xx_pte = (unsigned long) pte;
621 if (xx_pte == _PMD_EMPTY) continue;
622 if (!is_present(xx_pte)) {
623 printk("PID %5d, pmd[%d] not present (=%08lx) (in pgd[%d])\n", pid, j, xx_pte, i);
624 SHOW_SRC;
625 continue;
627 if (!starts_a(xx_pte)) {
628 printk("PID %5d, pmd[%d] corrupted (=%08lx) (in pgd[%d])\n", pid, j, xx_pte, i);
629 SHOW_SRC;
630 continue; /* no point scanning further. */
633 for (k=0; k<PTRS_PER_PTE; k++) {
634 unsigned long long entry;
635 unsigned long masked_entry;
637 entry = pte[k];
639 /* All user pages should be mapped onto EMI or be absent? */
640 if (entry & _PAGE_PRESENT) {
641 int in_pci, in_emi;
642 masked_entry = (unsigned long) entry & 0xfffff000UL;
643 in_pci = (masked_entry >= 0x40000000UL) && (masked_entry < 0x5fffffffUL);
644 in_emi = (masked_entry >= 0x80000000UL) && (masked_entry < 0x8fffffffUL); /* Assume 256Mb of RAM at most */
645 if (!in_emi && !in_pci) { /* Assume 128Mb of RAM */
646 printk("PID %5d, pte[%d] corrupted (=%08lx%08lx) (in pmd[%d], pgd[%d])\n", pid, k, (unsigned long) (entry>>32), (unsigned long) entry, j, i);
647 SHOW_SRC;
648 continue; /* no point scanning further. */
655 #endif /* CONFIG_SH64_PAGE_TABLE_AUDIT */
657 #if defined(CONFIG_SH64_PROC_TLB)
658 /* Procfs interface to read the performance information */
660 static int
661 tlb_proc_info(char *buf, char **start, off_t fpos, int length, int *eof, void *data)
663 int len=0;
664 len += sprintf(buf+len, "do_fast_page_fault called %12lld times\n", calls_to_do_fast_page_fault);
665 len += sprintf(buf+len, "do_slow_page_fault called %12lld times\n", calls_to_do_slow_page_fault);
666 len += sprintf(buf+len, "update_mmu_cache called %12lld times\n", calls_to_update_mmu_cache);
667 len += sprintf(buf+len, "flush_tlb_page called %12lld times\n", calls_to_flush_tlb_page);
668 len += sprintf(buf+len, "flush_tlb_range called %12lld times\n", calls_to_flush_tlb_range);
669 len += sprintf(buf+len, "flush_tlb_mm called %12lld times\n", calls_to_flush_tlb_mm);
670 len += sprintf(buf+len, "flush_tlb_all called %12lld times\n", calls_to_flush_tlb_all);
671 len += sprintf(buf+len, "flush_tlb_range_sizes\n"
672 " 1 : %12lld\n"
673 " 2 : %12lld\n"
674 " 3 - 4 : %12lld\n"
675 " 5 - 7 : %12lld\n"
676 " 8 - 11 : %12lld\n"
677 "12 - 15 : %12lld\n"
678 "16+ : %12lld\n",
679 flush_tlb_range_1, flush_tlb_range_2, flush_tlb_range_3_4,
680 flush_tlb_range_5_7, flush_tlb_range_8_11, flush_tlb_range_12_15,
681 flush_tlb_range_16_up);
682 len += sprintf(buf+len, "page not present %12lld times\n", page_not_present);
683 *eof = 1;
684 return len;
687 static int __init register_proc_tlb(void)
689 create_proc_read_entry("tlb", 0, NULL, tlb_proc_info, NULL);
690 return 0;
693 __initcall(register_proc_tlb);
695 #endif