initial commit with v2.6.9
[linux-2.6.9-moxart.git] / arch / sh64 / mm / cache.c
blob56fbbff5c1abed00b4eb2caed8956160196086f7
1 /*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
6 * arch/sh64/mm/cache.c
8 * Original version Copyright (C) 2000, 2001 Paolo Alberelli
9 * Second version Copyright (C) benedict.gaster@superh.com 2002
10 * Third version Copyright Richard.Curnow@superh.com 2003
11 * Hacks to third version Copyright (C) 2003 Paul Mundt
14 /****************************************************************************/
16 #include <linux/config.h>
17 #include <linux/init.h>
18 #include <linux/mman.h>
19 #include <linux/mm.h>
20 #include <linux/threads.h>
21 #include <asm/page.h>
22 #include <asm/pgtable.h>
23 #include <asm/processor.h>
24 #include <asm/cache.h>
25 #include <asm/tlb.h>
26 #include <asm/io.h>
27 #include <asm/uaccess.h>
28 #include <asm/mmu_context.h>
29 #include <asm/pgalloc.h> /* for flush_itlb_range */
31 #include <linux/proc_fs.h>
33 /* This function is in entry.S */
34 extern unsigned long switch_and_save_asid(unsigned long new_asid);
36 /* Wired TLB entry for the D-cache */
37 static unsigned long long dtlb_cache_slot;
39 /**
40 * sh64_cache_init()
42 * This is pretty much just a straightforward clone of the SH
43 * detect_cpu_and_cache_system().
45 * This function is responsible for setting up all of the cache
46 * info dynamically as well as taking care of CPU probing and
47 * setting up the relevant subtype data.
49 * FIXME: For the time being, we only really support the SH5-101
50 * out of the box, and don't support dynamic probing for things
51 * like the SH5-103 or even cut2 of the SH5-101. Implement this
52 * later!
54 int __init sh64_cache_init(void)
57 * First, setup some sane values for the I-cache.
59 cpu_data->icache.ways = 4;
60 cpu_data->icache.sets = 256;
61 cpu_data->icache.linesz = L1_CACHE_BYTES;
64 * FIXME: This can probably be cleaned up a bit as well.. for example,
65 * do we really need the way shift _and_ the way_step_shift ?? Judging
66 * by the existing code, I would guess no.. is there any valid reason
67 * why we need to be tracking this around?
69 cpu_data->icache.way_shift = 13;
70 cpu_data->icache.entry_shift = 5;
71 cpu_data->icache.set_shift = 4;
72 cpu_data->icache.way_step_shift = 16;
73 cpu_data->icache.asid_shift = 2;
76 * way offset = cache size / associativity, so just don't factor in
77 * associativity in the first place..
79 cpu_data->icache.way_ofs = cpu_data->icache.sets *
80 cpu_data->icache.linesz;
82 cpu_data->icache.asid_mask = 0x3fc;
83 cpu_data->icache.idx_mask = 0x1fe0;
84 cpu_data->icache.epn_mask = 0xffffe000;
85 cpu_data->icache.flags = 0;
88 * Next, setup some sane values for the D-cache.
90 * On the SH5, these are pretty consistent with the I-cache settings,
91 * so we just copy over the existing definitions.. these can be fixed
92 * up later, especially if we add runtime CPU probing.
94 * Though in the meantime it saves us from having to duplicate all of
95 * the above definitions..
97 cpu_data->dcache = cpu_data->icache;
100 * Setup any cache-related flags here
102 #if defined(CONFIG_DCACHE_WRITE_THROUGH)
103 set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags));
104 #elif defined(CONFIG_DCACHE_WRITE_BACK)
105 set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags));
106 #endif
109 * We also need to reserve a slot for the D-cache in the DTLB, so we
110 * do this now ..
112 dtlb_cache_slot = sh64_get_wired_dtlb_entry();
114 return 0;
117 /*##########################################################################*/
119 /* From here onwards, a rewrite of the implementation,
120 by Richard.Curnow@superh.com.
122 The major changes in this compared to the old version are;
123 1. use more selective purging through OCBP instead of using ALLOCO to purge
124 by natural replacement. This avoids purging out unrelated cache lines
125 that happen to be in the same set.
126 2. exploit the APIs copy_user_page and clear_user_page better
127 3. be more selective about I-cache purging, in particular use invalidate_all
128 more sparingly.
132 /*##########################################################################
133 SUPPORT FUNCTIONS
134 ##########################################################################*/
136 /****************************************************************************/
137 /* The following group of functions deal with mapping and unmapping a temporary
138 page into the DTLB slot that have been set aside for our exclusive use. */
139 /* In order to accomplish this, we use the generic interface for adding and
140 removing a wired slot entry as defined in arch/sh64/mm/tlb.c */
141 /****************************************************************************/
143 static unsigned long slot_own_flags;
145 static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr)
147 local_irq_save(slot_own_flags);
148 sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
151 static inline void sh64_teardown_dtlb_cache_slot(void)
153 sh64_teardown_tlb_slot(dtlb_cache_slot);
154 local_irq_restore(slot_own_flags);
157 /****************************************************************************/
159 #ifndef CONFIG_ICACHE_DISABLED
161 static void __inline__ sh64_icache_inv_all(void)
163 unsigned long long addr, flag, data;
164 unsigned int flags;
166 addr=ICCR0;
167 flag=ICCR0_ICI;
168 data=0;
170 /* Make this a critical section for safety (probably not strictly necessary.) */
171 local_irq_save(flags);
173 /* Without %1 it gets unexplicably wrong */
174 asm volatile("getcfg %3, 0, %0\n\t"
175 "or %0, %2, %0\n\t"
176 "putcfg %3, 0, %0\n\t"
177 "synci"
178 : "=&r" (data)
179 : "0" (data), "r" (flag), "r" (addr));
181 local_irq_restore(flags);
184 static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
186 /* Invalidate range of addresses [start,end] from the I-cache, where
187 * the addresses lie in the kernel superpage. */
189 unsigned long long ullend, addr, aligned_start;
190 #if (NEFF == 32)
191 aligned_start = (unsigned long long)(signed long long)(signed long) start;
192 #else
193 #error "NEFF != 32"
194 #endif
195 aligned_start &= L1_CACHE_ALIGN_MASK;
196 addr = aligned_start;
197 #if (NEFF == 32)
198 ullend = (unsigned long long) (signed long long) (signed long) end;
199 #else
200 #error "NEFF != 32"
201 #endif
202 while (addr <= ullend) {
203 asm __volatile__ ("icbi %0, 0" : : "r" (addr));
204 addr += L1_CACHE_BYTES;
208 static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr)
210 /* If we get called, we know that vma->vm_flags contains VM_EXEC.
211 Also, eaddr is page-aligned. */
213 unsigned long long addr, end_addr;
214 unsigned long flags = 0;
215 unsigned long running_asid, vma_asid;
216 addr = eaddr;
217 end_addr = addr + PAGE_SIZE;
219 /* Check whether we can use the current ASID for the I-cache
220 invalidation. For example, if we're called via
221 access_process_vm->flush_cache_page->here, (e.g. when reading from
222 /proc), 'running_asid' will be that of the reader, not of the
223 victim.
225 Also, note the risk that we might get pre-empted between the ASID
226 compare and blocking IRQs, and before we regain control, the
227 pid->ASID mapping changes. However, the whole cache will get
228 invalidated when the mapping is renewed, so the worst that can
229 happen is that the loop below ends up invalidating somebody else's
230 cache entries.
233 running_asid = get_asid();
234 vma_asid = (vma->vm_mm->context & MMU_CONTEXT_ASID_MASK);
235 if (running_asid != vma_asid) {
236 local_irq_save(flags);
237 switch_and_save_asid(vma_asid);
239 while (addr < end_addr) {
240 /* Worth unrolling a little */
241 asm __volatile__("icbi %0, 0" : : "r" (addr));
242 asm __volatile__("icbi %0, 32" : : "r" (addr));
243 asm __volatile__("icbi %0, 64" : : "r" (addr));
244 asm __volatile__("icbi %0, 96" : : "r" (addr));
245 addr += 128;
247 if (running_asid != vma_asid) {
248 switch_and_save_asid(running_asid);
249 local_irq_restore(flags);
253 /****************************************************************************/
255 static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
256 unsigned long start, unsigned long end)
258 /* Used for invalidating big chunks of I-cache, i.e. assume the range
259 is whole pages. If 'start' or 'end' is not page aligned, the code
260 is conservative and invalidates to the ends of the enclosing pages.
261 This is functionally OK, just a performance loss. */
263 /* See the comments below in sh64_dcache_purge_user_range() regarding
264 the choice of algorithm. However, for the I-cache option (2) isn't
265 available because there are no physical tags so aliases can't be
266 resolved. The icbi instruction has to be used through the user
267 mapping. Because icbi is cheaper than ocbp on a cache hit, it
268 would be cheaper to use the selective code for a large range than is
269 possible with the D-cache. Just assume 64 for now as a working
270 figure.
273 int n_pages;
275 if (!mm) return;
277 n_pages = ((end - start) >> PAGE_SHIFT);
278 if (n_pages >= 64) {
279 sh64_icache_inv_all();
280 } else {
281 unsigned long aligned_start;
282 unsigned long eaddr;
283 unsigned long after_last_page_start;
284 unsigned long mm_asid, current_asid;
285 unsigned long long flags = 0ULL;
287 mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
288 current_asid = get_asid();
290 if (mm_asid != current_asid) {
291 /* Switch ASID and run the invalidate loop under cli */
292 local_irq_save(flags);
293 switch_and_save_asid(mm_asid);
296 aligned_start = start & PAGE_MASK;
297 after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK);
299 while (aligned_start < after_last_page_start) {
300 struct vm_area_struct *vma;
301 unsigned long vma_end;
302 vma = find_vma(mm, aligned_start);
303 if (!vma || (aligned_start <= vma->vm_end)) {
304 /* Avoid getting stuck in an error condition */
305 aligned_start += PAGE_SIZE;
306 continue;
308 vma_end = vma->vm_end;
309 if (vma->vm_flags & VM_EXEC) {
310 /* Executable */
311 eaddr = aligned_start;
312 while (eaddr < vma_end) {
313 sh64_icache_inv_user_page(vma, eaddr);
314 eaddr += PAGE_SIZE;
317 aligned_start = vma->vm_end; /* Skip to start of next region */
319 if (mm_asid != current_asid) {
320 switch_and_save_asid(current_asid);
321 local_irq_restore(flags);
326 static void sh64_icache_inv_user_small_range(struct mm_struct *mm,
327 unsigned long start, int len)
330 /* Invalidate a small range of user context I-cache, not necessarily
331 page (or even cache-line) aligned. */
333 unsigned long long eaddr = start;
334 unsigned long long eaddr_end = start + len;
335 unsigned long current_asid, mm_asid;
336 unsigned long long flags;
337 unsigned long long epage_start;
339 /* Since this is used inside ptrace, the ASID in the mm context
340 typically won't match current_asid. We'll have to switch ASID to do
341 this. For safety, and given that the range will be small, do all
342 this under cli.
344 Note, there is a hazard that the ASID in mm->context is no longer
345 actually associated with mm, i.e. if the mm->context has started a
346 new cycle since mm was last active. However, this is just a
347 performance issue: all that happens is that we invalidate lines
348 belonging to another mm, so the owning process has to refill them
349 when that mm goes live again. mm itself can't have any cache
350 entries because there will have been a flush_cache_all when the new
351 mm->context cycle started. */
353 /* Align to start of cache line. Otherwise, suppose len==8 and start
354 was at 32N+28 : the last 4 bytes wouldn't get invalidated. */
355 eaddr = start & L1_CACHE_ALIGN_MASK;
356 eaddr_end = start + len;
358 local_irq_save(flags);
359 mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
360 current_asid = switch_and_save_asid(mm_asid);
362 epage_start = eaddr & PAGE_MASK;
364 while (eaddr < eaddr_end)
366 asm __volatile__("icbi %0, 0" : : "r" (eaddr));
367 eaddr += L1_CACHE_BYTES;
369 switch_and_save_asid(current_asid);
370 local_irq_restore(flags);
373 static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
375 /* The icbi instruction never raises ITLBMISS. i.e. if there's not a
376 cache hit on the virtual tag the instruction ends there, without a
377 TLB lookup. */
379 unsigned long long aligned_start;
380 unsigned long long ull_end;
381 unsigned long long addr;
383 ull_end = end;
385 /* Just invalidate over the range using the natural addresses. TLB
386 miss handling will be OK (TBC). Since it's for the current process,
387 either we're already in the right ASID context, or the ASIDs have
388 been recycled since we were last active in which case we might just
389 invalidate another processes I-cache entries : no worries, just a
390 performance drop for him. */
391 aligned_start = start & L1_CACHE_ALIGN_MASK;
392 addr = aligned_start;
393 while (addr < ull_end) {
394 asm __volatile__ ("icbi %0, 0" : : "r" (addr));
395 asm __volatile__ ("nop");
396 asm __volatile__ ("nop");
397 addr += L1_CACHE_BYTES;
401 #endif /* !CONFIG_ICACHE_DISABLED */
403 /****************************************************************************/
405 #ifndef CONFIG_DCACHE_DISABLED
407 /* Buffer used as the target of alloco instructions to purge data from cache
408 sets by natural eviction. -- RPC */
409 #define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4)
410 static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
412 /****************************************************************************/
414 static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
416 /* Purge all ways in a particular block of sets, specified by the base
417 set number and number of sets. Can handle wrap-around, if that's
418 needed. */
420 int dummy_buffer_base_set;
421 unsigned long long eaddr, eaddr0, eaddr1;
422 int j;
423 int set_offset;
425 dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift;
426 set_offset = sets_to_purge_base - dummy_buffer_base_set;
428 for (j=0; j<n_sets; j++, set_offset++) {
429 set_offset &= (cpu_data->dcache.sets - 1);
430 eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift);
432 /* Do one alloco which hits the required set per cache way. For
433 write-back mode, this will purge the #ways resident lines. There's
434 little point unrolling this loop because the allocos stall more if
435 they're too close together. */
436 eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
437 for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
438 asm __volatile__ ("alloco %0, 0" : : "r" (eaddr));
441 eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
442 for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
443 /* Load from each address. Required because alloco is a NOP if
444 the cache is write-through. Write-through is a config option. */
445 if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
446 *(volatile unsigned char *)(int)eaddr;
450 /* Don't use OCBI to invalidate the lines. That costs cycles directly.
451 If the dummy block is just left resident, it will naturally get
452 evicted as required. */
454 return;
457 /****************************************************************************/
459 static void sh64_dcache_purge_all(void)
461 /* Purge the entire contents of the dcache. The most efficient way to
462 achieve this is to use alloco instructions on a region of unused
463 memory equal in size to the cache, thereby causing the current
464 contents to be discarded by natural eviction. The alternative,
465 namely reading every tag, setting up a mapping for the corresponding
466 page and doing an OCBP for the line, would be much more expensive.
469 sh64_dcache_purge_sets(0, cpu_data->dcache.sets);
471 return;
475 /****************************************************************************/
477 static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end)
479 /* Purge the range of addresses [start,end] from the D-cache. The
480 addresses lie in the superpage mapping. There's no harm if we
481 overpurge at either end - just a small performance loss. */
482 unsigned long long ullend, addr, aligned_start;
483 #if (NEFF == 32)
484 aligned_start = (unsigned long long)(signed long long)(signed long) start;
485 #else
486 #error "NEFF != 32"
487 #endif
488 aligned_start &= L1_CACHE_ALIGN_MASK;
489 addr = aligned_start;
490 #if (NEFF == 32)
491 ullend = (unsigned long long) (signed long long) (signed long) end;
492 #else
493 #error "NEFF != 32"
494 #endif
495 while (addr <= ullend) {
496 asm __volatile__ ("ocbp %0, 0" : : "r" (addr));
497 addr += L1_CACHE_BYTES;
499 return;
502 /* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for
503 anything else in the kernel */
504 #define MAGIC_PAGE0_START 0xffffffffec000000ULL
506 static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr)
508 /* Purge the physical page 'paddr' from the cache. It's known that any
509 cache lines requiring attention have the same page colour as the the
510 address 'eaddr'.
512 This relies on the fact that the D-cache matches on physical tags
513 when no virtual tag matches. So we create an alias for the original
514 page and purge through that. (Alternatively, we could have done
515 this by switching ASID to match the original mapping and purged
516 through that, but that involves ASID switching cost + probably a
517 TLBMISS + refill anyway.)
520 unsigned long long magic_page_start;
521 unsigned long long magic_eaddr, magic_eaddr_end;
523 magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK);
525 /* As long as the kernel is not pre-emptible, this doesn't need to be
526 under cli/sti. */
528 sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr);
530 magic_eaddr = magic_page_start;
531 magic_eaddr_end = magic_eaddr + PAGE_SIZE;
532 while (magic_eaddr < magic_eaddr_end) {
533 /* Little point in unrolling this loop - the OCBPs are blocking
534 and won't go any quicker (i.e. the loop overhead is parallel
535 to part of the OCBP execution.) */
536 asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr));
537 magic_eaddr += L1_CACHE_BYTES;
540 sh64_teardown_dtlb_cache_slot();
543 /****************************************************************************/
545 static void sh64_dcache_purge_phy_page(unsigned long paddr)
547 /* Pure a page given its physical start address, by creating a
548 temporary 1 page mapping and purging across that. Even if we know
549 the virtual address (& vma or mm) of the page, the method here is
550 more elegant because it avoids issues of coping with page faults on
551 the purge instructions (i.e. no special-case code required in the
552 critical path in the TLB miss handling). */
554 unsigned long long eaddr_start, eaddr, eaddr_end;
555 int i;
557 /* As long as the kernel is not pre-emptible, this doesn't need to be
558 under cli/sti. */
560 eaddr_start = MAGIC_PAGE0_START;
561 for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) {
562 sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr);
564 eaddr = eaddr_start;
565 eaddr_end = eaddr + PAGE_SIZE;
566 while (eaddr < eaddr_end) {
567 asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr));
568 eaddr += L1_CACHE_BYTES;
571 sh64_teardown_dtlb_cache_slot();
572 eaddr_start += PAGE_SIZE;
576 static void sh64_dcache_purge_virt_page(struct mm_struct *mm, unsigned long eaddr)
578 unsigned long phys;
579 pgd_t *pgd;
580 pmd_t *pmd;
581 pte_t *pte;
582 pte_t entry;
584 pgd = pgd_offset(mm, eaddr);
585 pmd = pmd_offset(pgd, eaddr);
587 if (pmd_none(*pmd) || pmd_bad(*pmd))
588 return;
590 pte = pte_offset_kernel(pmd, eaddr);
591 entry = *pte;
593 if (pte_none(entry) || !pte_present(entry))
594 return;
596 phys = pte_val(entry) & PAGE_MASK;
598 sh64_dcache_purge_phy_page(phys);
601 static void sh64_dcache_purge_user_page(struct mm_struct *mm, unsigned long eaddr)
603 pgd_t *pgd;
604 pmd_t *pmd;
605 pte_t *pte;
606 pte_t entry;
607 unsigned long paddr;
609 /* NOTE : all the callers of this have mm->page_table_lock held, so the
610 following page table traversal is safe even on SMP/pre-emptible. */
612 if (!mm) return; /* No way to find physical address of page */
613 pgd = pgd_offset(mm, eaddr);
614 if (pgd_bad(*pgd)) return;
616 pmd = pmd_offset(pgd, eaddr);
617 if (pmd_none(*pmd) || pmd_bad(*pmd)) return;
619 pte = pte_offset_kernel(pmd, eaddr);
620 entry = *pte;
621 if (pte_none(entry) || !pte_present(entry)) return;
623 paddr = pte_val(entry) & PAGE_MASK;
625 sh64_dcache_purge_coloured_phy_page(paddr, eaddr);
628 /****************************************************************************/
630 static void sh64_dcache_purge_user_range(struct mm_struct *mm,
631 unsigned long start, unsigned long end)
633 /* There are at least 5 choices for the implementation of this, with
634 pros (+), cons(-), comments(*):
636 1. ocbp each line in the range through the original user's ASID
637 + no lines spuriously evicted
638 - tlbmiss handling (must either handle faults on demand => extra
639 special-case code in tlbmiss critical path), or map the page in
640 advance (=> flush_tlb_range in advance to avoid multiple hits)
641 - ASID switching
642 - expensive for large ranges
644 2. temporarily map each page in the range to a special effective
645 address and ocbp through the temporary mapping; relies on the
646 fact that SH-5 OCB* always do TLB lookup and match on ptags (they
647 never look at the etags)
648 + no spurious evictions
649 - expensive for large ranges
650 * surely cheaper than (1)
652 3. walk all the lines in the cache, check the tags, if a match
653 occurs create a page mapping to ocbp the line through
654 + no spurious evictions
655 - tag inspection overhead
656 - (especially for small ranges)
657 - potential cost of setting up/tearing down page mapping for
658 every line that matches the range
659 * cost partly independent of range size
661 4. walk all the lines in the cache, check the tags, if a match
662 occurs use 4 * alloco to purge the line (+3 other probably
663 innocent victims) by natural eviction
664 + no tlb mapping overheads
665 - spurious evictions
666 - tag inspection overhead
668 5. implement like flush_cache_all
669 + no tag inspection overhead
670 - spurious evictions
671 - bad for small ranges
673 (1) can be ruled out as more expensive than (2). (2) appears best
674 for small ranges. The choice between (3), (4) and (5) for large
675 ranges and the range size for the large/small boundary need
676 benchmarking to determine.
678 For now use approach (2) for small ranges and (5) for large ones.
682 int n_pages;
684 n_pages = ((end - start) >> PAGE_SHIFT);
685 if (n_pages >= 64) {
686 #if 1
687 sh64_dcache_purge_all();
688 #else
689 unsigned long long set, way;
690 unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
691 for (set = 0; set < cpu_data->dcache.sets; set++) {
692 unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift);
693 for (way = 0; way < cpu_data->dcache.ways; way++) {
694 unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift);
695 unsigned long long tag0;
696 unsigned long line_valid;
698 asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr));
699 line_valid = tag0 & SH_CACHE_VALID;
700 if (line_valid) {
701 unsigned long cache_asid;
702 unsigned long epn;
704 cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift;
705 /* The next line needs some
706 explanation. The virtual tags
707 encode bits [31:13] of the virtual
708 address, bit [12] of the 'tag' being
709 implied by the cache set index. */
710 epn = (tag0 & cpu_data->dcache.epn_mask) | ((set & 0x80) << cpu_data->dcache.entry_shift);
712 if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) {
713 /* TODO : could optimise this
714 call by batching multiple
715 adjacent sets together. */
716 sh64_dcache_purge_sets(set, 1);
717 break; /* Don't waste time inspecting other ways for this set */
722 #endif
723 } else {
724 /* 'Small' range */
725 unsigned long aligned_start;
726 unsigned long eaddr;
727 unsigned long last_page_start;
729 aligned_start = start & PAGE_MASK;
730 /* 'end' is 1 byte beyond the end of the range */
731 last_page_start = (end - 1) & PAGE_MASK;
733 eaddr = aligned_start;
734 while (eaddr <= last_page_start) {
735 sh64_dcache_purge_user_page(mm, eaddr);
736 eaddr += PAGE_SIZE;
739 return;
742 static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end)
744 unsigned long long aligned_start;
745 unsigned long long ull_end;
746 unsigned long long addr;
748 ull_end = end;
750 /* Just wback over the range using the natural addresses. TLB miss
751 handling will be OK (TBC) : the range has just been written to by
752 the signal frame setup code, so the PTEs must exist.
754 Note, if we have CONFIG_PREEMPT and get preempted inside this loop,
755 it doesn't matter, even if the pid->ASID mapping changes whilst
756 we're away. In that case the cache will have been flushed when the
757 mapping was renewed. So the writebacks below will be nugatory (and
758 we'll doubtless have to fault the TLB entry/ies in again with the
759 new ASID), but it's a rare case.
761 aligned_start = start & L1_CACHE_ALIGN_MASK;
762 addr = aligned_start;
763 while (addr < ull_end) {
764 asm __volatile__ ("ocbwb %0, 0" : : "r" (addr));
765 addr += L1_CACHE_BYTES;
769 #endif /* !CONFIG_DCACHE_DISABLED */
771 /****************************************************************************/
773 /* These *MUST* lie in an area of virtual address space that's otherwise unused. */
774 #define UNIQUE_EADDR_START 0xe0000000UL
775 #define UNIQUE_EADDR_END 0xe8000000UL
777 static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr)
779 /* Given a physical address paddr, and a user virtual address
780 user_eaddr which will eventually be mapped to it, create a one-off
781 kernel-private eaddr mapped to the same paddr. This is used for
782 creating special destination pages for copy_user_page and
783 clear_user_page */
785 static unsigned long current_pointer = UNIQUE_EADDR_START;
786 unsigned long coloured_pointer;
788 if (current_pointer == UNIQUE_EADDR_END) {
789 sh64_dcache_purge_all();
790 current_pointer = UNIQUE_EADDR_START;
793 coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | (user_eaddr & CACHE_OC_SYN_MASK);
794 sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr);
796 current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS);
798 return coloured_pointer;
801 /****************************************************************************/
803 static void sh64_copy_user_page_coloured(void *to, void *from, unsigned long address)
805 void *coloured_to;
807 /* Discard any existing cache entries of the wrong colour. These are
808 present quite often, if the kernel has recently used the page
809 internally, then given it up, then it's been allocated to the user.
811 sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
813 coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
814 sh64_page_copy(from, coloured_to);
816 sh64_teardown_dtlb_cache_slot();
819 static void sh64_clear_user_page_coloured(void *to, unsigned long address)
821 void *coloured_to;
823 /* Discard any existing kernel-originated lines of the wrong colour (as
824 above) */
825 sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
827 coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
828 sh64_page_clear(coloured_to);
830 sh64_teardown_dtlb_cache_slot();
833 /****************************************************************************/
835 /*##########################################################################
836 EXTERNALLY CALLABLE API.
837 ##########################################################################*/
839 /* These functions are described in Documentation/cachetlb.txt.
840 Each one of these functions varies in behaviour depending on whether the
841 I-cache and/or D-cache are configured out.
843 Note that the Linux term 'flush' corresponds to what is termed 'purge' in
844 the sh/sh64 jargon for the D-cache, i.e. write back dirty data then
845 invalidate the cache lines, and 'invalidate' for the I-cache.
848 #undef FLUSH_TRACE
850 void flush_cache_all(void)
852 /* Invalidate the entire contents of both caches, after writing back to
853 memory any dirty data from the D-cache. */
854 sh64_dcache_purge_all();
855 sh64_icache_inv_all();
858 /****************************************************************************/
860 void flush_cache_mm(struct mm_struct *mm)
862 /* Invalidate an entire user-address space from both caches, after
863 writing back dirty data (e.g. for shared mmap etc). */
865 /* This could be coded selectively by inspecting all the tags then
866 doing 4*alloco on any set containing a match (as for
867 flush_cache_range), but fork/exit/execve (where this is called from)
868 are expensive anyway. */
870 /* Have to do a purge here, despite the comments re I-cache below.
871 There could be odd-coloured dirty data associated with the mm still
872 in the cache - if this gets written out through natural eviction
873 after the kernel has reused the page there will be chaos.
876 sh64_dcache_purge_all();
878 /* The mm being torn down won't ever be active again, so any Icache
879 lines tagged with its ASID won't be visible for the rest of the
880 lifetime of this ASID cycle. Before the ASID gets reused, there
881 will be a flush_cache_all. Hence we don't need to touch the
882 I-cache. This is similar to the lack of action needed in
883 flush_tlb_mm - see fault.c. */
886 /****************************************************************************/
888 void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
889 unsigned long end)
891 struct mm_struct *mm = vma->vm_mm;
893 /* Invalidate (from both caches) the range [start,end) of virtual
894 addresses from the user address space specified by mm, after writing
895 back any dirty data.
897 Note(1), 'end' is 1 byte beyond the end of the range to flush.
899 Note(2), this is called with mm->page_table_lock held.*/
901 sh64_dcache_purge_user_range(mm, start, end);
902 sh64_icache_inv_user_page_range(mm, start, end);
905 /****************************************************************************/
907 void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr)
909 /* Invalidate any entries in either cache for the vma within the user
910 address space vma->vm_mm for the page starting at virtual address
911 'eaddr'. This seems to be used primarily in breaking COW. Note,
912 the I-cache must be searched too in case the page in question is
913 both writable and being executed from (e.g. stack trampolines.)
915 Note(1), this is called with mm->page_table_lock held.
918 sh64_dcache_purge_virt_page(vma->vm_mm, eaddr);
920 if (vma->vm_flags & VM_EXEC) {
921 sh64_icache_inv_user_page(vma, eaddr);
925 /****************************************************************************/
927 #ifndef CONFIG_DCACHE_DISABLED
929 void copy_user_page(void *to, void *from, unsigned long address, struct page *page)
931 /* 'from' and 'to' are kernel virtual addresses (within the superpage
932 mapping of the physical RAM). 'address' is the user virtual address
933 where the copy 'to' will be mapped after. This allows a custom
934 mapping to be used to ensure that the new copy is placed in the
935 right cache sets for the user to see it without having to bounce it
936 out via memory. Note however : the call to flush_page_to_ram in
937 (generic)/mm/memory.c:(break_cow) undoes all this good work in that one
938 very important case!
940 TBD : can we guarantee that on every call, any cache entries for
941 'from' are in the same colour sets as 'address' also? i.e. is this
942 always used just to deal with COW? (I suspect not). */
944 /* There are two possibilities here for when the page 'from' was last accessed:
945 * by the kernel : this is OK, no purge required.
946 * by the/a user (e.g. for break_COW) : need to purge.
948 If the potential user mapping at 'address' is the same colour as
949 'from' there is no need to purge any cache lines from the 'from'
950 page mapped into cache sets of colour 'address'. (The copy will be
951 accessing the page through 'from').
954 if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) {
955 sh64_dcache_purge_coloured_phy_page(__pa(from), address);
958 if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
959 /* No synonym problem on destination */
960 sh64_page_copy(from, to);
961 } else {
962 sh64_copy_user_page_coloured(to, from, address);
965 /* Note, don't need to flush 'from' page from the cache again - it's
966 done anyway by the generic code */
969 void clear_user_page(void *to, unsigned long address, struct page *page)
971 /* 'to' is a kernel virtual address (within the superpage
972 mapping of the physical RAM). 'address' is the user virtual address
973 where the 'to' page will be mapped after. This allows a custom
974 mapping to be used to ensure that the new copy is placed in the
975 right cache sets for the user to see it without having to bounce it
976 out via memory.
979 if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
980 /* No synonym problem on destination */
981 sh64_page_clear(to);
982 } else {
983 sh64_clear_user_page_coloured(to, address);
987 #endif /* !CONFIG_DCACHE_DISABLED */
989 /****************************************************************************/
991 void flush_dcache_page(struct page *page)
993 sh64_dcache_purge_phy_page(page_to_phys(page));
994 wmb();
997 /****************************************************************************/
999 void flush_icache_range(unsigned long start, unsigned long end)
1001 /* Flush the range [start,end] of kernel virtual adddress space from
1002 the I-cache. The corresponding range must be purged from the
1003 D-cache also because the SH-5 doesn't have cache snooping between
1004 the caches. The addresses will be visible through the superpage
1005 mapping, therefore it's guaranteed that there no cache entries for
1006 the range in cache sets of the wrong colour.
1008 Primarily used for cohering the I-cache after a module has
1009 been loaded. */
1011 /* We also make sure to purge the same range from the D-cache since
1012 flush_page_to_ram() won't be doing this for us! */
1014 sh64_dcache_purge_kernel_range(start, end);
1015 wmb();
1016 sh64_icache_inv_kernel_range(start, end);
1019 /****************************************************************************/
1021 void flush_icache_user_range(struct vm_area_struct *vma,
1022 struct page *page, unsigned long addr, int len)
1024 /* Flush the range of user (defined by vma->vm_mm) address space
1025 starting at 'addr' for 'len' bytes from the cache. The range does
1026 not straddle a page boundary, the unique physical page containing
1027 the range is 'page'. This seems to be used mainly for invalidating
1028 an address range following a poke into the program text through the
1029 ptrace() call from another process (e.g. for BRK instruction
1030 insertion). */
1032 sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr);
1033 mb();
1035 if (vma->vm_flags & VM_EXEC) {
1036 sh64_icache_inv_user_small_range(vma->vm_mm, addr, len);
1040 /*##########################################################################
1041 ARCH/SH64 PRIVATE CALLABLE API.
1042 ##########################################################################*/
1044 void flush_cache_sigtramp(unsigned long start, unsigned long end)
1046 /* For the address range [start,end), write back the data from the
1047 D-cache and invalidate the corresponding region of the I-cache for
1048 the current process. Used to flush signal trampolines on the stack
1049 to make them executable. */
1051 sh64_dcache_wback_current_user_range(start, end);
1052 wmb();
1053 sh64_icache_inv_current_user_range(start, end);