1 /**************************************************************************
2 * Copyright (c) 2007, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 **************************************************************************/
23 * Code for the SGX MMU:
27 * clflush on one processor only:
28 * clflush should apparently flush the cache line on all processors in an
34 * The usage of the slots must be completely encapsulated within a spinlock, and
35 * no other functions that may be using the locks for other purposed may be
36 * called from within the locked region.
37 * Since the slots are per processor, this will guarantee that we are the only
42 * TODO: Inserting ptes from an interrupt handler:
43 * This may be desirable for some SGX functionality where the GPU can fault in
44 * needed pages. For that, we need to make an atomic insert_pages function, that
46 * If it fails, the caller need to insert the page using a workqueue function,
47 * but on average it should be fast.
50 struct psb_mmu_driver
{
51 /* protects driver- and pd structures. Always take in read mode
52 * before taking the page table spinlock.
54 struct rw_semaphore sem
;
56 /* protects page tables, directory tables and pt tables.
61 atomic_t needs_tlbflush
;
63 uint8_t __iomem
*register_map
;
64 struct psb_mmu_pd
*default_pd
;
65 /*uint32_t bif_ctrl;*/
68 unsigned long clflush_mask
;
70 struct drm_psb_private
*dev_priv
;
76 struct psb_mmu_pd
*pd
;
84 struct psb_mmu_driver
*driver
;
86 struct psb_mmu_pt
**tables
;
88 struct page
*dummy_pt
;
89 struct page
*dummy_page
;
95 static inline uint32_t psb_mmu_pt_index(uint32_t offset
)
97 return (offset
>> PSB_PTE_SHIFT
) & 0x3FF;
100 static inline uint32_t psb_mmu_pd_index(uint32_t offset
)
102 return offset
>> PSB_PDE_SHIFT
;
105 static inline void psb_clflush(void *addr
)
107 __asm__
__volatile__("clflush (%0)\n" : : "r"(addr
) : "memory");
110 static inline void psb_mmu_clflush(struct psb_mmu_driver
*driver
,
113 if (!driver
->has_clflush
)
121 static void psb_page_clflush(struct psb_mmu_driver
*driver
, struct page
* page
)
123 uint32_t clflush_add
= driver
->clflush_add
>> PAGE_SHIFT
;
124 uint32_t clflush_count
= PAGE_SIZE
/ clflush_add
;
128 clf
= kmap_atomic(page
, KM_USER0
);
130 for (i
= 0; i
< clflush_count
; ++i
) {
135 kunmap_atomic(clf
, KM_USER0
);
138 static void psb_pages_clflush(struct psb_mmu_driver
*driver
,
139 struct page
*page
[], unsigned long num_pages
)
143 if (!driver
->has_clflush
)
146 for (i
= 0; i
< num_pages
; i
++)
147 psb_page_clflush(driver
, *page
++);
150 static void psb_mmu_flush_pd_locked(struct psb_mmu_driver
*driver
,
153 atomic_set(&driver
->needs_tlbflush
, 0);
156 static void psb_mmu_flush_pd(struct psb_mmu_driver
*driver
, int force
)
158 down_write(&driver
->sem
);
159 psb_mmu_flush_pd_locked(driver
, force
);
160 up_write(&driver
->sem
);
163 void psb_mmu_flush(struct psb_mmu_driver
*driver
, int rc_prot
)
166 down_write(&driver
->sem
);
168 up_write(&driver
->sem
);
171 void psb_mmu_set_pd_context(struct psb_mmu_pd
*pd
, int hw_context
)
173 /*ttm_tt_cache_flush(&pd->p, 1);*/
174 psb_pages_clflush(pd
->driver
, &pd
->p
, 1);
175 down_write(&pd
->driver
->sem
);
177 psb_mmu_flush_pd_locked(pd
->driver
, 1);
178 pd
->hw_context
= hw_context
;
179 up_write(&pd
->driver
->sem
);
183 static inline unsigned long psb_pd_addr_end(unsigned long addr
,
187 addr
= (addr
+ PSB_PDE_MASK
+ 1) & ~PSB_PDE_MASK
;
188 return (addr
< end
) ? addr
: end
;
191 static inline uint32_t psb_mmu_mask_pte(uint32_t pfn
, int type
)
193 uint32_t mask
= PSB_PTE_VALID
;
195 if (type
& PSB_MMU_CACHED_MEMORY
)
196 mask
|= PSB_PTE_CACHED
;
197 if (type
& PSB_MMU_RO_MEMORY
)
199 if (type
& PSB_MMU_WO_MEMORY
)
202 return (pfn
<< PAGE_SHIFT
) | mask
;
205 struct psb_mmu_pd
*psb_mmu_alloc_pd(struct psb_mmu_driver
*driver
,
206 int trap_pagefaults
, int invalid_type
)
208 struct psb_mmu_pd
*pd
= kmalloc(sizeof(*pd
), GFP_KERNEL
);
215 pd
->p
= alloc_page(GFP_DMA32
);
218 pd
->dummy_pt
= alloc_page(GFP_DMA32
);
221 pd
->dummy_page
= alloc_page(GFP_DMA32
);
225 if (!trap_pagefaults
) {
227 psb_mmu_mask_pte(page_to_pfn(pd
->dummy_pt
),
230 psb_mmu_mask_pte(page_to_pfn(pd
->dummy_page
),
237 v
= kmap(pd
->dummy_pt
);
238 for (i
= 0; i
< (PAGE_SIZE
/ sizeof(uint32_t)); ++i
)
239 v
[i
] = pd
->invalid_pte
;
241 kunmap(pd
->dummy_pt
);
244 for (i
= 0; i
< (PAGE_SIZE
/ sizeof(uint32_t)); ++i
)
245 v
[i
] = pd
->invalid_pde
;
249 clear_page(kmap(pd
->dummy_page
));
250 kunmap(pd
->dummy_page
);
252 pd
->tables
= vmalloc_user(sizeof(struct psb_mmu_pt
*) * 1024);
257 pd
->pd_mask
= PSB_PTE_VALID
;
263 __free_page(pd
->dummy_page
);
265 __free_page(pd
->dummy_pt
);
273 void psb_mmu_free_pt(struct psb_mmu_pt
*pt
)
279 void psb_mmu_free_pagedir(struct psb_mmu_pd
*pd
)
281 struct psb_mmu_driver
*driver
= pd
->driver
;
282 struct psb_mmu_pt
*pt
;
285 down_write(&driver
->sem
);
286 if (pd
->hw_context
!= -1)
287 psb_mmu_flush_pd_locked(driver
, 1);
289 /* Should take the spinlock here, but we don't need to do that
290 since we have the semaphore in write mode. */
292 for (i
= 0; i
< 1024; ++i
) {
299 __free_page(pd
->dummy_page
);
300 __free_page(pd
->dummy_pt
);
303 up_write(&driver
->sem
);
306 static struct psb_mmu_pt
*psb_mmu_alloc_pt(struct psb_mmu_pd
*pd
)
308 struct psb_mmu_pt
*pt
= kmalloc(sizeof(*pt
), GFP_KERNEL
);
310 uint32_t clflush_add
= pd
->driver
->clflush_add
>> PAGE_SHIFT
;
311 uint32_t clflush_count
= PAGE_SIZE
/ clflush_add
;
312 spinlock_t
*lock
= &pd
->driver
->lock
;
320 pt
->p
= alloc_page(GFP_DMA32
);
328 v
= kmap_atomic(pt
->p
, KM_USER0
);
330 ptes
= (uint32_t *) v
;
331 for (i
= 0; i
< (PAGE_SIZE
/ sizeof(uint32_t)); ++i
)
332 *ptes
++ = pd
->invalid_pte
;
335 if (pd
->driver
->has_clflush
&& pd
->hw_context
!= -1) {
337 for (i
= 0; i
< clflush_count
; ++i
) {
344 kunmap_atomic(v
, KM_USER0
);
354 struct psb_mmu_pt
*psb_mmu_pt_alloc_map_lock(struct psb_mmu_pd
*pd
,
357 uint32_t index
= psb_mmu_pd_index(addr
);
358 struct psb_mmu_pt
*pt
;
360 spinlock_t
*lock
= &pd
->driver
->lock
;
363 pt
= pd
->tables
[index
];
366 pt
= psb_mmu_alloc_pt(pd
);
371 if (pd
->tables
[index
]) {
375 pt
= pd
->tables
[index
];
379 v
= kmap_atomic(pd
->p
, KM_USER0
);
380 pd
->tables
[index
] = pt
;
381 v
[index
] = (page_to_pfn(pt
->p
) << 12) | pd
->pd_mask
;
383 kunmap_atomic((void *) v
, KM_USER0
);
385 if (pd
->hw_context
!= -1) {
386 psb_mmu_clflush(pd
->driver
, (void *) &v
[index
]);
387 atomic_set(&pd
->driver
->needs_tlbflush
, 1);
390 pt
->v
= kmap_atomic(pt
->p
, KM_USER0
);
394 static struct psb_mmu_pt
*psb_mmu_pt_map_lock(struct psb_mmu_pd
*pd
,
397 uint32_t index
= psb_mmu_pd_index(addr
);
398 struct psb_mmu_pt
*pt
;
399 spinlock_t
*lock
= &pd
->driver
->lock
;
402 pt
= pd
->tables
[index
];
407 pt
->v
= kmap_atomic(pt
->p
, KM_USER0
);
411 static void psb_mmu_pt_unmap_unlock(struct psb_mmu_pt
*pt
)
413 struct psb_mmu_pd
*pd
= pt
->pd
;
416 kunmap_atomic(pt
->v
, KM_USER0
);
417 if (pt
->count
== 0) {
418 v
= kmap_atomic(pd
->p
, KM_USER0
);
419 v
[pt
->index
] = pd
->invalid_pde
;
420 pd
->tables
[pt
->index
] = NULL
;
422 if (pd
->hw_context
!= -1) {
423 psb_mmu_clflush(pd
->driver
,
424 (void *) &v
[pt
->index
]);
425 atomic_set(&pd
->driver
->needs_tlbflush
, 1);
427 kunmap_atomic(pt
->v
, KM_USER0
);
428 spin_unlock(&pd
->driver
->lock
);
432 spin_unlock(&pd
->driver
->lock
);
435 static inline void psb_mmu_set_pte(struct psb_mmu_pt
*pt
,
436 unsigned long addr
, uint32_t pte
)
438 pt
->v
[psb_mmu_pt_index(addr
)] = pte
;
441 static inline void psb_mmu_invalidate_pte(struct psb_mmu_pt
*pt
,
444 pt
->v
[psb_mmu_pt_index(addr
)] = pt
->pd
->invalid_pte
;
448 static uint32_t psb_mmu_check_pte_locked(struct psb_mmu_pd
*pd
,
454 v
= kmap_atomic(pd
->p
, KM_USER0
);
456 printk(KERN_INFO
"Could not kmap pde page.\n");
459 pfn
= v
[psb_mmu_pd_index(mmu_offset
)];
460 /* printk(KERN_INFO "pde is 0x%08x\n",pfn); */
461 kunmap_atomic(v
, KM_USER0
);
462 if (((pfn
& 0x0F) != PSB_PTE_VALID
)) {
463 printk(KERN_INFO
"Strange pde at 0x%08x: 0x%08x.\n",
466 v
= ioremap(pfn
& 0xFFFFF000, 4096);
468 printk(KERN_INFO
"Could not kmap pte page.\n");
471 pfn
= v
[psb_mmu_pt_index(mmu_offset
)];
472 /* printk(KERN_INFO "pte is 0x%08x\n",pfn); */
474 if (((pfn
& 0x0F) != PSB_PTE_VALID
)) {
475 printk(KERN_INFO
"Strange pte at 0x%08x: 0x%08x.\n",
478 return pfn
>> PAGE_SHIFT
;
481 static void psb_mmu_check_mirrored_gtt(struct psb_mmu_pd
*pd
,
488 printk(KERN_INFO
"Checking mirrored gtt 0x%08x %d\n",
489 mmu_offset
, gtt_pages
);
490 down_read(&pd
->driver
->sem
);
491 start
= psb_mmu_check_pte_locked(pd
, mmu_offset
);
492 mmu_offset
+= PAGE_SIZE
;
494 while (gtt_pages
--) {
495 next
= psb_mmu_check_pte_locked(pd
, mmu_offset
);
496 if (next
!= start
+ 1) {
498 "Ptes out of order: 0x%08x, 0x%08x.\n",
502 mmu_offset
+= PAGE_SIZE
;
504 up_read(&pd
->driver
->sem
);
509 void psb_mmu_mirror_gtt(struct psb_mmu_pd
*pd
,
510 uint32_t mmu_offset
, uint32_t gtt_start
,
514 uint32_t start
= psb_mmu_pd_index(mmu_offset
);
515 struct psb_mmu_driver
*driver
= pd
->driver
;
516 int num_pages
= gtt_pages
;
518 down_read(&driver
->sem
);
519 spin_lock(&driver
->lock
);
521 v
= kmap_atomic(pd
->p
, KM_USER0
);
524 while (gtt_pages
--) {
525 *v
++ = gtt_start
| pd
->pd_mask
;
526 gtt_start
+= PAGE_SIZE
;
529 /*ttm_tt_cache_flush(&pd->p, num_pages);*/
530 psb_pages_clflush(pd
->driver
, &pd
->p
, num_pages
);
531 kunmap_atomic(v
, KM_USER0
);
532 spin_unlock(&driver
->lock
);
534 if (pd
->hw_context
!= -1)
535 atomic_set(&pd
->driver
->needs_tlbflush
, 1);
537 up_read(&pd
->driver
->sem
);
538 psb_mmu_flush_pd(pd
->driver
, 0);
541 struct psb_mmu_pd
*psb_mmu_get_default_pd(struct psb_mmu_driver
*driver
)
543 struct psb_mmu_pd
*pd
;
545 /* down_read(&driver->sem); */
546 pd
= driver
->default_pd
;
547 /* up_read(&driver->sem); */
552 /* Returns the physical address of the PD shared by sgx/msvdx */
553 uint32_t psb_get_default_pd_addr(struct psb_mmu_driver
*driver
)
555 struct psb_mmu_pd
*pd
;
557 pd
= psb_mmu_get_default_pd(driver
);
558 return page_to_pfn(pd
->p
) << PAGE_SHIFT
;
561 void psb_mmu_driver_takedown(struct psb_mmu_driver
*driver
)
563 psb_mmu_free_pagedir(driver
->default_pd
);
567 struct psb_mmu_driver
*psb_mmu_driver_init(uint8_t __iomem
* registers
,
570 struct drm_psb_private
*dev_priv
)
572 struct psb_mmu_driver
*driver
;
574 driver
= kmalloc(sizeof(*driver
), GFP_KERNEL
);
578 driver
->dev_priv
= dev_priv
;
580 driver
->default_pd
= psb_mmu_alloc_pd(driver
, trap_pagefaults
,
582 if (!driver
->default_pd
)
585 spin_lock_init(&driver
->lock
);
586 init_rwsem(&driver
->sem
);
587 down_write(&driver
->sem
);
588 driver
->register_map
= registers
;
589 atomic_set(&driver
->needs_tlbflush
, 1);
591 driver
->has_clflush
= 0;
593 if (boot_cpu_has(X86_FEATURE_CLFLSH
)) {
594 uint32_t tfms
, misc
, cap0
, cap4
, clflush_size
;
597 * clflush size is determined at kernel setup for x86_64
598 * but not for i386. We have to do it here.
601 cpuid(0x00000001, &tfms
, &misc
, &cap0
, &cap4
);
602 clflush_size
= ((misc
>> 8) & 0xff) * 8;
603 driver
->has_clflush
= 1;
604 driver
->clflush_add
=
605 PAGE_SIZE
* clflush_size
/ sizeof(uint32_t);
606 driver
->clflush_mask
= driver
->clflush_add
- 1;
607 driver
->clflush_mask
= ~driver
->clflush_mask
;
610 up_write(&driver
->sem
);
618 static void psb_mmu_flush_ptes(struct psb_mmu_pd
*pd
,
619 unsigned long address
, uint32_t num_pages
,
620 uint32_t desired_tile_stride
,
621 uint32_t hw_tile_stride
)
623 struct psb_mmu_pt
*pt
;
630 unsigned long row_add
;
631 unsigned long clflush_add
= pd
->driver
->clflush_add
;
632 unsigned long clflush_mask
= pd
->driver
->clflush_mask
;
634 if (!pd
->driver
->has_clflush
) {
635 /*ttm_tt_cache_flush(&pd->p, num_pages);*/
636 psb_pages_clflush(pd
->driver
, &pd
->p
, num_pages
);
641 rows
= num_pages
/ desired_tile_stride
;
643 desired_tile_stride
= num_pages
;
645 add
= desired_tile_stride
<< PAGE_SHIFT
;
646 row_add
= hw_tile_stride
<< PAGE_SHIFT
;
648 for (i
= 0; i
< rows
; ++i
) {
654 next
= psb_pd_addr_end(addr
, end
);
655 pt
= psb_mmu_pt_map_lock(pd
, addr
);
660 [psb_mmu_pt_index(addr
)]);
663 (addr
& clflush_mask
) < next
);
665 psb_mmu_pt_unmap_unlock(pt
);
666 } while (addr
= next
, next
!= end
);
672 void psb_mmu_remove_pfn_sequence(struct psb_mmu_pd
*pd
,
673 unsigned long address
, uint32_t num_pages
)
675 struct psb_mmu_pt
*pt
;
679 unsigned long f_address
= address
;
681 down_read(&pd
->driver
->sem
);
684 end
= addr
+ (num_pages
<< PAGE_SHIFT
);
687 next
= psb_pd_addr_end(addr
, end
);
688 pt
= psb_mmu_pt_alloc_map_lock(pd
, addr
);
692 psb_mmu_invalidate_pte(pt
, addr
);
694 } while (addr
+= PAGE_SIZE
, addr
< next
);
695 psb_mmu_pt_unmap_unlock(pt
);
697 } while (addr
= next
, next
!= end
);
700 if (pd
->hw_context
!= -1)
701 psb_mmu_flush_ptes(pd
, f_address
, num_pages
, 1, 1);
703 up_read(&pd
->driver
->sem
);
705 if (pd
->hw_context
!= -1)
706 psb_mmu_flush(pd
->driver
, 0);
711 void psb_mmu_remove_pages(struct psb_mmu_pd
*pd
, unsigned long address
,
712 uint32_t num_pages
, uint32_t desired_tile_stride
,
713 uint32_t hw_tile_stride
)
715 struct psb_mmu_pt
*pt
;
722 unsigned long row_add
;
723 unsigned long f_address
= address
;
726 rows
= num_pages
/ desired_tile_stride
;
728 desired_tile_stride
= num_pages
;
730 add
= desired_tile_stride
<< PAGE_SHIFT
;
731 row_add
= hw_tile_stride
<< PAGE_SHIFT
;
733 /* down_read(&pd->driver->sem); */
735 /* Make sure we only need to flush this processor's cache */
737 for (i
= 0; i
< rows
; ++i
) {
743 next
= psb_pd_addr_end(addr
, end
);
744 pt
= psb_mmu_pt_map_lock(pd
, addr
);
748 psb_mmu_invalidate_pte(pt
, addr
);
751 } while (addr
+= PAGE_SIZE
, addr
< next
);
752 psb_mmu_pt_unmap_unlock(pt
);
754 } while (addr
= next
, next
!= end
);
757 if (pd
->hw_context
!= -1)
758 psb_mmu_flush_ptes(pd
, f_address
, num_pages
,
759 desired_tile_stride
, hw_tile_stride
);
761 /* up_read(&pd->driver->sem); */
763 if (pd
->hw_context
!= -1)
764 psb_mmu_flush(pd
->driver
, 0);
767 int psb_mmu_insert_pfn_sequence(struct psb_mmu_pd
*pd
, uint32_t start_pfn
,
768 unsigned long address
, uint32_t num_pages
,
771 struct psb_mmu_pt
*pt
;
776 unsigned long f_address
= address
;
779 down_read(&pd
->driver
->sem
);
782 end
= addr
+ (num_pages
<< PAGE_SHIFT
);
785 next
= psb_pd_addr_end(addr
, end
);
786 pt
= psb_mmu_pt_alloc_map_lock(pd
, addr
);
792 pte
= psb_mmu_mask_pte(start_pfn
++, type
);
793 psb_mmu_set_pte(pt
, addr
, pte
);
795 } while (addr
+= PAGE_SIZE
, addr
< next
);
796 psb_mmu_pt_unmap_unlock(pt
);
798 } while (addr
= next
, next
!= end
);
801 if (pd
->hw_context
!= -1)
802 psb_mmu_flush_ptes(pd
, f_address
, num_pages
, 1, 1);
804 up_read(&pd
->driver
->sem
);
806 if (pd
->hw_context
!= -1)
807 psb_mmu_flush(pd
->driver
, 1);
812 int psb_mmu_insert_pages(struct psb_mmu_pd
*pd
, struct page
**pages
,
813 unsigned long address
, uint32_t num_pages
,
814 uint32_t desired_tile_stride
,
815 uint32_t hw_tile_stride
, int type
)
817 struct psb_mmu_pt
*pt
;
825 unsigned long row_add
;
826 unsigned long f_address
= address
;
829 if (hw_tile_stride
) {
830 if (num_pages
% desired_tile_stride
!= 0)
832 rows
= num_pages
/ desired_tile_stride
;
834 desired_tile_stride
= num_pages
;
837 add
= desired_tile_stride
<< PAGE_SHIFT
;
838 row_add
= hw_tile_stride
<< PAGE_SHIFT
;
840 down_read(&pd
->driver
->sem
);
842 for (i
= 0; i
< rows
; ++i
) {
848 next
= psb_pd_addr_end(addr
, end
);
849 pt
= psb_mmu_pt_alloc_map_lock(pd
, addr
);
856 psb_mmu_mask_pte(page_to_pfn(*pages
++),
858 psb_mmu_set_pte(pt
, addr
, pte
);
860 } while (addr
+= PAGE_SIZE
, addr
< next
);
861 psb_mmu_pt_unmap_unlock(pt
);
863 } while (addr
= next
, next
!= end
);
868 if (pd
->hw_context
!= -1)
869 psb_mmu_flush_ptes(pd
, f_address
, num_pages
,
870 desired_tile_stride
, hw_tile_stride
);
872 up_read(&pd
->driver
->sem
);
874 if (pd
->hw_context
!= -1)
875 psb_mmu_flush(pd
->driver
, 1);
880 int psb_mmu_virtual_to_pfn(struct psb_mmu_pd
*pd
, uint32_t virtual,
884 struct psb_mmu_pt
*pt
;
886 spinlock_t
*lock
= &pd
->driver
->lock
;
888 down_read(&pd
->driver
->sem
);
889 pt
= psb_mmu_pt_map_lock(pd
, virtual);
894 v
= kmap_atomic(pd
->p
, KM_USER0
);
895 tmp
= v
[psb_mmu_pd_index(virtual)];
896 kunmap_atomic(v
, KM_USER0
);
899 if (tmp
!= pd
->invalid_pde
|| !(tmp
& PSB_PTE_VALID
) ||
900 !(pd
->invalid_pte
& PSB_PTE_VALID
)) {
905 *pfn
= pd
->invalid_pte
>> PAGE_SHIFT
;
908 tmp
= pt
->v
[psb_mmu_pt_index(virtual)];
909 if (!(tmp
& PSB_PTE_VALID
)) {
913 *pfn
= tmp
>> PAGE_SHIFT
;
915 psb_mmu_pt_unmap_unlock(pt
);
917 up_read(&pd
->driver
->sem
);