4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * UNIX machine dependent virtual memory support.
33 #include <sys/exechdr.h>
34 #include <vm/seg_kmem.h>
35 #include <sys/atomic.h>
36 #include <sys/archsystm.h>
37 #include <sys/machsystm.h>
39 #include <sys/cpu_module.h>
41 #include <vm/hat_sfmmu.h>
43 #include <sys/memnode.h>
45 #include <sys/mem_config.h>
46 #include <sys/mem_cage.h>
47 #include <vm/vm_dep.h>
49 #include <sys/platform_module.h>
52 * These variables are set by module specific config routines.
53 * They are only set by modules which will use physical cache page coloring.
55 int do_pg_coloring
= 0;
58 * These variables can be conveniently patched at kernel load time to
59 * prevent do_pg_coloring from being enabled by
60 * module specific config routines.
63 int use_page_coloring
= 1;
66 * initialized by page_coloring_init()
68 extern uint_t page_colors
;
69 extern uint_t page_colors_mask
;
70 extern uint_t page_coloring_shift
;
72 uint_t vac_colors
= 0;
73 uint_t vac_colors_mask
= 0;
75 /* cpu specific coloring initialization */
76 extern void page_coloring_init_cpu();
77 #pragma weak page_coloring_init_cpu
80 * get the ecache setsize for the current cpu.
82 #define CPUSETSIZE() (cpunodes[CPU->cpu_id].ecache_setsize)
84 plcnt_t plcnt
; /* page list count */
87 * This variable is set by the cpu module to contain the lowest
88 * address not affected by the SF_ERRATA_57 workaround. It should
89 * remain 0 if the workaround is not needed.
91 #if defined(SF_ERRATA_57)
92 caddr_t errata57_limit
;
95 extern void page_relocate_hash(page_t
*, page_t
*);
98 * these must be defined in platform specific areas
100 extern void map_addr_proc(caddr_t
*, size_t, offset_t
, int, caddr_t
,
101 struct proc
*, uint_t
);
102 extern page_t
*page_get_freelist(struct vnode
*, u_offset_t
, struct seg
*,
103 caddr_t
, size_t, uint_t
, struct lgrp
*);
105 * Convert page frame number to an OBMEM page frame number
106 * (i.e. put in the type bits -- zero for this implementation)
109 impl_obmem_pfnum(pfn_t pf
)
115 * Use physmax to determine the highest physical page of DRAM memory
116 * It is assumed that any physical addresses above physmax is in IO space.
117 * We don't bother checking the low end because we assume that memory space
118 * begins at physical page frame 0.
120 * Return 1 if the page frame is onboard DRAM memory, else 0.
121 * Returns 0 for nvram so it won't be cached.
124 pf_is_memory(pfn_t pf
)
126 /* We must be IO space */
130 /* We must be memory space */
135 * Handle a pagefault.
138 pagefault(caddr_t addr
, enum fault_type type
, enum seg_rw rw
, int iskernel
)
147 if (INVALID_VADDR(addr
))
155 #if defined(SF_ERRATA_57)
157 * Prevent infinite loops due to a segment driver
158 * setting the execute permissions and the sfmmu hat
159 * silently ignoring them.
161 if (rw
== S_EXEC
&& AS_TYPE_64BIT(as
) &&
162 addr
< errata57_limit
) {
170 * Dispatch pagefault.
172 res
= as_fault(as
->a_hat
, as
, addr
, 1, type
, rw
);
175 * If this isn't a potential unmapped hole in the user's
176 * UNIX data or stack segments, just return status info.
178 if (!(res
== FC_NOMAP
&& iskernel
== 0))
182 * Check to see if we happened to faulted on a currently unmapped
183 * part of the UNIX data or stack segments. If so, create a zfod
184 * mapping there and then try calling the fault routine again.
189 if (addr
< base
|| addr
>= base
+ len
) { /* data seg? */
190 base
= (caddr_t
)(p
->p_usrstack
- p
->p_stksize
);
192 if (addr
< base
|| addr
>= p
->p_usrstack
) { /* stack seg? */
193 /* not in either UNIX data or stack segments */
199 /* the rest of this function implements a 3.X 4.X 5.X compatibility */
200 /* This code is probably not needed anymore */
202 /* expand the gap to the page boundaries on each side */
203 len
= (((uintptr_t)base
+ len
+ PAGEOFFSET
) & PAGEMASK
) -
204 ((uintptr_t)base
& PAGEMASK
);
205 base
= (caddr_t
)((uintptr_t)base
& PAGEMASK
);
209 if (as_gap(as
, PAGESIZE
, &base
, &len
, AH_CONTAIN
, addr
) == 0) {
210 err
= as_map(as
, base
, len
, segvn_create
, zfod_argsp
);
213 res
= FC_MAKE_ERR(err
);
218 * This page is already mapped by another thread after we
219 * returned from as_fault() above. We just fallthrough
225 res
= as_fault(as
->a_hat
, as
, addr
, 1, F_INVAL
, rw
);
233 * This is the routine which defines the address limit implied
234 * by the flag '_MAP_LOW32'. USERLIMIT32 matches the highest
235 * mappable address in a 32-bit process on this platform (though
236 * perhaps we should make it be UINT32_MAX here?)
239 map_addr(caddr_t
*addrp
, size_t len
, offset_t off
, int vacalign
, uint_t flags
)
241 struct proc
*p
= curproc
;
242 caddr_t userlimit
= flags
& _MAP_LOW32
?
243 (caddr_t
)USERLIMIT32
: p
->p_as
->a_userlimit
;
244 map_addr_proc(addrp
, len
, off
, vacalign
, userlimit
, p
, flags
);
248 * Some V9 CPUs have holes in the middle of the 64-bit virtual address range.
250 caddr_t hole_start
, hole_end
;
257 uchar_t kpm_size_shift
;
259 int valid_va_range_aligned_wraparound
;
261 * Determine whether [*basep, *basep + *lenp) contains a mappable range of
262 * addresses at least "minlen" long, where the base of the range is at "off"
263 * phase from an "align" boundary and there is space for a "redzone"-sized
264 * redzone on either side of the range. On success, 1 is returned and *basep
265 * and *lenp are adjusted to describe the acceptable range (including
266 * the redzone). On failure, 0 is returned.
269 valid_va_range_aligned(caddr_t
*basep
, size_t *lenp
, size_t minlen
, int dir
,
270 size_t align
, size_t redzone
, size_t off
)
275 ASSERT(align
== 0 ? off
== 0 : off
< align
);
277 ASSERT(align
== 0 || align
>= PAGESIZE
);
281 tot_len
= minlen
+ 2 * redzone
; /* need at least this much space */
283 /* If hi rolled over the top try cutting back. */
285 *lenp
= 0UL - (uintptr_t)lo
- 1UL;
286 /* Trying to see if this really happens, and then if so, why */
287 valid_va_range_aligned_wraparound
++;
290 if (*lenp
< tot_len
) {
295 * Deal with a possible hole in the address range between
296 * hole_start and hole_end that should never be mapped by the MMU.
299 if (lo
< hole_start
) {
304 /* lo < hole_start && hi >= hole_end */
307 * prefer lowest range
309 if (hole_start
- lo
>= tot_len
)
311 else if (hi
- hole_end
>= tot_len
)
317 * prefer highest range
319 if (hi
- hole_end
>= tot_len
)
321 else if (hole_start
- lo
>= tot_len
)
327 /* lo >= hole_start */
334 /* Check if remaining length is too small */
335 if (hi
- lo
< tot_len
) {
339 caddr_t tlo
= lo
+ redzone
;
340 caddr_t thi
= hi
- redzone
;
341 tlo
= (caddr_t
)P2PHASEUP((uintptr_t)tlo
, align
, off
);
342 if (tlo
< lo
+ redzone
) {
345 if (thi
< tlo
|| thi
- tlo
< minlen
) {
355 * Determine whether [*basep, *basep + *lenp) contains a mappable range of
356 * addresses at least "minlen" long. On success, 1 is returned and *basep
357 * and *lenp are adjusted to describe the acceptable range. On failure, 0
361 valid_va_range(caddr_t
*basep
, size_t *lenp
, size_t minlen
, int dir
)
363 return (valid_va_range_aligned(basep
, lenp
, minlen
, dir
, 0, 0, 0));
367 * Determine whether [addr, addr+len] with protections `prot' are valid
368 * for a user address space.
372 valid_usr_range(caddr_t addr
, size_t len
, uint_t prot
, struct as
*as
,
375 caddr_t eaddr
= addr
+ len
;
377 if (eaddr
<= addr
|| addr
>= userlimit
|| eaddr
> userlimit
)
378 return (RANGE_BADADDR
);
381 * Determine if the address range falls within an illegal
384 if (eaddr
> hole_start
&& addr
< hole_end
)
385 return (RANGE_BADADDR
);
387 #if defined(SF_ERRATA_57)
389 * Make sure USERLIMIT isn't raised too high
391 ASSERT64(addr
<= (caddr_t
)0xffffffff80000000ul
||
392 errata57_limit
== 0);
394 if (AS_TYPE_64BIT(as
) &&
395 (addr
< errata57_limit
) &&
397 return (RANGE_BADPROT
);
398 #endif /* SF_ERRATA57 */
403 * Routine used to check to see if an a.out can be executed
404 * by the current machine/architecture.
407 chkaout(struct exdata
*exp
)
409 if (exp
->ux_mach
== M_SPARC
)
416 * The following functions return information about an a.out
417 * which is used when a program is executed.
421 * Return the load memory address for the data segment.
424 getdmem(struct exec
*exp
)
427 * XXX - Sparc Reference Hack approaching
428 * Remember that we are loading
429 * 8k executables into a 4k machine
430 * DATA_ALIGN == 2 * PAGESIZE
433 return ((caddr_t
)(roundup(USRTEXT
+ exp
->a_text
, DATA_ALIGN
)));
435 return ((caddr_t
)USRTEXT
);
439 * Return the starting disk address for the data segment.
442 getdfile(struct exec
*exp
)
444 if (exp
->a_magic
== ZMAGIC
)
445 return (exp
->a_text
);
447 return (sizeof (struct exec
) + exp
->a_text
);
451 * Return the load memory address for the text segment.
456 gettmem(struct exec
*exp
)
458 return ((caddr_t
)USRTEXT
);
462 * Return the file byte offset for the text segment.
465 gettfile(struct exec
*exp
)
467 if (exp
->a_magic
== ZMAGIC
)
470 return (sizeof (struct exec
));
475 struct exdata
*edp_in
,
476 struct exdata
*edp_out
,
480 *edp_out
= *edp_in
; /* structure copy */
482 if ((edp_in
->ux_mag
== ZMAGIC
) &&
483 ((edp_in
->vp
->v_flag
& VNOMAP
) == 0)) {
493 * Return non 0 value if the address may cause a VAC alias with KPM mappings.
494 * KPM selects an address such that it's equal offset modulo shm_alignment and
495 * assumes it can't be in VAC conflict with any larger than PAGESIZE mapping.
498 map_addr_vacalign_check(caddr_t addr
, u_offset_t off
)
501 return (((uintptr_t)addr
^ off
) & shm_alignment
- 1);
508 * Sanity control. Don't use large pages regardless of user
509 * settings if there's less than priv or shm_lpg_min_physmem memory installed.
510 * The units for this variable is 8K pages.
512 pgcnt_t shm_lpg_min_physmem
= 131072; /* 1GB */
513 pgcnt_t privm_lpg_min_physmem
= 131072; /* 1GB */
516 map_pgszheap(struct proc
*p
, caddr_t addr
, size_t len
)
518 size_t pgsz
= MMU_PAGESIZE
;
522 * If len is zero, retrieve from proc and don't demote the page size.
523 * Use atleast the default pagesize.
526 len
= p
->p_brkbase
+ p
->p_brksize
- p
->p_bssbase
;
528 len
= MAX(len
, default_uheap_lpsize
);
530 for (szc
= mmu_page_sizes
- 1; szc
>= 0; szc
--) {
531 pgsz
= hw_page_array
[szc
].hp_size
;
532 if ((disable_auto_data_large_pages
& (1 << szc
)) ||
533 pgsz
> max_uheap_lpsize
)
541 * If addr == 0 we were called by memcntl() when the
542 * size code is 0. Don't set pgsz less than current size.
544 if (addr
== 0 && (pgsz
< hw_page_array
[p
->p_brkpageszc
].hp_size
)) {
545 pgsz
= hw_page_array
[p
->p_brkpageszc
].hp_size
;
552 map_pgszstk(struct proc
*p
, caddr_t addr
, size_t len
)
554 size_t pgsz
= MMU_PAGESIZE
;
558 * If len is zero, retrieve from proc and don't demote the page size.
559 * Use atleast the default pagesize.
564 len
= MAX(len
, default_ustack_lpsize
);
566 for (szc
= mmu_page_sizes
- 1; szc
>= 0; szc
--) {
567 pgsz
= hw_page_array
[szc
].hp_size
;
568 if ((disable_auto_data_large_pages
& (1 << szc
)) ||
569 pgsz
> max_ustack_lpsize
)
577 * If addr == 0 we were called by memcntl() or exec_args() when the
578 * size code is 0. Don't set pgsz less than current size.
580 if (addr
== 0 && (pgsz
< hw_page_array
[p
->p_stkpageszc
].hp_size
)) {
581 pgsz
= hw_page_array
[p
->p_stkpageszc
].hp_size
;
588 map_pgszism(caddr_t addr
, size_t len
)
593 for (szc
= mmu_page_sizes
- 1; szc
>= TTE4M
; szc
--) {
594 if (disable_ism_large_pages
& (1 << szc
))
597 pgsz
= hw_page_array
[szc
].hp_size
;
598 if ((len
>= pgsz
) && IS_P2ALIGNED(addr
, pgsz
))
602 return (DEFAULT_ISM_PAGESIZE
);
606 * Suggest a page size to be used to map a segment of type maptype and length
607 * len. Returns a page size (not a size code).
611 map_pgsz(int maptype
, struct proc
*p
, caddr_t addr
, size_t len
, int memcntl
)
613 size_t pgsz
= MMU_PAGESIZE
;
615 ASSERT(maptype
!= MAPPGSZ_VA
);
617 if (maptype
!= MAPPGSZ_ISM
&& physmem
< privm_lpg_min_physmem
) {
618 return (MMU_PAGESIZE
);
623 pgsz
= map_pgszism(addr
, len
);
627 if (max_ustack_lpsize
> MMU_PAGESIZE
) {
628 pgsz
= map_pgszstk(p
, addr
, len
);
633 if (max_uheap_lpsize
> MMU_PAGESIZE
) {
634 pgsz
= map_pgszheap(p
, addr
, len
);
642 /* assumes TTE8K...TTE4M == szc */
645 map_szcvec(caddr_t addr
, size_t size
, uintptr_t off
, int disable_lpgs
,
646 size_t max_lpsize
, size_t min_physmem
)
648 caddr_t eaddr
= addr
+ size
;
655 if (physmem
< min_physmem
|| max_lpsize
<= MMU_PAGESIZE
) {
658 for (i
= mmu_page_sizes
- 1; i
> 0; i
--) {
659 if (disable_lpgs
& (1 << i
)) {
662 pgsz
= page_get_pagesize(i
);
663 if (pgsz
> max_lpsize
) {
666 raddr
= (caddr_t
)P2ROUNDUP((uintptr_t)addr
, pgsz
);
667 readdr
= (caddr_t
)P2ALIGN((uintptr_t)eaddr
, pgsz
);
668 if (raddr
< addr
|| raddr
>= readdr
) {
671 if (P2PHASE((uintptr_t)addr
^ off
, pgsz
)) {
676 * And or in the remaining enabled page sizes.
678 szcvec
|= P2PHASE(~disable_lpgs
, (1 << i
));
679 szcvec
&= ~1; /* no need to return 8K pagesize */
686 * Return a bit vector of large page size codes that
687 * can be used to map [addr, addr + len) region.
691 map_pgszcvec(caddr_t addr
, size_t size
, uintptr_t off
, int flags
, int type
,
694 if (flags
& MAP_TEXT
) {
695 return (map_szcvec(addr
, size
, off
,
696 disable_auto_text_large_pages
,
697 max_utext_lpsize
, shm_lpg_min_physmem
));
699 } else if (flags
& MAP_INITDATA
) {
700 return (map_szcvec(addr
, size
, off
,
701 disable_auto_data_large_pages
,
702 max_uidata_lpsize
, privm_lpg_min_physmem
));
704 } else if (type
== MAPPGSZC_SHM
) {
705 return (map_szcvec(addr
, size
, off
,
706 disable_auto_data_large_pages
,
707 max_shm_lpsize
, shm_lpg_min_physmem
));
709 } else if (type
== MAPPGSZC_HEAP
) {
710 return (map_szcvec(addr
, size
, off
,
711 disable_auto_data_large_pages
,
712 max_uheap_lpsize
, privm_lpg_min_physmem
));
714 } else if (type
== MAPPGSZC_STACK
) {
715 return (map_szcvec(addr
, size
, off
,
716 disable_auto_data_large_pages
,
717 max_ustack_lpsize
, privm_lpg_min_physmem
));
720 return (map_szcvec(addr
, size
, off
,
721 disable_auto_data_large_pages
,
722 max_privmap_lpsize
, privm_lpg_min_physmem
));
727 * Anchored in the table below are counters used to keep track
728 * of free contiguous physical memory. Each element of the table contains
729 * the array of counters, the size of array which is allocated during
730 * startup based on physmax and a shift value used to convert a pagenum
731 * into a counter array index or vice versa. The table has page size
732 * for rows and region size for columns:
734 * page_counters[page_size][region_size]
736 * page_size: TTE size code of pages on page_size freelist.
738 * region_size: TTE size code of a candidate larger page made up
739 * made up of contiguous free page_size pages.
741 * As you go across a page_size row increasing region_size each
742 * element keeps track of how many (region_size - 1) size groups
743 * made up of page_size free pages can be coalesced into a
744 * regsion_size page. Yuck! Lets try an example:
746 * page_counters[1][3] is the table element used for identifying
747 * candidate 4M pages from contiguous pages off the 64K free list.
748 * Each index in the page_counters[1][3].array spans 4M. Its the
749 * number of free 512K size (regsion_size - 1) groups of contiguous
750 * 64K free pages. So when page_counters[1][3].counters[n] == 8
751 * we know we have a candidate 4M page made up of 512K size groups
756 * Per page size free lists. 3rd (max_mem_nodes) and 4th (page coloring bins)
757 * dimensions are allocated dynamically.
759 page_t
***page_freelists
[MMU_PAGE_SIZES
][MAX_MEM_TYPES
];
762 * For now there is only a single size cache list.
763 * Allocated dynamically.
765 page_t
***page_cachelists
[MAX_MEM_TYPES
];
767 kmutex_t
*fpc_mutex
[NPC_MUTEX
];
768 kmutex_t
*cpc_mutex
[NPC_MUTEX
];
771 * Calculate space needed for page freelists and counters
774 calc_free_pagelist_sz(void)
777 size_t alloc_sz
, cache_sz
, free_sz
;
780 * one cachelist per color, node, and type
782 cache_sz
= (page_get_pagecolors(0) * sizeof (page_t
*)) +
784 cache_sz
*= max_mem_nodes
* MAX_MEM_TYPES
;
787 * one freelist per size, color, node, and type
789 free_sz
= sizeof (page_t
**);
790 for (szc
= 0; szc
< mmu_page_sizes
; szc
++)
791 free_sz
+= sizeof (page_t
*) * page_get_pagecolors(szc
);
792 free_sz
*= max_mem_nodes
* MAX_MEM_TYPES
;
794 alloc_sz
= cache_sz
+ free_sz
+ page_ctrs_sz();
799 alloc_page_freelists(caddr_t alloc_base
)
805 * We only support small pages in the cachelist.
807 for (mtype
= 0; mtype
< MAX_MEM_TYPES
; mtype
++) {
808 page_cachelists
[mtype
] = (page_t
***)alloc_base
;
809 alloc_base
+= (max_mem_nodes
* sizeof (page_t
**));
810 for (mnode
= 0; mnode
< max_mem_nodes
; mnode
++) {
811 page_cachelists
[mtype
][mnode
] = (page_t
**)alloc_base
;
813 (page_get_pagecolors(0) * sizeof (page_t
*));
818 * Allocate freelists bins for all
819 * supported page sizes.
821 for (szc
= 0; szc
< mmu_page_sizes
; szc
++) {
822 clrs
= page_get_pagecolors(szc
);
823 for (mtype
= 0; mtype
< MAX_MEM_TYPES
; mtype
++) {
824 page_freelists
[szc
][mtype
] = (page_t
***)alloc_base
;
825 alloc_base
+= (max_mem_nodes
* sizeof (page_t
**));
826 for (mnode
= 0; mnode
< max_mem_nodes
; mnode
++) {
827 page_freelists
[szc
][mtype
][mnode
] =
828 (page_t
**)alloc_base
;
829 alloc_base
+= (clrs
* (sizeof (page_t
*)));
834 alloc_base
= page_ctrs_alloc(alloc_base
);
839 * Allocate page_freelists locks for a memnode from the nucleus data
840 * area. This is the first time that mmu_page_sizes is used during
841 * bootup, so check mmu_page_sizes initialization.
844 ndata_alloc_page_mutexs(struct memlist
*ndata
)
849 void page_coloring_init();
851 page_coloring_init();
852 if (&mmu_init_mmu_page_sizes
) {
853 if (!mmu_init_mmu_page_sizes(0)) {
854 cmn_err(CE_PANIC
, "mmu_page_sizes %d not initialized",
858 ASSERT(mmu_page_sizes
>= DEFAULT_MMU_PAGE_SIZES
);
860 /* fpc_mutex and cpc_mutex */
861 alloc_sz
= 2 * NPC_MUTEX
* max_mem_nodes
* sizeof (kmutex_t
);
863 alloc_base
= ndata_alloc(ndata
, alloc_sz
, ecache_alignsize
);
864 if (alloc_base
== NULL
)
867 ASSERT(((uintptr_t)alloc_base
& (ecache_alignsize
- 1)) == 0);
869 for (i
= 0; i
< NPC_MUTEX
; i
++) {
870 fpc_mutex
[i
] = (kmutex_t
*)alloc_base
;
871 alloc_base
+= (sizeof (kmutex_t
) * max_mem_nodes
);
872 cpc_mutex
[i
] = (kmutex_t
*)alloc_base
;
873 alloc_base
+= (sizeof (kmutex_t
) * max_mem_nodes
);
879 * To select our starting bin, we stride through the bins with a stride
880 * of 337. Why 337? It's prime, it's largeish, and it performs well both
881 * in simulation and practice for different workloads on varying cache sizes.
883 uint32_t color_start_current
= 0;
884 uint32_t color_start_stride
= 337;
885 int color_start_random
= 0;
889 get_color_start(struct as
*as
)
893 if (consistent_coloring
== 2 || color_start_random
) {
894 return ((uint_t
)(((gettick()) << (vac_shift
- MMU_PAGESHIFT
)) &
895 (hw_page_array
[0].hp_colors
- 1)));
899 old
= color_start_current
;
900 new = old
+ (color_start_stride
<< (vac_shift
- MMU_PAGESHIFT
));
901 } while (atomic_cas_32(&color_start_current
, old
, new) != old
);
903 return ((uint_t
)(new));
907 * Called once at startup from kphysm_init() -- before memialloc()
908 * is invoked to do the 1st page_free()/page_freelist_add().
910 * initializes page_colors and page_colors_mask based on ecache_setsize.
912 * Also initializes the counter locks.
920 if (do_pg_coloring
== 0) {
922 for (i
= 0; i
< mmu_page_sizes
; i
++) {
923 colorequivszc
[i
] = 0;
924 hw_page_array
[i
].hp_colors
= 1;
930 * Calculate page_colors from ecache_setsize. ecache_setsize contains
931 * the max ecache setsize of all cpus configured in the system or, for
932 * cheetah+ systems, the max possible ecache setsize for all possible
935 page_colors
= ecache_setsize
/ MMU_PAGESIZE
;
936 page_colors_mask
= page_colors
- 1;
938 vac_colors
= vac_size
/ MMU_PAGESIZE
;
939 vac_colors_mask
= vac_colors
-1;
941 page_coloring_shift
= 0;
944 page_coloring_shift
++;
947 /* initialize number of colors per page size */
948 for (i
= 0; i
< mmu_page_sizes
; i
++) {
949 hw_page_array
[i
].hp_colors
= (page_colors_mask
>>
950 (hw_page_array
[i
].hp_shift
- hw_page_array
[0].hp_shift
))
952 colorequivszc
[i
] = 0;
956 * initialize cpu_page_colors if ecache setsizes are homogenous.
957 * cpu_page_colors set to -1 during DR operation or during startup
958 * if setsizes are heterogenous.
960 * The value of cpu_page_colors determines if additional color bins
961 * need to be checked for a particular color in the page_get routines.
963 if (cpu_setsize
> 0 && cpu_page_colors
== 0 &&
964 cpu_setsize
< ecache_setsize
) {
965 cpu_page_colors
= cpu_setsize
/ MMU_PAGESIZE
;
966 a
= lowbit(page_colors
) - lowbit(cpu_page_colors
);
970 for (i
= 0; i
< mmu_page_sizes
; i
++) {
971 if ((colors
= hw_page_array
[i
].hp_colors
) <= 1) {
974 while ((colors
>> a
) == 0)
978 /* higher 4 bits encodes color equiv mask */
979 colorequivszc
[i
] = (a
<< 4);
983 /* do cpu specific color initialization */
984 if (&page_coloring_init_cpu
) {
985 page_coloring_init_cpu();
990 bp_color(struct buf
*bp
)
995 if ((bp
->b_flags
& B_PAGEIO
) != 0) {
996 color
= sfmmu_get_ppvcolor(bp
->b_pages
);
997 } else if (bp
->b_un
.b_addr
!= NULL
) {
998 color
= sfmmu_get_addrvcolor(bp
->b_un
.b_addr
);
1001 return (color
< 0 ? 0 : ptob(color
));
1005 * Function for flushing D-cache when performing module relocations
1006 * to an alternate mapping. Stubbed out on all platforms except sun4u,
1012 sfmmu_cache_flushall();
1016 kdi_range_overlap(uintptr_t va1
, size_t sz1
, uintptr_t va2
, size_t sz2
)
1018 if (va1
< va2
&& va1
+ sz1
<= va2
)
1021 if (va2
< va1
&& va2
+ sz2
<= va1
)
1028 * Return the number of bytes, relative to the beginning of a given range, that
1029 * are non-toxic (can be read from and written to with relative impunity).
1032 kdi_range_is_nontoxic(uintptr_t va
, size_t sz
, int write
)
1034 /* OBP reads are harmless, but we don't want people writing there */
1035 if (write
&& kdi_range_overlap(va
, sz
, OFW_START_ADDR
, OFW_END_ADDR
-
1036 OFW_START_ADDR
+ 1))
1037 return (va
< OFW_START_ADDR
? OFW_START_ADDR
- va
: 0);
1039 if (kdi_range_overlap(va
, sz
, PIOMAPBASE
, PIOMAPSIZE
))
1040 return (va
< PIOMAPBASE
? PIOMAPBASE
- va
: 0);
1042 return (sz
); /* no overlap */
1046 * Minimum physmem required for enabling large pages for kernel heap
1047 * Currently we do not enable lp for kmem on systems with less
1048 * than 1GB of memory. This value can be changed via /etc/system
1050 size_t segkmem_lpminphysmem
= 0x40000000; /* 1GB */
1053 * this function chooses large page size for kernel heap
1056 get_segkmem_lpsize(size_t lpsize
)
1058 size_t memtotal
= physmem
* PAGESIZE
;
1062 if (memtotal
< segkmem_lpminphysmem
)
1065 if (plat_lpkmem_is_supported
!= NULL
&&
1066 plat_lpkmem_is_supported() == 0)
1069 mmusz
= mmu_get_kernel_lpsize(lpsize
);
1070 szc
= page_szc(mmusz
);
1073 if (!(disable_large_pages
& (1 << szc
)))
1074 return (page_get_pagesize(szc
));