2 * Info about, and flushing the host cpu caches.
4 * This work is licensed under the terms of the GNU GPL, version 2 or later.
5 * See the COPYING file in the top-level directory.
8 #include "qemu/osdep.h"
9 #include "qemu/cacheflush.h"
10 #include "qemu/cacheinfo.h"
11 #include "qemu/bitops.h"
12 #include "qemu/host-utils.h"
13 #include "qemu/atomic.h"
16 int qemu_icache_linesize
= 0;
17 int qemu_icache_linesize_log
;
18 int qemu_dcache_linesize
= 0;
19 int qemu_dcache_linesize_log
;
22 * Operating system specific cache detection mechanisms.
27 static void sys_cache_info(int *isize
, int *dsize
)
29 SYSTEM_LOGICAL_PROCESSOR_INFORMATION
*buf
;
35 * Check for the required buffer size first. Note that if the zero
36 * size we use for the probe results in success, then there is no
37 * data available; fail in that case.
39 success
= GetLogicalProcessorInformation(0, &size
);
40 if (success
|| GetLastError() != ERROR_INSUFFICIENT_BUFFER
) {
44 n
= size
/ sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION
);
45 size
= n
* sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION
);
46 buf
= g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION
, n
);
47 if (!GetLogicalProcessorInformation(buf
, &size
)) {
51 for (i
= 0; i
< n
; i
++) {
52 if (buf
[i
].Relationship
== RelationCache
53 && buf
[i
].Cache
.Level
== 1) {
54 switch (buf
[i
].Cache
.Type
) {
56 *isize
= *dsize
= buf
[i
].Cache
.LineSize
;
58 case CacheInstruction
:
59 *isize
= buf
[i
].Cache
.LineSize
;
62 *dsize
= buf
[i
].Cache
.LineSize
;
73 #elif defined(CONFIG_DARWIN)
74 # include <sys/sysctl.h>
75 static void sys_cache_info(int *isize
, int *dsize
)
77 /* There's only a single sysctl for both I/D cache line sizes. */
79 size_t len
= sizeof(size
);
80 if (!sysctlbyname("hw.cachelinesize", &size
, &len
, NULL
, 0)) {
81 *isize
= *dsize
= size
;
84 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
85 # include <sys/sysctl.h>
86 static void sys_cache_info(int *isize
, int *dsize
)
88 /* There's only a single sysctl for both I/D cache line sizes. */
90 size_t len
= sizeof(size
);
91 if (!sysctlbyname("machdep.cacheline_size", &size
, &len
, NULL
, 0)) {
92 *isize
= *dsize
= size
;
98 static void sys_cache_info(int *isize
, int *dsize
)
100 # ifdef _SC_LEVEL1_ICACHE_LINESIZE
101 int tmp_isize
= (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE
);
106 # ifdef _SC_LEVEL1_DCACHE_LINESIZE
107 int tmp_dsize
= (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE
);
113 #endif /* sys_cache_info */
117 * Architecture (+ OS) specific cache detection mechanisms.
120 #if defined(__powerpc__)
121 static bool have_coherent_icache
;
124 #if defined(__aarch64__) && !defined(CONFIG_DARWIN) && !defined(CONFIG_WIN32)
126 * Apple does not expose CTR_EL0, so we must use system interfaces.
127 * Windows neither, but we use a generic implementation of flush_idcache_range
130 static uint64_t save_ctr_el0
;
131 static void arch_cache_info(int *isize
, int *dsize
)
136 * The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1,
137 * but (at least under Linux) these are marked protected by the
138 * kernel. However, CTR_EL0 contains the minimum linesize in the
139 * entire hierarchy, and is used by userspace cache flushing.
141 * We will also use this value in flush_idcache_range.
143 asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr
));
146 if (*isize
== 0 || *dsize
== 0) {
148 *isize
= 4 << (ctr
& 0xf);
151 *dsize
= 4 << ((ctr
>> 16) & 0xf);
156 #elif defined(_ARCH_PPC) && defined(__linux__)
159 static void arch_cache_info(int *isize
, int *dsize
)
162 *isize
= qemu_getauxval(AT_ICACHEBSIZE
);
165 *dsize
= qemu_getauxval(AT_DCACHEBSIZE
);
167 have_coherent_icache
= qemu_getauxval(AT_HWCAP
) & PPC_FEATURE_ICACHE_SNOOP
;
171 static void arch_cache_info(int *isize
, int *dsize
) { }
172 #endif /* arch_cache_info */
175 * ... and if all else fails ...
178 static void fallback_cache_info(int *isize
, int *dsize
)
180 /* If we can only find one of the two, assume they're the same. */
190 #if defined(_ARCH_PPC)
192 * For PPC, we're going to use the cache sizes computed for
193 * flush_idcache_range. Which means that we must use the
194 * architecture minimum.
196 *isize
= *dsize
= 16;
198 /* Otherwise, 64 bytes is not uncommon. */
199 *isize
= *dsize
= 64;
204 static void __attribute__((constructor
)) init_cache_info(void)
206 int isize
= 0, dsize
= 0;
208 sys_cache_info(&isize
, &dsize
);
209 arch_cache_info(&isize
, &dsize
);
210 fallback_cache_info(&isize
, &dsize
);
212 assert((isize
& (isize
- 1)) == 0);
213 assert((dsize
& (dsize
- 1)) == 0);
215 qemu_icache_linesize
= isize
;
216 qemu_icache_linesize_log
= ctz32(isize
);
217 qemu_dcache_linesize
= dsize
;
218 qemu_dcache_linesize_log
= ctz32(dsize
);
225 * Architecture (+ OS) specific cache flushing mechanisms.
228 #if defined(__i386__) || defined(__x86_64__) || defined(__s390__)
230 /* Caches are coherent and do not require flushing; symbol inline. */
232 #elif defined(__aarch64__) && !defined(CONFIG_WIN32)
234 * For Windows, we use generic implementation of flush_idcache_range, that
235 * performs a call to FlushInstructionCache, through __builtin___clear_cache.
239 /* Apple does not expose CTR_EL0, so we must use system interfaces. */
240 #include <libkern/OSCacheControl.h>
242 void flush_idcache_range(uintptr_t rx
, uintptr_t rw
, size_t len
)
246 * sys_icache_invalidate() syncs the dcache and icache,
247 * so no need to call sys_dcache_flush().
250 sys_dcache_flush((void *)rw
, len
);
252 sys_icache_invalidate((void *)rx
, len
);
257 * This is a copy of gcc's __aarch64_sync_cache_range, modified
258 * to fit this three-operand interface.
260 void flush_idcache_range(uintptr_t rx
, uintptr_t rw
, size_t len
)
262 const unsigned CTR_IDC
= 1u << 28;
263 const unsigned CTR_DIC
= 1u << 29;
264 const uint64_t ctr_el0
= save_ctr_el0
;
265 const uintptr_t icache_lsize
= qemu_icache_linesize
;
266 const uintptr_t dcache_lsize
= qemu_dcache_linesize
;
270 * If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification
271 * is not required for instruction to data coherence.
273 if (!(ctr_el0
& CTR_IDC
)) {
275 * Loop over the address range, clearing one cache line at once.
276 * Data cache must be flushed to unification first to make sure
277 * the instruction cache fetches the updated data.
279 for (p
= rw
& -dcache_lsize
; p
< rw
+ len
; p
+= dcache_lsize
) {
280 asm volatile("dc\tcvau, %0" : : "r" (p
) : "memory");
282 asm volatile("dsb\tish" : : : "memory");
286 * If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point
287 * of Unification is not required for instruction to data coherence.
289 if (!(ctr_el0
& CTR_DIC
)) {
290 for (p
= rx
& -icache_lsize
; p
< rx
+ len
; p
+= icache_lsize
) {
291 asm volatile("ic\tivau, %0" : : "r"(p
) : "memory");
293 asm volatile ("dsb\tish" : : : "memory");
296 asm volatile("isb" : : : "memory");
298 #endif /* CONFIG_DARWIN */
300 #elif defined(__mips__)
303 #include <machine/sysarch.h>
305 #include <sys/cachectl.h>
308 void flush_idcache_range(uintptr_t rx
, uintptr_t rw
, size_t len
)
311 cacheflush((void *)rw
, len
, DCACHE
);
313 cacheflush((void *)rx
, len
, ICACHE
);
316 #elif defined(__powerpc__)
318 void flush_idcache_range(uintptr_t rx
, uintptr_t rw
, size_t len
)
324 * Some processors have coherent caches and support a simplified
325 * flushing procedure. See
326 * POWER9 UM, 4.6.2.2 Instruction Cache Block Invalidate (icbi)
327 * https://ibm.ent.box.com/s/tmklq90ze7aj8f4n32er1mu3sy9u8k3k
329 if (have_coherent_icache
) {
330 asm volatile ("sync\n\t"
333 : : "r"(rx
) : "memory");
337 dsize
= qemu_dcache_linesize
;
338 isize
= qemu_icache_linesize
;
340 b
= rw
& ~(dsize
- 1);
341 e
= (rw
+ len
+ dsize
- 1) & ~(dsize
- 1);
342 for (p
= b
; p
< e
; p
+= dsize
) {
343 asm volatile ("dcbst 0,%0" : : "r"(p
) : "memory");
345 asm volatile ("sync" : : : "memory");
347 b
= rx
& ~(isize
- 1);
348 e
= (rx
+ len
+ isize
- 1) & ~(isize
- 1);
349 for (p
= b
; p
< e
; p
+= isize
) {
350 asm volatile ("icbi 0,%0" : : "r"(p
) : "memory");
352 asm volatile ("sync" : : : "memory");
353 asm volatile ("isync" : : : "memory");
356 #elif defined(__sparc__)
358 void flush_idcache_range(uintptr_t rx
, uintptr_t rw
, size_t len
)
360 /* No additional data flush to the RW virtual address required. */
361 uintptr_t p
, end
= (rx
+ len
+ 7) & -8;
362 for (p
= rx
& -8; p
< end
; p
+= 8) {
363 __asm__
__volatile__("flush\t%0" : : "r" (p
));
369 void flush_idcache_range(uintptr_t rx
, uintptr_t rw
, size_t len
)
372 __builtin___clear_cache((char *)rw
, (char *)rw
+ len
);
374 __builtin___clear_cache((char *)rx
, (char *)rx
+ len
);