1 /* vim: set shiftwidth=4 tabstop=8 autoindent cindent expandtab: */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 /* compile-time and runtime tests for whether to use SSE instructions */
11 // cpuid.h is available on gcc 4.3 and higher on i386 and x86_64
13 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
14 // MSVC 2005 or newer on x86-32 or x86-64
20 // SSE.h has parallel #ifs which declare MOZILLA_SSE_HAVE_CPUID_DETECTION.
21 // We can't declare these functions in the header file, however, because
22 // <intrin.h> conflicts with <windows.h> on MSVC 2005, and some files want to
23 // include both SSE.h and <windows.h>.
27 enum CPUIDRegister
{ eax
= 0, ebx
= 1, ecx
= 2, edx
= 3 };
29 static bool has_cpuid_bits(unsigned int level
, CPUIDRegister reg
,
32 unsigned int eax
, ebx
, ecx
, edx
;
33 unsigned max
= __get_cpuid_max(level
& 0x80000000u
, nullptr);
34 if (level
> max
) return false;
35 __cpuid_count(level
, 0, eax
, ebx
, ecx
, edx
);
40 return (regs
[reg
] & bits
) == bits
;
43 static bool has_cpuid_bits_ex(unsigned int level
, CPUIDRegister reg
,
46 unsigned int eax
, ebx
, ecx
, edx
;
47 unsigned max
= __get_cpuid_max(level
& 0x80000000u
, nullptr);
48 if (level
> max
) return false;
49 __cpuid_count(level
, 1, eax
, ebx
, ecx
, edx
);
54 return (regs
[reg
] & bits
) == bits
;
57 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
59 enum CPUIDRegister
{ eax
= 0, ebx
= 1, ecx
= 2, edx
= 3 };
61 static bool has_cpuid_bits(unsigned int level
, CPUIDRegister reg
,
63 // Check that the level in question is supported.
65 __cpuid_ex(regs
, level
& 0x80000000u
, 1);
66 if (unsigned(regs
[0]) < level
) return false;
68 // "The __cpuid intrinsic clears the ECX register before calling the cpuid
70 __cpuid_ex(regs
, level
, 1);
71 return (unsigned(regs
[reg
]) & bits
) == bits
;
74 #elif (defined(__GNUC__) || defined(__SUNPRO_CC)) && \
75 (defined(__i386) || defined(__x86_64__))
77 enum CPUIDRegister
{ eax
= 0, ebx
= 1, ecx
= 2, edx
= 3 };
80 static void moz_cpuid(int CPUInfo
[4], int InfoType
) {
81 asm("xchg %esi, %ebx\n"
82 "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0)
85 "movl %ebx, 4(%edi)\n"
86 "movl %ecx, 8(%edi)\n"
87 "movl %edx, 12(%edi)\n"
90 : "a"(InfoType
), // %eax
92 : "%ecx", "%edx", "%esi");
94 static void moz_cpuid_ex(int CPUInfo
[4], int InfoType
) {
95 asm("xchg %esi, %ebx\n"
99 "movl %ebx, 4(%edi)\n"
100 "movl %ecx, 8(%edi)\n"
101 "movl %edx, 12(%edi)\n"
104 : "a"(InfoType
), // %eax
106 : "%ecx", "%edx", "%esi");
109 static void moz_cpuid(int CPUInfo
[4], int InfoType
) {
110 asm("xchg %rsi, %rbx\n"
111 "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0)
113 "movl %eax, (%rdi)\n"
114 "movl %ebx, 4(%rdi)\n"
115 "movl %ecx, 8(%rdi)\n"
116 "movl %edx, 12(%rdi)\n"
119 : "a"(InfoType
), // %eax
121 : "%ecx", "%edx", "%rsi");
123 static void moz_cpuid_ex(int CPUInfo
[4], int InfoType
) {
124 asm("xchg %rsi, %rbx\n"
127 "movl %eax, (%rdi)\n"
128 "movl %ebx, 4(%rdi)\n"
129 "movl %ecx, 8(%rdi)\n"
130 "movl %edx, 12(%rdi)\n"
133 : "a"(InfoType
), // %eax
135 : "%ecx", "%edx", "%rsi");
139 static bool has_cpuid_bits(unsigned int level
, CPUIDRegister reg
,
141 // Check that the level in question is supported.
142 volatile int regs
[4];
143 moz_cpuid((int*)regs
, level
& 0x80000000u
);
144 if (unsigned(regs
[0]) < level
) return false;
146 moz_cpuid((int*)regs
, level
);
147 return (unsigned(regs
[reg
]) & bits
) == bits
;
150 static bool has_cpuid_bits_ex(unsigned int level
, CPUIDRegister reg
,
152 // Check that the level in question is supported.
153 volatile int regs
[4];
154 moz_cpuid_ex((int*)regs
, level
& 0x80000000u
);
155 if (unsigned(regs
[0]) < level
) return false;
157 moz_cpuid_ex((int*)regs
, level
);
158 return (unsigned(regs
[reg
]) & bits
) == bits
;
161 #endif // end CPUID declarations
167 namespace sse_private
{
169 #if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
171 # if !defined(MOZILLA_PRESUME_MMX)
172 bool mmx_enabled
= has_cpuid_bits(1u, edx
, (1u << 23));
175 # if !defined(MOZILLA_PRESUME_SSE)
176 bool sse_enabled
= has_cpuid_bits(1u, edx
, (1u << 25));
179 # if !defined(MOZILLA_PRESUME_SSE2)
180 bool sse2_enabled
= has_cpuid_bits(1u, edx
, (1u << 26));
183 # if !defined(MOZILLA_PRESUME_SSE3)
184 bool sse3_enabled
= has_cpuid_bits(1u, ecx
, (1u << 0));
187 # if !defined(MOZILLA_PRESUME_SSSE3)
188 bool ssse3_enabled
= has_cpuid_bits(1u, ecx
, (1u << 9));
191 # if !defined(MOZILLA_PRESUME_SSE4A)
192 bool sse4a_enabled
= has_cpuid_bits(0x80000001u
, ecx
, (1u << 6));
195 # if !defined(MOZILLA_PRESUME_SSE4_1)
196 bool sse4_1_enabled
= has_cpuid_bits(1u, ecx
, (1u << 19));
199 # if !defined(MOZILLA_PRESUME_SSE4_2)
200 bool sse4_2_enabled
= has_cpuid_bits(1u, ecx
, (1u << 20));
203 # if !defined(MOZILLA_PRESUME_FMA3)
204 bool fma3_enabled
= has_cpuid_bits(1u, ecx
, (1u << 12));
207 # if !defined(MOZILLA_PRESUME_AVX) || !defined(MOZILLA_PRESUME_AVX2)
208 static bool has_avx() {
209 # if defined(MOZILLA_PRESUME_AVX)
212 const unsigned AVX
= 1u << 28;
213 const unsigned OSXSAVE
= 1u << 27;
214 const unsigned XSAVE
= 1u << 26;
216 const unsigned XMM_STATE
= 1u << 1;
217 const unsigned YMM_STATE
= 1u << 2;
218 const unsigned AVX_STATE
= XMM_STATE
| YMM_STATE
;
220 return has_cpuid_bits(1u, ecx
, AVX
| OSXSAVE
| XSAVE
) &&
221 // ensure the OS supports XSAVE of YMM registers
222 (xgetbv(0) & AVX_STATE
) == AVX_STATE
;
223 # endif // MOZILLA_PRESUME_AVX
225 # endif // !MOZILLA_PRESUME_AVX || !MOZILLA_PRESUME_AVX2
227 # if !defined(MOZILLA_PRESUME_AVX)
228 bool avx_enabled
= has_avx();
231 # if !defined(MOZILLA_PRESUME_AVX2)
232 bool avx2_enabled
= has_avx() && has_cpuid_bits(7u, ebx
, (1u << 5));
235 # if !defined(MOZILLA_PRESUME_AVXVNNI)
236 bool avxvnni_enabled
= has_cpuid_bits_ex(7u, eax
, (1u << 4));
239 # if !defined(MOZILLA_PRESUME_AES)
240 bool aes_enabled
= has_cpuid_bits(1u, ecx
, (1u << 25));
243 bool has_constant_tsc
= has_cpuid_bits(0x80000007u
, edx
, (1u << 8));
247 } // namespace sse_private
251 uint64_t xgetbv(uint32_t xcr
) {
253 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(eax
), "=d"(edx
) : "c"(xcr
));
254 return (uint64_t)(edx
) << 32 | eax
;
259 } // namespace mozilla