1 /* vim: set shiftwidth=4 tabstop=8 autoindent cindent expandtab: */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 /* compile-time and runtime tests for whether to use SSE instructions */
11 // cpuid.h is available on gcc 4.3 and higher on i386 and x86_64
13 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
14 // MSVC 2005 or newer on x86-32 or x86-64
20 // SSE.h has parallel #ifs which declare MOZILLA_SSE_HAVE_CPUID_DETECTION.
21 // We can't declare these functions in the header file, however, because
22 // <intrin.h> conflicts with <windows.h> on MSVC 2005, and some files want to
23 // include both SSE.h and <windows.h>.
27 enum CPUIDRegister
{ eax
= 0, ebx
= 1, ecx
= 2, edx
= 3 };
29 static bool has_cpuid_bits(unsigned int level
, CPUIDRegister reg
,
32 unsigned int eax
, ebx
, ecx
, edx
;
33 unsigned max
= __get_cpuid_max(level
& 0x80000000u
, nullptr);
34 if (level
> max
) return false;
35 __cpuid_count(level
, 0, eax
, ebx
, ecx
, edx
);
40 return (regs
[reg
] & bits
) == bits
;
43 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
45 enum CPUIDRegister
{ eax
= 0, ebx
= 1, ecx
= 2, edx
= 3 };
47 static bool has_cpuid_bits(unsigned int level
, CPUIDRegister reg
,
49 // Check that the level in question is supported.
51 __cpuid(regs
, level
& 0x80000000u
);
52 if (unsigned(regs
[0]) < level
) return false;
54 // "The __cpuid intrinsic clears the ECX register before calling the cpuid
57 return (unsigned(regs
[reg
]) & bits
) == bits
;
60 #elif (defined(__GNUC__) || defined(__SUNPRO_CC)) && \
61 (defined(__i386) || defined(__x86_64__))
63 enum CPUIDRegister
{ eax
= 0, ebx
= 1, ecx
= 2, edx
= 3 };
66 static void moz_cpuid(int CPUInfo
[4], int InfoType
) {
67 asm("xchg %esi, %ebx\n"
68 "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0)
71 "movl %ebx, 4(%edi)\n"
72 "movl %ecx, 8(%edi)\n"
73 "movl %edx, 12(%edi)\n"
76 : "a"(InfoType
), // %eax
78 : "%ecx", "%edx", "%esi");
81 static void moz_cpuid(int CPUInfo
[4], int InfoType
) {
82 asm("xchg %rsi, %rbx\n"
83 "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0)
86 "movl %ebx, 4(%rdi)\n"
87 "movl %ecx, 8(%rdi)\n"
88 "movl %edx, 12(%rdi)\n"
91 : "a"(InfoType
), // %eax
93 : "%ecx", "%edx", "%rsi");
97 static bool has_cpuid_bits(unsigned int level
, CPUIDRegister reg
,
99 // Check that the level in question is supported.
100 volatile int regs
[4];
101 moz_cpuid((int*)regs
, level
& 0x80000000u
);
102 if (unsigned(regs
[0]) < level
) return false;
104 moz_cpuid((int*)regs
, level
);
105 return (unsigned(regs
[reg
]) & bits
) == bits
;
108 #endif // end CPUID declarations
114 namespace sse_private
{
116 #if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
118 # if !defined(MOZILLA_PRESUME_MMX)
119 bool mmx_enabled
= has_cpuid_bits(1u, edx
, (1u << 23));
122 # if !defined(MOZILLA_PRESUME_SSE)
123 bool sse_enabled
= has_cpuid_bits(1u, edx
, (1u << 25));
126 # if !defined(MOZILLA_PRESUME_SSE2)
127 bool sse2_enabled
= has_cpuid_bits(1u, edx
, (1u << 26));
130 # if !defined(MOZILLA_PRESUME_SSE3)
131 bool sse3_enabled
= has_cpuid_bits(1u, ecx
, (1u << 0));
134 # if !defined(MOZILLA_PRESUME_SSSE3)
135 bool ssse3_enabled
= has_cpuid_bits(1u, ecx
, (1u << 9));
138 # if !defined(MOZILLA_PRESUME_SSE4A)
139 bool sse4a_enabled
= has_cpuid_bits(0x80000001u
, ecx
, (1u << 6));
142 # if !defined(MOZILLA_PRESUME_SSE4_1)
143 bool sse4_1_enabled
= has_cpuid_bits(1u, ecx
, (1u << 19));
146 # if !defined(MOZILLA_PRESUME_SSE4_2)
147 bool sse4_2_enabled
= has_cpuid_bits(1u, ecx
, (1u << 20));
150 # if !defined(MOZILLA_PRESUME_AVX) || !defined(MOZILLA_PRESUME_AVX2)
151 static bool has_avx() {
152 # if defined(MOZILLA_PRESUME_AVX)
155 const unsigned AVX
= 1u << 28;
156 const unsigned OSXSAVE
= 1u << 27;
157 const unsigned XSAVE
= 1u << 26;
159 const unsigned XMM_STATE
= 1u << 1;
160 const unsigned YMM_STATE
= 1u << 2;
161 const unsigned AVX_STATE
= XMM_STATE
| YMM_STATE
;
163 return has_cpuid_bits(1u, ecx
, AVX
| OSXSAVE
| XSAVE
) &&
164 // ensure the OS supports XSAVE of YMM registers
165 (xgetbv(0) & AVX_STATE
) == AVX_STATE
;
166 # endif // MOZILLA_PRESUME_AVX
168 # endif // !MOZILLA_PRESUME_AVX || !MOZILLA_PRESUME_AVX2
170 # if !defined(MOZILLA_PRESUME_AVX)
171 bool avx_enabled
= has_avx();
174 # if !defined(MOZILLA_PRESUME_AVX2)
175 bool avx2_enabled
= has_avx() && has_cpuid_bits(7u, ebx
, (1u << 5));
178 # if !defined(MOZILLA_PRESUME_AES)
179 bool aes_enabled
= has_cpuid_bits(1u, ecx
, (1u << 25));
182 bool has_constant_tsc
= has_cpuid_bits(0x80000007u
, edx
, (1u << 8));
186 } // namespace sse_private
190 uint64_t xgetbv(uint32_t xcr
) {
192 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(eax
), "=d"(edx
) : "c"(xcr
));
193 return (uint64_t)(edx
) << 32 | eax
;
198 } // namespace mozilla