1 /* libFLAC - Free Lossless Audio Codec library
2 * Copyright (C) 2001-2009 Josh Coalson
3 * Copyright (C) 2011-2014 Xiph.Org Foundation
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * - Neither the name of the Xiph.org Foundation nor the names of its
17 * contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
24 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 #include "private/cpu.h"
44 #if defined FLAC__CPU_IA32
47 static void disable_sse(FLAC__CPUInfo
*info
)
49 info
->ia32
.sse
= false;
50 info
->ia32
.sse2
= false;
51 info
->ia32
.sse3
= false;
52 info
->ia32
.ssse3
= false;
53 info
->ia32
.sse41
= false;
54 info
->ia32
.sse42
= false;
57 static void disable_avx(FLAC__CPUInfo
*info
)
59 info
->ia32
.avx
= false;
60 info
->ia32
.avx2
= false;
61 info
->ia32
.fma
= false;
64 #elif defined FLAC__CPU_X86_64
66 static void disable_avx(FLAC__CPUInfo
*info
)
68 info
->x86
.avx
= false;
69 info
->x86
.avx2
= false;
70 info
->x86
.fma
= false;
74 #if defined (__NetBSD__) || defined(__OpenBSD__)
75 #include <sys/param.h>
76 #include <sys/sysctl.h>
77 #include <machine/cpu.h>
80 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) || defined(__APPLE__)
81 #include <sys/types.h>
82 #include <sys/sysctl.h>
86 /* these are flags in EDX of CPUID AX=00000001 */
87 static const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV
= 0x00008000;
88 static const unsigned FLAC__CPUINFO_IA32_CPUID_MMX
= 0x00800000;
89 static const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR
= 0x01000000;
90 static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE
= 0x02000000;
91 static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2
= 0x04000000;
94 /* these are flags in ECX of CPUID AX=00000001 */
95 static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE3
= 0x00000001;
96 static const unsigned FLAC__CPUINFO_IA32_CPUID_SSSE3
= 0x00000200;
97 static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE41
= 0x00080000;
98 static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE42
= 0x00100000;
100 #if defined FLAC__AVX_SUPPORTED
101 /* these are flags in ECX of CPUID AX=00000001 */
102 static const unsigned FLAC__CPUINFO_IA32_CPUID_OSXSAVE
= 0x08000000;
103 static const unsigned FLAC__CPUINFO_IA32_CPUID_AVX
= 0x10000000;
104 static const unsigned FLAC__CPUINFO_IA32_CPUID_FMA
= 0x00001000;
105 /* these are flags in EBX of CPUID AX=00000007 */
106 static const unsigned FLAC__CPUINFO_IA32_CPUID_AVX2
= 0x00000020;
110 * Extra stuff needed for detection of OS support for SSE on IA-32
112 #if defined(FLAC__CPU_IA32) && !defined FLAC__NO_ASM && (defined FLAC__HAS_NASM || defined FLAC__HAS_X86INTRIN) && !FLAC__SSE_OS
113 # if defined(__linux__)
115 * If the OS doesn't support SSE, we will get here with a SIGILL. We
116 * modify the return address to jump over the offending SSE instruction
117 * and also the operation following it that indicates the instruction
118 * executed successfully. In this way we use no global variables and
122 * 3 bytes for "xorps xmm0,xmm0"
123 * 3 bytes for estimate of how long the follwing "inc var" instruction is
124 * 6 bytes extra in case our estimate is wrong
125 * 12 bytes puts us in the NOP "landing zone"
127 # include <sys/ucontext.h>
128 static void sigill_handler_sse_os(int signal
, siginfo_t
*si
, void *uc
)
130 (void)signal
, (void)si
;
131 ((ucontext_t
*)uc
)->uc_mcontext
.gregs
[14/*REG_EIP*/] += 3 + 3 + 6;
133 # elif defined(_MSC_VER)
134 # include <windows.h>
139 void FLAC__cpu_info(FLAC__CPUInfo
*info
)
144 #ifdef FLAC__CPU_IA32
145 FLAC__bool ia32_fxsr
= false;
146 FLAC__bool ia32_osxsave
= false;
147 (void) ia32_fxsr
; (void) ia32_osxsave
; /* to avoid warnings about unused variables */
148 memset(info
, 0, sizeof(*info
));
149 info
->type
= FLAC__CPUINFO_TYPE_IA32
;
150 #if !defined FLAC__NO_ASM && (defined FLAC__HAS_NASM || defined FLAC__HAS_X86INTRIN)
151 info
->use_asm
= true; /* we assume a minimum of 80386 with FLAC__CPU_IA32 */
152 #ifdef FLAC__HAS_X86INTRIN
153 if(!FLAC__cpu_have_cpuid_x86())
156 if(!FLAC__cpu_have_cpuid_asm_ia32())
160 /* http://www.sandpile.org/x86/cpuid.htm */
161 #ifdef FLAC__HAS_X86INTRIN
162 FLAC__uint32 flags_eax
, flags_ebx
, flags_ecx
, flags_edx
;
163 FLAC__cpu_info_x86(0, &flags_eax
, &flags_ebx
, &flags_ecx
, &flags_edx
);
164 info
->ia32
.intel
= (flags_ebx
== 0x756E6547 && flags_edx
== 0x49656E69 && flags_ecx
== 0x6C65746E)? true : false; /* GenuineIntel */
165 FLAC__cpu_info_x86(1, &flags_eax
, &flags_ebx
, &flags_ecx
, &flags_edx
);
167 FLAC__uint32 flags_ecx
, flags_edx
;
168 FLAC__cpu_info_asm_ia32(&flags_edx
, &flags_ecx
);
170 info
->ia32
.cmov
= (flags_edx
& FLAC__CPUINFO_IA32_CPUID_CMOV
)? true : false;
171 info
->ia32
.mmx
= (flags_edx
& FLAC__CPUINFO_IA32_CPUID_MMX
)? true : false;
172 ia32_fxsr
= (flags_edx
& FLAC__CPUINFO_IA32_CPUID_FXSR
)? true : false;
173 info
->ia32
.sse
= (flags_edx
& FLAC__CPUINFO_IA32_CPUID_SSE
)? true : false;
174 info
->ia32
.sse2
= (flags_edx
& FLAC__CPUINFO_IA32_CPUID_SSE2
)? true : false;
175 info
->ia32
.sse3
= (flags_ecx
& FLAC__CPUINFO_IA32_CPUID_SSE3
)? true : false;
176 info
->ia32
.ssse3
= (flags_ecx
& FLAC__CPUINFO_IA32_CPUID_SSSE3
)? true : false;
177 info
->ia32
.sse41
= (flags_ecx
& FLAC__CPUINFO_IA32_CPUID_SSE41
)? true : false;
178 info
->ia32
.sse42
= (flags_ecx
& FLAC__CPUINFO_IA32_CPUID_SSE42
)? true : false;
179 #if defined FLAC__HAS_X86INTRIN && defined FLAC__AVX_SUPPORTED
180 ia32_osxsave
= (flags_ecx
& FLAC__CPUINFO_IA32_CPUID_OSXSAVE
)? true : false;
181 info
->ia32
.avx
= (flags_ecx
& FLAC__CPUINFO_IA32_CPUID_AVX
)? true : false;
182 info
->ia32
.fma
= (flags_ecx
& FLAC__CPUINFO_IA32_CPUID_FMA
)? true : false;
183 FLAC__cpu_info_x86(7, &flags_eax
, &flags_ebx
, &flags_ecx
, &flags_edx
);
184 info
->ia32
.avx2
= (flags_ebx
& FLAC__CPUINFO_IA32_CPUID_AVX2
)? true : false;
189 fprintf(stderr
, "CPU info (IA-32):\n");
190 fprintf(stderr
, " CMOV ....... %c\n", info
->ia32
.cmov
? 'Y' : 'n');
191 fprintf(stderr
, " MMX ........ %c\n", info
->ia32
.mmx
? 'Y' : 'n');
192 fprintf(stderr
, " SSE ........ %c\n", info
->ia32
.sse
? 'Y' : 'n');
193 fprintf(stderr
, " SSE2 ....... %c\n", info
->ia32
.sse2
? 'Y' : 'n');
194 fprintf(stderr
, " SSE3 ....... %c\n", info
->ia32
.sse3
? 'Y' : 'n');
195 fprintf(stderr
, " SSSE3 ...... %c\n", info
->ia32
.ssse3
? 'Y' : 'n');
196 fprintf(stderr
, " SSE41 ...... %c\n", info
->ia32
.sse41
? 'Y' : 'n');
197 fprintf(stderr
, " SSE42 ...... %c\n", info
->ia32
.sse42
? 'Y' : 'n');
198 # if defined FLAC__HAS_X86INTRIN && defined FLAC__AVX_SUPPORTED
199 fprintf(stderr
, " AVX ........ %c\n", info
->ia32
.avx
? 'Y' : 'n');
200 fprintf(stderr
, " FMA ........ %c\n", info
->ia32
.fma
? 'Y' : 'n');
201 fprintf(stderr
, " AVX2 ....... %c\n", info
->ia32
.avx2
? 'Y' : 'n');
206 * now have to check for OS support of SSE instructions
210 /* assume user knows better than us; turn it off */
212 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) || defined(__APPLE__)
215 /* at least one of these must work: */
216 len
= sizeof(sse
); sse
= sse
|| (sysctlbyname("hw.instruction_sse", &sse
, &len
, NULL
, 0) == 0 && sse
);
217 len
= sizeof(sse
); sse
= sse
|| (sysctlbyname("hw.optional.sse" , &sse
, &len
, NULL
, 0) == 0 && sse
); /* __APPLE__ ? */
220 #elif defined(__NetBSD__) || defined (__OpenBSD__)
221 # if __NetBSD_Version__ >= 105250000 || (defined __OpenBSD__)
222 int val
= 0, mib
[2] = { CTL_MACHDEP
, CPU_SSE
};
223 size_t len
= sizeof(val
);
224 if(sysctl(mib
, 2, &val
, &len
, NULL
, 0) < 0 || !val
)
226 else { /* double-check SSE2 */
229 if(sysctl(mib
, 2, &val
, &len
, NULL
, 0) < 0 || !val
) {
231 info
->ia32
.sse
= true;
237 #elif defined(__ANDROID__) || defined(ANDROID)
238 /* no need to check OS SSE support */
239 #elif defined(__linux__) && !FLAC__SSE_OS
241 struct sigaction sigill_save
;
242 struct sigaction sigill_sse
;
243 sigill_sse
.sa_sigaction
= sigill_handler_sse_os
;
244 sigemptyset(&sigill_sse
.sa_mask
);
245 sigill_sse
.sa_flags
= SA_SIGINFO
| SA_RESETHAND
; /* SA_RESETHAND just in case our SIGILL return jump breaks, so we don't get stuck in a loop */
246 if(0 == sigaction(SIGILL
, &sigill_sse
, &sigill_save
))
248 /* http://www.ibiblio.org/gferg/ldp/GCC-Inline-Assembly-HOWTO.html */
249 /* see sigill_handler_sse_os() for an explanation of the following: */
251 "xorps %%xmm0,%%xmm0\n\t" /* will cause SIGILL if unsupported by OS */
252 "incl %0\n\t" /* SIGILL handler will jump over this */
254 "nop\n\t" /* SIGILL jump lands here if "inc" is 9 bytes */
260 "nop\n\t" /* SIGILL jump lands here if "inc" is 3 bytes (expected) */
262 "nop" /* SIGILL jump lands here if "inc" is 1 byte */
267 sigaction(SIGILL
, &sigill_save
, NULL
);
272 #elif defined(_MSC_VER)
278 __except(EXCEPTION_EXECUTE_HANDLER
) {
279 if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION
)
282 #elif defined(__GNUC__) /* MinGW goes here */
284 /* Based on the idea described in Agner Fog's manual "Optimizing subroutines in assembly language" */
285 /* In theory, not guaranteed to detect lack of OS SSE support on some future Intel CPUs, but in practice works (see the aforementioned manual) */
288 FLAC__uint32 buff
[128];
289 } __attribute__((aligned(16))) fxsr
;
290 FLAC__uint32 old_val
, new_val
;
292 memset(fxsr
.buff
, 0, sizeof (fxsr
.buff
));
294 asm volatile ("fxsave %0" : "=m" (fxsr
) : "m" (fxsr
));
295 old_val
= fxsr
.buff
[50];
296 fxsr
.buff
[50] ^= 0x0013c0de; /* change value in the buffer */
297 asm volatile ("fxrstor %0" : "=m" (fxsr
) : "m" (fxsr
)); /* try to change SSE register */
298 fxsr
.buff
[50] = old_val
; /* restore old value in the buffer */
299 asm volatile ("fxsave %0" : "=m" (fxsr
) : "m" (fxsr
)); /* old value will be overwritten if SSE register was changed */
300 new_val
= fxsr
.buff
[50]; /* == old_val if FXRSTOR didn't change SSE register and (old_val ^ 0x0013c0de) otherwise */
301 fxsr
.buff
[50] = old_val
; /* again restore old value in the buffer */
302 asm volatile ("fxrstor %0" : "=m" (fxsr
) : "m" (fxsr
)); /* restore old values of registers */
304 if ((old_val
^new_val
) == 0x0013c0de)
310 /* no way to test, disable to be safe */
314 fprintf(stderr
, " SSE OS sup . %c\n", info
->ia32
.sse
? 'Y' : 'n');
317 else /* info->ia32.sse == false */
321 * now have to check for OS support of AVX instructions
323 #ifdef FLAC__HAS_X86INTRIN
324 if(info
->ia32
.avx
&& ia32_osxsave
) {
325 FLAC__uint32 ecr
= FLAC__cpu_xgetbv_x86();
326 if ((ecr
& 0x6) != 0x6)
329 fprintf(stderr
, " AVX OS sup . %c\n", info
->ia32
.avx
? 'Y' : 'n');
332 else /* no OS AVX support */
338 info
->use_asm
= false;
344 #elif defined FLAC__CPU_X86_64
345 FLAC__bool x86_osxsave
= false;
346 (void) x86_osxsave
; /* to avoid warnings about unused variables */
347 memset(info
, 0, sizeof(*info
));
348 info
->type
= FLAC__CPUINFO_TYPE_X86_64
;
349 #if !defined FLAC__NO_ASM && defined FLAC__HAS_X86INTRIN
350 info
->use_asm
= true;
352 /* http://www.sandpile.org/x86/cpuid.htm */
353 FLAC__uint32 flags_eax
, flags_ebx
, flags_ecx
, flags_edx
;
354 FLAC__cpu_info_x86(0, &flags_eax
, &flags_ebx
, &flags_ecx
, &flags_edx
);
355 info
->x86
.intel
= (flags_ebx
== 0x756E6547 && flags_edx
== 0x49656E69 && flags_ecx
== 0x6C65746E)? true : false; /* GenuineIntel */
356 FLAC__cpu_info_x86(1, &flags_eax
, &flags_ebx
, &flags_ecx
, &flags_edx
);
357 info
->x86
.sse3
= (flags_ecx
& FLAC__CPUINFO_IA32_CPUID_SSE3
)? true : false;
358 info
->x86
.ssse3
= (flags_ecx
& FLAC__CPUINFO_IA32_CPUID_SSSE3
)? true : false;
359 info
->x86
.sse41
= (flags_ecx
& FLAC__CPUINFO_IA32_CPUID_SSE41
)? true : false;
360 info
->x86
.sse42
= (flags_ecx
& FLAC__CPUINFO_IA32_CPUID_SSE42
)? true : false;
361 #if defined FLAC__AVX_SUPPORTED
362 x86_osxsave
= (flags_ecx
& FLAC__CPUINFO_IA32_CPUID_OSXSAVE
)? true : false;
363 info
->x86
.avx
= (flags_ecx
& FLAC__CPUINFO_IA32_CPUID_AVX
)? true : false;
364 info
->x86
.fma
= (flags_ecx
& FLAC__CPUINFO_IA32_CPUID_FMA
)? true : false;
365 FLAC__cpu_info_x86(7, &flags_eax
, &flags_ebx
, &flags_ecx
, &flags_edx
);
366 info
->x86
.avx2
= (flags_ebx
& FLAC__CPUINFO_IA32_CPUID_AVX2
)? true : false;
370 fprintf(stderr
, "CPU info (x86-64):\n");
371 fprintf(stderr
, " SSE3 ....... %c\n", info
->x86
.sse3
? 'Y' : 'n');
372 fprintf(stderr
, " SSSE3 ...... %c\n", info
->x86
.ssse3
? 'Y' : 'n');
373 fprintf(stderr
, " SSE41 ...... %c\n", info
->x86
.sse41
? 'Y' : 'n');
374 fprintf(stderr
, " SSE42 ...... %c\n", info
->x86
.sse42
? 'Y' : 'n');
375 # if defined FLAC__AVX_SUPPORTED
376 fprintf(stderr
, " AVX ........ %c\n", info
->x86
.avx
? 'Y' : 'n');
377 fprintf(stderr
, " FMA ........ %c\n", info
->x86
.fma
? 'Y' : 'n');
378 fprintf(stderr
, " AVX2 ....... %c\n", info
->x86
.avx2
? 'Y' : 'n');
383 * now have to check for OS support of AVX instructions
385 if(info
->x86
.avx
&& x86_osxsave
) {
386 FLAC__uint32 ecr
= FLAC__cpu_xgetbv_x86();
387 if ((ecr
& 0x6) != 0x6)
390 fprintf(stderr
, " AVX OS sup . %c\n", info
->x86
.avx
? 'Y' : 'n');
393 else /* no OS AVX support */
396 info
->use_asm
= false;
403 info
->type
= FLAC__CPUINFO_TYPE_UNKNOWN
;
404 info
->use_asm
= false;
408 #if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
411 #include <intrin.h> /* for __cpuid() and _xgetbv() */
412 #elif defined __GNUC__ && defined HAVE_CPUID_H
413 #include <cpuid.h> /* for __get_cpuid() and __get_cpuid_max() */
416 FLAC__uint32
FLAC__cpu_have_cpuid_x86(void)
418 #ifdef FLAC__CPU_X86_64
421 # if defined _MSC_VER || defined __INTEL_COMPILER /* Do they support CPUs w/o CPUID support (or OSes that work on those CPUs)? */
422 FLAC__uint32 flags1
, flags2
;
436 if (((flags1
^flags2
) & 0x200000) != 0)
440 # elif defined __GNUC__ && defined HAVE_CPUID_H
441 if (__get_cpuid_max(0, 0) != 0)
451 void FLAC__cpu_info_x86(FLAC__uint32 level
, FLAC__uint32
*eax
, FLAC__uint32
*ebx
, FLAC__uint32
*ecx
, FLAC__uint32
*edx
)
453 #if defined _MSC_VER || defined __INTEL_COMPILER
455 int ext
= level
& 0x80000000;
456 __cpuid(cpuinfo
, ext
);
457 if((unsigned)cpuinfo
[0] < level
) {
458 *eax
= *ebx
= *ecx
= *edx
= 0;
461 #if defined FLAC__AVX_SUPPORTED
462 __cpuidex(cpuinfo
, level
, 0); /* for AVX2 detection */
464 __cpuid(cpuinfo
, level
); /* some old compilers don't support __cpuidex */
466 *eax
= cpuinfo
[0]; *ebx
= cpuinfo
[1]; *ecx
= cpuinfo
[2]; *edx
= cpuinfo
[3];
467 #elif defined __GNUC__ && defined HAVE_CPUID_H
468 FLAC__uint32 ext
= level
& 0x80000000;
469 __cpuid(ext
, *eax
, *ebx
, *ecx
, *edx
);
471 *eax
= *ebx
= *ecx
= *edx
= 0;
474 __cpuid_count(level
, 0, *eax
, *ebx
, *ecx
, *edx
);
476 *eax
= *ebx
= *ecx
= *edx
= 0;
480 FLAC__uint32
FLAC__cpu_xgetbv_x86(void)
482 #if (defined _MSC_VER || defined __INTEL_COMPILER) && defined FLAC__AVX_SUPPORTED
483 return (FLAC__uint32
)_xgetbv(0);
484 #elif defined __GNUC__
486 asm volatile (".byte 0x0f, 0x01, 0xd0" : "=a"(lo
), "=d"(hi
) : "c" (0));
493 #endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */