1 /*****************************************************************************
2 * cpu.c: CPU detection code
3 *****************************************************************************
4 * Copyright (C) 1998-2004 VLC authors and VideoLAN
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Christophe Massiot <massiot@via.ecp.fr>
9 * Eugenio Jarosiewicz <ej0@cise.ufl.eduEujenio>
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU Lesser General Public License as published by
13 * the Free Software Foundation; either version 2.1 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public License
22 * along with this program; if not, write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
24 *****************************************************************************/
26 /*****************************************************************************
28 *****************************************************************************/
33 #include <vlc_common.h>
40 #include <sys/types.h>
50 #include <sys/sysctl.h>
53 #if defined(__OpenBSD__) && defined(__powerpc__)
54 #include <sys/param.h>
55 #include <sys/sysctl.h>
56 #include <machine/cpu.h>
59 static uint32_t cpu_flags
;
61 #if defined (__i386__) || defined (__x86_64__) || defined (__powerpc__) \
62 || defined (__ppc__) || defined (__ppc64__) || defined (__powerpc64__)
63 # if !defined (WIN32) && !defined (__OS2__)
64 static bool vlc_CPU_check (const char *name
, void (*func
) (void))
71 signal (SIGILL
, SIG_DFL
);
73 //__asm__ __volatile__ ( code : : input );
78 //i_capabilities |= (flag);
81 while( waitpid( pid
, &status
, 0 ) == -1 );
83 if( WIFEXITED( status
) && WEXITSTATUS( status
) == 0 )
86 fprintf (stderr
, "Warning: your CPU has %s instructions, but not your "
87 "operating system.\n", name
);
88 fprintf( stderr
, " some optimizations will be disabled unless "
89 "you upgrade your OS\n" );
93 #if defined (CAN_COMPILE_SSE) && !defined (__SSE__)
94 VLC_SSE
static void SSE_test (void)
96 asm volatile ("xorps %%xmm0,%%xmm0\n" : : : "xmm0", "xmm1");
99 #if defined (CAN_COMPILE_SSE2) && !defined (__SSE2__)
100 VLC_SSE
static void SSE2_test (void)
102 asm volatile ("movupd %%xmm0, %%xmm0\n" : : : "xmm0", "xmm1");
105 #if defined (CAN_COMPILE_SSE3) && !defined (__SSE3__)
106 VLC_SSE
static void SSE3_test (void)
108 asm volatile ("movsldup %%xmm1, %%xmm0\n" : : : "xmm0", "xmm1");
111 #if defined (CAN_COMPILE_SSSE3) && !defined (__SSSE3__)
112 VLC_SSE
static void SSSE3_test (void)
114 asm volatile ("pabsw %%xmm1, %%xmm0\n" : : : "xmm0", "xmm1");
117 #if defined (CAN_COMPILE_SSE4_1) && !defined (__SSE4_1__)
118 VLC_SSE
static void SSE4_1_test (void)
120 asm volatile ("pmaxsb %%xmm1, %%xmm0\n" : : : "xmm0", "xmm1");
123 #if defined (CAN_COMPILE_SSE4_2) && !defined (__SSE4_2__)
124 VLC_SSE
static void SSE4_2_test (void)
126 asm volatile ("pcmpgtq %%xmm1, %%xmm0\n" : : : "xmm0", "xmm1");
129 #if defined (CAN_COMPILE_3DNOW) && !defined (__3dNOW__)
130 VLC_MMX
static void ThreeD_Now_test (void)
132 asm volatile ("pfadd %%mm0,%%mm0\n" "femms\n" : : : "mm0");
136 #if defined (CAN_COMPILE_ALTIVEC)
137 static void Altivec_test (void)
139 asm volatile ("mtspr 256, %0\n" "vand %%v0, %%v0, %%v0\n" : : "r" (-1));
143 #else /* WIN32 || __OS2__ */
144 # define vlc_CPU_check(name, func) (1)
149 * Determines the CPU capabilities and stores them in cpu_flags.
150 * The result can be retrieved with vlc_CPU().
152 void vlc_CPU_init (void)
154 uint32_t i_capabilities
= 0;
156 #if defined( __i386__ ) || defined( __x86_64__ )
157 unsigned int i_eax
, i_ebx
, i_ecx
, i_edx
;
160 /* Needed for x86 CPU capabilities detection */
161 # if defined (__i386__) && defined (__PIC__)
162 # define cpuid(reg) \
163 asm volatile ("xchgl %%ebx,%1\n\t" \
165 "xchgl %%ebx,%1\n\t" \
166 : "=a" (i_eax), "=r" (i_ebx), "=c" (i_ecx), "=d" (i_edx) \
170 # define cpuid(reg) \
171 asm volatile ("cpuid\n\t" \
172 : "=a" (i_eax), "=b" (i_ebx), "=c" (i_ecx), "=d" (i_edx) \
176 /* Check if the OS really supports the requested instructions */
177 # if defined (__i386__) && !defined (__i486__) && !defined (__i586__) \
178 && !defined (__i686__) && !defined (__pentium4__) \
179 && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
180 /* check if cpuid instruction is supported */
181 asm volatile ( "push %%ebx\n\t"
184 "movl %%eax, %%ebx\n\t"
185 "xorl $0x200000, %%eax\n\t"
201 /* the CPU supports the CPUID instruction - get its level */
204 # if defined (__i386__) && !defined (__i586__) \
205 && !defined (__i686__) && !defined (__pentium4__) \
206 && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
211 /* borrowed from mpeg2dec */
212 b_amd
= ( i_ebx
== 0x68747541 ) && ( i_ecx
== 0x444d4163 )
213 && ( i_edx
== 0x69746e65 );
215 /* test for the MMX flag */
217 # if !defined (__MMX__)
218 if( ! (i_edx
& 0x00800000) )
221 i_capabilities
|= CPU_CAPABILITY_MMX
;
223 # if defined (__SSE__)
224 i_capabilities
|= CPU_CAPABILITY_MMXEXT
| CPU_CAPABILITY_SSE
;
226 if( i_edx
& 0x02000000 )
228 i_capabilities
|= CPU_CAPABILITY_MMXEXT
;
230 # ifdef CAN_COMPILE_SSE
231 if (vlc_CPU_check ("SSE", SSE_test
))
232 i_capabilities
|= CPU_CAPABILITY_SSE
;
237 # if defined (__SSE2__)
238 i_capabilities
|= CPU_CAPABILITY_SSE2
;
239 # elif defined (CAN_COMPILE_SSE2)
240 if ((i_edx
& 0x04000000) && vlc_CPU_check ("SSE2", SSE2_test
))
241 i_capabilities
|= CPU_CAPABILITY_SSE2
;
244 # if defined (__SSE3__)
245 i_capabilities
|= CPU_CAPABILITY_SSE3
;
246 # elif defined (CAN_COMPILE_SSE3)
247 if ((i_ecx
& 0x00000001) && vlc_CPU_check ("SSE3", SSE3_test
))
248 i_capabilities
|= CPU_CAPABILITY_SSE3
;
251 # if defined (__SSSE3__)
252 i_capabilities
|= CPU_CAPABILITY_SSSE3
;
253 # elif defined (CAN_COMPILE_SSSE3)
254 if ((i_ecx
& 0x00000200) && vlc_CPU_check ("SSSE3", SSSE3_test
))
255 i_capabilities
|= CPU_CAPABILITY_SSSE3
;
258 # if defined (__SSE4_1__)
259 i_capabilities
|= CPU_CAPABILITY_SSE4_1
;
260 # elif defined (CAN_COMPILE_SSE4_1)
261 if ((i_ecx
& 0x00080000) && vlc_CPU_check ("SSE4.1", SSE4_1_test
))
262 i_capabilities
|= CPU_CAPABILITY_SSE4_1
;
265 # if defined (__SSE4_2__)
266 i_capabilities
|= CPU_CAPABILITY_SSE4_2
;
267 # elif defined (CAN_COMPILE_SSE4_2)
268 if ((i_ecx
& 0x00100000) && vlc_CPU_check ("SSE4.2", SSE4_2_test
))
269 i_capabilities
|= CPU_CAPABILITY_SSE4_2
;
272 /* test for additional capabilities */
275 if( i_eax
< 0x80000001 )
278 /* list these additional capabilities */
281 # if defined (__3dNOW__)
282 i_capabilities
|= CPU_CAPABILITY_3DNOW
;
283 # elif defined (CAN_COMPILE_3DNOW)
284 if ((i_edx
& 0x80000000) && vlc_CPU_check ("3D Now!", ThreeD_Now_test
))
285 i_capabilities
|= CPU_CAPABILITY_3DNOW
;
288 if( b_amd
&& ( i_edx
& 0x00400000 ) )
290 i_capabilities
|= CPU_CAPABILITY_MMXEXT
;
294 #elif defined( __powerpc__ ) || defined( __ppc__ ) || defined( __powerpc64__ ) \
295 || defined( __ppc64__ )
297 # if defined(__APPLE__) || defined(__OpenBSD__)
298 # if defined(__OpenBSD__)
299 int selectors
[2] = { CTL_MACHDEP
, CPU_ALTIVEC
};
301 int selectors
[2] = { CTL_HW
, HW_VECTORUNIT
};
303 int i_has_altivec
= 0;
304 size_t i_length
= sizeof( i_has_altivec
);
305 int i_error
= sysctl( selectors
, 2, &i_has_altivec
, &i_length
, NULL
, 0);
307 if( i_error
== 0 && i_has_altivec
!= 0 )
308 i_capabilities
|= CPU_CAPABILITY_ALTIVEC
;
310 # elif defined( CAN_COMPILE_ALTIVEC )
311 if (vlc_CPU_check ("Altivec", Altivec_test
))
312 i_capabilities
|= CPU_CAPABILITY_ALTIVEC
;
318 cpu_flags
= i_capabilities
;
322 * Retrieves pre-computed CPU capability flags
324 unsigned vlc_CPU (void)
326 /* On Windows and OS/2,
327 * initialized from DllMain() and _DLL_InitTerm() respectively, instead */
328 #if !defined(WIN32) && !defined(__OS2__)
329 static pthread_once_t once
= PTHREAD_ONCE_INIT
;
330 pthread_once (&once
, vlc_CPU_init
);
336 void vlc_CPU_dump (vlc_object_t
*obj
)
338 const unsigned flags
= vlc_CPU();
339 char buf
[200], *p
= buf
;
341 #define PRINT_CAPABILITY( capability, string ) \
342 if (flags & (capability)) \
343 p += sprintf (p, "%s ", (string) )
345 #if defined (__i386__) || defined (__x86_64__)
346 PRINT_CAPABILITY(CPU_CAPABILITY_MMX
, "MMX");
347 PRINT_CAPABILITY(CPU_CAPABILITY_3DNOW
, "3DNow!");
348 PRINT_CAPABILITY(CPU_CAPABILITY_MMXEXT
, "MMXEXT");
349 PRINT_CAPABILITY(CPU_CAPABILITY_SSE
, "SSE");
350 PRINT_CAPABILITY(CPU_CAPABILITY_SSE2
, "SSE2");
351 PRINT_CAPABILITY(CPU_CAPABILITY_SSE3
, "SSE3");
352 PRINT_CAPABILITY(CPU_CAPABILITY_SSSE3
, "SSSE3");
353 PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_1
, "SSE4.1");
354 PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_2
, "SSE4.2");
355 PRINT_CAPABILITY(CPU_CAPABILITY_SSE4A
, "SSE4A");
357 #elif defined (__powerpc__) || defined (__ppc__) || defined (__ppc64__)
358 PRINT_CAPABILITY(CPU_CAPABILITY_ALTIVEC
, "AltiVec");
360 #elif defined (__arm__)
361 PRINT_CAPABILITY(CPU_CAPABILITY_NEON
, "NEONv1");
366 p
+= sprintf (p
, "FPU ");
370 msg_Dbg (obj
, "CPU has capabilities %s", buf
);
374 static vlc_memcpy_t pf_vlc_memcpy
= memcpy
;
376 void vlc_fastmem_register (vlc_memcpy_t cpy
)
378 assert (cpy
!= NULL
);
383 * vlc_memcpy: fast CPU-dependent memcpy
385 void *vlc_memcpy (void *tgt
, const void *src
, size_t n
)
387 return pf_vlc_memcpy (tgt
, src
, n
);