Win32: use IsProcessorFeaturePresent() to detect available instructions
[vlc/solaris.git] / src / misc / cpu.c
blob3640865e9ccaa6d4c8619054c6cf0883fcdc0934
1 /*****************************************************************************
2 * cpu.c: CPU detection code
3 *****************************************************************************
4 * Copyright (C) 1998-2004 the VideoLAN team
5 * $Id$
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Christophe Massiot <massiot@via.ecp.fr>
9 * Eugenio Jarosiewicz <ej0@cise.ufl.eduEujenio>
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
24 *****************************************************************************/
26 /*****************************************************************************
27 * Preamble
28 *****************************************************************************/
29 #ifdef HAVE_CONFIG_H
30 # include "config.h"
31 #endif
33 #include <vlc_common.h>
34 #include <vlc_cpu.h>
36 #include <sys/types.h>
37 #ifndef WIN32
38 #include <unistd.h>
39 #include <sys/wait.h>
40 #include <signal.h>
41 #else
42 #include <errno.h>
43 #include <windows.h>
44 #include <winbase.h>
45 #define PF_SSE3_INSTRUCTIONS_AVAILABLE 13
46 #endif
48 #include "libvlc.h"
50 #if defined(__APPLE__) && (defined(__ppc__) || defined(__ppc64__))
51 #include <sys/sysctl.h>
52 #endif
54 #if defined( __i386__ ) || defined( __x86_64__ ) || defined( __powerpc__ ) \
55 || defined( __ppc__ ) || defined( __ppc64__ ) || defined( __powerpc64__ )
56 # ifndef WIN32
57 static bool check_OS_capability( const char *psz_capability, pid_t pid )
59 int status;
61 if( pid == -1 )
62 return false; /* fail safe :-/ */
64 while( waitpid( pid, &status, 0 ) == -1 );
66 if( WIFEXITED( status ) && WEXITSTATUS( status ) == 0 )
67 return true;
69 fprintf( stderr, "warning: your CPU has %s instructions, but not your "
70 "operating system.\n", psz_capability );
71 fprintf( stderr, " some optimizations will be disabled unless "
72 "you upgrade your OS\n" );
73 return false;
76 # define check_capability(name, flag, code) \
77 do { \
78 pid_t pid = fork(); \
79 if( pid == 0 ) \
80 { \
81 signal(SIGILL, SIG_DFL); \
82 __asm__ __volatile__ ( code : : ); \
83 _exit(0); \
84 } \
85 if( check_OS_capability((name), pid )) \
86 i_capabilities |= (flag); \
87 } while(0)
89 # else /* WIN32 */
90 # define check_capability(name, flag, code) \
91 do { \
92 i_capabilities |= (flag); \
93 } while(0)
94 # endif
95 #endif
97 /*****************************************************************************
98 * CPUCapabilities: get the CPU capabilities
99 *****************************************************************************
100 * This function is called to list extensions the CPU may have.
101 *****************************************************************************/
102 uint32_t CPUCapabilities( void )
104 uint32_t i_capabilities = 0;
106 #if defined( __i386__ ) || defined( __x86_64__ )
107 unsigned int i_eax, i_ebx, i_ecx, i_edx;
108 bool b_amd;
110 /* Needed for x86 CPU capabilities detection */
111 # if defined( __x86_64__ )
112 # define cpuid( reg ) \
113 asm volatile ( "cpuid\n\t" \
114 "movl %%ebx,%1\n\t" \
115 : "=a" ( i_eax ), \
116 "=b" ( i_ebx ), \
117 "=c" ( i_ecx ), \
118 "=d" ( i_edx ) \
119 : "a" ( reg ) \
120 : "cc" );
121 # else
122 # define cpuid( reg ) \
123 asm volatile ( "push %%ebx\n\t" \
124 "cpuid\n\t" \
125 "movl %%ebx,%1\n\t" \
126 "pop %%ebx\n\t" \
127 : "=a" ( i_eax ), \
128 "=r" ( i_ebx ), \
129 "=c" ( i_ecx ), \
130 "=d" ( i_edx ) \
131 : "a" ( reg ) \
132 : "cc" );
133 # endif
134 /* Check if the OS really supports the requested instructions */
135 # if defined (__i386__) && !defined (__i486__) && !defined (__i586__) \
136 && !defined (__i686__) && !defined (__pentium4__) \
137 && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
138 /* check if cpuid instruction is supported */
139 asm volatile ( "push %%ebx\n\t"
140 "pushf\n\t"
141 "pop %%eax\n\t"
142 "movl %%eax, %%ebx\n\t"
143 "xorl $0x200000, %%eax\n\t"
144 "push %%eax\n\t"
145 "popf\n\t"
146 "pushf\n\t"
147 "pop %%eax\n\t"
148 "movl %%ebx,%1\n\t"
149 "pop %%ebx\n\t"
150 : "=a" ( i_eax ),
151 "=r" ( i_ebx )
153 : "cc" );
155 if( i_eax == i_ebx )
156 goto out;
157 # endif
159 /* the CPU supports the CPUID instruction - get its level */
160 cpuid( 0x00000000 );
162 # if defined (__i386__) && !defined (__i586__) \
163 && !defined (__i686__) && !defined (__pentium4__) \
164 && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
165 if( !i_eax )
166 goto out;
167 #endif
169 /* borrowed from mpeg2dec */
170 b_amd = ( i_ebx == 0x68747541 ) && ( i_ecx == 0x444d4163 )
171 && ( i_edx == 0x69746e65 );
173 /* test for the MMX flag */
174 cpuid( 0x00000001 );
176 # if !defined (__MMX__)
177 if( ! (i_edx & 0x00800000) )
178 goto out;
179 # endif
180 i_capabilities |= CPU_CAPABILITY_MMX;
181 # if defined (__SSE__)
182 i_capabilities |= CPU_CAPABILITY_MMXEXT | CPU_CAPABILITY_SSE;
183 # else
184 if( i_edx & 0x02000000 )
186 i_capabilities |= CPU_CAPABILITY_MMXEXT;
188 # ifdef CAN_COMPILE_SSE
189 # ifdef WIN32
190 if( IsProcessorFeaturePresent( PF_XMMI_INSTRUCTIONS_AVAILABLE ) )
191 i_capabilities |= CPU_CAPABILITY_SSE;
192 # else
193 check_capability( "SSE", CPU_CAPABILITY_SSE,
194 "xorps %%xmm0,%%xmm0\n" );
195 # endif
196 # endif
198 # endif
200 # if defined (__SSE2__)
201 i_capabilities |= CPU_CAPABILITY_SSE2;
202 # elif defined (CAN_COMPILE_SSE2)
203 if( i_edx & 0x04000000 )
205 # ifdef WIN32
206 if( IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ) )
207 i_capabilities |= CPU_CAPABILITY_SSE2;
208 # else
209 check_capability( "SSE2", CPU_CAPABILITY_SSE2,
210 "movupd %%xmm0, %%xmm0\n" );
211 # endif
213 # endif
215 # if defined (__SSE3__)
216 i_capabilities |= CPU_CAPABILITY_SSE3;
217 # elif defined (CAN_COMPILE_SSE3)
218 if( i_ecx & 0x00000001 )
220 # ifdef WIN32
221 if( IsProcessorFeaturePresent( PF_SSE3_INSTRUCTIONS_AVAILABLE ) )
222 i_capabilities |= CPU_CAPABILITY_SSE3;
223 # else
224 check_capability( "SSE3", CPU_CAPABILITY_SSE3,
225 "movsldup %%xmm1, %%xmm0\n" );
226 # endif
228 # endif
230 # if defined (__SSSE3__)
231 i_capabilities |= CPU_CAPABILITY_SSSE3;
232 # elif defined (CAN_COMPILE_SSSE3)
233 # ifdef WIN32
234 /* FIXME: IsProcessorFeaturePresent can't check for SSSE3 */
235 # else
236 if( i_ecx & 0x00000200 )
237 check_capability( "SSSE3", CPU_CAPABILITY_SSSE3,
238 "pabsw %%xmm1, %%xmm0\n" );
239 # endif
240 # endif
242 # if defined (__SSE4_1__)
243 i_capabilities |= CPU_CAPABILITY_SSE4_1;
244 # elif defined (CAN_COMPILE_SSE4_1)
245 # ifdef WIN32
246 /* FIXME: IsProcessorFeaturePresent can't check for SSE4.1 */
247 # else
248 if( i_ecx & 0x00080000 )
249 check_capability( "SSE4.1", CPU_CAPABILITY_SSE4_1,
250 "pmaxsb %%xmm1, %%xmm0\n" );
251 # endif
252 # endif
254 # if defined (__SSE4_2__)
255 i_capabilities |= CPU_CAPABILITY_SSE4_2;
256 # elif defined (CAN_COMPILE_SSE4_2)
257 # ifdef WIN32
258 /* FIXME: IsProcessorFeaturePresent can't check for SSE4.2 */
259 # else
260 if( i_ecx & 0x00100000 )
261 check_capability( "SSE4.2", CPU_CAPABILITY_SSE4_2,
262 "pcmpgtq %%xmm1, %%xmm0\n" );
263 # endif
264 # endif
266 /* test for additional capabilities */
267 cpuid( 0x80000000 );
269 if( i_eax < 0x80000001 )
270 goto out;
272 /* list these additional capabilities */
273 cpuid( 0x80000001 );
275 # if defined (__3dNOW__)
276 i_capabilities |= CPU_CAPABILITY_3DNOW;
277 # elif defined (CAN_COMPILE_3DNOW)
279 if( i_edx & 0x80000000 )
281 # ifdef WIN32
282 if( IsProcessorFeaturePresent( PF_3DNOW_INSTRUCTIONS_AVAILABLE ) )
283 i_capabilities |= CPU_CAPABILITY_3DNOW;
284 # else
285 check_capability( "3D Now!", CPU_CAPABILITY_3DNOW,
286 "pfadd %%mm0,%%mm0\n" "femms\n" );
287 # endif
289 # endif
291 if( b_amd && ( i_edx & 0x00400000 ) )
293 i_capabilities |= CPU_CAPABILITY_MMXEXT;
295 out:
297 #elif defined( __arm__ )
298 # if defined( __ARM_NEON__ )
299 i_capabilities |= CPU_CAPABILITY_NEON;
300 # endif
302 #elif defined( __powerpc__ ) || defined( __ppc__ ) || defined( __powerpc64__ ) \
303 || defined( __ppc64__ )
305 # if defined(__APPLE__)
306 int selectors[2] = { CTL_HW, HW_VECTORUNIT };
307 int i_has_altivec = 0;
308 size_t i_length = sizeof( i_has_altivec );
309 int i_error = sysctl( selectors, 2, &i_has_altivec, &i_length, NULL, 0);
311 if( i_error == 0 && i_has_altivec != 0 )
312 i_capabilities |= CPU_CAPABILITY_ALTIVEC;
314 # elif defined( CAN_COMPILE_ALTIVEC )
315 pid_t pid = fork();
316 if( pid == 0 )
318 signal(SIGILL, SIG_DFL);
319 asm volatile ("mtspr 256, %0\n\t"
320 "vand %%v0, %%v0, %%v0"
322 : "r" (-1));
323 _exit(0);
326 if( check_OS_capability( "Altivec", pid ) )
327 i_capabilities |= CPU_CAPABILITY_ALTIVEC;
329 # endif
331 #endif
332 return i_capabilities;
335 uint32_t cpu_flags = 0;
338 /*****************************************************************************
339 * vlc_CPU: get pre-computed CPU capability flags
340 ****************************************************************************/
341 unsigned vlc_CPU (void)
343 return cpu_flags;
346 const struct
348 uint32_t value;
349 char name[12];
350 } cap_dirs[] = {
351 #if defined ( __i386__ ) || defined ( __x86_64__ )
352 { CPU_CAPABILITY_MMX, "mmx" },
353 { CPU_CAPABILITY_MMXEXT, "mmxext" },
354 { CPU_CAPABILITY_3DNOW, "3dnow" },
355 { CPU_CAPABILITY_SSE, "sse" },
356 #endif
357 #if defined (__ppc__) || defined (__ppc64__) || defined (__powerpc__)
358 { CPU_CAPABILITY_ALTIVEC, "altivec" },
359 #endif
360 #if defined (__arm__)
361 { CPU_CAPABILITY_NEON, "arm_neon" },
362 #endif
366 * Check if a directory name contains usable plugins w.r.t. the hardware
367 * capabilities. Loading a plugin when the hardware has insufficient
368 * capabilities may lead to illegal instructions (SIGILL) and must be avoided.
370 * @param name the name of the directory (<b>not</b> the path)
372 * @return true if the hardware has sufficient capabilities or the directory
373 * does not require any special capability; false if the running hardware has
374 * insufficient capabilities.
376 bool vlc_CPU_CheckPluginDir (const char *name)
378 const unsigned flags = vlc_CPU ();
379 for (size_t i = 0; i < sizeof (cap_dirs) / sizeof (cap_dirs[0]); i++)
381 if (strcmp (name, cap_dirs[i].name))
382 continue;
383 return (flags & cap_dirs[i].value) != 0;
385 return true;
388 static vlc_memcpy_t pf_vlc_memcpy = memcpy;
389 static vlc_memset_t pf_vlc_memset = memset;
391 void vlc_fastmem_register (vlc_memcpy_t cpy, vlc_memset_t set)
393 if (cpy)
394 pf_vlc_memcpy = cpy;
395 if (set)
396 pf_vlc_memset = set;
400 * vlc_memcpy: fast CPU-dependent memcpy
402 void *vlc_memcpy (void *tgt, const void *src, size_t n)
404 return pf_vlc_memcpy (tgt, src, n);
408 * vlc_memset: fast CPU-dependent memset
410 void *vlc_memset (void *tgt, int c, size_t n)
412 return pf_vlc_memset (tgt, c, n);