1 /*****************************************************************************
2 * cpu.c: CPU detection code
3 *****************************************************************************
4 * Copyright (C) 1998-2004 the VideoLAN team
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Christophe Massiot <massiot@via.ecp.fr>
9 * Eugenio Jarosiewicz <ej0@cise.ufl.eduEujenio>
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
24 *****************************************************************************/
26 /*****************************************************************************
28 *****************************************************************************/
33 #include <vlc_common.h>
36 #include <sys/types.h>
48 #if defined(__APPLE__)
49 #include <sys/sysctl.h>
52 #if defined(__OpenBSD__)
53 #include <sys/param.h>
54 #include <sys/sysctl.h>
55 #include <machine/cpu.h>
60 #include <sys/types.h>
61 #include <sys/processor.h>
65 #if defined( __i386__ ) || defined( __x86_64__ ) || defined( __powerpc__ ) \
66 || defined( __ppc__ ) || defined( __ppc64__ ) || defined( __powerpc64__ )
68 static bool check_OS_capability( const char *psz_capability
, pid_t pid
)
73 return false; /* fail safe :-/ */
75 while( waitpid( pid
, &status
, 0 ) == -1 );
77 if( WIFEXITED( status
) && WEXITSTATUS( status
) == 0 )
80 fprintf( stderr
, "warning: your CPU has %s instructions, but not your "
81 "operating system.\n", psz_capability
);
82 fprintf( stderr
, " some optimizations will be disabled unless "
83 "you upgrade your OS\n" );
87 # define check_capability(name, flag, code) \
92 signal(SIGILL, SIG_DFL); \
93 __asm__ __volatile__ ( code : : ); \
96 if( check_OS_capability((name), pid )) \
97 i_capabilities |= (flag); \
101 # define check_capability(name, flag, code) \
102 i_capabilities |= (flag);
106 /*****************************************************************************
107 * CPUCapabilities: get the CPU capabilities
108 *****************************************************************************
109 * This function is called to list extensions the CPU may have.
110 *****************************************************************************/
111 uint32_t CPUCapabilities( void )
113 uint32_t i_capabilities
= 0;
115 #if defined( __i386__ ) || defined( __x86_64__ )
116 unsigned int i_eax
, i_ebx
, i_ecx
, i_edx
;
119 /* Needed for x86 CPU capabilities detection */
120 # if defined( __x86_64__ )
121 # define cpuid( reg ) \
122 asm volatile ( "cpuid\n\t" \
123 "movl %%ebx,%1\n\t" \
131 # define cpuid( reg ) \
132 asm volatile ( "push %%ebx\n\t" \
134 "movl %%ebx,%1\n\t" \
143 /* Check if the OS really supports the requested instructions */
144 # if defined (__i386__) && !defined (__i486__) && !defined (__i586__) \
145 && !defined (__i686__) && !defined (__pentium4__) \
146 && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
147 /* check if cpuid instruction is supported */
148 asm volatile ( "push %%ebx\n\t"
151 "movl %%eax, %%ebx\n\t"
152 "xorl $0x200000, %%eax\n\t"
168 /* the CPU supports the CPUID instruction - get its level */
171 # if defined (__i386__) && !defined (__i586__) \
172 && !defined (__i686__) && !defined (__pentium4__) \
173 && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
178 /* borrowed from mpeg2dec */
179 b_amd
= ( i_ebx
== 0x68747541 ) && ( i_ecx
== 0x444d4163 )
180 && ( i_edx
== 0x69746e65 );
182 /* test for the MMX flag */
184 # if !defined (__MMX__)
185 if( ! (i_edx
& 0x00800000) )
188 i_capabilities
|= CPU_CAPABILITY_MMX
;
190 # if defined (__SSE__)
191 i_capabilities
|= CPU_CAPABILITY_MMXEXT
| CPU_CAPABILITY_SSE
;
193 if( i_edx
& 0x02000000 )
195 i_capabilities
|= CPU_CAPABILITY_MMXEXT
;
197 # ifdef CAN_COMPILE_SSE
198 check_capability( "SSE", CPU_CAPABILITY_SSE
,
199 "xorps %%xmm0,%%xmm0\n" );
204 # if defined (__SSE2__)
205 i_capabilities
|= CPU_CAPABILITY_SSE2
;
206 # elif defined (CAN_COMPILE_SSE2)
207 if( i_edx
& 0x04000000 )
208 check_capability( "SSE2", CPU_CAPABILITY_SSE2
,
209 "movupd %%xmm0, %%xmm0\n" );
212 # if defined (__SSE3__)
213 i_capabilities
|= CPU_CAPABILITY_SSE3
;
214 # elif defined (CAN_COMPILE_SSE3)
215 if( i_ecx
& 0x00000001 )
216 check_capability( "SSE3", CPU_CAPABILITY_SSE3
,
217 "movsldup %%xmm1, %%xmm0\n" );
220 # if defined (__SSSE3__)
221 i_capabilities
|= CPU_CAPABILITY_SSSE3
;
222 # elif defined (CAN_COMPILE_SSSE3)
223 if( i_ecx
& 0x00000200 )
224 check_capability( "SSSE3", CPU_CAPABILITY_SSSE3
,
225 "pabsw %%xmm1, %%xmm0\n" );
228 # if defined (__SSE4_1__)
229 i_capabilities
|= CPU_CAPABILITY_SSE4_1
;
230 # elif defined (CAN_COMPILE_SSE4_1)
231 if( i_ecx
& 0x00080000 )
232 check_capability( "SSE4.1", CPU_CAPABILITY_SSE4_1
,
233 "pmaxsb %%xmm1, %%xmm0\n" );
236 # if defined (__SSE4_2__)
237 i_capabilities
|= CPU_CAPABILITY_SSE4_2
;
238 # elif defined (CAN_COMPILE_SSE4_2)
239 if( i_ecx
& 0x00100000 )
240 check_capability( "SSE4.2", CPU_CAPABILITY_SSE4_2
,
241 "pcmpgtq %%xmm1, %%xmm0\n" );
244 /* test for additional capabilities */
247 if( i_eax
< 0x80000001 )
250 /* list these additional capabilities */
253 # if defined (__3dNOW__)
254 i_capabilities
|= CPU_CAPABILITY_3DNOW
;
255 # elif defined (CAN_COMPILE_3DNOW)
256 if( i_edx
& 0x80000000 )
257 check_capability( "3D Now!", CPU_CAPABILITY_3DNOW
,
258 "pfadd %%mm0,%%mm0\n" "femms\n" );
261 if( b_amd
&& ( i_edx
& 0x00400000 ) )
263 i_capabilities
|= CPU_CAPABILITY_MMXEXT
;
267 #elif defined( __arm__ )
268 # if defined( __ARM_NEON__ )
269 i_capabilities
|= CPU_CAPABILITY_NEON
;
272 #elif defined( __powerpc__ ) || defined( __ppc__ ) || defined( __powerpc64__ ) \
273 || defined( __ppc64__ )
275 # if defined(__APPLE__) || defined(__OpenBSD__)
276 # if defined(__OpenBSD__)
277 int selectors
[2] = { CTL_MACHDEP
, CPU_ALTIVEC
};
279 int selectors
[2] = { CTL_HW
, HW_VECTORUNIT
};
281 int i_has_altivec
= 0;
282 size_t i_length
= sizeof( i_has_altivec
);
283 int i_error
= sysctl( selectors
, 2, &i_has_altivec
, &i_length
, NULL
, 0);
285 if( i_error
== 0 && i_has_altivec
!= 0 )
286 i_capabilities
|= CPU_CAPABILITY_ALTIVEC
;
288 # elif defined( CAN_COMPILE_ALTIVEC )
292 signal(SIGILL
, SIG_DFL
);
293 asm volatile ("mtspr 256, %0\n\t"
294 "vand %%v0, %%v0, %%v0"
300 if( check_OS_capability( "Altivec", pid
) )
301 i_capabilities
|= CPU_CAPABILITY_ALTIVEC
;
306 return i_capabilities
;
309 uint32_t cpu_flags
= 0;
312 /*****************************************************************************
313 * vlc_CPU: get pre-computed CPU capability flags
314 ****************************************************************************/
315 unsigned vlc_CPU (void)
325 #if defined ( __i386__ ) || defined ( __x86_64__ )
326 { CPU_CAPABILITY_MMX
, "mmx" },
327 { CPU_CAPABILITY_MMXEXT
, "mmxext" },
328 { CPU_CAPABILITY_3DNOW
, "3dnow" },
329 { CPU_CAPABILITY_SSE
, "sse" },
331 #if defined (__ppc__) || defined (__ppc64__) || defined (__powerpc__)
332 { CPU_CAPABILITY_ALTIVEC
, "altivec" },
334 #if defined (__arm__)
335 { CPU_CAPABILITY_NEON
, "arm_neon" },
340 * Return the number of available logical CPU.
342 unsigned vlc_GetCPUCount(void)
344 #if defined(WIN32) && !defined(UNDER_CE)
347 if (!GetProcessAffinityMask(GetCurrentProcess(), &process_mask
, &system_mask
))
351 while (system_mask
) {
356 #elif defined(HAVE_SCHED_GETAFFINITY)
359 if (sched_getaffinity(0, sizeof(cpu
), &cpu
) < 0)
362 for (unsigned i
= 0; i
< CPU_SETSIZE
; i
++)
363 count
+= CPU_ISSET(i
, &cpu
) != 0;
365 #elif defined(__APPLE__)
367 size_t size
= sizeof(count
) ;
368 if (sysctlbyname("hw.ncpu", &count
, &size
, NULL
, 0))
369 return 1; /* Failure */
371 #elif defined(__OpenBSD__)
372 int selectors
[2] = { CTL_HW
, HW_NCPU
};
374 size_t size
= sizeof(count
) ;
375 if (sysctl(selectors
, 2, &count
, &size
, NULL
, 0))
376 return 1; /* Failure */
378 #elif defined(__SunOS)
382 processorid_t
*cpulist
;
383 processor_info_t cpuinfo
;
384 cpulist
= malloc(sizeof(processorid_t
) * sysconf(_SC_NPROCESSORS_MAX
));
385 if (!cpulist
) return 1;
386 if (pset_info(PS_MYID
, &type
, &numcpus
, cpulist
)==0)
388 for (u_int i
= 0; i
< numcpus
; i
++)
390 if (!processor_info(cpulist
[i
], &cpuinfo
))
391 count
+= (cpuinfo
.pi_state
== P_ONLINE
)?1:0;
394 count
= sysconf(_SC_NPROCESSORS_ONLN
);
397 return (count
>0)?count
:1;
399 # warning "vlc_GetCPUCount is not implemented for your platform"
405 * Check if a directory name contains usable plugins w.r.t. the hardware
406 * capabilities. Loading a plugin when the hardware has insufficient
407 * capabilities may lead to illegal instructions (SIGILL) and must be avoided.
409 * @param name the name of the directory (<b>not</b> the path)
411 * @return true if the hardware has sufficient capabilities or the directory
412 * does not require any special capability; false if the running hardware has
413 * insufficient capabilities.
415 bool vlc_CPU_CheckPluginDir (const char *name
)
417 const unsigned flags
= vlc_CPU ();
418 for (size_t i
= 0; i
< sizeof (cap_dirs
) / sizeof (cap_dirs
[0]); i
++)
420 if (strcmp (name
, cap_dirs
[i
].name
))
422 return (flags
& cap_dirs
[i
].value
) != 0;
427 static vlc_memcpy_t pf_vlc_memcpy
= memcpy
;
428 static vlc_memset_t pf_vlc_memset
= memset
;
430 void vlc_fastmem_register (vlc_memcpy_t cpy
, vlc_memset_t set
)
439 * vlc_memcpy: fast CPU-dependent memcpy
441 void *vlc_memcpy (void *tgt
, const void *src
, size_t n
)
443 return pf_vlc_memcpy (tgt
, src
, n
);
447 * vlc_memset: fast CPU-dependent memset
449 void *vlc_memset (void *tgt
, int c
, size_t n
)
451 return pf_vlc_memset (tgt
, c
, n
);
455 * Returned an aligned pointer on newly allocated memory.
456 * \param alignment must be a power of 2 and a multiple of sizeof(void*)
457 * \param size is the size of the usable memory returned.
459 * It must not be freed directly, *base must.
461 void *vlc_memalign(void **base
, size_t alignment
, size_t size
)
463 assert(alignment
>= sizeof(void*));
464 for (size_t t
= alignment
; t
> 1; t
>>= 1)
466 #if defined(HAVE_POSIX_MEMALIGN)
467 if (posix_memalign(base
, alignment
, size
)) {
472 #elif defined(HAVE_MEMALIGN)
473 return *base
= memalign(alignment
, size
);
475 unsigned char *p
= *base
= malloc(size
+ alignment
- 1);
478 return (void*)((uintptr_t)(p
+ alignment
- 1) & ~(alignment
- 1));