Fix various typos
[vlc/asuraparaju-public.git] / src / misc / cpu.c
blobe630e9b60263f6069ea075b8403d491779bfb043
1 /*****************************************************************************
2 * cpu.c: CPU detection code
3 *****************************************************************************
4 * Copyright (C) 1998-2004 the VideoLAN team
5 * $Id$
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Christophe Massiot <massiot@via.ecp.fr>
9 * Eugenio Jarosiewicz <ej0@cise.ufl.eduEujenio>
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
24 *****************************************************************************/
26 /*****************************************************************************
27 * Preamble
28 *****************************************************************************/
29 #ifdef HAVE_CONFIG_H
30 # include "config.h"
31 #endif
33 #include <vlc_common.h>
34 #include <vlc_cpu.h>
36 #include <sys/types.h>
37 #ifndef WIN32
38 #include <unistd.h>
39 #include <sys/wait.h>
40 #include <signal.h>
41 #else
42 #include <errno.h>
43 #endif
44 #include <assert.h>
46 #include "libvlc.h"
48 #if defined(__APPLE__)
49 #include <sys/sysctl.h>
50 #endif
52 #if defined(__OpenBSD__)
53 #include <sys/param.h>
54 #include <sys/sysctl.h>
55 #include <machine/cpu.h>
56 #endif
58 #if defined(__SunOS)
59 #include <unistd.h>
60 #include <sys/types.h>
61 #include <sys/processor.h>
62 #include <sys/pset.h>
63 #endif
65 #if defined( __i386__ ) || defined( __x86_64__ ) || defined( __powerpc__ ) \
66 || defined( __ppc__ ) || defined( __ppc64__ ) || defined( __powerpc64__ )
67 # ifndef WIN32
68 static bool check_OS_capability( const char *psz_capability, pid_t pid )
70 int status;
72 if( pid == -1 )
73 return false; /* fail safe :-/ */
75 while( waitpid( pid, &status, 0 ) == -1 );
77 if( WIFEXITED( status ) && WEXITSTATUS( status ) == 0 )
78 return true;
80 fprintf( stderr, "warning: your CPU has %s instructions, but not your "
81 "operating system.\n", psz_capability );
82 fprintf( stderr, " some optimizations will be disabled unless "
83 "you upgrade your OS\n" );
84 return false;
87 # define check_capability(name, flag, code) \
88 do { \
89 pid_t pid = fork(); \
90 if( pid == 0 ) \
91 { \
92 signal(SIGILL, SIG_DFL); \
93 __asm__ __volatile__ ( code : : ); \
94 _exit(0); \
95 } \
96 if( check_OS_capability((name), pid )) \
97 i_capabilities |= (flag); \
98 } while(0)
100 # else /* WIN32 */
101 # define check_capability(name, flag, code) \
102 i_capabilities |= (flag);
103 # endif
104 #endif
106 /*****************************************************************************
107 * CPUCapabilities: get the CPU capabilities
108 *****************************************************************************
109 * This function is called to list extensions the CPU may have.
110 *****************************************************************************/
111 uint32_t CPUCapabilities( void )
113 uint32_t i_capabilities = 0;
115 #if defined( __i386__ ) || defined( __x86_64__ )
116 unsigned int i_eax, i_ebx, i_ecx, i_edx;
117 bool b_amd;
119 /* Needed for x86 CPU capabilities detection */
120 # if defined( __x86_64__ )
121 # define cpuid( reg ) \
122 asm volatile ( "cpuid\n\t" \
123 "movl %%ebx,%1\n\t" \
124 : "=a" ( i_eax ), \
125 "=b" ( i_ebx ), \
126 "=c" ( i_ecx ), \
127 "=d" ( i_edx ) \
128 : "a" ( reg ) \
129 : "cc" );
130 # else
131 # define cpuid( reg ) \
132 asm volatile ( "push %%ebx\n\t" \
133 "cpuid\n\t" \
134 "movl %%ebx,%1\n\t" \
135 "pop %%ebx\n\t" \
136 : "=a" ( i_eax ), \
137 "=r" ( i_ebx ), \
138 "=c" ( i_ecx ), \
139 "=d" ( i_edx ) \
140 : "a" ( reg ) \
141 : "cc" );
142 # endif
143 /* Check if the OS really supports the requested instructions */
144 # if defined (__i386__) && !defined (__i486__) && !defined (__i586__) \
145 && !defined (__i686__) && !defined (__pentium4__) \
146 && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
147 /* check if cpuid instruction is supported */
148 asm volatile ( "push %%ebx\n\t"
149 "pushf\n\t"
150 "pop %%eax\n\t"
151 "movl %%eax, %%ebx\n\t"
152 "xorl $0x200000, %%eax\n\t"
153 "push %%eax\n\t"
154 "popf\n\t"
155 "pushf\n\t"
156 "pop %%eax\n\t"
157 "movl %%ebx,%1\n\t"
158 "pop %%ebx\n\t"
159 : "=a" ( i_eax ),
160 "=r" ( i_ebx )
162 : "cc" );
164 if( i_eax == i_ebx )
165 goto out;
166 # endif
168 /* the CPU supports the CPUID instruction - get its level */
169 cpuid( 0x00000000 );
171 # if defined (__i386__) && !defined (__i586__) \
172 && !defined (__i686__) && !defined (__pentium4__) \
173 && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
174 if( !i_eax )
175 goto out;
176 #endif
178 /* borrowed from mpeg2dec */
179 b_amd = ( i_ebx == 0x68747541 ) && ( i_ecx == 0x444d4163 )
180 && ( i_edx == 0x69746e65 );
182 /* test for the MMX flag */
183 cpuid( 0x00000001 );
184 # if !defined (__MMX__)
185 if( ! (i_edx & 0x00800000) )
186 goto out;
187 # endif
188 i_capabilities |= CPU_CAPABILITY_MMX;
190 # if defined (__SSE__)
191 i_capabilities |= CPU_CAPABILITY_MMXEXT | CPU_CAPABILITY_SSE;
192 # else
193 if( i_edx & 0x02000000 )
195 i_capabilities |= CPU_CAPABILITY_MMXEXT;
197 # ifdef CAN_COMPILE_SSE
198 check_capability( "SSE", CPU_CAPABILITY_SSE,
199 "xorps %%xmm0,%%xmm0\n" );
200 # endif
202 # endif
204 # if defined (__SSE2__)
205 i_capabilities |= CPU_CAPABILITY_SSE2;
206 # elif defined (CAN_COMPILE_SSE2)
207 if( i_edx & 0x04000000 )
208 check_capability( "SSE2", CPU_CAPABILITY_SSE2,
209 "movupd %%xmm0, %%xmm0\n" );
210 # endif
212 # if defined (__SSE3__)
213 i_capabilities |= CPU_CAPABILITY_SSE3;
214 # elif defined (CAN_COMPILE_SSE3)
215 if( i_ecx & 0x00000001 )
216 check_capability( "SSE3", CPU_CAPABILITY_SSE3,
217 "movsldup %%xmm1, %%xmm0\n" );
218 # endif
220 # if defined (__SSSE3__)
221 i_capabilities |= CPU_CAPABILITY_SSSE3;
222 # elif defined (CAN_COMPILE_SSSE3)
223 if( i_ecx & 0x00000200 )
224 check_capability( "SSSE3", CPU_CAPABILITY_SSSE3,
225 "pabsw %%xmm1, %%xmm0\n" );
226 # endif
228 # if defined (__SSE4_1__)
229 i_capabilities |= CPU_CAPABILITY_SSE4_1;
230 # elif defined (CAN_COMPILE_SSE4_1)
231 if( i_ecx & 0x00080000 )
232 check_capability( "SSE4.1", CPU_CAPABILITY_SSE4_1,
233 "pmaxsb %%xmm1, %%xmm0\n" );
234 # endif
236 # if defined (__SSE4_2__)
237 i_capabilities |= CPU_CAPABILITY_SSE4_2;
238 # elif defined (CAN_COMPILE_SSE4_2)
239 if( i_ecx & 0x00100000 )
240 check_capability( "SSE4.2", CPU_CAPABILITY_SSE4_2,
241 "pcmpgtq %%xmm1, %%xmm0\n" );
242 # endif
244 /* test for additional capabilities */
245 cpuid( 0x80000000 );
247 if( i_eax < 0x80000001 )
248 goto out;
250 /* list these additional capabilities */
251 cpuid( 0x80000001 );
253 # if defined (__3dNOW__)
254 i_capabilities |= CPU_CAPABILITY_3DNOW;
255 # elif defined (CAN_COMPILE_3DNOW)
256 if( i_edx & 0x80000000 )
257 check_capability( "3D Now!", CPU_CAPABILITY_3DNOW,
258 "pfadd %%mm0,%%mm0\n" "femms\n" );
259 # endif
261 if( b_amd && ( i_edx & 0x00400000 ) )
263 i_capabilities |= CPU_CAPABILITY_MMXEXT;
265 out:
267 #elif defined( __arm__ )
268 # if defined( __ARM_NEON__ )
269 i_capabilities |= CPU_CAPABILITY_NEON;
270 # endif
272 #elif defined( __powerpc__ ) || defined( __ppc__ ) || defined( __powerpc64__ ) \
273 || defined( __ppc64__ )
275 # if defined(__APPLE__) || defined(__OpenBSD__)
276 # if defined(__OpenBSD__)
277 int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
278 # else
279 int selectors[2] = { CTL_HW, HW_VECTORUNIT };
280 # endif
281 int i_has_altivec = 0;
282 size_t i_length = sizeof( i_has_altivec );
283 int i_error = sysctl( selectors, 2, &i_has_altivec, &i_length, NULL, 0);
285 if( i_error == 0 && i_has_altivec != 0 )
286 i_capabilities |= CPU_CAPABILITY_ALTIVEC;
288 # elif defined( CAN_COMPILE_ALTIVEC )
289 pid_t pid = fork();
290 if( pid == 0 )
292 signal(SIGILL, SIG_DFL);
293 asm volatile ("mtspr 256, %0\n\t"
294 "vand %%v0, %%v0, %%v0"
296 : "r" (-1));
297 _exit(0);
300 if( check_OS_capability( "Altivec", pid ) )
301 i_capabilities |= CPU_CAPABILITY_ALTIVEC;
303 # endif
305 #endif
306 return i_capabilities;
309 uint32_t cpu_flags = 0;
312 /*****************************************************************************
313 * vlc_CPU: get pre-computed CPU capability flags
314 ****************************************************************************/
315 unsigned vlc_CPU (void)
317 return cpu_flags;
320 const struct
322 uint32_t value;
323 char name[12];
324 } cap_dirs[] = {
325 #if defined ( __i386__ ) || defined ( __x86_64__ )
326 { CPU_CAPABILITY_MMX, "mmx" },
327 { CPU_CAPABILITY_MMXEXT, "mmxext" },
328 { CPU_CAPABILITY_3DNOW, "3dnow" },
329 { CPU_CAPABILITY_SSE, "sse" },
330 #endif
331 #if defined (__ppc__) || defined (__ppc64__) || defined (__powerpc__)
332 { CPU_CAPABILITY_ALTIVEC, "altivec" },
333 #endif
334 #if defined (__arm__)
335 { CPU_CAPABILITY_NEON, "arm_neon" },
336 #endif
340 * Return the number of available logical CPU.
342 unsigned vlc_GetCPUCount(void)
344 #if defined(WIN32) && !defined(UNDER_CE)
345 DWORD process_mask;
346 DWORD system_mask;
347 if (!GetProcessAffinityMask(GetCurrentProcess(), &process_mask, &system_mask))
348 return 1;
350 unsigned count = 0;
351 while (system_mask) {
352 count++;
353 system_mask >>= 1;
355 return count;
356 #elif defined(HAVE_SCHED_GETAFFINITY)
357 cpu_set_t cpu;
358 CPU_ZERO(&cpu);
359 if (sched_getaffinity(0, sizeof(cpu), &cpu) < 0)
360 return 1;
361 unsigned count = 0;
362 for (unsigned i = 0; i < CPU_SETSIZE; i++)
363 count += CPU_ISSET(i, &cpu) != 0;
364 return count;
365 #elif defined(__APPLE__)
366 int count;
367 size_t size = sizeof(count) ;
368 if (sysctlbyname("hw.ncpu", &count, &size, NULL, 0))
369 return 1; /* Failure */
370 return count;
371 #elif defined(__OpenBSD__)
372 int selectors[2] = { CTL_HW, HW_NCPU };
373 int count;
374 size_t size = sizeof(count) ;
375 if (sysctl(selectors, 2, &count, &size, NULL, 0))
376 return 1; /* Failure */
377 return count;
378 #elif defined(__SunOS)
379 unsigned count = 0;
380 int type;
381 u_int numcpus;
382 processorid_t *cpulist;
383 processor_info_t cpuinfo;
384 cpulist = malloc(sizeof(processorid_t) * sysconf(_SC_NPROCESSORS_MAX));
385 if (!cpulist) return 1;
386 if (pset_info(PS_MYID, &type, &numcpus, cpulist)==0)
388 for (u_int i = 0; i < numcpus; i++)
390 if (!processor_info(cpulist[i], &cpuinfo))
391 count += (cpuinfo.pi_state == P_ONLINE)?1:0;
393 } else {
394 count = sysconf(_SC_NPROCESSORS_ONLN);
396 free(cpulist);
397 return (count>0)?count:1;
398 #else
399 # warning "vlc_GetCPUCount is not implemented for your platform"
400 return 1;
401 #endif
405 * Check if a directory name contains usable plugins w.r.t. the hardware
406 * capabilities. Loading a plugin when the hardware has insufficient
407 * capabilities may lead to illegal instructions (SIGILL) and must be avoided.
409 * @param name the name of the directory (<b>not</b> the path)
411 * @return true if the hardware has sufficient capabilities or the directory
412 * does not require any special capability; false if the running hardware has
413 * insufficient capabilities.
415 bool vlc_CPU_CheckPluginDir (const char *name)
417 const unsigned flags = vlc_CPU ();
418 for (size_t i = 0; i < sizeof (cap_dirs) / sizeof (cap_dirs[0]); i++)
420 if (strcmp (name, cap_dirs[i].name))
421 continue;
422 return (flags & cap_dirs[i].value) != 0;
424 return true;
427 static vlc_memcpy_t pf_vlc_memcpy = memcpy;
428 static vlc_memset_t pf_vlc_memset = memset;
430 void vlc_fastmem_register (vlc_memcpy_t cpy, vlc_memset_t set)
432 if (cpy)
433 pf_vlc_memcpy = cpy;
434 if (set)
435 pf_vlc_memset = set;
439 * vlc_memcpy: fast CPU-dependent memcpy
441 void *vlc_memcpy (void *tgt, const void *src, size_t n)
443 return pf_vlc_memcpy (tgt, src, n);
447 * vlc_memset: fast CPU-dependent memset
449 void *vlc_memset (void *tgt, int c, size_t n)
451 return pf_vlc_memset (tgt, c, n);
455 * Returned an aligned pointer on newly allocated memory.
456 * \param alignment must be a power of 2 and a multiple of sizeof(void*)
457 * \param size is the size of the usable memory returned.
459 * It must not be freed directly, *base must.
461 void *vlc_memalign(void **base, size_t alignment, size_t size)
463 assert(alignment >= sizeof(void*));
464 for (size_t t = alignment; t > 1; t >>= 1)
465 assert((t&1) == 0);
466 #if defined(HAVE_POSIX_MEMALIGN)
467 if (posix_memalign(base, alignment, size)) {
468 *base = NULL;
469 return NULL;
471 return *base;
472 #elif defined(HAVE_MEMALIGN)
473 return *base = memalign(alignment, size);
474 #else
475 unsigned char *p = *base = malloc(size + alignment - 1);
476 if (!p)
477 return NULL;
478 return (void*)((uintptr_t)(p + alignment - 1) & ~(alignment - 1));
479 #endif