Merge svn changes up to r30484
[mplayer/kovensky.git] / libmpeg2 / cpu_accel.c
blobf2e99f754a60159d7ecb8caddde5c62dd75857e7
1 /*
2 * cpu_accel.c
3 * Copyright (C) 2000-2004 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
7 * See http://libmpeg2.sourceforge.net/ for updates.
9 * mpeg2dec is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * mpeg2dec is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Modified for use with MPlayer, see libmpeg2_changes.diff for the exact changes.
24 * detailed changelog at http://svn.mplayerhq.hu/mplayer/trunk/
25 * $Id$
28 #include "config.h"
30 #include <inttypes.h>
32 #include "mpeg2.h"
33 #include "attributes.h"
34 #include "mpeg2_internal.h"
36 #include "cpudetect.h"
38 #if ARCH_X86 || ARCH_X86_64
39 static inline uint32_t arch_accel (uint32_t accel)
41 /* Use MPlayer CPU detection instead of libmpeg2 variant. */
42 #if 0
43 if (accel & (MPEG2_ACCEL_X86_3DNOW | MPEG2_ACCEL_X86_MMXEXT))
44 accel |= MPEG2_ACCEL_X86_MMX;
46 if (accel & (MPEG2_ACCEL_X86_SSE2 | MPEG2_ACCEL_X86_SSE3))
47 accel |= MPEG2_ACCEL_X86_MMXEXT;
49 if (accel & (MPEG2_ACCEL_X86_SSE3))
50 accel |= MPEG2_ACCEL_X86_SSE2;
52 #ifdef ACCEL_DETECT
53 if (accel & MPEG2_ACCEL_DETECT) {
54 uint32_t eax, ebx, ecx, edx;
55 int AMD;
57 #if defined(__x86_64__) || (!defined(PIC) && !defined(__PIC__))
58 #define cpuid(op,eax,ebx,ecx,edx) \
59 __asm__ ("cpuid" \
60 : "=a" (eax), \
61 "=b" (ebx), \
62 "=c" (ecx), \
63 "=d" (edx) \
64 : "a" (op) \
65 : "cc")
66 #else /* PIC version : save ebx (not needed on x86_64) */
67 #define cpuid(op,eax,ebx,ecx,edx) \
68 __asm__ ("pushl %%ebx\n\t" \
69 "cpuid\n\t" \
70 "movl %%ebx,%1\n\t" \
71 "popl %%ebx" \
72 : "=a" (eax), \
73 "=r" (ebx), \
74 "=c" (ecx), \
75 "=d" (edx) \
76 : "a" (op) \
77 : "cc")
78 #endif
80 #ifndef __x86_64__ /* x86_64 supports the cpuid op */
81 __asm__ ("pushf\n\t"
82 "pushf\n\t"
83 "pop %0\n\t"
84 "movl %0,%1\n\t"
85 "xorl $0x200000,%0\n\t"
86 "push %0\n\t"
87 "popf\n\t"
88 "pushf\n\t"
89 "pop %0\n\t"
90 "popf"
91 : "=r" (eax),
92 "=r" (ebx)
94 : "cc");
96 if (eax == ebx) /* no cpuid */
97 return accel;
98 #endif
100 cpuid (0x00000000, eax, ebx, ecx, edx);
101 if (!eax) /* vendor string only */
102 return accel;
104 AMD = (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65);
106 cpuid (0x00000001, eax, ebx, ecx, edx);
107 if (! (edx & 0x00800000)) /* no MMX */
108 return accel;
110 accel |= MPEG2_ACCEL_X86_MMX;
111 if (edx & 0x02000000) /* SSE - identical to AMD MMX ext. */
112 accel |= MPEG2_ACCEL_X86_MMXEXT;
114 if (edx & 0x04000000) /* SSE2 */
115 accel |= MPEG2_ACCEL_X86_SSE2;
117 if (ecx & 0x00000001) /* SSE3 */
118 accel |= MPEG2_ACCEL_X86_SSE3;
120 cpuid (0x80000000, eax, ebx, ecx, edx);
121 if (eax < 0x80000001) /* no extended capabilities */
122 return accel;
124 cpuid (0x80000001, eax, ebx, ecx, edx);
126 if (edx & 0x80000000)
127 accel |= MPEG2_ACCEL_X86_3DNOW;
129 if (AMD && (edx & 0x00400000)) /* AMD MMX extensions */
130 accel |= MPEG2_ACCEL_X86_MMXEXT;
132 #endif /* ACCEL_DETECT */
134 return accel;
136 #else /* 0 */
137 accel = 0;
138 if (gCpuCaps.hasMMX)
139 accel |= MPEG2_ACCEL_X86_MMX;
140 if (gCpuCaps.hasSSE2)
141 accel |= MPEG2_ACCEL_X86_SSE2;
142 if (gCpuCaps.hasMMX2)
143 accel |= MPEG2_ACCEL_X86_MMXEXT;
144 if (gCpuCaps.has3DNow)
145 accel |= MPEG2_ACCEL_X86_3DNOW;
147 return accel;
149 #endif /* 0 */
151 #endif /* ARCH_X86 || ARCH_X86_64 */
153 #if defined(ACCEL_DETECT) && (ARCH_PPC || ARCH_SPARC)
154 #include <signal.h>
155 #include <setjmp.h>
157 static sigjmp_buf jmpbuf;
158 static volatile sig_atomic_t canjump = 0;
160 static RETSIGTYPE sigill_handler (int sig)
162 if (!canjump) {
163 signal (sig, SIG_DFL);
164 raise (sig);
167 canjump = 0;
168 siglongjmp (jmpbuf, 1);
170 #endif /* ACCEL_DETECT && (ARCH_PPC || ARCH_SPARC) */
172 #if ARCH_PPC
173 static uint32_t arch_accel (uint32_t accel)
175 #ifdef ACCEL_DETECT
176 if ((accel & (MPEG2_ACCEL_PPC_ALTIVEC | MPEG2_ACCEL_DETECT)) ==
177 MPEG2_ACCEL_DETECT) {
178 static RETSIGTYPE (* oldsig) (int);
180 oldsig = signal (SIGILL, sigill_handler);
181 if (sigsetjmp (jmpbuf, 1)) {
182 signal (SIGILL, oldsig);
183 return accel;
186 canjump = 1;
188 #if defined(__APPLE_CC__) /* apple */
189 #define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t"
190 #else /* gnu */
191 #define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t"
192 #endif
193 asm volatile ("mtspr 256, %0\n\t"
194 VAND (0, 0, 0)
196 : "r" (-1));
198 canjump = 0;
199 accel |= MPEG2_ACCEL_PPC_ALTIVEC;
201 signal (SIGILL, oldsig);
203 #endif /* ACCEL_DETECT */
205 return accel;
207 #endif /* ARCH_PPC */
209 #if ARCH_SPARC
210 static uint32_t arch_accel (uint32_t accel)
212 if (accel & MPEG2_ACCEL_SPARC_VIS2)
213 accel |= MPEG2_ACCEL_SPARC_VIS;
215 #ifdef ACCEL_DETECT
216 if ((accel & (MPEG2_ACCEL_SPARC_VIS2 | MPEG2_ACCEL_DETECT)) ==
217 MPEG2_ACCEL_DETECT) {
218 static RETSIGTYPE (* oldsig) (int);
220 oldsig = signal (SIGILL, sigill_handler);
221 if (sigsetjmp (jmpbuf, 1)) {
222 signal (SIGILL, oldsig);
223 return accel;
226 canjump = 1;
228 /* pdist %f0, %f0, %f0 */
229 __asm__ __volatile__(".word\t0x81b007c0");
231 canjump = 0;
232 accel |= MPEG2_ACCEL_SPARC_VIS;
234 if (sigsetjmp (jmpbuf, 1)) {
235 signal (SIGILL, oldsig);
236 return accel;
239 canjump = 1;
241 /* edge8n %g0, %g0, %g0 */
242 __asm__ __volatile__(".word\t0x81b00020");
244 canjump = 0;
245 accel |= MPEG2_ACCEL_SPARC_VIS2;
247 signal (SIGILL, oldsig);
249 #endif /* ACCEL_DETECT */
251 return accel;
253 #endif /* ARCH_SPARC */
255 #if ARCH_ALPHA
256 static inline uint32_t arch_accel (uint32_t accel)
258 if (accel & MPEG2_ACCEL_ALPHA_MVI)
259 accel |= MPEG2_ACCEL_ALPHA;
261 #ifdef ACCEL_DETECT
262 if (accel & MPEG2_ACCEL_DETECT) {
263 uint64_t no_mvi;
265 asm volatile ("amask %1, %0"
266 : "=r" (no_mvi)
267 : "rI" (256)); /* AMASK_MVI */
268 accel |= no_mvi ? MPEG2_ACCEL_ALPHA : (MPEG2_ACCEL_ALPHA |
269 MPEG2_ACCEL_ALPHA_MVI);
271 #endif /* ACCEL_DETECT */
273 return accel;
275 #endif /* ARCH_ALPHA */
277 uint32_t mpeg2_detect_accel (uint32_t accel)
279 #if ARCH_X86 || ARCH_X86_64 || ARCH_PPC || ARCH_ALPHA || ARCH_SPARC
280 accel = arch_accel (accel);
281 #endif
282 return accel;