in24/in32/fl32 little/big-endian QuickTime PCM audio support
[mplayer/glamo.git] / mp3lib / decod386.c
blob5d0397935973b12e186ad037a4aafa2c9b684958
1 /*
2 * Modified for use with MPlayer, for details see the CVS changelog at
3 * http://www.mplayerhq.hu/cgi-bin/cvsweb.cgi/main/
4 * $Id$
5 */
7 /*
8 * Mpeg Layer-1,2,3 audio decoder
9 * ------------------------------
10 * copyright (c) 1995,1996,1997 by Michael Hipp, All rights reserved.
11 * See also 'README'
13 * slighlty optimized for machines without autoincrement/decrement.
14 * The performance is highly compiler dependend. Maybe
15 * the decode.c version for 'normal' processor may be faster
16 * even for Intel processors.
20 #include "config.h"
22 #if 0
23 /* old WRITE_SAMPLE */
24 /* is portable */
25 #define WRITE_SAMPLE(samples,sum,clip) { \
26 if( (sum) > 32767.0) { *(samples) = 0x7fff; (clip)++; } \
27 else if( (sum) < -32768.0) { *(samples) = -0x8000; (clip)++; }\
28 else { *(samples) = sum; } \
30 #else
31 /* new WRITE_SAMPLE */
34 * should be the same as the "old WRITE_SAMPLE" macro above, but uses
35 * some tricks to avoid double->int conversions and floating point compares.
37 * Here's how it works:
38 * ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) is
39 * 0x0010000080000000LL in hex. It computes 0x0010000080000000LL + sum
40 * as a double IEEE fp value and extracts the low-order 32-bits from the
41 * IEEE fp representation stored in memory. The 2^56 bit in the constant
42 * is intended to force the bits of "sum" into the least significant bits
43 * of the double mantissa. After an integer substraction of 0x80000000
44 * we have the original double value "sum" converted to an 32-bit int value.
46 * (Is that really faster than the clean and simple old version of the macro?)
50 * On a SPARC cpu, we fetch the low-order 32-bit from the second 32-bit
51 * word of the double fp value stored in memory. On an x86 cpu, we fetch it
52 * from the first 32-bit word.
53 * I'm not sure if the WORDS_BIGENDIAN feature test covers all possible memory
54 * layouts of double floating point values an all cpu architectures. If
55 * it doesn't work for you, just enable the "old WRITE_SAMPLE" macro.
57 #if WORDS_BIGENDIAN
58 #define MANTISSA_OFFSET 1
59 #else
60 #define MANTISSA_OFFSET 0
61 #endif
63 /* sizeof(int) == 4 */
64 #define WRITE_SAMPLE(samples,sum,clip) { \
65 union { double dtemp; int itemp[2]; } u; int v; \
66 u.dtemp = ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) + (sum);\
67 v = u.itemp[MANTISSA_OFFSET] - 0x80000000; \
68 if( v > 32767) { *(samples) = 0x7fff; (clip)++; } \
69 else if( v < -32768) { *(samples) = -0x8000; (clip)++; } \
70 else { *(samples) = v; } \
72 #endif
76 #define WRITE_SAMPLE(samples,sum,clip) { \
77 double dtemp; int v; \
78 dtemp = ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) + (sum);\
79 v = ((*(int *)&dtemp) - 0x80000000); \
80 if( v > 32767) { *(samples) = 0x7fff; (clip)++; } \
81 else if( v < -32768) { *(samples) = -0x8000; (clip)++; } \
82 else { *(samples) = v; } \
86 static int synth_1to1(real *bandPtr,int channel,unsigned char *out,int *pnt);
88 static int synth_1to1_mono(real *bandPtr,unsigned char *samples,int *pnt)
90 short samples_tmp[64];
91 short *tmp1 = samples_tmp;
92 int i,ret;
93 int pnt1 = 0;
95 ret = synth_1to1(bandPtr,0,(unsigned char *) samples_tmp,&pnt1);
96 samples += *pnt;
98 for(i=0;i<32;i++) {
99 *( (short *) samples) = *tmp1;
100 samples += 2;
101 tmp1 += 2;
103 *pnt += 64;
105 return ret;
109 static int synth_1to1_mono2stereo(real *bandPtr,unsigned char *samples,int *pnt)
111 int i,ret;
113 ret = synth_1to1(bandPtr,0,samples,pnt);
114 samples = samples + *pnt - 128;
116 for(i=0;i<32;i++) {
117 ((short *)samples)[1] = ((short *)samples)[0];
118 samples+=4;
121 return ret;
124 static synth_func_t synth_func;
126 #if defined(CAN_COMPILE_X86_ASM)
127 int synth_1to1_MMX( real *bandPtr,int channel,short * samples)
129 static short buffs[2][2][0x110];
130 static int bo = 1;
131 synth_1to1_MMX_s(bandPtr, channel, samples, (short *) buffs, &bo);
132 return 0;
134 #endif
136 #ifdef HAVE_ALTIVEC
137 #define dct64_base(a,b,c) if(gCpuCaps.hasAltiVec) dct64_altivec(a,b,c); else dct64(a,b,c)
138 #else /* HAVE_ALTIVEC */
139 #define dct64_base(a,b,c) dct64(a,b,c)
140 #endif /* HAVE_ALTIVEC */
142 static int synth_1to1(real *bandPtr,int channel,unsigned char *out,int *pnt)
144 static real buffs[2][2][0x110];
145 static const int step = 2;
146 static int bo = 1;
147 short *samples = (short *) (out + *pnt);
148 real *b0,(*buf)[0x110];
149 int clip = 0;
150 int bo1;
152 *pnt += 128;
154 /* optimized for x86 */
155 #if defined(CAN_COMPILE_X86_ASM)
156 if ( synth_func )
158 // printf("Calling %p, bandPtr=%p channel=%d samples=%p\n",synth_func,bandPtr,channel,samples);
159 // FIXME: synth_func() may destroy EBP, don't rely on stack contents!!!
160 return (*synth_func)( bandPtr,channel,samples);
162 #endif
163 if(!channel) { /* channel=0 */
164 bo--;
165 bo &= 0xf;
166 buf = buffs[0];
168 else {
169 samples++;
170 buf = buffs[1];
173 if(bo & 0x1) {
174 b0 = buf[0];
175 bo1 = bo;
176 dct64_base(buf[1]+((bo+1)&0xf),buf[0]+bo,bandPtr);
178 else {
179 b0 = buf[1];
180 bo1 = bo+1;
181 dct64_base(buf[0]+bo,buf[1]+bo+1,bandPtr);
185 register int j;
186 real *window = mp3lib_decwin + 16 - bo1;
188 for (j=16;j;j--,b0+=0x10,window+=0x20,samples+=step)
190 real sum;
191 sum = window[0x0] * b0[0x0];
192 sum -= window[0x1] * b0[0x1];
193 sum += window[0x2] * b0[0x2];
194 sum -= window[0x3] * b0[0x3];
195 sum += window[0x4] * b0[0x4];
196 sum -= window[0x5] * b0[0x5];
197 sum += window[0x6] * b0[0x6];
198 sum -= window[0x7] * b0[0x7];
199 sum += window[0x8] * b0[0x8];
200 sum -= window[0x9] * b0[0x9];
201 sum += window[0xA] * b0[0xA];
202 sum -= window[0xB] * b0[0xB];
203 sum += window[0xC] * b0[0xC];
204 sum -= window[0xD] * b0[0xD];
205 sum += window[0xE] * b0[0xE];
206 sum -= window[0xF] * b0[0xF];
208 WRITE_SAMPLE(samples,sum,clip);
212 real sum;
213 sum = window[0x0] * b0[0x0];
214 sum += window[0x2] * b0[0x2];
215 sum += window[0x4] * b0[0x4];
216 sum += window[0x6] * b0[0x6];
217 sum += window[0x8] * b0[0x8];
218 sum += window[0xA] * b0[0xA];
219 sum += window[0xC] * b0[0xC];
220 sum += window[0xE] * b0[0xE];
221 WRITE_SAMPLE(samples,sum,clip);
222 b0-=0x10,window-=0x20,samples+=step;
224 window += bo1<<1;
226 for (j=15;j;j--,b0-=0x10,window-=0x20,samples+=step)
228 real sum;
229 sum = -window[-0x1] * b0[0x0];
230 sum -= window[-0x2] * b0[0x1];
231 sum -= window[-0x3] * b0[0x2];
232 sum -= window[-0x4] * b0[0x3];
233 sum -= window[-0x5] * b0[0x4];
234 sum -= window[-0x6] * b0[0x5];
235 sum -= window[-0x7] * b0[0x6];
236 sum -= window[-0x8] * b0[0x7];
237 sum -= window[-0x9] * b0[0x8];
238 sum -= window[-0xA] * b0[0x9];
239 sum -= window[-0xB] * b0[0xA];
240 sum -= window[-0xC] * b0[0xB];
241 sum -= window[-0xD] * b0[0xC];
242 sum -= window[-0xE] * b0[0xD];
243 sum -= window[-0xF] * b0[0xE];
244 sum -= window[-0x0] * b0[0xF];
246 WRITE_SAMPLE(samples,sum,clip);
250 return clip;
254 #ifdef USE_FAKE_MONO
255 static int synth_1to1_l(real *bandPtr,int channel,unsigned char *out,int *pnt)
257 int i,ret;
259 ret = synth_1to1(bandPtr,channel,out,pnt);
260 out = out + *pnt - 128;
262 for(i=0;i<32;i++) {
263 ((short *)out)[1] = ((short *)out)[0];
264 out+=4;
267 return ret;
270 static int synth_1to1_r(real *bandPtr,int channel,unsigned char *out,int *pnt)
272 int i,ret;
274 ret = synth_1to1(bandPtr,channel,out,pnt);
275 out = out + *pnt - 128;
277 for(i=0;i<32;i++) {
278 ((short *)out)[0] = ((short *)out)[1];
279 out+=4;
282 return ret;
284 #endif