Real rstp:// streaming support, ported from xine
[mplayer/greg.git] / mp3lib / decod386.c
blob7739cdb8d16ac12bb5f9d10d2fdb85689eee087b
1 /*
2 * Mpeg Layer-1,2,3 audio decoder
3 * ------------------------------
4 * copyright (c) 1995,1996,1997 by Michael Hipp, All rights reserved.
5 * See also 'README'
7 * slighlty optimized for machines without autoincrement/decrement.
8 * The performance is highly compiler dependend. Maybe
9 * the decode.c version for 'normal' processor may be faster
10 * even for Intel processors.
14 #include "../config.h"
16 #if 0
17 /* old WRITE_SAMPLE */
18 /* is portable */
19 #define WRITE_SAMPLE(samples,sum,clip) { \
20 if( (sum) > 32767.0) { *(samples) = 0x7fff; (clip)++; } \
21 else if( (sum) < -32768.0) { *(samples) = -0x8000; (clip)++; }\
22 else { *(samples) = sum; } \
24 #else
25 /* new WRITE_SAMPLE */
28 * should be the same as the "old WRITE_SAMPLE" macro above, but uses
29 * some tricks to avoid double->int conversions and floating point compares.
31 * Here's how it works:
32 * ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) is
33 * 0x0010000080000000LL in hex. It computes 0x0010000080000000LL + sum
34 * as a double IEEE fp value and extracts the low-order 32-bits from the
35 * IEEE fp representation stored in memory. The 2^56 bit in the constant
36 * is intended to force the bits of "sum" into the least significant bits
37 * of the double mantissa. After an integer substraction of 0x80000000
38 * we have the original double value "sum" converted to an 32-bit int value.
40 * (Is that really faster than the clean and simple old version of the macro?)
44 * On a SPARC cpu, we fetch the low-order 32-bit from the second 32-bit
45 * word of the double fp value stored in memory. On an x86 cpu, we fetch it
46 * from the first 32-bit word.
47 * I'm not sure if the WORDS_BIGENDIAN feature test covers all possible memory
48 * layouts of double floating point values an all cpu architectures. If
49 * it doesn't work for you, just enable the "old WRITE_SAMPLE" macro.
51 #if WORDS_BIGENDIAN
52 #define MANTISSA_OFFSET 1
53 #else
54 #define MANTISSA_OFFSET 0
55 #endif
57 /* sizeof(int) == 4 */
58 #define WRITE_SAMPLE(samples,sum,clip) { \
59 union { double dtemp; int itemp[2]; } u; int v; \
60 u.dtemp = ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) + (sum);\
61 v = u.itemp[MANTISSA_OFFSET] - 0x80000000; \
62 if( v > 32767) { *(samples) = 0x7fff; (clip)++; } \
63 else if( v < -32768) { *(samples) = -0x8000; (clip)++; } \
64 else { *(samples) = v; } \
66 #endif
70 #define WRITE_SAMPLE(samples,sum,clip) { \
71 double dtemp; int v; \
72 dtemp = ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) + (sum);\
73 v = ((*(int *)&dtemp) - 0x80000000); \
74 if( v > 32767) { *(samples) = 0x7fff; (clip)++; } \
75 else if( v < -32768) { *(samples) = -0x8000; (clip)++; } \
76 else { *(samples) = v; } \
80 static int synth_1to1_mono(real *bandPtr,unsigned char *samples,int *pnt)
82 short samples_tmp[64];
83 short *tmp1 = samples_tmp;
84 int i,ret;
85 int pnt1 = 0;
87 ret = synth_1to1(bandPtr,0,(unsigned char *) samples_tmp,&pnt1);
88 samples += *pnt;
90 for(i=0;i<32;i++) {
91 *( (short *) samples) = *tmp1;
92 samples += 2;
93 tmp1 += 2;
95 *pnt += 64;
97 return ret;
101 static int synth_1to1_mono2stereo(real *bandPtr,unsigned char *samples,int *pnt)
103 int i,ret;
105 ret = synth_1to1(bandPtr,0,samples,pnt);
106 samples = samples + *pnt - 128;
108 for(i=0;i<32;i++) {
109 ((short *)samples)[1] = ((short *)samples)[0];
110 samples+=4;
113 return ret;
117 #ifdef USE_FAKE_MONO
118 static int synth_1to1_l(real *bandPtr,int channel,unsigned char *out,int *pnt)
120 int i,ret;
122 ret = synth_1to1(bandPtr,channel,out,pnt);
123 out = out + *pnt - 128;
125 for(i=0;i<32;i++) {
126 ((short *)out)[1] = ((short *)out)[0];
127 out+=4;
130 return ret;
134 static int synth_1to1_r(real *bandPtr,int channel,unsigned char *out,int *pnt)
136 int i,ret;
138 ret = synth_1to1(bandPtr,channel,out,pnt);
139 out = out + *pnt - 128;
141 for(i=0;i<32;i++) {
142 ((short *)out)[0] = ((short *)out)[1];
143 out+=4;
146 return ret;
148 #endif
150 synth_func_t synth_func;
152 #if defined(CAN_COMPILE_X86_ASM)
153 int synth_1to1_MMX( real *bandPtr,int channel,short * samples)
155 static short buffs[2][2][0x110];
156 static int bo = 1;
157 synth_1to1_MMX_s(bandPtr, channel, samples, (short *) buffs, &bo);
158 return 0;
160 #endif
162 #ifdef HAVE_ALTIVEC
163 #define dct64_base(a,b,c) if(gCpuCaps.hasAltiVec) dct64_altivec(a,b,c); else dct64(a,b,c)
164 #else /* HAVE_ALTIVEC */
165 #define dct64_base(a,b,c) dct64(a,b,c)
166 #endif /* HAVE_ALTIVEC */
168 static int synth_1to1(real *bandPtr,int channel,unsigned char *out,int *pnt)
170 static real buffs[2][2][0x110];
171 static const int step = 2;
172 static int bo = 1;
173 short *samples = (short *) (out + *pnt);
174 real *b0,(*buf)[0x110];
175 int clip = 0;
176 int bo1;
178 *pnt += 128;
180 /* optimized for x86 */
181 #if defined(CAN_COMPILE_X86_ASM)
182 if ( synth_func )
184 // printf("Calling %p, bandPtr=%p channel=%d samples=%p\n",synth_func,bandPtr,channel,samples);
185 // FIXME: synth_func() may destroy EBP, don't rely on stack contents!!!
186 return (*synth_func)( bandPtr,channel,samples);
188 #endif
189 if(!channel) { /* channel=0 */
190 bo--;
191 bo &= 0xf;
192 buf = buffs[0];
194 else {
195 samples++;
196 buf = buffs[1];
199 if(bo & 0x1) {
200 b0 = buf[0];
201 bo1 = bo;
202 dct64_base(buf[1]+((bo+1)&0xf),buf[0]+bo,bandPtr);
204 else {
205 b0 = buf[1];
206 bo1 = bo+1;
207 dct64_base(buf[0]+bo,buf[1]+bo+1,bandPtr);
211 register int j;
212 real *window = mp3lib_decwin + 16 - bo1;
214 for (j=16;j;j--,b0+=0x10,window+=0x20,samples+=step)
216 real sum;
217 sum = window[0x0] * b0[0x0];
218 sum -= window[0x1] * b0[0x1];
219 sum += window[0x2] * b0[0x2];
220 sum -= window[0x3] * b0[0x3];
221 sum += window[0x4] * b0[0x4];
222 sum -= window[0x5] * b0[0x5];
223 sum += window[0x6] * b0[0x6];
224 sum -= window[0x7] * b0[0x7];
225 sum += window[0x8] * b0[0x8];
226 sum -= window[0x9] * b0[0x9];
227 sum += window[0xA] * b0[0xA];
228 sum -= window[0xB] * b0[0xB];
229 sum += window[0xC] * b0[0xC];
230 sum -= window[0xD] * b0[0xD];
231 sum += window[0xE] * b0[0xE];
232 sum -= window[0xF] * b0[0xF];
234 WRITE_SAMPLE(samples,sum,clip);
238 real sum;
239 sum = window[0x0] * b0[0x0];
240 sum += window[0x2] * b0[0x2];
241 sum += window[0x4] * b0[0x4];
242 sum += window[0x6] * b0[0x6];
243 sum += window[0x8] * b0[0x8];
244 sum += window[0xA] * b0[0xA];
245 sum += window[0xC] * b0[0xC];
246 sum += window[0xE] * b0[0xE];
247 WRITE_SAMPLE(samples,sum,clip);
248 b0-=0x10,window-=0x20,samples+=step;
250 window += bo1<<1;
252 for (j=15;j;j--,b0-=0x10,window-=0x20,samples+=step)
254 real sum;
255 sum = -window[-0x1] * b0[0x0];
256 sum -= window[-0x2] * b0[0x1];
257 sum -= window[-0x3] * b0[0x2];
258 sum -= window[-0x4] * b0[0x3];
259 sum -= window[-0x5] * b0[0x4];
260 sum -= window[-0x6] * b0[0x5];
261 sum -= window[-0x7] * b0[0x6];
262 sum -= window[-0x8] * b0[0x7];
263 sum -= window[-0x9] * b0[0x8];
264 sum -= window[-0xA] * b0[0x9];
265 sum -= window[-0xB] * b0[0xA];
266 sum -= window[-0xC] * b0[0xB];
267 sum -= window[-0xD] * b0[0xC];
268 sum -= window[-0xE] * b0[0xD];
269 sum -= window[-0xF] * b0[0xE];
270 sum -= window[-0x0] * b0[0xF];
272 WRITE_SAMPLE(samples,sum,clip);
276 return clip;