2 * Modified for use with MPlayer, for details see the changelog at
3 * http://svn.mplayerhq.hu/mplayer/trunk/
8 * mpg123_synth_1to1 works the same way as the c version of this
9 * file. only two types of changes have been made:
10 * - reordered floating point instructions to
11 * prevent pipline stalls
12 * - made WRITE_SAMPLE use integer instead of
13 * (slower) floating point
14 * all kinds of x86 processors should benefit from these
17 * useful sources of information on optimizing x86 code include:
19 * Intel Architecture Optimization Manual
20 * http://www.intel.com/design/pentium/manuals/242816.htm
22 * Cyrix 6x86 Instruction Set Summary
23 * ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf
25 * AMD-K5 Processor Software Development
26 * http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf
28 * Stefan Bieschewski <stb@acm.org>
34 #define real float /* ugly - but only way */
36 static int attribute_used buffs
[1088]={0};
37 static int attribute_used bo
=1;
38 static int attribute_used saved_ebp
=0;
40 int synth_1to1_pent(real
*bandPtr
, int channel
, short *samples
)
45 " movl %%ebp,"MANGLE(saved_ebp
)"\n\t"
46 " movl %1,%%eax\n\t"/*bandPtr*/
48 " xorl %%edi,%%edi\n\t"
49 " movl "MANGLE(bo
)",%%ebp\n\t"
54 " movl %%ebp,"MANGLE(bo
)"\n\t"
55 " movl $"MANGLE(buffs
)",%%ecx\n\t"
59 " movl $"MANGLE(buffs
)"+2176,%%ecx\n\t"
63 " movl %%ecx,%%ebx\n\t"
66 " movl 4+%4,%%edx\n\t"
67 " leal (%%ebx,%%edx,4),%%eax\n\t"
69 " movl 8+%4,%%eax\n\t"
72 " leal 1088(,%%eax,4),%%eax\n\t"
73 " addl %%ebx,%%eax\n\t"
76 " leal 1088(%%ecx),%%ebx\n\t"
77 " leal 1(%%ebp),%%edx\n\t"
80 " leal 1092(%%ecx,%%ebp,4),%%eax\n\t"
82 " leal (%%ecx,%%ebp,4),%%eax\n\t"
85 " call "MANGLE(mp3lib_dct64
)"\n\t"
88 " leal 0(,%%edx,4),%%edx\n\t"
89 " movl $"MANGLE(mp3lib_decwin
)"+64,%%eax\n\t"
90 " movl %%eax,%%ecx\n\t"
91 " subl %%edx,%%ecx\n\t"
100 " fmuls 8(%%ebx)\n\t"
102 " fsubrp %%st,%%st(1)\n\t"
103 " flds 12(%%ecx)\n\t"
104 " fmuls 12(%%ebx)\n\t"
106 " faddp %%st,%%st(1)\n\t"
107 " flds 16(%%ecx)\n\t"
108 " fmuls 16(%%ebx)\n\t"
110 " fsubrp %%st,%%st(1)\n\t"
111 " flds 20(%%ecx)\n\t"
112 " fmuls 20(%%ebx)\n\t"
114 " faddp %%st,%%st(1)\n\t"
115 " flds 24(%%ecx)\n\t"
116 " fmuls 24(%%ebx)\n\t"
118 " fsubrp %%st,%%st(1)\n\t"
119 " flds 28(%%ecx)\n\t"
120 " fmuls 28(%%ebx)\n\t"
122 " faddp %%st,%%st(1)\n\t"
123 " flds 32(%%ecx)\n\t"
124 " fmuls 32(%%ebx)\n\t"
126 " fsubrp %%st,%%st(1)\n\t"
127 " flds 36(%%ecx)\n\t"
128 " fmuls 36(%%ebx)\n\t"
130 " faddp %%st,%%st(1)\n\t"
131 " flds 40(%%ecx)\n\t"
132 " fmuls 40(%%ebx)\n\t"
134 " fsubrp %%st,%%st(1)\n\t"
135 " flds 44(%%ecx)\n\t"
136 " fmuls 44(%%ebx)\n\t"
138 " faddp %%st,%%st(1)\n\t"
139 " flds 48(%%ecx)\n\t"
140 " fmuls 48(%%ebx)\n\t"
142 " fsubrp %%st,%%st(1)\n\t"
143 " flds 52(%%ecx)\n\t"
144 " fmuls 52(%%ebx)\n\t"
146 " faddp %%st,%%st(1)\n\t"
147 " flds 56(%%ecx)\n\t"
148 " fmuls 56(%%ebx)\n\t"
150 " fsubrp %%st,%%st(1)\n\t"
151 " flds 60(%%ecx)\n\t"
152 " fmuls 60(%%ebx)\n\t"
155 " faddp %%st,%%st(1)\n\t"
157 " fsubrp %%st,%%st(1)\n\t"
158 " fistpl (%%esp)\n\t"
160 " cmpl $32767,%%eax\n\t"
162 " cmpl $-32768,%%eax\n\t"
164 " movw %%ax,(%%esi)\n\t"
166 "1: movw $32767,(%%esi)\n\t"
168 "2: movw $-32768,(%%esi)\n\t"
172 " addl $64,%%ebx\n\t"
173 " subl $-128,%%ecx\n\t"
180 " fmuls 8(%%ebx)\n\t"
181 " flds 16(%%ecx)\n\t"
182 " fmuls 16(%%ebx)\n\t"
184 " faddp %%st,%%st(1)\n\t"
185 " flds 24(%%ecx)\n\t"
186 " fmuls 24(%%ebx)\n\t"
188 " faddp %%st,%%st(1)\n\t"
189 " flds 32(%%ecx)\n\t"
190 " fmuls 32(%%ebx)\n\t"
192 " faddp %%st,%%st(1)\n\t"
193 " flds 40(%%ecx)\n\t"
194 " fmuls 40(%%ebx)\n\t"
196 " faddp %%st,%%st(1)\n\t"
197 " flds 48(%%ecx)\n\t"
198 " fmuls 48(%%ebx)\n\t"
200 " faddp %%st,%%st(1)\n\t"
201 " flds 56(%%ecx)\n\t"
202 " fmuls 56(%%ebx)\n\t"
205 " faddp %%st,%%st(1)\n\t"
207 " faddp %%st,%%st(1)\n\t"
208 " fistpl (%%esp)\n\t"
210 " cmpl $32767,%%eax\n\t"
212 " cmpl $-32768,%%eax\n\t"
214 " movw %%ax,(%%esi)\n\t"
216 "1: movw $32767,(%%esi)\n\t"
218 "2: movw $-32768,(%%esi)\n\t"
222 " addl $-64,%%ebx\n\t"
225 " leal -128(%%ecx,%%edx,8),%%ecx\n\t"
226 " movl $15,%%ebp\n\t"
228 " flds -4(%%ecx)\n\t"
231 " flds -8(%%ecx)\n\t"
232 " fmuls 4(%%ebx)\n\t"
234 " flds -12(%%ecx)\n\t"
235 " fmuls 8(%%ebx)\n\t"
237 " fsubrp %%st,%%st(1)\n\t"
238 " flds -16(%%ecx)\n\t"
239 " fmuls 12(%%ebx)\n\t"
241 " fsubrp %%st,%%st(1)\n\t"
242 " flds -20(%%ecx)\n\t"
243 " fmuls 16(%%ebx)\n\t"
245 " fsubrp %%st,%%st(1)\n\t"
246 " flds -24(%%ecx)\n\t"
247 " fmuls 20(%%ebx)\n\t"
249 " fsubrp %%st,%%st(1)\n\t"
250 " flds -28(%%ecx)\n\t"
251 " fmuls 24(%%ebx)\n\t"
253 " fsubrp %%st,%%st(1)\n\t"
254 " flds -32(%%ecx)\n\t"
255 " fmuls 28(%%ebx)\n\t"
257 " fsubrp %%st,%%st(1)\n\t"
258 " flds -36(%%ecx)\n\t"
259 " fmuls 32(%%ebx)\n\t"
261 " fsubrp %%st,%%st(1)\n\t"
262 " flds -40(%%ecx)\n\t"
263 " fmuls 36(%%ebx)\n\t"
265 " fsubrp %%st,%%st(1)\n\t"
266 " flds -44(%%ecx)\n\t"
267 " fmuls 40(%%ebx)\n\t"
269 " fsubrp %%st,%%st(1)\n\t"
270 " flds -48(%%ecx)\n\t"
271 " fmuls 44(%%ebx)\n\t"
273 " fsubrp %%st,%%st(1)\n\t"
274 " flds -52(%%ecx)\n\t"
275 " fmuls 48(%%ebx)\n\t"
277 " fsubrp %%st,%%st(1)\n\t"
278 " flds -56(%%ecx)\n\t"
279 " fmuls 52(%%ebx)\n\t"
281 " fsubrp %%st,%%st(1)\n\t"
282 " flds -60(%%ecx)\n\t"
283 " fmuls 56(%%ebx)\n\t"
285 " fsubrp %%st,%%st(1)\n\t"
287 " fmuls 60(%%ebx)\n\t"
290 " fsubrp %%st,%%st(1)\n\t"
292 " fsubrp %%st,%%st(1)\n\t"
293 " fistpl (%%esp)\n\t"
295 " cmpl $32767,%%eax\n\t"
297 " cmpl $-32768,%%eax\n\t"
299 " movw %%ax,(%%esi)\n\t"
301 "1: movw $32767,(%%esi)\n\t"
303 "2: movw $-32768,(%%esi)\n\t"
307 " addl $-64,%%ebx\n\t"
308 " addl $-128,%%ecx\n\t"
312 " movl %%edi,%%eax\n\t"
313 " movl "MANGLE(saved_ebp
)",%%ebp\n\t"
315 :"m"(bandPtr
),"m"(channel
),"m"(samples
),"m"(tmp
[0])
316 :"memory","%edi","%esi","%ebx","%ecx","%edx");