Some people confuse vidix with kernel drivers, so let's add a note about it
[mplayer/glamo.git] / mp3lib / decode_MMX.c
blob990e56305c78930068904ce89411c74e3b3f8c38
1 /*
2 * this code comes under GPL
3 * This code was taken from http://www.mpg123.org
4 * See ChangeLog of mpg123-0.59s-pre.1 for detail
5 * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
7 * Local ChangeLog:
8 * - Partial loops unrolling and removing MOVW insn from loops
9 */
10 #include "../config.h"
11 #include "../mangle.h"
12 #define real float /* ugly - but only way */
14 static unsigned long long attribute_used __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL;
15 static unsigned long long attribute_used __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL;
16 unsigned long __attribute__((aligned(8))) costab_mmx[] =
18 1056974725,
19 1057056395,
20 1057223771,
21 1057485416,
22 1057855544,
23 1058356026,
24 1059019886,
25 1059897405,
26 1061067246,
27 1062657950,
28 1064892987,
29 1066774581,
30 1069414683,
31 1073984175,
32 1079645762,
33 1092815430,
34 1057005197,
35 1057342072,
36 1058087743,
37 1059427869,
38 1061799040,
39 1065862217,
40 1071413542,
41 1084439708,
42 1057128951,
43 1058664893,
44 1063675095,
45 1076102863,
46 1057655764,
47 1067924853,
48 1060439283,
51 static int temp; // buggy gcc 3.x fails if this is moved into the function :(
52 void synth_1to1_MMX_s(real *bandPtr, int channel, short *samples,
53 short *buffs, int *bo)
56 __asm __volatile(
57 "movl %1,%%ecx\n\t"
58 "movl %2,%%edi\n\t"
59 "movl $15,%%ebx\n\t"
60 "movl %4,%%edx\n\t"
61 "leal (%%edi,%%ecx,2),%%edi\n\t"
62 "decl %%ecx\n\t"
63 "movl %3,%%esi\n\t"
64 "movl (%%edx),%%eax\n\t"
65 "jecxz .L01\n\t"
66 "decl %%eax\n\t"
67 "andl %%ebx,%%eax\n\t"
68 "leal 1088(%%esi),%%esi\n\t"
69 "movl %%eax,(%%edx)\n\t"
70 ".L01:\n\t"
71 "leal (%%esi,%%eax,2),%%edx\n\t"
72 "movl %%eax,%5\n\t"
73 "incl %%eax\n\t"
74 "andl %%ebx,%%eax\n\t"
75 "leal 544(%%esi,%%eax,2),%%ecx\n\t"
76 "incl %%ebx\n\t"
77 "testl $1, %%eax\n\t"
78 "jnz .L02\n\t"
79 "xchgl %%edx,%%ecx\n\t"
80 "incl %5\n\t"
81 "leal 544(%%esi),%%esi\n\t"
82 ".L02:\n\t"
83 "emms\n\t"
84 "pushl %0\n\t"
85 "pushl %%edx\n\t"
86 "pushl %%ecx\n\t"
87 "call *"MANGLE(dct64_MMX_func)"\n\t"
88 "addl $12, %%esp\n\t"
89 "leal 1(%%ebx), %%ecx\n\t"
90 "subl %5,%%ebx\n\t"
91 "pushl %%ecx\n\t"
92 "leal "MANGLE(mp3lib_decwins)"(%%ebx,%%ebx,1), %%edx\n\t"
93 "shrl $1, %%ecx\n\t"
94 ".balign 16\n\t"
95 ".L03:\n\t"
96 "movq (%%edx),%%mm0\n\t"
97 "movq 64(%%edx),%%mm4\n\t"
98 "pmaddwd (%%esi),%%mm0\n\t"
99 "pmaddwd 32(%%esi),%%mm4\n\t"
100 "movq 8(%%edx),%%mm1\n\t"
101 "movq 72(%%edx),%%mm5\n\t"
102 "pmaddwd 8(%%esi),%%mm1\n\t"
103 "pmaddwd 40(%%esi),%%mm5\n\t"
104 "movq 16(%%edx),%%mm2\n\t"
105 "movq 80(%%edx),%%mm6\n\t"
106 "pmaddwd 16(%%esi),%%mm2\n\t"
107 "pmaddwd 48(%%esi),%%mm6\n\t"
108 "movq 24(%%edx),%%mm3\n\t"
109 "movq 88(%%edx),%%mm7\n\t"
110 "pmaddwd 24(%%esi),%%mm3\n\t"
111 "pmaddwd 56(%%esi),%%mm7\n\t"
112 "paddd %%mm1,%%mm0\n\t"
113 "paddd %%mm5,%%mm4\n\t"
114 "paddd %%mm2,%%mm0\n\t"
115 "paddd %%mm6,%%mm4\n\t"
116 "paddd %%mm3,%%mm0\n\t"
117 "paddd %%mm7,%%mm4\n\t"
118 "movq %%mm0,%%mm1\n\t"
119 "movq %%mm4,%%mm5\n\t"
120 "psrlq $32,%%mm1\n\t"
121 "psrlq $32,%%mm5\n\t"
122 "paddd %%mm1,%%mm0\n\t"
123 "paddd %%mm5,%%mm4\n\t"
124 "psrad $13,%%mm0\n\t"
125 "psrad $13,%%mm4\n\t"
126 "packssdw %%mm0,%%mm0\n\t"
127 "packssdw %%mm4,%%mm4\n\t"
129 "movq (%%edi), %%mm1\n\t"
130 "punpckldq %%mm4, %%mm0\n\t"
131 "pand "MANGLE(one_null)", %%mm1\n\t"
132 "pand "MANGLE(null_one)", %%mm0\n\t"
133 "por %%mm0, %%mm1\n\t"
134 "movq %%mm1,(%%edi)\n\t"
136 "leal 64(%%esi),%%esi\n\t"
137 "leal 128(%%edx),%%edx\n\t"
138 "leal 8(%%edi),%%edi\n\t"
140 "decl %%ecx\n\t"
141 "jnz .L03\n\t"
143 "popl %%ecx\n\t"
144 "andl $1, %%ecx\n\t"
145 "jecxz .next_loop\n\t"
147 "movq (%%edx),%%mm0\n\t"
148 "pmaddwd (%%esi),%%mm0\n\t"
149 "movq 8(%%edx),%%mm1\n\t"
150 "pmaddwd 8(%%esi),%%mm1\n\t"
151 "movq 16(%%edx),%%mm2\n\t"
152 "pmaddwd 16(%%esi),%%mm2\n\t"
153 "movq 24(%%edx),%%mm3\n\t"
154 "pmaddwd 24(%%esi),%%mm3\n\t"
155 "paddd %%mm1,%%mm0\n\t"
156 "paddd %%mm2,%%mm0\n\t"
157 "paddd %%mm3,%%mm0\n\t"
158 "movq %%mm0,%%mm1\n\t"
159 "psrlq $32,%%mm1\n\t"
160 "paddd %%mm1,%%mm0\n\t"
161 "psrad $13,%%mm0\n\t"
162 "packssdw %%mm0,%%mm0\n\t"
163 "movd %%mm0,%%eax\n\t"
164 "movw %%ax, (%%edi)\n\t"
165 "leal 32(%%esi),%%esi\n\t"
166 "leal 64(%%edx),%%edx\n\t"
167 "leal 4(%%edi),%%edi\n\t"
169 ".next_loop:\n\t"
170 "subl $64,%%esi\n\t"
171 "movl $7,%%ecx\n\t"
172 ".balign 16\n\t"
173 ".L04:\n\t"
174 "movq (%%edx),%%mm0\n\t"
175 "movq 64(%%edx),%%mm4\n\t"
176 "pmaddwd (%%esi),%%mm0\n\t"
177 "pmaddwd -32(%%esi),%%mm4\n\t"
178 "movq 8(%%edx),%%mm1\n\t"
179 "movq 72(%%edx),%%mm5\n\t"
180 "pmaddwd 8(%%esi),%%mm1\n\t"
181 "pmaddwd -24(%%esi),%%mm5\n\t"
182 "movq 16(%%edx),%%mm2\n\t"
183 "movq 80(%%edx),%%mm6\n\t"
184 "pmaddwd 16(%%esi),%%mm2\n\t"
185 "pmaddwd -16(%%esi),%%mm6\n\t"
186 "movq 24(%%edx),%%mm3\n\t"
187 "movq 88(%%edx),%%mm7\n\t"
188 "pmaddwd 24(%%esi),%%mm3\n\t"
189 "pmaddwd -8(%%esi),%%mm7\n\t"
190 "paddd %%mm1,%%mm0\n\t"
191 "paddd %%mm5,%%mm4\n\t"
192 "paddd %%mm2,%%mm0\n\t"
193 "paddd %%mm6,%%mm4\n\t"
194 "paddd %%mm3,%%mm0\n\t"
195 "paddd %%mm7,%%mm4\n\t"
196 "movq %%mm0,%%mm1\n\t"
197 "movq %%mm4,%%mm5\n\t"
198 "psrlq $32,%%mm1\n\t"
199 "psrlq $32,%%mm5\n\t"
200 "paddd %%mm0,%%mm1\n\t"
201 "paddd %%mm4,%%mm5\n\t"
202 "psrad $13,%%mm1\n\t"
203 "psrad $13,%%mm5\n\t"
204 "packssdw %%mm1,%%mm1\n\t"
205 "packssdw %%mm5,%%mm5\n\t"
206 "psubd %%mm0,%%mm0\n\t"
207 "psubd %%mm4,%%mm4\n\t"
208 "psubsw %%mm1,%%mm0\n\t"
209 "psubsw %%mm5,%%mm4\n\t"
211 "movq (%%edi), %%mm1\n\t"
212 "punpckldq %%mm4, %%mm0\n\t"
213 "pand "MANGLE(one_null)", %%mm1\n\t"
214 "pand "MANGLE(null_one)", %%mm0\n\t"
215 "por %%mm0, %%mm1\n\t"
216 "movq %%mm1,(%%edi)\n\t"
218 "subl $64,%%esi\n\t"
219 "addl $128,%%edx\n\t"
220 "leal 8(%%edi),%%edi\n\t"
221 "decl %%ecx\n\t"
222 "jnz .L04\n\t"
224 "movq (%%edx),%%mm0\n\t"
225 "pmaddwd (%%esi),%%mm0\n\t"
226 "movq 8(%%edx),%%mm1\n\t"
227 "pmaddwd 8(%%esi),%%mm1\n\t"
228 "movq 16(%%edx),%%mm2\n\t"
229 "pmaddwd 16(%%esi),%%mm2\n\t"
230 "movq 24(%%edx),%%mm3\n\t"
231 "pmaddwd 24(%%esi),%%mm3\n\t"
232 "paddd %%mm1,%%mm0\n\t"
233 "paddd %%mm2,%%mm0\n\t"
234 "paddd %%mm3,%%mm0\n\t"
235 "movq %%mm0,%%mm1\n\t"
236 "psrlq $32,%%mm1\n\t"
237 "paddd %%mm0,%%mm1\n\t"
238 "psrad $13,%%mm1\n\t"
239 "packssdw %%mm1,%%mm1\n\t"
240 "psubd %%mm0,%%mm0\n\t"
241 "psubsw %%mm1,%%mm0\n\t"
242 "movd %%mm0,%%eax\n\t"
243 "movw %%ax,(%%edi)\n\t"
244 "emms\n\t"
246 :"m"(bandPtr),"m"(channel),"m"(samples),"m"(buffs),"m"(bo), "m"(temp)
247 :"memory","%edi","%esi","%ebx","%esp");