2 * this code comes under GPL
3 * This code was taken from http://www.mpg123.org
4 * See ChangeLog of mpg123-0.59s-pre.1 for detail
5 * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
8 * - Partial loops unrolling and removing MOVW insn from loops
12 #define real float /* ugly - but only way */
14 extern void (*dct64_MMX_func
)(short*, short*, real
*);
15 static const unsigned long long attribute_used
__attribute__((aligned(8))) null_one
= 0x0000ffff0000ffffULL
;
16 static const unsigned long long attribute_used
__attribute__((aligned(8))) one_null
= 0xffff0000ffff0000ULL
;
17 const unsigned int __attribute__((aligned(16))) costab_mmx
[] =
53 This array of magic numbers were calculated by the pure function
54 make_decode_tables_MMX(32768), which had been implemented in (deleted since
55 r23383) tabinit_MMX.c.
57 static const short __attribute__((aligned(8))) mp3lib_decwins
[] =
59 0, 7, 54, 114, 510, 1288, 1644, 9372,
60 18760, -9373, 1644, -1289, 510, -115, 54, -8,
61 0, 7, 54, 114, 510, 1288, 1644, 9372,
62 18760, -9373, 1644, -1289, 510, -115, 54, -8,
63 0, 7, 55, 129, 500, 1379, 1490, 9834,
64 18748, -8910, 1784, -1197, 516, -101, 52, -7,
65 0, 7, 55, 129, 500, 1379, 1490, 9834,
66 18748, -8910, 1784, -1197, 516, -101, 52, -7,
67 0, 8, 56, 145, 488, 1469, 1322, 10294,
68 18714, -8448, 1910, -1107, 520, -87, 51, -6,
69 0, 8, 56, 145, 488, 1469, 1322, 10294,
70 18714, -8448, 1910, -1107, 520, -87, 51, -6,
71 0, 9, 57, 161, 474, 1559, 1141, 10751,
72 18658, -7987, 2023, -1016, 522, -74, 49, -6,
73 0, 9, 57, 161, 474, 1559, 1141, 10751,
74 18658, -7987, 2023, -1016, 522, -74, 49, -6,
75 0, 10, 57, 177, 456, 1647, 944, 11205,
76 18579, -7528, 2123, -927, 522, -61, 48, -5,
77 0, 10, 57, 177, 456, 1647, 944, 11205,
78 18579, -7528, 2123, -927, 522, -61, 48, -5,
79 0, 11, 57, 194, 435, 1733, 734, 11654,
80 18477, -7073, 2210, -838, 519, -50, 46, -5,
81 0, 11, 57, 194, 435, 1733, 734, 11654,
82 18477, -7073, 2210, -838, 519, -50, 46, -5,
83 0, 12, 57, 212, 411, 1817, 510, 12097,
84 18354, -6621, 2285, -751, 515, -39, 44, -4,
85 0, 12, 57, 212, 411, 1817, 510, 12097,
86 18354, -6621, 2285, -751, 515, -39, 44, -4,
87 0, 13, 57, 229, 384, 1899, 271, 12534,
88 18209, -6174, 2348, -666, 508, -28, 43, -4,
89 0, 13, 57, 229, 384, 1899, 271, 12534,
90 18209, -6174, 2348, -666, 508, -28, 43, -4,
91 0, 14, 56, 247, 354, 1977, 18, 12963,
92 18043, -5733, 2398, -583, 501, -18, 41, -4,
93 0, 14, 56, 247, 354, 1977, 18, 12963,
94 18043, -5733, 2398, -583, 501, -18, 41, -4,
95 0, 15, 56, 266, 320, 2052, -249, 13383,
96 17855, -5298, 2438, -502, 491, -9, 39, -3,
97 0, 15, 56, 266, 320, 2052, -249, 13383,
98 17855, -5298, 2438, -502, 491, -9, 39, -3,
99 0, 17, 54, 284, 283, 2122, -530, 13794,
100 17648, -4870, 2466, -423, 480, -1, 37, -3,
101 0, 17, 54, 284, 283, 2122, -530, 13794,
102 17648, -4870, 2466, -423, 480, -1, 37, -3,
103 0, 18, 52, 302, 243, 2188, -825, 14194,
104 17420, -4450, 2484, -347, 468, 7, 35, -3,
105 0, 18, 52, 302, 243, 2188, -825, 14194,
106 17420, -4450, 2484, -347, 468, 7, 35, -3,
107 0, 19, 50, 320, 199, 2249, -1133, 14583,
108 17173, -4039, 2492, -274, 455, 14, 33, -2,
109 0, 19, 50, 320, 199, 2249, -1133, 14583,
110 17173, -4039, 2492, -274, 455, 14, 33, -2,
111 -1, 21, 48, 339, 152, 2304, -1454, 14959,
112 16908, -3637, 2490, -204, 440, 20, 32, -2,
113 -1, 21, 48, 339, 152, 2304, -1454, 14959,
114 16908, -3637, 2490, -204, 440, 20, 32, -2,
115 -1, 22, 45, 357, 101, 2354, -1788, 15322,
116 16624, -3245, 2479, -137, 425, 26, 30, -2,
117 -1, 22, 45, 357, 101, 2354, -1788, 15322,
118 16624, -3245, 2479, -137, 425, 26, 30, -2,
119 -1, 24, 41, 374, 47, 2396, -2135, 15671,
120 16323, -2864, 2460, -72, 409, 31, 28, -2,
121 -1, 24, 41, 374, 47, 2396, -2135, 15671,
122 16323, -2864, 2460, -72, 409, 31, 28, -2,
123 -1, 26, 37, 391, -11, 2431, -2493, 16004,
124 16005, -2494, 2432, -12, 392, 36, 26, -2,
125 -1, 26, 37, 391, -11, 2431, -2493, 16004,
126 16005, -2494, 2432, -12, 392, 36, 26, -2,
127 -2, -28, 31, -409, -72, -2460, -2864, -16323,
128 15671, 2135, 2396, -47, 374, -41, 24, 1,
129 -2, -28, 31, -409, -72, -2460, -2864, -16323,
130 15671, 2135, 2396, -47, 374, -41, 24, 1,
131 -2, -30, 26, -425, -137, -2479, -3245, -16624,
132 15322, 1788, 2354, -101, 357, -45, 22, 1,
133 -2, -30, 26, -425, -137, -2479, -3245, -16624,
134 15322, 1788, 2354, -101, 357, -45, 22, 1,
135 -2, -32, 20, -440, -204, -2490, -3637, -16908,
136 14959, 1454, 2304, -152, 339, -48, 21, 1,
137 -2, -32, 20, -440, -204, -2490, -3637, -16908,
138 14959, 1454, 2304, -152, 339, -48, 21, 1,
139 -2, -33, 14, -455, -274, -2492, -4039, -17173,
140 14583, 1133, 2249, -199, 320, -50, 19, 0,
141 -2, -33, 14, -455, -274, -2492, -4039, -17173,
142 14583, 1133, 2249, -199, 320, -50, 19, 0,
143 -3, -35, 7, -468, -347, -2484, -4450, -17420,
144 14194, 825, 2188, -243, 302, -52, 18, 0,
145 -3, -35, 7, -468, -347, -2484, -4450, -17420,
146 14194, 825, 2188, -243, 302, -52, 18, 0,
147 -3, -37, -1, -480, -423, -2466, -4870, -17648,
148 13794, 530, 2122, -283, 284, -54, 17, 0,
149 -3, -37, -1, -480, -423, -2466, -4870, -17648,
150 13794, 530, 2122, -283, 284, -54, 17, 0,
151 -3, -39, -9, -491, -502, -2438, -5298, -17855,
152 13383, 249, 2052, -320, 266, -56, 15, 0,
153 -3, -39, -9, -491, -502, -2438, -5298, -17855,
154 13383, 249, 2052, -320, 266, -56, 15, 0,
155 -4, -41, -18, -501, -583, -2398, -5733, -18043,
156 12963, -18, 1977, -354, 247, -56, 14, 0,
157 -4, -41, -18, -501, -583, -2398, -5733, -18043,
158 12963, -18, 1977, -354, 247, -56, 14, 0,
159 -4, -43, -28, -508, -666, -2348, -6174, -18209,
160 12534, -271, 1899, -384, 229, -57, 13, 0,
161 -4, -43, -28, -508, -666, -2348, -6174, -18209,
162 12534, -271, 1899, -384, 229, -57, 13, 0,
163 -4, -44, -39, -515, -751, -2285, -6621, -18354,
164 12097, -510, 1817, -411, 212, -57, 12, 0,
165 -4, -44, -39, -515, -751, -2285, -6621, -18354,
166 12097, -510, 1817, -411, 212, -57, 12, 0,
167 -5, -46, -50, -519, -838, -2210, -7073, -18477,
168 11654, -734, 1733, -435, 194, -57, 11, 0,
169 -5, -46, -50, -519, -838, -2210, -7073, -18477,
170 11654, -734, 1733, -435, 194, -57, 11, 0,
171 -5, -48, -61, -522, -927, -2123, -7528, -18579,
172 11205, -944, 1647, -456, 177, -57, 10, 0,
173 -5, -48, -61, -522, -927, -2123, -7528, -18579,
174 11205, -944, 1647, -456, 177, -57, 10, 0,
175 -6, -49, -74, -522, -1016, -2023, -7987, -18658,
176 10751, -1141, 1559, -474, 161, -57, 9, 0,
177 -6, -49, -74, -522, -1016, -2023, -7987, -18658,
178 10751, -1141, 1559, -474, 161, -57, 9, 0,
179 -6, -51, -87, -520, -1107, -1910, -8448, -18714,
180 10294, -1322, 1469, -488, 145, -56, 8, 0,
181 -6, -51, -87, -520, -1107, -1910, -8448, -18714,
182 10294, -1322, 1469, -488, 145, -56, 8, 0,
183 -7, -52, -101, -516, -1197, -1784, -8910, -18748,
184 9834, -1490, 1379, -500, 129, -55, 7, 0,
185 -7, -52, -101, -516, -1197, -1784, -8910, -18748,
186 9834, -1490, 1379, -500, 129, -55, 7, 0,
189 int synth_1to1_MMX(real
*bandPtr
, int channel
, short *samples
)
191 static short buffs
[2][2][0x110] __attribute__((aligned(8)));
193 short *b0
, (*buf
)[0x110], *a
, *b
;
209 b
= buf
[1] + ((bo
+ 1) & 0xf);
214 a
= buf
[1] + ((bo
+ 1) & 0xf);
217 dct64_MMX_func(a
, b
, bandPtr
);
218 window
= mp3lib_decwins
+ 16 - bo1
;
219 //printf("DEBUG: channel %d, bo %d, off %d\n", channel, bo, 16 - bo1);
223 "movq (%1),%%mm0\n\t"
224 "movq 64(%1),%%mm4\n\t"
225 "pmaddwd (%2),%%mm0\n\t"
226 "pmaddwd 32(%2),%%mm4\n\t"
227 "movq 8(%1),%%mm1\n\t"
228 "movq 72(%1),%%mm5\n\t"
229 "pmaddwd 8(%2),%%mm1\n\t"
230 "pmaddwd 40(%2),%%mm5\n\t"
231 "movq 16(%1),%%mm2\n\t"
232 "movq 80(%1),%%mm6\n\t"
233 "pmaddwd 16(%2),%%mm2\n\t"
234 "pmaddwd 48(%2),%%mm6\n\t"
235 "movq 24(%1),%%mm3\n\t"
236 "movq 88(%1),%%mm7\n\t"
237 "pmaddwd 24(%2),%%mm3\n\t"
238 "pmaddwd 56(%2),%%mm7\n\t"
239 "paddd %%mm1,%%mm0\n\t"
240 "paddd %%mm5,%%mm4\n\t"
241 "paddd %%mm2,%%mm0\n\t"
242 "paddd %%mm6,%%mm4\n\t"
243 "paddd %%mm3,%%mm0\n\t"
244 "paddd %%mm7,%%mm4\n\t"
245 "movq %%mm0,%%mm1\n\t"
246 "movq %%mm4,%%mm5\n\t"
247 "psrlq $32,%%mm1\n\t"
248 "psrlq $32,%%mm5\n\t"
249 "paddd %%mm1,%%mm0\n\t"
250 "paddd %%mm5,%%mm4\n\t"
251 "psrad $13,%%mm0\n\t"
252 "psrad $13,%%mm4\n\t"
253 "packssdw %%mm0,%%mm0\n\t"
254 "packssdw %%mm4,%%mm4\n\t"
256 "movq (%3), %%mm1\n\t"
257 "punpckldq %%mm4, %%mm0\n\t"
258 "pand "MANGLE(one_null
)", %%mm1\n\t"
259 "pand "MANGLE(null_one
)", %%mm0\n\t"
260 "por %%mm0, %%mm1\n\t"
261 "movq %%mm1,(%3)\n\t"
270 "movq (%1),%%mm0\n\t"
271 "pmaddwd (%2),%%mm0\n\t"
272 "movq 8(%1),%%mm1\n\t"
273 "pmaddwd 8(%2),%%mm1\n\t"
274 "movq 16(%1),%%mm2\n\t"
275 "pmaddwd 16(%2),%%mm2\n\t"
276 "movq 24(%1),%%mm3\n\t"
277 "pmaddwd 24(%2),%%mm3\n\t"
278 "paddd %%mm1,%%mm0\n\t"
279 "paddd %%mm2,%%mm0\n\t"
280 "paddd %%mm3,%%mm0\n\t"
281 "movq %%mm0,%%mm1\n\t"
282 "psrlq $32,%%mm1\n\t"
283 "paddd %%mm1,%%mm0\n\t"
284 "psrad $13,%%mm0\n\t"
285 "packssdw %%mm0,%%mm0\n\t"
286 "movd %%mm0,%%eax\n\t"
287 "movw %%ax, (%3)\n\t"
295 "movq (%1),%%mm0\n\t"
296 "movq 64(%1),%%mm4\n\t"
297 "pmaddwd (%2),%%mm0\n\t"
298 "pmaddwd -32(%2),%%mm4\n\t"
299 "movq 8(%1),%%mm1\n\t"
300 "movq 72(%1),%%mm5\n\t"
301 "pmaddwd 8(%2),%%mm1\n\t"
302 "pmaddwd -24(%2),%%mm5\n\t"
303 "movq 16(%1),%%mm2\n\t"
304 "movq 80(%1),%%mm6\n\t"
305 "pmaddwd 16(%2),%%mm2\n\t"
306 "pmaddwd -16(%2),%%mm6\n\t"
307 "movq 24(%1),%%mm3\n\t"
308 "movq 88(%1),%%mm7\n\t"
309 "pmaddwd 24(%2),%%mm3\n\t"
310 "pmaddwd -8(%2),%%mm7\n\t"
311 "paddd %%mm1,%%mm0\n\t"
312 "paddd %%mm5,%%mm4\n\t"
313 "paddd %%mm2,%%mm0\n\t"
314 "paddd %%mm6,%%mm4\n\t"
315 "paddd %%mm3,%%mm0\n\t"
316 "paddd %%mm7,%%mm4\n\t"
317 "movq %%mm0,%%mm1\n\t"
318 "movq %%mm4,%%mm5\n\t"
319 "psrlq $32,%%mm1\n\t"
320 "psrlq $32,%%mm5\n\t"
321 "paddd %%mm0,%%mm1\n\t"
322 "paddd %%mm4,%%mm5\n\t"
323 "psrad $13,%%mm1\n\t"
324 "psrad $13,%%mm5\n\t"
325 "packssdw %%mm1,%%mm1\n\t"
326 "packssdw %%mm5,%%mm5\n\t"
327 "psubd %%mm0,%%mm0\n\t"
328 "psubd %%mm4,%%mm4\n\t"
329 "psubsw %%mm1,%%mm0\n\t"
330 "psubsw %%mm5,%%mm4\n\t"
332 "movq (%3), %%mm1\n\t"
333 "punpckldq %%mm4, %%mm0\n\t"
334 "pand "MANGLE(one_null
)", %%mm1\n\t"
335 "pand "MANGLE(null_one
)", %%mm0\n\t"
336 "por %%mm0, %%mm1\n\t"
337 "movq %%mm1,(%3)\n\t"
345 "movq (%1),%%mm0\n\t"
346 "pmaddwd (%2),%%mm0\n\t"
347 "movq 8(%1),%%mm1\n\t"
348 "pmaddwd 8(%2),%%mm1\n\t"
349 "movq 16(%1),%%mm2\n\t"
350 "pmaddwd 16(%2),%%mm2\n\t"
351 "movq 24(%1),%%mm3\n\t"
352 "pmaddwd 24(%2),%%mm3\n\t"
353 "paddd %%mm1,%%mm0\n\t"
354 "paddd %%mm2,%%mm0\n\t"
355 "paddd %%mm3,%%mm0\n\t"
356 "movq %%mm0,%%mm1\n\t"
357 "psrlq $32,%%mm1\n\t"
358 "paddd %%mm0,%%mm1\n\t"
359 "psrad $13,%%mm1\n\t"
360 "packssdw %%mm1,%%mm1\n\t"
361 "psubd %%mm0,%%mm0\n\t"
362 "psubsw %%mm1,%%mm0\n\t"
363 "movd %%mm0,%%eax\n\t"
366 :"+r"(i
), "+r"(window
), "+r"(b0
), "+r"(samples
)