1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2007 by Tomasz Malesinski
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
20 ****************************************************************************/
23 /* Codecs should not normally do this, but we need to check a macro, and
24 * codecs.h would confuse the assembler. */
26 #define cPI3_8 (0x30fbc54d)
27 #define cPI2_8 (0x5a82799a)
28 #define cPI1_8 (0x7641af3d)
31 .section .icode,"ax",%progbits
37 .global mdct_butterfly_32
38 .global mdct_butterfly_generic_loop
41 @ inputs: r0,r1,r2,r3,r4,r5,r6,r10,r11 &lr
42 @ uses: r8,r9,r12(scratch)
43 @ modifies: r0,r1,r2,r3,r4,r5,r6,r10,r11. increments r0 by #8*4
44 add r9, r5, r1 @ x4 + x0
45 sub r5, r5, r1 @ x4 - x0
46 add r7, r6, r2 @ x5 + x1
47 sub r6, r6, r2 @ x5 - x1
48 add r8, r10, r3 @ x6 + x2
49 sub r10, r10, r3 @ x6 - x2
50 add r12, r11, r4 @ x7 + x3
51 sub r11, r11, r4 @ x7 - x3
53 add r1, r10, r6 @ y0 = (x6 - x2) + (x5 - x1)
54 sub r2, r11, r5 @ y1 = (x7 - x3) - (x4 - x0)
55 sub r3, r10, r6 @ y2 = (x6 - x2) - (x5 - x1)
56 add r4, r11, r5 @ y3 = (x7 - x3) + (x4 - x0)
57 sub r5, r8, r9 @ y4 = (x6 + x2) - (x4 + x0)
58 sub r6, r12, r7 @ y5 = (x7 + x3) - (x5 + x1)
59 add r10, r8, r9 @ y6 = (x6 + x2) + (x4 + x0)
60 add r11, r12, r7 @ y7 = (x7 + x3) + (x5 + x1)
61 stmia r0!, {r1, r2, r3, r4, r5, r6, r10, r11}
67 @ uses: r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12
68 @ modifies: r0. increments r0 by #16*4
69 @ calls mdct_butterfly_8 via bl so need to stack lr for return address
73 ldmia r0, {r2, r3, r4, r5}
74 ldmia r1, {r6, r7, r8, r9}
75 add r6, r6, r2 @ y8 = x8 + x0
76 rsb r2, r6, r2, asl #1 @ x0 - x8
77 add r7, r7, r3 @ y9 = x9 + x1
78 rsb r3, r7, r3, asl #1 @ x1 - x9
79 add r8, r8, r4 @ y10 = x10 + x2
80 sub r11, r8, r4, asl #1 @ x10 - x2
81 add r9, r9, r5 @ y11 = x11 + x3
82 rsb r10, r9, r5, asl #1 @ x3 - x11
84 stmia r1!, {r6, r7, r8, r9}
86 add r2, r2, r3 @ (x0 - x8) + (x1 - x9)
87 rsb r3, r2, r3, asl #1 @ (x1 - x9) - (x0 - x8)
95 stmia r0!, {r5, r6, r10, r11}
97 ldmia r0, {r2, r3, r4, r5}
98 ldmia r1, {r6, r7, r8, r9}
99 add r6, r6, r2 @ y12 = x12 + x4
100 sub r2, r6, r2, asl #1 @ x12 - x4
101 add r7, r7, r3 @ y13 = x13 + x5
102 sub r3, r7, r3, asl #1 @ x13 - x5
103 add r8, r8, r4 @ y10 = x14 + x6
104 sub r10, r8, r4, asl #1 @ x14 - x6
105 add r9, r9, r5 @ y11 = x15 + x7
106 sub r11, r9, r5, asl #1 @ x15 - x7
108 stmia r1, {r6, r7, r8, r9}
110 sub r2, r2, r3 @ (x12 - x4) - (x13 - x5)
111 add r3, r2, r3, asl #1 @ (x12 - x4) + (x13 - x5)
113 smull r8, r5, r12, r2
114 smull r8, r6, r12, r3
117 @ no stmia here, r5, r6, r10, r11 are passed to mdct_butterfly_8
120 ldmia r0, {r1, r2, r3, r4}
123 @ mdct_butterfly_8 will have incremented r0 by #8*4 already
124 ldmia r0, {r1, r2, r3, r4, r5, r6, r10, r11}
127 @ mdct_butterfly_8 increments r0 by another #8*4 here
128 @ at end, r0 has been incremented by #16*4
133 stmdb sp!, {r4-r11, lr}
137 ldmia r0, {r2, r3, r4, r5}
138 ldmia r1, {r6, r7, r8, r9}
139 add r6, r6, r2 @ y16 = x16 + x0
140 rsb r2, r6, r2, asl #1 @ x0 - x16
141 add r7, r7, r3 @ y17 = x17 + x1
142 rsb r3, r7, r3, asl #1 @ x1 - x17
143 add r8, r8, r4 @ y18 = x18 + x2
144 rsb r4, r8, r4, asl #1 @ x2 - x18
145 add r9, r9, r5 @ y19 = x19 + x3
146 rsb r5, r9, r5, asl #1 @ x3 - x19
148 stmia r1!, {r6, r7, r8, r9}
152 smull r10, r6, r12, r2
154 smlal r10, r6, lr, r3
155 smull r10, r7, r12, r3
156 smlal r10, r7, lr, r2
160 add r4, r4, r5 @ (x3 - x19) + (x2 - x18)
161 rsb r5, r4, r5, asl #1 @ (x3 - x19) - (x2 - x18)
164 smull r10, r8, r4, r11
165 smull r10, r9, r5, r11
169 stmia r0!, {r6, r7, r8, r9}
171 ldmia r0, {r2, r3, r4, r5}
172 ldmia r1, {r6, r7, r8, r9}
173 add r6, r6, r2 @ y20 = x20 + x4
174 rsb r2, r6, r2, asl #1 @ x4 - x20
175 add r7, r7, r3 @ y21 = x21 + x5
176 rsb r3, r7, r3, asl #1 @ x5 - x21
177 add r8, r8, r4 @ y22 = x22 + x6
178 sub r11, r8, r4, asl #1 @ x22 - x6
179 add r9, r9, r5 @ y23 = x23 + x7
180 rsb r10, r9, r5, asl #1 @ x7 - x23
181 stmia r1!, {r6, r7, r8, r9}
183 @r4,r5,r6,r7,r8,r9 now free
184 @ we don't use r5, r8, r9 below
188 smlal r4, r6, r12, r3
190 smlal r4, r7, r12, r2
194 stmia r0!, {r6, r7, r10, r11}
196 ldmia r0, {r2, r3, r4, r5}
197 ldmia r1, {r6, r7, r8, r9}
198 add r6, r6, r2 @ y24 = x24 + x8
199 sub r2, r6, r2, asl #1 @ x24 - x8
200 add r7, r7, r3 @ y25 = x25 + x9
201 sub r3, r7, r3, asl #1 @ x25 - x9
202 add r8, r8, r4 @ y26 = x26 + x10
203 sub r4, r8, r4, asl #1 @ x26 - x10
204 add r9, r9, r5 @ y27 = x27 + x11
205 sub r5, r9, r5, asl #1 @ x27 - x11
207 stmia r1!, {r6, r7, r8, r9}
209 smull r10, r7, lr, r3
211 smlal r10, r7, r12, r2
212 smull r10, r6, r12, r3
213 smlal r10, r6, lr, r2
217 sub r4, r4, r5 @ (x26 - x10) - (x27 - x11)
218 add r5, r4, r5, asl #1 @ (x26 - x10) + (x27 - x11)
221 smull r10, r8, r11, r4
222 smull r10, r9, r11, r5
226 stmia r0!, {r6, r7, r8, r9}
228 ldmia r0, {r2, r3, r4, r5}
229 ldmia r1, {r6, r7, r8, r9}
230 add r6, r6, r2 @ y28 = x28 + x12
231 sub r2, r6, r2, asl #1 @ x28 - x12
232 add r7, r7, r3 @ y29 = x29 + x13
233 sub r3, r7, r3, asl #1 @ x29 - x13
234 add r8, r8, r4 @ y30 = x30 + x14
235 sub r10, r8, r4, asl #1 @ x30 - x14
236 add r9, r9, r5 @ y31 = x31 + x15
237 sub r11, r9, r5, asl #1 @ x31 - x15
238 stmia r1, {r6, r7, r8, r9}
240 @ r4,r5,r6,r7,r8,r9 now free
241 @ we don't use r5,r8,r9 below
243 smull r4, r7, r12, r3
247 smlal r4, r6, r12, r2
251 stmia r0, {r6, r7, r10, r11}
256 @ we know mdct_butterfly_16 increments r0 by #16*4
257 @ and we wanted to advance by #16*4 anyway, so just call again
262 @ mdct_butterfly_generic_loop(x1, x2, T0, step, Ttop)
263 mdct_butterfly_generic_loop:
264 stmdb sp!, {r4-r11, lr}
268 ldmdb r0, {r6, r7, r8, r9}
269 ldmdb r1, {r10, r11, r12, r14}
272 sub r10, r6, r10, asl #1
274 rsb r11, r7, r11, asl #1
276 sub r12, r8, r12, asl #1
278 rsb r14, r9, r14, asl #1
280 stmdb r0!, {r6, r7, r8, r9}
283 smull r5, r8, r6, r14
285 smlal r5, r8, r7, r12
286 smull r5, r9, r6, r12
287 smlal r5, r9, r7, r14
291 add r2, r2, r3, asl #2
294 smull r5, r6, r12, r11
296 smlal r5, r6, r14, r10
297 smull r5, r7, r12, r10
298 smlal r5, r7, r14, r11
302 stmdb r1!, {r6, r7, r8, r9}
303 add r2, r2, r3, asl #2
310 ldmdb r0, {r6, r7, r8, r9}
311 ldmdb r1, {r10, r11, r12, r14}
314 sub r10, r6, r10, asl #1
316 sub r11, r7, r11, asl #1
318 sub r12, r8, r12, asl #1
320 sub r14, r9, r14, asl #1
322 stmdb r0!, {r6, r7, r8, r9}
325 smull r5, r9, r6, r14
327 smlal r5, r9, r7, r12
328 smull r5, r8, r6, r12
329 smlal r5, r8, r7, r14
334 sub r2, r2, r3, asl #2
337 smull r5, r7, r12, r11
339 smlal r5, r7, r14, r10
340 smull r5, r6, r12, r10
341 smlal r5, r6, r14, r11
345 stmdb r1!, {r6, r7, r8, r9}
346 sub r2, r2, r3, asl #2
353 ldmdb r0, {r6, r7, r8, r9}
354 ldmdb r1, {r10, r11, r12, r14}
357 rsb r10, r6, r10, asl #1
359 rsb r11, r7, r11, asl #1
361 rsb r12, r8, r12, asl #1
363 rsb r14, r9, r14, asl #1
365 stmdb r0!, {r6, r7, r8, r9}
368 smull r5, r8, r6, r12
370 smlal r5, r8, r7, r14
371 smull r5, r9, r6, r14
372 smlal r5, r9, r7, r12
377 add r2, r2, r3, asl #2
380 smull r5, r6, r12, r10
382 smlal r5, r6, r14, r11
383 smull r5, r7, r12, r11
384 smlal r5, r7, r14, r10
388 stmdb r1!, {r6, r7, r8, r9}
389 add r2, r2, r3, asl #2
396 ldmdb r0, {r6, r7, r8, r9}
397 ldmdb r1, {r10, r11, r12, r14}
400 sub r10, r6, r10, asl #1
402 rsb r11, r7, r11, asl #1
404 sub r12, r8, r12, asl #1
406 rsb r14, r9, r14, asl #1
408 stmdb r0!, {r6, r7, r8, r9}
411 smull r5, r9, r6, r12
412 smlal r5, r9, r7, r14
414 smull r5, r8, r6, r14
415 smlal r5, r8, r7, r12
419 sub r2, r2, r3, asl #2
422 smull r5, r7, r12, r10
424 smlal r5, r7, r14, r11
425 smull r5, r6, r12, r11
426 smlal r5, r6, r14, r10
430 stmdb r1!, {r6, r7, r8, r9}
431 sub r2, r2, r3, asl #2