1 /***************************************************************************
\r
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
\r
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
\r
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
\r
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
\r
10 * Copyright (C) 2008 by Andree Buschmann
\r
12 * All files in this archive are subject to the GNU General Public License.
\r
13 * See the file COPYING in the source tree root for full license agreement.
\r
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
\r
16 * KIND, either express or implied.
\r
18 ****************************************************************************/
\r
20 #include "mpc_config.h"
\r
22 .section .text, "ax", %progbits
\r
24 /****************************************************************************
\r
25 * void mpc_decoder_windowing_D(...)
\r
27 * 2nd step within synthesis filter. Does the dewindowing.
\r
28 * 32=32x32 multiplies (OPTIMIZE_FOR_SPEED)
\r
29 * Uses pre-shifted V[] and D[] values. D[] will always be the second operand
\r
30 * of mul/mla to achieve higher speed as D[] has lower amplitude than V[].
\r
31 ****************************************************************************/
\r
32 #if defined(OPTIMIZE_FOR_SPEED)
\r
34 .global mpc_decoder_windowing_D
\r
35 .type mpc_decoder_windowing_D, %function
\r
36 mpc_decoder_windowing_D:
\r
42 stmfd sp!, {r4-r12, lr}
\r
46 ldmia r2!, { r3-r10 } /* load D[00..07] */
\r
47 ldr r11, [r1] /* 0 */
\r
49 ldr r11, [r1, #96*4] /* 1 */
\r
50 mla r12, r11, r4, r12
\r
51 ldr r11, [r1, #128*4] /* 2 */
\r
52 mla r12, r11, r5, r12
\r
53 ldr r11, [r1, #224*4] /* 3 */
\r
54 mla r12, r11, r6, r12
\r
55 ldr r11, [r1, #256*4] /* 4 */
\r
56 mla r12, r11, r7, r12
\r
57 ldr r11, [r1, #352*4] /* 5 */
\r
58 mla r12, r11, r8, r12
\r
59 ldr r11, [r1, #384*4] /* 6 */
\r
60 mla r12, r11, r9, r12
\r
61 ldr r11, [r1, #480*4] /* 7 */
\r
62 mla r12, r11, r10, r12
\r
63 ldmia r2!, { r3-r10 } /* load D[08..15] */
\r
64 ldr r11, [r1, #512*4] /* 8 */
\r
65 mla r12, r11, r3, r12
\r
66 ldr r11, [r1, #608*4] /* 9 */
\r
67 mla r12, r11, r4, r12
\r
68 ldr r11, [r1, #640*4] /* 10 */
\r
69 mla r12, r11, r5, r12
\r
70 ldr r11, [r1, #736*4] /* 11 */
\r
71 mla r12, r11, r6, r12
\r
72 ldr r11, [r1, #768*4] /* 12 */
\r
73 mla r12, r11, r7, r12
\r
74 ldr r11, [r1, #864*4] /* 13 */
\r
75 mla r12, r11, r8, r12
\r
76 ldr r11, [r1, #896*4] /* 14 */
\r
77 mla r12, r11, r9, r12
\r
78 ldr r11, [r1, #992*4] /* 15 */
\r
79 mla r12, r11, r10, r12
\r
80 mov r12, r12, asr #1 /* post shift to compensate for pre-shifting */
\r
81 str r12, [r0], #4 /* store Data */
\r
82 add r1, r1, #4 /* V++ */
\r
87 ldmfd sp!, {r4-r12, pc}
\r
88 .mpc_dewindowing_end:
\r
89 .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
\r
91 /****************************************************************************
\r
92 * void mpc_decoder_windowing_D(...)
\r
94 * 2nd step within synthesis filter. Does the dewindowing.
\r
95 * 64=32x32 multiplies
\r
96 * Uses un-shifted D[]-values. D[] will always be the second operand of
\r
97 * smull/smlal to achieve higher speed as D[] has lower amplitude than V[].
\r
98 ****************************************************************************/
\r
100 .global mpc_decoder_windowing_D
\r
101 .type mpc_decoder_windowing_D, %function
\r
102 mpc_decoder_windowing_D:
\r
108 stmfd sp!, {r4-r9, lr}
\r
112 ldmia r2!, { r3-r6 } /* load D[00..03] */
\r
113 ldr r7, [r1] /* 0 */
\r
114 smull r8, r9, r7, r3
\r
115 ldr r7, [r1, #96*4] /* 1 */
\r
116 smlal r8, r9, r7, r4
\r
117 ldr r7, [r1, #128*4] /* 2 */
\r
118 smlal r8, r9, r7, r5
\r
119 ldr r7, [r1, #224*4] /* 3 */
\r
120 smlal r8, r9, r7, r6
\r
121 ldmia r2!, { r3-r6 } /* load D[04..07] */
\r
122 ldr r7, [r1, #256*4] /* 4 */
\r
123 smlal r8, r9, r7, r3
\r
124 ldr r7, [r1, #352*4] /* 5 */
\r
125 smlal r8, r9, r7, r4
\r
126 ldr r7, [r1, #384*4] /* 6 */
\r
127 smlal r8, r9, r7, r5
\r
128 ldr r7, [r1, #480*4] /* 7 */
\r
129 smlal r8, r9, r7, r6
\r
130 ldmia r2!, { r3-r6 } /* load D[08..11] */
\r
131 ldr r7, [r1, #512*4] /* 8 */
\r
132 smlal r8, r9, r7, r3
\r
133 ldr r7, [r1, #608*4] /* 9 */
\r
134 smlal r8, r9, r7, r4
\r
135 ldr r7, [r1, #640*4] /* 10 */
\r
136 smlal r8, r9, r7, r5
\r
137 ldr r7, [r1, #736*4] /* 11 */
\r
138 smlal r8, r9, r7, r6
\r
139 ldmia r2!, { r3-r6 } /* load D[12..15] */
\r
140 ldr r7, [r1, #768*4] /* 12 */
\r
141 smlal r8, r9, r7, r3
\r
142 ldr r7, [r1, #864*4] /* 13 */
\r
143 smlal r8, r9, r7, r4
\r
144 ldr r7, [r1, #896*4] /* 14 */
\r
145 smlal r8, r9, r7, r5
\r
146 ldr r7, [r1, #992*4] /* 15 */
\r
147 smlal r8, r9, r7, r6
\r
148 mov r8, r8, lsr #16
\r
149 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
\r
150 str r8, [r0], #4 /* store Data */
\r
151 add r1, r1, #4 /* V++ */
\r
156 ldmfd sp!, {r4-r9, pc}
\r
157 .mpc_dewindowing_end:
\r
158 .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
\r