1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2008 by Andree Buschmann
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
20 ****************************************************************************/
22 #include "mpc_config.h"
24 .section .text, "ax", %progbits
26 /****************************************************************************
27 * void mpc_decoder_windowing_D(...)
29 * 2nd step within synthesis filter. Does the dewindowing.
31 * Uses un-shifted D[]-values. D[] will always be the second operand of
32 * smull/smlal to achieve higher speed as D[] has lower amplitude than V[].
33 ****************************************************************************/
35 .global mpc_decoder_windowing_D
36 .type mpc_decoder_windowing_D, %function
38 mpc_decoder_windowing_D:
43 /************************************************************************
44 * Reference implementation.
45 ***********************************************************************/
46 stmfd sp!, {r4-r8, lr}
50 ldmia r2!, { r3-r6 } /* load D[00..03] */
53 ldr r7, [r1, #96*4] /* 1 */
55 ldr r7, [r1, #128*4] /* 2 */
57 ldr r7, [r1, #224*4] /* 3 */
59 ldmia r2!, { r3-r6 } /* load D[04..07] */
60 ldr r7, [r1, #256*4] /* 4 */
62 ldr r7, [r1, #352*4] /* 5 */
64 ldr r7, [r1, #384*4] /* 6 */
66 ldr r7, [r1, #480*4] /* 7 */
68 ldmia r2!, { r3-r6 } /* load D[08..11] */
69 ldr r7, [r1, #512*4] /* 8 */
71 ldr r7, [r1, #608*4] /* 9 */
73 ldr r7, [r1, #640*4] /* 10 */
75 ldr r7, [r1, #736*4] /* 11 */
77 ldmia r2!, { r3-r6 } /* load D[12..15] */
78 ldr r7, [r1, #768*4] /* 12 */
80 ldr r7, [r1, #864*4] /* 13 */
82 ldr r7, [r1, #896*4] /* 14 */
84 ldr r7, [r1, #992*4] /* 15 */
87 orr r8, r8, r12, lsl #16 /* (lo>>16) || (hi<<16) */
88 str r8, [r0], #4 /* store Data */
89 add r1, r1, #4 /* V++ */
94 ldmfd sp!, {r4-r8, pc}
96 mpc_decoder_windowing_D:
101 /************************************************************************
102 * Further speed up through making use of symmetries within D[]-window.
103 * The row V[00] can be extracted as it has symmetries within this single
104 * row. 8 smull/mlal and 8 ldr's can be saved at the cost of 2 add's.
105 * The rows V[01..15] are symmetric to V[31..17]. 15 x 16 ldr's can be
106 * saved at the cost of 15 x 4 + 1 add's.
107 * The row V[16] can be extracted as it has symmetries within this single
108 * row. 8 smull/mlal and 8 ldr's can be saved.
109 ***********************************************************************/
110 stmfd sp!, {r4-r11, lr}
112 /******************************************
113 * row 0 with internal symmetry
114 *****************************************/
115 add r2, r2, #4 /* D+=1, r2 = D[01] as D[00] = 0 */
116 ldmia r2!, { r3-r6 } /* load D[01..04] */
117 ldr r7 , [r1, #96*4] /* 1 */
118 ldr r10, [r1, #992*4] /* 15 */
119 rsb r10, r10, r7 /* V[01] - V[15] */
120 smull r8, r9, r10, r3
121 ldr r7 , [r1, #128*4] /* 2 */
122 ldr r10, [r1, #896*4] /* 14 */
123 add r10, r10, r7 /* V[02] + V[14] */
124 smlal r8, r9, r10, r4
125 ldr r7 , [r1, #224*4] /* 3 */
126 ldr r10, [r1, #864*4] /* 13 */
127 rsb r10, r10, r7 /* V[03] - V[13] */
128 smlal r8, r9, r10, r5
129 ldr r7 , [r1, #256*4] /* 4 */
130 ldr r10, [r1, #768*4] /* 12 */
131 add r10, r10, r7 /* V[04] + V[12] */
132 smlal r8, r9, r10, r6
133 ldmia r2!, { r3-r6 } /* load D[05..08] */
134 ldr r7 , [r1, #352*4] /* 5 */
135 ldr r10, [r1, #736*4] /* 11 */
136 rsb r10, r10, r7 /* V[05] - V[11] */
137 smlal r8, r9, r10, r3
138 ldr r7 , [r1, #384*4] /* 6 */
139 ldr r10, [r1, #640*4] /* 10 */
140 add r10, r10, r7 /* V[06] + V[10] */
141 smlal r8, r9, r10, r4
142 ldr r7 , [r1, #480*4] /* 7 */
143 ldr r10, [r1, #608*4] /* 9 */
144 rsb r10, r10, r7 /* V[07] - V[09] */
145 smlal r8, r9, r10, r5
146 ldr r10, [r1, #512*4] /* 8 */
147 smlal r8, r9, r10, r6
149 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
150 str r8, [r0], #4 /* store Data */
151 add r1, r1, #4 /* V+=1, r1 = V[01] */
152 add r2, r2, #7*4 /* D+=7, r2 = D[16] */
154 /******************************************
155 * rows 01..15 are symmetrc to rows 31..17
156 * r8 = lo, r9 = hi of 01..15
158 * r10 = lo, r11 = hi of 31..17
160 *****************************************/
162 add r12, r1, #30*4 /* r12 = V[31] */
164 ldmia r2!, { r3-r6 } /* load D[00..03] */
165 ldr r7, [r12, #768*4] /* 12 */
166 smull r10, r11, r7, r6
167 ldr r7, [r12, #864*4] /* 13 */
168 smlal r10, r11, r7, r5
169 ldr r7, [r12, #896*4] /* 14 */
170 smlal r10, r11, r7, r4
171 ldr r7, [r12, #992*4] /* 15 */
172 smlal r10, r11, r7, r3
175 ldr r7, [r1, #96*4] /* 1 */
177 ldr r7, [r1, #128*4] /* 2 */
179 ldr r7, [r1, #224*4] /* 3 */
181 ldmia r2!, { r3-r6 } /* load D[04..07] */
182 ldr r7, [r1, #256*4] /* 4 */
184 ldr r7, [r1, #352*4] /* 5 */
186 ldr r7, [r1, #384*4] /* 6 */
188 ldr r7, [r1, #480*4] /* 7 */
190 ldr r7, [r12, #512*4] /* 8 */
191 smlal r10, r11, r7, r6
192 ldr r7, [r12, #608*4] /* 9 */
193 smlal r10, r11, r7, r5
194 ldr r7, [r12, #640*4] /* 10 */
195 smlal r10, r11, r7, r4
196 ldr r7, [r12, #736*4] /* 11 */
197 smlal r10, r11, r7, r3
198 ldmia r2!, { r3-r6 } /* load D[08..11] */
199 ldr r7, [r12, #256*4] /* 4 */
200 smlal r10, r11, r7, r6
201 ldr r7, [r12, #352*4] /* 5 */
202 smlal r10, r11, r7, r5
203 ldr r7, [r12, #384*4] /* 6 */
204 smlal r10, r11, r7, r4
205 ldr r7, [r12, #480*4] /* 7 */
206 smlal r10, r11, r7, r3
207 ldr r7, [r1, #512*4] /* 8 */
209 ldr r7, [r1, #608*4] /* 9 */
211 ldr r7, [r1, #640*4] /* 10 */
213 ldr r7, [r1, #736*4] /* 11 */
215 ldmia r2!, { r3-r6 } /* load D[12..15] */
216 ldr r7, [r1, #768*4] /* 12 */
218 ldr r7, [r1, #864*4] /* 13 */
220 ldr r7, [r1, #896*4] /* 14 */
222 ldr r7, [r1, #992*4] /* 15 */
224 ldr r7, [r12] /* 0 */
225 smlal r10, r11, r7, r6
226 ldr r7, [r12, #96*4] /* 1 */
227 smlal r10, r11, r7, r5
228 ldr r7, [r12, #128*4] /* 2 */
229 smlal r10, r11, r7, r4
230 ldr r7, [r12, #224*4] /* 3 */
231 smlal r10, r11, r7, r3
232 /* store Data[01..15] */
234 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
235 str r8, [r0] /* store Data */
236 /* store Data[31..17] */
237 add r0, r0, lr, asl #3 /* r0 = r0 + 2*lr [words] */
238 mov r10, r10, lsr #16
239 orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */
240 rsb r10, r10, #0 /* r10 = -r10 */
241 str r10, [r0], #4 /* store Data */
242 sub r0, r0, lr, asl #3 /* r0 = r0 - 2*lr [words] */
243 /* correct adresses for next loop */
244 sub r12, r12, #4 /* r12 = V-- */
245 add r1, r1, #4 /* r1 = V++ */
250 /******************************************
251 * V[16] with internal symmetry
252 *****************************************/
253 ldmia r2!, { r3-r6 } /* load D[00..03] */
254 ldr r7 , [r1] /* 0 */
255 ldr r10, [r1, #992*4] /* 15 */
256 rsb r10, r10, r7 /* V[00] - V[15] */
257 smull r8, r9, r10, r3
258 ldr r7 , [r1, #96*4] /* 1 */
259 ldr r10, [r1, #896*4] /* 14 */
260 rsb r10, r10, r7 /* V[01] - V[14] */
261 smlal r8, r9, r10, r4
262 ldr r7 , [r1, #128*4] /* 2 */
263 ldr r10, [r1, #864*4] /* 13 */
264 rsb r10, r10, r7 /* V[02] - V[13] */
265 smlal r8, r9, r10, r5
266 ldr r7 , [r1, #224*4] /* 3 */
267 ldr r10, [r1, #768*4] /* 12 */
268 rsb r10, r10, r7 /* V[03] - V[12] */
269 smlal r8, r9, r10, r6
270 ldmia r2!, { r3-r6 } /* load D[04..07] */
271 ldr r7 , [r1, #256*4] /* 4 */
272 ldr r10, [r1, #736*4] /* 11 */
273 rsb r10, r10, r7 /* V[04] - V[11] */
274 smlal r8, r9, r10, r3
275 ldr r7 , [r1, #352*4] /* 5 */
276 ldr r10, [r1, #640*4] /* 10 */
277 rsb r10, r10, r7 /* V[05] - V[10] */
278 smlal r8, r9, r10, r4
279 ldr r7 , [r1, #384*4] /* 6 */
280 ldr r10, [r1, #608*4] /* 9 */
281 rsb r10, r10, r7 /* V[06] - V[09] */
282 smlal r8, r9, r10, r5
283 ldr r7 , [r1, #480*4] /* 7 */
284 ldr r10, [r1, #512*4] /* 8 */
285 rsb r10, r10, r7 /* V[07] - V[08] */
286 smlal r8, r9, r10, r6
288 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
289 str r8, [r0], #4 /* store Data */
290 add r1, r1, #4 /* V++ */
292 ldmfd sp!, {r4-r11, pc}
294 .mpc_dewindowing_end:
295 .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D