1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2006 by Thom Johansen
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
20 ****************************************************************************/
24 /* The following is an assembler optimised version of the LPC filtering
25 routines needed for FLAC decoding. It is optimised for use with ARM
27 All LPC filtering up to order 9 is done in specially optimised unrolled
28 loops, while every order above this is handled by a slower default routine.
31 .section .icode,"ax",%progbits
35 .global lpc_decode_arm
37 stmdb sp!, { r4-r11, lr }
39 /* r0 = blocksize, r1 = qlevel, r2 = pred_order
40 r3 = data, r4 = coeffs
43 /* the data pointer always lags behind history pointer by 'pred_order'
44 samples. since we have one loop for each order, we can hard code this
45 and free a register by not saving data pointer.
47 sub r3, r3, r2, lsl #2 @ r3 = history
48 cmp r0, #0 @ no samples to process
50 cmp r2, #9 @ check if order is too high for unrolled loops
51 addls pc, pc, r2, lsl #2 @ jump to our unrolled decode loop if it exists
53 b .default @ order too high, go to default routine
54 b .exit @ zero order filter isn't possible, exit function
64 @ last jump table entry coincides with target, so leave it out
66 ldmia r4, { r5-r12, r14 } @ fetch coefs
68 ldr r4, [r3], #4 @ load first history sample
69 mul r2, r4, r14 @ multiply with last coef
70 ldr r4, [r3], #4 @ rinse and repeat while accumulating sum in r2
86 ldr r4, [r3] @ r4 = residual
87 add r2, r4, r2, asr r1 @ shift sum by qlevel bits and add residual
88 str r2, [r3], #-8*4 @ save result and wrap history pointer back
89 subs r0, r0, #1 @ check if we're done
90 bne .loop9 @ nope, jump back
96 @ we have more registers to spare here, so start block reading
97 ldmia r3!, { r4, r14 }
100 ldmia r3!, { r4, r14 }
103 ldmia r3!, { r4, r14 }
106 ldmia r3!, { r4, r14 }
110 add r2, r4, r2, asr r1
119 ldmia r3!, { r4, r12, r14 }
123 ldmia r3!, { r4, r12, r14 }
130 add r2, r4, r2, asr r1
139 ldmia r3!, { r4, r11-r12, r14 }
144 ldmia r3!, { r4, r11 }
148 add r2, r4, r2, asr r1
157 ldmia r3!, { r4, r10-r12, r14 }
164 add r2, r4, r2, asr r1
173 ldmia r3!, { r4, r11-r12, r14 }
179 add r2, r4, r2, asr r1
188 ldmia r3!, { r4, r12, r14 }
193 add r2, r4, r2, asr r1
202 ldmia r3!, { r4, r14 }
206 add r2, r4, r2, asr r1
213 ldr r5, [r4] @ load the one coef we need
214 ldr r4, [r3], #4 @ load one history sample, r3 now points to residual
216 mul r2, r4, r5 @ multiply coef by history sample
217 ldr r4, [r3] @ load residual
218 add r4, r4, r2, asr r1 @ add result to residual
219 str r4, [r3], #4 @ place r3 at next residual, we already have
220 subs r0, r0, #1 @ the current sample in r4 for the next iteration
225 /* we do the filtering in an unrolled by 4 loop as far as we can, and then
226 do the rest by jump table. */
227 add r5, r4, r2, lsl #2 @ need to start in the other end of coefs
228 mov r7, r2, lsr #2 @ r7 = coefs/4
229 mov r14, #0 @ init accumulator
231 ldmdb r5!, { r8-r11 }
232 ldmia r3!, { r6, r12 }
233 mla r14, r6, r11, r14
234 mla r14, r12, r10, r14
235 ldmia r3!, { r6, r12 }
237 mla r14, r12, r8, r14
241 and r7, r2, #3 @ get remaining samples to be filtered
242 add pc, pc, r7, lsl #2 @ jump into accumulator chain
248 @ implicit .threeleft
251 mla r14, r12, r8, r14
255 mla r14, r12, r8, r14
259 mla r14, r12, r8, r14
262 ldr r12, [r3] @ load residual
263 add r14, r12, r14, asr r1 @ shift sum by qlevel bits and add residual
264 str r14, [r3], #4 @ store result
265 sub r3, r3, r2, lsl #2 @ and wrap history pointer back to next first pos
266 subs r0, r0, #1 @ are we done?
267 bne .default @ no, prepare for next sample
270 ldmia sp!, { r4-r11, pc }