1 /* Copyright (C) 2007 Thom Johansen */
4 @brief Various analysis/synthesis filters (ARMv4 version)
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
11 - Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
18 - Neither the name of the Xiph.org Foundation nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
26 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 #if CONFIG_CPU == PP5002
37 .section .icode,"ax",%progbits
42 /* void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) */
45 stmdb sp!, { r4-r11, lr }
46 ldr r5, [sp, #36] @ r0 = x, r1 = den, r2 = y, r3 = N
47 ldr r4, [sp, #40] @ r4 = mem, r5 = ord
52 ldmpc regs=r4-r11 @ Non-supported order, return
54 @ TODO: try using direct form 1 filtering
56 ldmia r4, { r5-r12 } @ r5-r12 = mem[0..7]
58 add r5, r5, #4096 @ Rounding constant
60 add r14, r14, r5, asr #13 @ (mem[0] + 4096) >> 13 + x[i]
62 orr r5, r5, #0xff @ r5 = 32767
64 movgt r14, r5 @ Clip positive
66 rsblt r14, r5, #0 @ Clip negative
67 strh r14, [r2], #2 @ Write result to y[i]
71 sub r5, r6, r5 @ mem[0] = mem[1] - den[0]*y[i]
74 sub r6, r7, r6 @ mem[1] = mem[2] - den[1]*y[i]
77 sub r7, r8, r7 @ mem[2] = mem[3] - den[2]*y[i]
80 sub r8, r9, r8 @ mem[3] = mem[4] - den[3]*y[i]
83 sub r9, r10, r9 @ mem[4] = mem[5] - den[4]*y[i]
86 sub r10, r11, r10 @ mem[5] = mem[6] - den[5]*y[i]
89 sub r11, r12, r11 @ mem[6] = mem[7] - den[6]*y[i]
92 rsb r12, r12, #0 @ mem[7] = -den[7]*y[i]
95 ldr r4, [sp, #40] @ r4 = mem
96 stmia r4, { r5-r12 } @ Save back mem[]
97 ldmpc regs=r4-r11 @ Exit
100 ldmia r4, { r5-r9 } @ r5-r9 = mem[0..4]
101 add r5, r5, #4096 @ Rounding constant
103 add r14, r14, r5, asr #13 @ (mem[0] + 4096) >> 13 + x[i]
105 orr r5, r5, #0xff @ r5 = 32767
107 movgt r14, r5 @ Clip positive
109 rsblt r14, r5, #0 @ Clip negative
110 strh r14, [r2], #2 @ Write result to y[i]
112 ldmia r1!, { r10-r12 } @ r10-r12 = den[0..5]
116 sub r5, r6, r5 @ mem[0] = mem[1] - den[0]*y[i]
117 mov r10, r10, asr #16
119 sub r6, r7, r6 @ mem[1] = mem[2] - den[1]*y[i]
120 mov r10, r11, lsl #16
121 mov r10, r10, asr #16
123 sub r7, r8, r7 @ mem[2] = mem[3] - den[2]*y[i]
124 mov r10, r11, asr #16
126 sub r8, r9, r8 @ mem[3] = mem[4] - den[3]*y[i]
127 stmia r4!, { r5-r8 } @ Write back mem[0..3], r4 = &mem[4]
128 mov r10, r12, lsl #16
129 mov r10, r10, asr #16
132 ldmib r4, { r6-r10 } @ r6-r10 = mem[5..9]
133 sub r5, r6, r5 @ mem[4] = mem[5] - den[4]*y[i]
134 mov r12, r12, asr #16
136 sub r6, r7, r6 @ mem[5] = mem[6] - den[5]*y[i]
137 ldmia r1!, { r11-r12 } @ r11-r12 = den[6..9]
141 sub r7, r8, r7 @ mem[6] = mem[7] - den[6]*y[i]
142 mov r11, r11, asr #16
144 sub r8, r9, r8 @ mem[7] = mem[8] - den[7]*y[i]
145 mov r11, r12, lsl #16
146 mov r11, r11, asr #16
148 sub r9, r10, r9 @ mem[8] = mem[9] - den[8]*y[i]
149 mov r12, r12, asr #16
151 rsb r10, r10, #0 @ mem[9] = -den[9]*y[i]
152 stmia r4!, { r5-r10 } @ Write back mem[4..9]
157 ldmpc regs=r4-r11 @ Exit
160 /* void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word32_t *mem1, spx_word32_t *mem2, char *stack) */
163 stmdb sp!, { r4-r11, lr }
164 add r7, sp, #36 @ r0 = x1, r1 = x2, r2 = a, r3 = y
165 ldmia r7, { r4-r7 } @ r4 = N, r5 = M, r6 = mem1, r7 = mem2
168 sub r9, sp, r8 @ r9 = sp - (N + M >> 1) = xx2
169 sub r8, r9, r8 @ r8 = r9 - (N + M >> 1) = xx1
170 str sp, [r8, #-4] @ Stack old sp
171 sub sp, r8, #4 @ Update sp
173 add r0, r0, r4 @ x1 += N >> 1
174 add r1, r1, r4 @ x2 += N >> 1
175 mov r14, r4 @ Loop counter is N
177 @ Backwards copy x1 and x2 arrays to xx1 and xx2, assume N2 is power of two
178 @ N should always be a multiple of four, so this should be OK
179 ldmdb r0!, { r10-r11 }
180 mov r12, r10, ror #16
181 mov r11, r11, ror #16
182 stmia r8!, { r11-r12 }
183 ldmdb r1!, { r10-r11 }
184 mov r12, r10, ror #16
185 mov r11, r11, ror #16
186 stmia r9!, { r11-r12 }
190 @ Copy alternate members of mem1 and mem2 to last part of xx1 and xx2
191 mov r14, r5 @ Loop counter is M
194 stmdb sp!, { r6-r7 } @ Stack &mem1[1], &mem2[1]
199 orr r10, r10, r11, lsl #16
201 orr r11, r12, r11, lsl #16
207 sub r0, r8, r5 @ r0 = &xx1[N2]
208 sub r1, r9, r5 @ r1 = &xx2[N2]
209 str r4, [sp, #-4] @ Stack N
211 str r4, [sp, #-8] @ Stack M
212 @ sp doesn't point to the end of the stack frame from here on, but we're not
213 @ calling anything so it shouldn't matter
214 @ Main loop, register usage:
215 @ r0 = xx1, r1 = xx2, r2 = a, r3 = y, r4 = M, r5 = x10, r6 = x11, r7 = x20
216 @ r8 = x21, r9 = [a1, a0], r10 = acc0, r11 = acc1, r12 = acc2, r14 = acc3
218 mov r10, #16384 @ Init acccumulators to rounding const
223 ldrsh r5, [r0, #-4]! @ r5 = x10, r0 = &xx1[N2 - 2]
224 ldrsh r7, [r1, #-4]! @ r7 = x20, r1 = &xx2[N2 - 2]
226 ldrsh r9, [r2], #2 @ r9 = a0
227 ldrsh r6, [r0, #2]! @ r6 = x11
228 ldrsh r8, [r1, #2]! @ r8 = x21
229 sub r5, r5, r7 @ r5 = x10 - x20
230 add r7, r5, r7, asl #1 @ r7 = x10 + x20
231 mla r12, r9, r5, r12 @ acc2 += a0*(x10 - x20)
232 sub r5, r6, r8 @ r5 = x11 - x21
233 mla r10, r9, r5, r10 @ acc0 += a0*(x11 - x21)
234 ldrsh r9, [r2], #2 @ r9 = a1
235 add r5, r6, r8 @ r5 = x11 + x21
236 mla r14, r9, r7, r14 @ acc3 += a1*(x10 + x20)
237 mla r11, r9, r5, r11 @ acc1 += a1*(x11 + x21)
239 ldrsh r9, [r2], #2 @ r9 = a1
240 ldrsh r5, [r0, #2]! @ r5 = x10
241 ldrsh r7, [r1, #2]! @ r7 = x20
242 sub r6, r6, r8 @ r6 = x11 - x21
243 add r8, r6, r8, asl #1 @ r8 = x11 + x21
244 mla r12, r9, r6, r12 @ acc2 += a0*(x11 - x21)
245 sub r6, r5, r7 @ r6 = x10 - x20
246 mla r10, r9, r6, r10 @ acc0 += a0*(x10 - x20)
247 ldrsh r9, [r2], #2 @ r9 = a1
248 add r6, r5, r7 @ r5 = x10 + x20
249 mla r14, r9, r8, r14 @ acc3 += a1*(x11 + x21)
250 mla r11, r9, r6, r11 @ acc1 += a1*(x10 + x10)
254 ldr r4, [sp, #-8] @ r4 = M
255 sub r2, r2, r4, lsl #1 @ r2 = &a[0]
256 sub r0, r0, r4 @ r0 = &xx1[N2 - 2 - i]
257 sub r1, r1, r4 @ r1 = &xx2[N2 - 2 - i]
259 mov r10, r10, asr #15 @ Shift outputs down
260 mov r11, r11, asr #15
261 mov r12, r12, asr #15
262 mov r14, r14, asr #15
264 @ Clip output to -32768..32767 range, which works fine despite not being
265 @ Speex' usual clipping range.
269 eorne r10, r9, r5, asr #31
272 eorne r11, r9, r5, asr #31
275 eorne r12, r9, r5, asr #31
278 eorne r14, r9, r5, asr #31
280 strh r10, [r3], #2 @ Write outputs
284 ldr r10, [sp, #-4] @ Load N
285 subs r10, r10, #4 @ Are we done?
289 @ Copy start of xx1 and xx2 back to alternate mem1 and mem2 entries
290 @ r0 and r1 are &xx1[0] and &xx2[0] at this point
291 ldmia sp, { r5-r6, sp } @ Fetch &mem1[1], &mem2[1], restore sp
303 ldmpc regs=r4-r11 @ Exit
306 /* void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len) */
309 stmdb sp!, { r4-r8, lr }
311 ldmia r0!, { r5-r8 } @ Load four input samples
312 smull r5, r12, r2, r5
313 mov r12, r12, lsl #18 @ Recombine upper and lower parts
314 orr r5, r12, r5, lsr #14
315 smull r6, r12, r2, r6
316 mov r12, r12, lsl #18
317 orr r6, r12, r6, lsr #14
318 smull r7, r12, r2, r7
319 mov r12, r12, lsl #18
320 orr r7, r12, r7, lsr #14
321 smull r8, r12, r2, r8
322 mov r12, r12, lsl #18
323 orr r8, r12, r8, lsr #14
324 stmia r1!, { r5-r8 } @ Store four output samples
325 subs r3, r3, #4 @ Are we done?
328 ldmpc regs=r4-r8 @ Exit