1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2006-2007 Thom Johansen
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
18 ****************************************************************************/
20 /****************************************************************************
21 * void channels_process_sound_chan_mono(int count, int32_t *buf[])
23 * NOTE: The following code processes two samples at once. When count is odd,
24 * there is an additional obsolete sample processed, which will not be
25 * used by the calling functions.
27 .section .icode, "ax", %progbits
29 .global channels_process_sound_chan_mono
30 .type channels_process_sound_chan_mono, %function
31 channels_process_sound_chan_mono:
32 @ input: r0 = count, r1 = buf
33 stmfd sp!, {r4-r6, lr}
34 ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1]
39 mov r4, r4, asr #1 @ r4 = r4/2
40 add r4, r4, r6, asr #1 @ r4 = r4 + r6/2 = (buf[0]+buf[1])/2
41 mov r5, r5, asr #1 @ r5 = r5/2
42 add r5, r5, lr, asr #1 @ r5 = r5 + lr/2 = (buf[0]+buf[1])/2
48 ldmfd sp!, {r4-r6, pc}
50 .size channels_process_sound_chan_mono,.monoend-channels_process_sound_chan_mono
52 /****************************************************************************
53 * void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
54 * NOTE: The following code processes two samples at once. When count is odd,
55 * there is an additional obsolete sample processed, which will not be
56 * used by the calling functions.
58 .section .icode, "ax", %progbits
60 .global channels_process_sound_chan_karaoke
61 .type channels_process_sound_chan_karaoke, %function
62 channels_process_sound_chan_karaoke:
63 @ input: r0 = count, r1 = buf
64 stmfd sp!, {r4-r6, lr}
65 ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1]
70 mov r6, r6, asr #1 @ r6 = r6/2
71 rsb r4, r6, r4, asr #1 @ r4 = -r6 + r4/2 = (buf[0]-buf[1])/2
72 rsb r6, r4, #0 @ r6 = -r4
73 mov lr, lr, asr #1 @ lr = lr/2
74 rsb r5, lr, r5, asr #1 @ r5 = -lr + r5/2 = (buf[0]-buf[1])/2
75 rsb lr, r5, #0 @ lr = -r5
81 ldmfd sp!, {r4-r6, pc}
83 .size channels_process_sound_chan_karaoke,.karaokeend-channels_process_sound_chan_karaoke
85 /****************************************************************************
86 * void sample_output_mono(int count, struct dsp_data *data,
87 int32_t *src[], int16_t *dst)
88 * NOTE: The following code processes two samples at once. When count is odd,
89 * there is an additional obsolete sample processed, which will not be
90 * used by the calling functions.
92 .section .icode, "ax", %progbits
94 .global sample_output_mono
95 .type sample_output_mono, %function
97 @ input: r0 = count, r1 = data, r2 = src, r3 = dst
98 stmfd sp!, {r4-r9, lr}
100 ldr r4, [r2] @ r4 = src[0]
101 ldr r5, [r1] @ lr = data->output_scale
102 sub r1, r5, #1 @ r1 = r5-1
104 mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1)
105 mvn r1, #0x8000 @ r1 needed for clipping
107 orr r8, r8, #0xff @ r8 needed for masking
112 mov r6, r6, asr r5 @ r6 = (r6 + 1<<(scale-1)) >> scale
115 eorne r6, r1, lr, asr #31 @ Clip (-32768...+32767)
117 mov r7, r7, asr r5 @ r7 = (r7 + 1<<(scale-1)) >> scale
120 eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767)
123 orr r6, r6, r6, asl #16 @ pack first 2 halfwords into 1 word
125 orr r7, r7, r7, asl #16 @ pack last 2 halfwords into 1 word
131 ldmfd sp!, {r4-r9, pc}
133 .size sample_output_mono,.somend-sample_output_mono
135 /****************************************************************************
136 * void sample_output_stereo(int count, struct dsp_data *data,
137 int32_t *src[], int16_t *dst)
138 * NOTE: The following code processes two samples at once. When count is odd,
139 * there is an additional obsolete sample processed, which will not be
140 * used by the calling functions.
142 .section .icode, "ax", %progbits
144 .global sample_output_stereo
145 .type sample_output_stereo, %function
146 sample_output_stereo:
147 @ input: r0 = count, r1 = data, r2 = src, r3 = dst
148 stmfd sp!, {r4-r11, lr}
150 ldmia r2, {r4-r5} @ r4 = src[0], r5 = src[1]
151 ldr r6, [r1] @ r6 = data->output_scale
152 sub r1, r6, #1 @ r1 = r6-1
154 mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1)
155 mvn r1, #0x8000 @ r1 needed for clipping
157 orr r11, r11, #0xff @ r11 needed for masking
162 mov r7, r7, asr r6 @ r7 = (r7 + 1<<(scale-1)) >> scale
165 eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767)
167 mov r8, r8, asr r6 @ r8 = (r8 + 1<<(scale-1)) >> scale
170 eorne r8, r1, lr, asr #31 @ Clip (-32768...+32767)
174 mov r9, r9, asr r6 @ r9 = (r9 + 1<<(scale-1)) >> scale
177 eorne r9, r1, lr, asr #31 @ Clip (-32768...+32767)
179 mov r10, r10, asr r6 @ r10 = (r10 + 1<<(scale-1)) >> scale
182 eorne r10, r1, lr, asr #31 @ Clip (-32768...+32767)
185 orr r9, r7, r9, asl #16 @ pack first 2 halfwords into 1 word
187 orr r10, r8, r10, asl #16 @ pack last 2 halfwords into 1 word
193 ldmfd sp!, {r4-r11, pc}
195 .size sample_output_stereo,.sosend-sample_output_stereo
197 /****************************************************************************
198 * void apply_crossfeed(int count, int32_t* src[])
201 .global apply_crossfeed
203 @ unfortunately, we ended up in a bit of a register squeeze here, and need
204 @ to keep the count on the stack :/
205 stmdb sp!, { r4-r11, lr } @ stack modified regs
206 ldmia r1, { r2-r3 } @ r2 = src[0], r3 = src[1]
208 ldr r1, =crossfeed_data
209 ldmia r1!, { r4-r11 } @ load direct gain and filter data
210 add r12, r1, #13*4*2 @ calculate end of delay
211 stmdb sp!, { r0, r12 } @ stack count and end of delay adr
212 ldr r0, [r1, #13*4*2] @ fetch current delay line address
214 /* Register usage in loop:
215 * r0 = &delay[index][0], r1 = accumulator high, r2 = src[0], r3 = src[1],
216 * r4 = direct gain, r5-r7 = b0, b1, a1 (filter coefs),
217 * r8-r11 = filter history, r12 = temp, r14 = accumulator low
220 smull r14, r1, r6, r8 @ acc = b1*dr[n - 1]
221 smlal r14, r1, r7, r9 @ acc += a1*y_l[n - 1]
222 ldr r8, [r0, #4] @ r8 = dr[n]
223 smlal r14, r1, r5, r8 @ acc += b0*dr[n]
224 mov r9, r1, lsl #1 @ fix format for filter history
225 ldr r12, [r2] @ load left input
226 smlal r14, r1, r4, r12 @ acc += gain*x_l[n]
227 mov r1, r1, lsl #1 @ fix format
228 str r1, [r2], #4 @ save result
230 smull r14, r1, r6, r10 @ acc = b1*dl[n - 1]
231 smlal r14, r1, r7, r11 @ acc += a1*y_r[n - 1]
232 ldr r10, [r0] @ r10 = dl[n]
233 str r12, [r0], #4 @ save left input to delay line
234 smlal r14, r1, r5, r10 @ acc += b0*dl[n]
235 mov r11, r1, lsl #1 @ fix format for filter history
236 ldr r12, [r3] @ load right input
237 smlal r14, r1, r4, r12 @ acc += gain*x_r[n]
238 str r12, [r0], #4 @ save right input to delay line
239 mov r1, r1, lsl #1 @ fix format
240 str r1, [r3], #4 @ save result
242 ldr r12, [sp, #4] @ fetch delay line end addr from stack
243 cmp r0, r12 @ need to wrap to start of delay?
244 subeq r0, r0, #13*4*2 @ wrap back delay line ptr to start
246 ldr r1, [sp] @ fetch count from stack
247 subs r1, r1, #1 @ are we finished?
248 strne r1, [sp] @ nope, save count back to stack
251 @ save data back to struct
252 ldr r12, =crossfeed_data + 4*4
253 stmia r12, { r8-r11 } @ save filter history
254 str r0, [r12, #30*4] @ save delay line index
255 add sp, sp, #8 @ remove temp variables from stack
256 ldmia sp!, { r4-r11, pc }
258 .size apply_crossfeed,.cfend-apply_crossfeed
260 /****************************************************************************
261 * int dsp_downsample(int count, struct dsp_data *data,
262 * in32_t *src[], int32_t *dst[])
265 .global dsp_downsample
267 stmdb sp!, { r4-r11, lr } @ stack modified regs
268 ldmib r1, { r5-r6 } @ r5 = num_channels,r6 = resample_data.delta
269 sub r5, r5, #1 @ pre-decrement num_channels for use
270 add r4, r1, #12 @ r4 = &resample_data.phase
272 orr r12, r12, #0xff00 @ r12 = 0xffff
274 ldr r1, [r4] @ r1 = resample_data.phase
275 ldr r7, [r2, r5, lsl #2] @ r7 = s = src[ch - 1]
276 ldr r8, [r3, r5, lsl #2] @ r8 = d = dst[ch - 1]
277 add r9, r4, #4 @ r9 = &last_sample[0]
278 ldr r10, [r9, r5, lsl #2] @ r10 = last_sample[ch - 1]
280 ldr r14, [r7, r11, lsl #2] @ load last sample in s[] ...
281 str r14, [r9, r5, lsl #2] @ and write as next frame's last_sample
282 movs r9, r1, lsr #16 @ r9 = pos = phase >> 16
283 ldreq r11, [r7] @ if pos = 0, load src[0] and jump into loop
284 beq .dsuse_last_start
285 cmp r9, r0 @ if pos >= count, we're already done
288 @ Register usage in loop:
289 @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel,
290 @ r6 = delta, r7 = s, r8 = d, r9 = pos, r10 = s[pos - 1], r11 = s[pos]
292 add r9, r7, r9, lsl #2 @ r9 = &s[pos]
293 ldmda r9, { r10, r11 } @ r10 = s[pos - 1], r11 = s[pos]
295 sub r11, r11, r10 @ r11 = diff = s[pos] - s[pos - 1]
296 @ keep frac in lower bits to take advantage of multiplier early termination
297 and r9, r1, r12 @ frac = phase & 0xffff
298 smull r9, r14, r11, r9
299 add r10, r10, r14, lsl #16
300 add r10, r10, r9, lsr #16 @ r10 = out = s[pos - 1] + frac*diff
301 str r10, [r8], #4 @ *d++ = out
302 add r1, r1, r6 @ phase += delta
303 mov r9, r1, lsr #16 @ pos = phase >> 16
304 cmp r9, r0 @ pos < count?
305 blt .dsloop @ yup, do more samples
308 bpl .dschannel_loop @ if (--ch) >= 0, do another channel
309 sub r1, r1, r0, lsl #16 @ wrap phase back to start
310 str r1, [r4] @ store back
311 ldr r1, [r3] @ r1 = &dst[0]
312 sub r8, r8, r1 @ dst - &dst[0]
313 mov r0, r8, lsr #2 @ convert bytes->samples
314 ldmia sp!, { r4-r11, pc } @ ... and we're out
316 .size dsp_downsample,.dsend-dsp_downsample
318 /****************************************************************************
319 * int dsp_upsample(int count, struct dsp_data *dsp,
320 * in32_t *src[], int32_t *dst[])
325 stmdb sp!, { r4-r11, lr } @ stack modified regs
326 ldmib r1, { r5-r6 } @ r5 = num_channels,r6 = resample_data.delta
327 sub r5, r5, #1 @ pre-decrement num_channels for use
328 add r4, r1, #12 @ r4 = &resample_data.phase
329 stmdb sp!, { r0, r4 } @ stack count and &resample_data.phase
331 ldr r12, [r4] @ r12 = resample_data.phase
332 mov r1, r12, ror #16 @ swap halfword positions, we'll use carry
333 @ to detect pos increments
334 ldr r7, [r2, r5, lsl #2] @ r7 = s = src[ch - 1]
335 ldr r8, [r3, r5, lsl #2] @ r8 = d = dst[ch - 1]
336 add r9, r4, #4 @ r9 = &last_sample[0]
337 ldr r10, [r9, r5, lsl #2] @ r10 = last_sample[ch - 1]
339 ldr r14, [r7, r11, lsl #2] @ load last sample in s[] ...
340 str r14, [r9, r5, lsl #2] @ and write as next frame's last_sample
341 add r9, r7, r0, lsl #2 @ r9 = src_end = &src[count]
342 movs r14, r12, lsr #16 @ pos = resample_data.phase >> 16
343 beq .usstart_0 @ pos = 0
344 cmp r14, r0 @ if pos >= count, we're already done
346 add r7, r7, r14, lsl #2 @ r7 = &s[pos]
347 ldr r10, [r7, #-4] @ r11 = s[pos - 1]
350 @ Register usage in loop:
351 @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel,
352 @ r6 = delta, r7 = s, r8 = d, r9 = src_end, r10 = s[pos - 1], r11 = s[pos]
354 mov r10, r11 @ r10 = previous sample
356 ldr r11, [r7], #4 @ r11 = next sample
357 sub r0, r11, r10 @ r0 = s[pos] - s[pos - 1]
359 mov r4, r1, lsr #16 @ r4 = frac = phase >> 16
360 smull r12, r14, r4, r0
361 add r14, r10, r14, lsl #16
362 add r14, r14, r12, lsr #16 @ r14 = out = s[pos - 1] + frac*diff
363 str r14, [r8], #4 @ *d++ = out
364 adds r1, r1, r6, lsl #16 @ phase += delta << 16
365 bcc .usloop_0 @ if carry is set, pos is incremented
366 cmp r7, r9 @ if s < src_end, do another sample
370 ldmia sp, { r0, r4 } @ reload count and &resample_data.phase
371 bpl .uschannel_loop @ if (--ch) >= 0, do another channel
372 mov r1, r1, ror #16 @ wrap phase back to start of next frame
373 str r1, [r4] @ store back
374 ldr r1, [r3] @ r1 = &dst[0]
375 sub r8, r8, r1 @ dst - &dst[0]
376 mov r0, r8, lsr #2 @ convert bytes->samples
377 add sp, sp, #8 @ adjust stack for temp variables
378 ldmia sp!, { r4-r11, pc } @ ... and we're out
380 .size dsp_upsample,.usend-dsp_upsample