1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2006-2007 Thom Johansen
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
20 ****************************************************************************/
23 /****************************************************************************
24 * void channels_process_sound_chan_mono(int count, int32_t *buf[])
26 * NOTE: The following code processes two samples at once. When count is odd,
27 * there is an additional obsolete sample processed, which will not be
28 * used by the calling functions.
30 .section .icode, "ax", %progbits
32 .global channels_process_sound_chan_mono
33 .type channels_process_sound_chan_mono, %function
34 channels_process_sound_chan_mono:
35 @ input: r0 = count, r1 = buf
36 stmfd sp!, { r4, lr } @
38 ldmia r1, { r1, r2 } @ r1 = buf[0], r2 = buf[1]
39 subs r0, r0, #1 @ odd: end at 0; even: end at -1
40 beq .mono_singlesample @ Zero? Only one sample!
43 ldmia r1, { r3, r4 } @ r3, r4 = Li0, Li1
44 ldmia r2, { r12, r14 } @ r12, r14 = Ri0, Ri1
45 mov r3, r3, asr #1 @ Mo0 = Li0 / 2 + Ri0 / 2
46 mov r4, r4, asr #1 @ Mo1 = Li1 / 2 + Ri1 / 2
47 add r12, r3, r12, asr #1 @
48 add r14, r4, r14, asr #1 @
50 stmia r1!, { r12, r14 } @ store Mo0, Mo1
51 stmia r2!, { r12, r14 } @ store Mo0, Mo1
54 ldmltfd sp!, { r4, pc } @ if count was even, we're done
57 ldr r3, [r1] @ r3 = Ls
58 ldr r12, [r2] @ r12 = Rs
59 mov r3, r3, asr #1 @ Mo = Ls / 2 + Rs / 2
60 add r12, r3, r12, asr #1 @
61 str r12, [r1] @ store Mo
62 str r12, [r2] @ store Mo
64 ldmfd sp!, { r4, pc } @
65 .size channels_process_sound_chan_mono, \
66 .-channels_process_sound_chan_mono
68 /****************************************************************************
69 * void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
70 * NOTE: The following code processes two samples at once. When count is odd,
71 * there is an additional obsolete sample processed, which will not be
72 * used by the calling functions.
74 .section .icode, "ax", %progbits
76 .global channels_process_sound_chan_karaoke
77 .type channels_process_sound_chan_karaoke, %function
78 channels_process_sound_chan_karaoke:
79 @ input: r0 = count, r1 = buf
80 stmfd sp!, { r4, lr } @
82 ldmia r1, { r1, r2 } @ r1 = buf[0], r2 = buf[1]
83 subs r0, r0, #1 @ odd: end at 0; even: end at -1
84 beq .karaoke_singlesample @ Zero? Only one sample!
87 ldmia r1, { r3, r4 } @ r3, r4 = Li0, Li1
88 ldmia r2, { r12, r14 } @ r12, r14 = Ri0, Ri1
89 mov r3, r3, asr #1 @ Lo0 = Li0 / 2 - Ri0 / 2
90 mov r4, r4, asr #1 @ Lo1 = Li1 / 2 - Ri1 / 2
91 sub r3, r3, r12, asr #1 @
92 sub r4, r4, r14, asr #1 @
93 rsb r12, r3, #0 @ Ro0 = -Lk0 = Rs0 / 2 - Ls0 / 2
94 rsb r14, r4, #0 @ Ro1 = -Lk1 = Ri1 / 2 - Li1 / 2
96 stmia r1!, { r3, r4 } @ store Lo0, Lo1
97 stmia r2!, { r12, r14 } @ store Ro0, Ro1
100 ldmltfd sp!, { r4, pc } @ if count was even, we're done
102 .karaoke_singlesample: @
103 ldr r3, [r1] @ r3 = Li
104 ldr r12, [r2] @ r12 = Ri
105 mov r3, r3, asr #1 @ Lk = Li / 2 - Ri /2
106 sub r3, r3, r12, asr #1 @
107 rsb r12, r3, #0 @ Rk = -Lo = Ri / 2 - Li / 2
108 str r3, [r1] @ store Lo
109 str r12, [r2] @ store Ro
111 ldmfd sp!, { r4, pc } @
112 .size channels_process_sound_chan_karaoke, \
113 .-channels_process_sound_chan_karaoke
116 /****************************************************************************
117 * void sample_output_mono(int count, struct dsp_data *data,
118 * const int32_t *src[], int16_t *dst)
119 * NOTE: The following code processes two samples at once. When count is odd,
120 * there is an additional obsolete sample processed, which will not be
121 * used by the calling functions.
123 .section .icode, "ax", %progbits
125 .global sample_output_mono
126 .type sample_output_mono, %function
128 @ input: r0 = count, r1 = data, r2 = src, r3 = dst
129 stmfd sp!, { r4-r6, lr }
131 ldr r1, [r1] @ lr = data->output_scale
132 ldr r2, [r2] @ r2 = src[0]
135 mov r4, r4, lsl r1 @ r4 = 1 << (scale-1)
137 mvn r14, #0x8000 @ r14 = 0xffff7fff, needed for
138 @ clipping and masking
140 beq .som_singlesample @ Zero? Only one sample!
143 ldmia r2!, { r5, r6 }
144 add r5, r5, r4 @ r6 = (r6 + 1<<(scale-1)) >> scale
147 teq r12, r12, asr #31
148 eorne r5, r14, r5, asr #31 @ Clip (-32768...+32767)
150 mov r6, r6, asr r1 @ r7 = (r7 + 1<<(scale-1)) >> scale
152 teq r12, r12, asr #31
153 eorne r6, r14, r6, asr #31 @ Clip (-32768...+32767)
155 and r5, r5, r14, lsr #16
156 and r6, r6, r14, lsr #16
157 orr r5, r5, r5, lsl #16 @ pack first 2 halfwords into 1 word
158 orr r6, r6, r6, lsl #16 @ pack last 2 halfwords into 1 word
159 stmia r3!, { r5, r6 }
164 ldmltfd sp!, { r4-r6, pc } @ even 'count'? return
167 ldr r5, [r2] @ do odd sample
171 teq r12, r12, asr #31
172 eorne r5, r14, r5, asr #31
174 and r5, r5, r14, lsr #16 @ pack 2 halfwords into 1 word
175 orr r5, r5, r5, lsl #16
178 ldmfd sp!, { r4-r6, pc }
179 .size sample_output_mono, .-sample_output_mono
181 /****************************************************************************
182 * void sample_output_stereo(int count, struct dsp_data *data,
183 * const int32_t *src[], int16_t *dst)
184 * NOTE: The following code processes two samples at once. When count is odd,
185 * there is an additional obsolete sample processed, which will not be
186 * used by the calling functions.
188 .section .icode, "ax", %progbits
190 .global sample_output_stereo
191 .type sample_output_stereo, %function
192 sample_output_stereo:
193 @ input: r0 = count, r1 = data, r2 = src, r3 = dst
194 stmfd sp!, { r4-r9, lr }
196 ldr r1, [r1] @ r1 = data->output_scale
197 ldmia r2, { r2, r5 } @ r2 = src[0], r5 = src[1]
200 mov r4, r4, lsl r1 @ r4 = 1 << (scale-1)
203 mvn r14, #0x8000 @ r14 = 0xffff7fff, needed for
204 @ clipping and masking
206 beq .sos_singlesample @ Zero? Only one sample!
209 ldmia r2!, { r6, r7 } @ 2 left
210 ldmia r5!, { r8, r9 } @ 2 right
212 add r6, r6, r4 @ r6 = (r6 + 1<<(scale-1)) >> scale
215 teq r12, r12, asr #31
216 eorne r6, r14, r6, asr #31 @ Clip (-32768...+32767)
218 mov r7, r7, asr r1 @ r7 = (r7 + 1<<(scale-1)) >> scale
220 teq r12, r12, asr #31
221 eorne r7, r14, r7, asr #31 @ Clip (-32768...+32767)
223 add r8, r8, r4 @ r8 = (r8 + 1<<(scale-1)) >> scale
226 teq r12, r12, asr #31
227 eorne r8, r14, r8, asr #31 @ Clip (-32768...+32767)
228 add r9, r9, r4 @ r9 = (r9 + 1<<(scale-1)) >> scale
231 teq r12, r12, asr #31
232 eorne r9, r14, r9, asr #31 @ Clip (-32768...+32767)
234 and r6, r6, r14, lsr #16 @ pack first 2 halfwords into 1 word
235 orr r8, r6, r8, asl #16
236 and r7, r7, r14, lsr #16 @ pack last 2 halfwords into 1 word
237 orr r9, r7, r9, asl #16
239 stmia r3!, { r8, r9 }
244 ldmltfd sp!, { r4-r9, pc } @ even 'count'? return
247 ldr r6, [r2] @ left odd sample
248 ldr r8, [r5] @ right odd sample
250 add r6, r6, r4 @ r6 = (r7 + 1<<(scale-1)) >> scale
253 teq r12, r12, asr #31
254 eorne r6, r14, r6, asr #31 @ Clip (-32768...+32767)
255 add r8, r8, r4 @ r8 = (r8 + 1<<(scale-1)) >> scale
258 teq r12, r12, asr #31
259 eorne r8, r14, r8, asr #31 @ Clip (-32768...+32767)
261 and r6, r6, r14, lsr #16 @ pack 2 halfwords into 1 word
262 orr r8, r6, r8, asl #16
266 ldmfd sp!, { r4-r9, pc }
267 .size sample_output_stereo, .-sample_output_stereo
268 #endif /* ARM_ARCH < 6 */
270 /****************************************************************************
271 * void apply_crossfeed(int count, int32_t* src[])
274 .global apply_crossfeed
276 @ unfortunately, we ended up in a bit of a register squeeze here, and need
277 @ to keep the count on the stack :/
278 stmdb sp!, { r4-r11, lr } @ stack modified regs
279 ldmia r1, { r2-r3 } @ r2 = src[0], r3 = src[1]
281 ldr r1, =crossfeed_data
282 ldmia r1!, { r4-r11 } @ load direct gain and filter data
283 add r12, r1, #13*4*2 @ calculate end of delay
284 stmdb sp!, { r0, r12 } @ stack count and end of delay adr
285 ldr r0, [r1, #13*4*2] @ fetch current delay line address
287 /* Register usage in loop:
288 * r0 = &delay[index][0], r1 = accumulator high, r2 = src[0], r3 = src[1],
289 * r4 = direct gain, r5-r7 = b0, b1, a1 (filter coefs),
290 * r8-r11 = filter history, r12 = temp, r14 = accumulator low
293 smull r14, r1, r6, r8 @ acc = b1*dr[n - 1]
294 smlal r14, r1, r7, r9 @ acc += a1*y_l[n - 1]
295 ldr r8, [r0, #4] @ r8 = dr[n]
296 smlal r14, r1, r5, r8 @ acc += b0*dr[n]
297 mov r9, r1, lsl #1 @ fix format for filter history
298 ldr r12, [r2] @ load left input
299 smlal r14, r1, r4, r12 @ acc += gain*x_l[n]
300 mov r1, r1, lsl #1 @ fix format
301 str r1, [r2], #4 @ save result
303 smull r14, r1, r6, r10 @ acc = b1*dl[n - 1]
304 smlal r14, r1, r7, r11 @ acc += a1*y_r[n - 1]
305 ldr r10, [r0] @ r10 = dl[n]
306 str r12, [r0], #4 @ save left input to delay line
307 smlal r14, r1, r5, r10 @ acc += b0*dl[n]
308 mov r11, r1, lsl #1 @ fix format for filter history
309 ldr r12, [r3] @ load right input
310 smlal r14, r1, r4, r12 @ acc += gain*x_r[n]
311 str r12, [r0], #4 @ save right input to delay line
312 mov r1, r1, lsl #1 @ fix format
313 str r1, [r3], #4 @ save result
315 ldr r12, [sp, #4] @ fetch delay line end addr from stack
316 cmp r0, r12 @ need to wrap to start of delay?
317 subeq r0, r0, #13*4*2 @ wrap back delay line ptr to start
319 ldr r1, [sp] @ fetch count from stack
320 subs r1, r1, #1 @ are we finished?
321 strne r1, [sp] @ nope, save count back to stack
324 @ save data back to struct
325 ldr r12, =crossfeed_data + 4*4
326 stmia r12, { r8-r11 } @ save filter history
327 str r0, [r12, #30*4] @ save delay line index
328 add sp, sp, #8 @ remove temp variables from stack
329 ldmia sp!, { r4-r11, pc }
330 .size apply_crossfeed, .-apply_crossfeed
332 /****************************************************************************
333 * int dsp_downsample(int count, struct dsp_data *data,
334 * in32_t *src[], int32_t *dst[])
337 .global dsp_downsample
339 stmdb sp!, { r4-r11, lr } @ stack modified regs
340 ldmib r1, { r5-r6 } @ r5 = num_channels,r6 = resample_data.delta
341 sub r5, r5, #1 @ pre-decrement num_channels for use
342 add r4, r1, #12 @ r4 = &resample_data.phase
344 orr r12, r12, #0xff00 @ r12 = 0xffff
346 ldr r1, [r4] @ r1 = resample_data.phase
347 ldr r7, [r2, r5, lsl #2] @ r7 = s = src[ch - 1]
348 ldr r8, [r3, r5, lsl #2] @ r8 = d = dst[ch - 1]
349 add r9, r4, #4 @ r9 = &last_sample[0]
350 ldr r10, [r9, r5, lsl #2] @ r10 = last_sample[ch - 1]
352 ldr r14, [r7, r11, lsl #2] @ load last sample in s[] ...
353 str r14, [r9, r5, lsl #2] @ and write as next frame's last_sample
354 movs r9, r1, lsr #16 @ r9 = pos = phase >> 16
355 ldreq r11, [r7] @ if pos = 0, load src[0] and jump into loop
356 beq .dsuse_last_start
357 cmp r9, r0 @ if pos >= count, we're already done
360 @ Register usage in loop:
361 @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel,
362 @ r6 = delta, r7 = s, r8 = d, r9 = pos, r10 = s[pos - 1], r11 = s[pos]
364 add r9, r7, r9, lsl #2 @ r9 = &s[pos]
365 ldmda r9, { r10, r11 } @ r10 = s[pos - 1], r11 = s[pos]
367 sub r11, r11, r10 @ r11 = diff = s[pos] - s[pos - 1]
368 @ keep frac in lower bits to take advantage of multiplier early termination
369 and r9, r1, r12 @ frac = phase & 0xffff
370 smull r9, r14, r11, r9
371 add r10, r10, r14, lsl #16
372 add r10, r10, r9, lsr #16 @ r10 = out = s[pos - 1] + frac*diff
373 str r10, [r8], #4 @ *d++ = out
374 add r1, r1, r6 @ phase += delta
375 mov r9, r1, lsr #16 @ pos = phase >> 16
376 cmp r9, r0 @ pos < count?
377 blt .dsloop @ yup, do more samples
380 bpl .dschannel_loop @ if (--ch) >= 0, do another channel
381 sub r1, r1, r0, lsl #16 @ wrap phase back to start
382 str r1, [r4] @ store back
383 ldr r1, [r3] @ r1 = &dst[0]
384 sub r8, r8, r1 @ dst - &dst[0]
385 mov r0, r8, lsr #2 @ convert bytes->samples
386 ldmia sp!, { r4-r11, pc } @ ... and we're out
387 .size dsp_downsample, .-dsp_downsample
389 /****************************************************************************
390 * int dsp_upsample(int count, struct dsp_data *dsp,
391 * in32_t *src[], int32_t *dst[])
396 stmfd sp!, { r4-r11, lr } @ stack modified regs
397 ldmib r1, { r5-r6 } @ r5 = num_channels,r6 = resample_data.delta
398 sub r5, r5, #1 @ pre-decrement num_channels for use
399 add r4, r1, #12 @ r4 = &resample_data.phase
400 mov r6, r6, lsl #16 @ we'll use carry to detect pos increments
401 stmfd sp!, { r0, r4 } @ stack count and &resample_data.phase
403 ldr r12, [r4] @ r12 = resample_data.phase
404 ldr r7, [r2, r5, lsl #2] @ r7 = s = src[ch - 1]
405 ldr r8, [r3, r5, lsl #2] @ r8 = d = dst[ch - 1]
406 add r9, r4, #4 @ r9 = &last_sample[0]
407 mov r1, r12, lsl #16 @ we'll use carry to detect pos increments
409 ldr r14, [r7, r11, lsl #2] @ load last sample in s[] ...
410 ldr r10, [r9, r5, lsl #2] @ r10 = last_sample[ch - 1]
411 str r14, [r9, r5, lsl #2] @ and write as next frame's last_sample
412 movs r14, r12, lsr #16 @ pos = resample_data.phase >> 16
413 beq .usstart_0 @ pos = 0
414 cmp r14, r0 @ if pos >= count, we're already done
416 add r7, r7, r14, lsl #2 @ r7 = &s[pos]
417 ldr r10, [r7, #-4] @ r11 = s[pos - 1]
420 @ Register usage in loop:
421 @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel,
422 @ r6 = delta, r7 = s, r8 = d, r9 = diff, r10 = s[pos - 1], r11 = s[pos]
424 mov r10, r11 @ r10 = previous sample
426 ldr r11, [r7], #4 @ r11 = next sample
427 mov r4, r1, lsr #16 @ r4 = frac = phase >> 16
428 sub r9, r11, r10 @ r9 = diff = s[pos] - s[pos - 1]
430 smull r12, r14, r4, r9
431 adds r1, r1, r6 @ phase += delta << 16
432 mov r4, r1, lsr #16 @ r4 = frac = phase >> 16
433 add r14, r10, r14, lsl #16
434 add r14, r14, r12, lsr #16 @ r14 = out = s[pos - 1] + frac*diff
435 str r14, [r8], #4 @ *d++ = out
436 bcc .usloop_0 @ if carry is set, pos is incremented
437 subs r0, r0, #1 @ if count > 0, do another sample
441 ldmfd sp, { r0, r4 } @ reload count and &resample_data.phase
442 bpl .uschannel_loop @ if (--ch) >= 0, do another channel
443 mov r1, r1, lsr #16 @ wrap phase back to start of next frame
444 ldr r2, [r3] @ r1 = &dst[0]
445 str r1, [r4] @ store phase
446 sub r8, r8, r2 @ dst - &dst[0]
447 mov r0, r8, lsr #2 @ convert bytes->samples
448 add sp, sp, #8 @ adjust stack for temp variables
449 ldmfd sp!, { r4-r11, pc } @ ... and we're out
450 .size dsp_upsample, .-dsp_upsample
452 /****************************************************************************
453 * void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[])
455 .section .icode, "ax", %progbits
457 .global dsp_apply_gain
458 .type dsp_apply_gain, %function
460 @ input: r0 = count, r1 = data, r2 = buf[]
461 stmfd sp!, { r4-r8, lr }
463 ldr r3, [r1, #4] @ r3 = data->num_channels
464 ldr r4, [r1, #32] @ r5 = data->gain
467 ldr r1, [r2], #4 @ r1 = buf[0] and increment index of buf[]
468 subs r12, r0, #1 @ r12 = r0 = count - 1
469 beq .dag_singlesample @ Zero? Only one sample!
472 ldmia r1, { r5, r6 } @ load r5, r6 from r1
473 smull r7, r8, r5, r4 @ r7 = FRACMUL_SHL(r5, r4, 8)
474 smull r14, r5, r6, r4 @ r14 = FRACMUL_SHL(r6, r4, 8)
477 mov r14, r14, lsr #23
478 orr r7, r7, r8, asl #9
479 orr r14, r14, r5, asl #9
480 stmia r1!, { r7, r14 } @ save r7, r14 to [r1] and increment r1
481 bgt .dag_innerloop @ end of inner loop
483 blt .dag_evencount @ < 0? even count
486 ldr r5, [r1] @ handle odd sample
487 smull r7, r8, r5, r4 @ r7 = FRACMUL_SHL(r5, r4, 8)
489 orr r7, r7, r8, asl #9
494 bgt .dag_outerloop @ end of outer loop
496 ldmfd sp!, { r4-r8, pc }
497 .size dsp_apply_gain, .-dsp_apply_gain