1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2006 Thom Johansen
11 * Portions Copyright (C) 2007 Michael Sevakis
13 * All files in this archive are subject to the GNU General Public License.
14 * See the file COPYING in the source tree root for full license agreement.
16 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
17 * KIND, either express or implied.
19 ****************************************************************************/
21 /****************************************************************************
22 * void apply_crossfeed(int count, int32_t *src[])
25 .global apply_crossfeed
28 movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs
29 movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src
30 movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1]
31 lea.l crossfeed_data, %a1
32 move.l (%a1)+, %a6 | a6 = direct gain
33 movem.l 12(%a1), %d0-%d3 | fetch filter history samples
34 move.l 132(%a1), %a0 | fetch delay line address
35 movem.l (%a1), %a1-%a3 | load filter coefs
36 /* Register usage in loop:
37 * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs),
38 * %a4 = src[0], %a5 = src[1], %a6 = direct gain,
44 mac.l %a2, %d0, 4(%a0), %d0, %acc0 | acc = b1*dr[n - 1] d0 = dr[n]
45 mac.l %a1, %d0 , %acc0 | acc += b0*dr[n]
46 mac.l %a3, %d1, (%a4), %d4, %acc0 | acc += a1*y_l[n - 1], load L
47 move.l %acc0, %d1 | get filtered delayed sample
48 mac.l %a6, %d4, %acc0 | acc += gain*x_l[n]
50 move.l %d6, (%a4)+ | write result
52 mac.l %a2, %d2, (%a0), %d2, %acc0 | acc = b1*dl[n - 1], d2 = dl[n]
53 mac.l %a1, %d2 , %acc0 | acc += b0*dl[n]
54 mac.l %a3, %d3, (%a5), %d5, %acc0 | acc += a1*y_r[n - 1], load R
55 movem.l %d4-%d5, (%a0) | save left & right inputs to delay line
56 move.l %acc0, %d3 | get filtered delayed sample
57 mac.l %a6, %d5, %acc0 | acc += gain*x_r[n]
58 lea.l 8(%a0), %a0 | increment delay pointer
60 move.l %d6, (%a5)+ | write result
62 cmpa.l #crossfeed_data+136, %a0| wrap a0 if passed end
64 .word 0x51fb | tpf.l - trap the buffer wrap
66 lea.l -104(%a0), %a0 | wrap
67 subq.l #1, %d7 | --count < 0 ?
69 lea.l crossfeed_data+16, %a1 | save data back to struct
70 movem.l %d0-%d3, (%a1) | ...history
71 move.l %a0, 120(%a1) | ...delay_p
72 movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs
76 .size apply_crossfeed,.cfend-apply_crossfeed
79 /****************************************************************************
80 * int dsp_downsample(int count, struct dsp_data *data,
81 * in32_t *src[], int32_t *dst[])
84 .global dsp_downsample
86 lea.l -40(%sp), %sp | save non-clobberables
87 movem.l %d2-%d7/%a2-%a5, (%sp) |
88 movem.l 44(%sp), %d2/%a0-%a2 | %d2 = count
92 movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels
93 | %d4 = delta = data->resample_data.delta
94 moveq.l #16, %d7 | %d7 = shift
96 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
97 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
98 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
99 lea.l 12(%a0, %d3.l*4), %a5 | %a5 = &data->resample_data.ast_sample[ch-1]
100 move.l (%a5), %d0 | %d0 = last = data->resample_data.last_sample[ch-1]
101 move.l -4(%a3, %d2.l*4), (%a5) | data->resample_data.last_sample[ch-1] = s[count-1]
102 move.l %d5, %d6 | %d6 = pos = phase >> 16
104 cmp.l %d2, %d6 | past end of samples?
105 bge.b .dsloop_skip | yes? skip loop
106 tst.l %d6 | need last sample of prev. frame?
107 bne.b .dsloop | no? start main loop
108 move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos]
109 bra.b .dsuse_last_start | start with last (last in %d0)
111 lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos]
112 movem.l (%a5), %d0-%d1 |
114 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
115 move.l %d0, %acc0 | %acc0 = previous sample
116 move.l %d5, %d0 | frac = (phase << 16) >> 1
119 mac.l %d0, %d1, %acc0 | %acc0 += frac * diff
120 add.l %d4, %d5 | phase += delta
121 move.l %d5, %d6 | pos = phase >> 16
123 movclr.l %acc0, %d0 |
124 move.l %d0, (%a4)+ | *d++ = %d0
125 cmp.l %d2, %d6 | pos < count?
126 blt.b .dsloop | yes? continue resampling
128 subq.l #1, %d3 | ch > 0?
129 bgt.b .dschannel_loop | yes? process next channel
130 asl.l %d7, %d2 | wrap phase to start of next frame
131 sub.l %d2, %d5 | data->resample_data.phase =
132 move.l %d5, 12(%a0) | ... phase - (count << 16)
133 move.l %a4, %d0 | return d - d[0]
135 asr.l #2, %d0 | convert bytes->samples
136 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
137 lea.l 40(%sp), %sp | cleanup stack
140 .size dsp_downsample,.dsend-dsp_downsample
142 /****************************************************************************
143 * int dsp_upsample(int count, struct dsp_data *dsp,
144 * in32_t *src[], int32_t *dst[])
149 lea.l -40(%sp), %sp | save non-clobberables
150 movem.l %d2-%d7/%a2-%a5, (%sp) |
151 movem.l 44(%sp), %d2/%a0-%a2 | %d2 = count
155 movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels
156 | %d4 = delta = data->resample_data.delta
157 swap %d4 | swap delta to high word to use
158 | carries to increment position
160 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
161 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
162 lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1]
163 lea.l (%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count]
164 move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1]
165 move.l -(%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1]
166 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
167 swap %d5 | swap phase to high word to use
168 | carries to increment position
169 move.l %d5, %d6 | %d6 = pos = phase >> 16
171 eor.l %d5, %d6 | pos == 0?
172 beq.b .usstart_0 | no? transistion from down
173 cmp.l %d3, %d6 | past end of samples?
174 bge.b .usloop_skip | yes? skip loop
175 lea.l -4(%a3, %d6.l*4), %a3 | %a3 = s = &s[pos-1] (previous)
176 move.l (%a3)+, %d0 | %d0 = *s++
177 .word 0x51fa | tpf.w - trap next instruction
179 move.l %d6, %d0 | move previous sample to %d0
181 move.l (%a3)+, %d1 | fetch next sample
182 move.l %d1, %d6 | save sample value
183 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
185 lsr.l #1, %d5 | make phase into frac
186 mac.l %d1, %d5, %acc0 | %acc0 = diff * frac
187 lsl.l #1, %d5 | restore frac to phase
188 movclr.l %acc0, %d7 | %d7 = product
189 add.l %d0, %d7 | %d7 = last + product
190 move.l %d7, (%a4)+ | *d++ = %d7
191 add.l %d4, %d5 | phase += delta
192 bcc.b .usloop_0 | load next values?
193 cmp.l %a5, %a3 | src <= src_end?
194 ble.b .usloop_1 | yes? continue resampling
196 subq.l #1, %d3 | ch > 0?
197 bgt.b .uschannel_loop | yes? process next channel
198 swap %d5 | wrap phase to start of next frame
199 move.l %d5, 12(%a0) | ...and save in data->resample_data.phase
200 move.l %a4, %d0 | return d - d[0]
202 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
203 asr.l #2, %d0 | convert bytes->samples
204 lea.l 40(%sp), %sp | cleanup stack
207 .size dsp_upsample,.usend-dsp_upsample
209 /* These routines might benefit from burst transfers but we'll keep them
210 * small for now since they're rather light weight
213 /****************************************************************************
214 * void channels_process_sound_chan_mono(int count, int32_t *buf[])
216 * Mix left and right channels 50/50 into a center channel.
219 .global channels_process_sound_chan_mono
220 channels_process_sound_chan_mono:
221 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
222 lea.l -12(%sp), %sp | save registers
224 movem.l %d1-%d3, (%sp) |
225 move.l #0xb0, %macsr | put emac in rounding fractional mode
226 movem.l (%a0), %a0-%a1 | get channel pointers
227 move.l #0x40000000, %d3 | %d3 = 0.5
229 move.l (%a0), %d1 | L = R = l/2 + r/2
230 mac.l %d1, %d3, (%a1), %d2, %acc0 |
231 mac.l %d2, %d3, %acc0 |
232 movclr.l %acc0, %d1 |
233 move.l %d1, (%a0)+ | output to original buffer
237 movem.l (%sp), %d1-%d3 | restore registers
239 lea.l 12(%sp), %sp | cleanup
242 .size channels_process_sound_chan_mono, .cpmono_end-channels_process_sound_chan_mono
245 /****************************************************************************
246 * void channels_process_sound_chan_custom(int count, int32_t *buf[])
248 * Apply stereo width (narrowing/expanding) effect.
251 .global channels_process_sound_chan_custom
252 channels_process_sound_chan_custom:
253 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
254 lea.l -16(%sp), %sp | save registers
256 movem.l %d1-%d4, (%sp) |
257 move.l #0xb0, %macsr | put emac in rounding fractional mode
258 movem.l (%a0), %a0-%a1 | get channel pointers
259 move.l dsp_sw_gain, %d3 | load straight (mid) gain
260 move.l dsp_sw_cross, %d4 | load cross (side) gain
263 mac.l %d1, %d3, (%a1), %d2, %acc0 | L = l*gain + r*cross
264 mac.l %d1, %d4 , %acc1 | R = r*gain + l*cross
265 mac.l %d2, %d4 , %acc0 |
266 mac.l %d2, %d3 , %acc1 |
267 movclr.l %acc0, %d1 |
268 movclr.l %acc1, %d2 |
273 movem.l (%sp), %d1-%d4 | restore registers
275 lea.l 16(%sp), %sp | cleanup
278 .size channels_process_sound_chan_custom, .cpcustom_end-channels_process_sound_chan_custom
280 /****************************************************************************
281 * void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
283 * Separate channels into side channels.
286 .global channels_process_sound_chan_karaoke
287 channels_process_sound_chan_karaoke:
288 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
289 lea.l -16(%sp), %sp | save registers
291 movem.l %d1-%d4, (%sp) |
292 move.l #0xb0, %macsr | put emac in rounding fractional mode
293 movem.l (%a0), %a0-%a1 | get channel pointers
294 move.l #0x40000000, %d4 | %d3 = 0.5
297 msac.l %d1, %d4, (%a1), %d2, %acc0 | R = r/2 - l/2
298 mac.l %d2, %d4 , %acc0 |
299 movclr.l %acc0, %d1 |
301 neg.l %d1 | L = -R = -(r/2 - l/2) = l/2 - r/2
305 movem.l (%sp), %d1-%d4 | restore registers
307 lea.l 16(%sp), %sp | cleanup
310 .size channels_process_sound_chan_karaoke, .cpkaraoke_end-channels_process_sound_chan_karaoke
311 /****************************************************************************
312 * void sample_output_stereo(int count, struct dsp_data *data,
313 * int32_t *src[], int16_t *dst)
315 * Framework based on the ubiquitous Rockbox line transfer logic for
318 * Does emac clamping and scaling (which proved faster than the usual
319 * checks and branches - even single test clamping) and writes using
320 * line burst transfers. Also better than writing a single L-R pair per
321 * loop but a good deal more code.
323 * Attemping bursting during reads is rather futile since the source and
324 * destination alignments rarely agree and too much complication will
325 * slow us up. The parallel loads seem to do a bit better at least until
326 * a pcm buffer can always give line aligned chunk and then aligning the
327 * dest can then imply the source is aligned if the source buffers are.
328 * For now longword alignment is assumed of both the source and dest.
332 .global sample_output_stereo
333 sample_output_stereo:
334 lea.l -44(%sp), %sp | save registers
335 move.l %macsr, %d1 | do it now as at many lines will
336 movem.l %d1-%d7/%a2-%a5, (%sp) | be the far more common condition
337 move.l #0x80, %macsr | put emac unit in signed int mode
338 movem.l 48(%sp), %a0-%a2/%a4 |
339 lea.l (%a4, %a0.l*4), %a0 | %a0 = end address
340 move.l (%a1), %d1 | %a1 = multiplier: (1 << (16 - scale))
346 movem.l (%a2), %a2-%a3 | get L/R channel pointers
347 moveq.l #28, %d0 | %d0 = second line bound
349 and.l #0xfffffff0, %d0 |
350 cmp.l %a4, %d0 | at least a full line?
351 blo.w .sos_longloop_1_start | no? jump to trailing longword
352 sub.l #16, %d0 | %d1 = first line bound
353 cmp.l %a4, %d0 | any leading longwords?
354 bls.b .sos_lineloop_start | no? jump to line loop
356 move.l (%a2)+, %d1 | read longword from L and R
357 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | shift L to high word
358 mac.l %d2, %a1, %acc1 | shift R to high word
359 movclr.l %acc0, %d1 | get possibly saturated results
360 movclr.l %acc1, %d2 |
361 swap %d2 | move R to low word
362 move.w %d2, %d1 | interleave MS 16 bits of each
363 move.l %d1, (%a4)+ | ...and write both
365 bhi.b .sos_longloop_0 |
367 lea.l -12(%a0), %a5 | %a5 = at or just before last line bound
369 move.l (%a3)+, %d4 | get next 4 R samples and scale
370 mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation
371 mac.l %d5, %a1, (%a3)+, %d6, %acc1 |
372 mac.l %d6, %a1, (%a3)+, %d7, %acc2 |
373 mac.l %d7, %a1, (%a2)+, %d0, %acc3 |
374 lea.l 16(%a4), %a4 | increment dest here, mitigate stalls
375 movclr.l %acc0, %d4 | obtain R results
376 movclr.l %acc1, %d5 |
377 movclr.l %acc2, %d6 |
378 movclr.l %acc3, %d7 |
379 mac.l %d0, %a1, (%a2)+, %d1, %acc0 | get next 4 L samples and scale
380 mac.l %d1, %a1, (%a2)+, %d2, %acc1 | with saturation
381 mac.l %d2, %a1, (%a2)+, %d3, %acc2 |
382 mac.l %d3, %a1 , %acc3 |
383 swap %d4 | a) interleave most significant...
387 movclr.l %acc0, %d0 | obtain L results
388 movclr.l %acc1, %d1 |
389 movclr.l %acc2, %d2 |
390 movclr.l %acc3, %d3 |
391 move.w %d4, %d0 | a) ... 16 bits of L and R
395 movem.l %d0-%d3, -16(%a4) | write four stereo samples
397 bhi.b .sos_lineloop |
398 .sos_longloop_1_start:
399 cmp.l %a4, %a0 | any longwords left?
400 bls.b .sos_done | no? finished.
402 move.l (%a2)+, %d1 | handle trailing longwords
403 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | the same way as leading ones
404 mac.l %d2, %a1, %acc1 |
405 movclr.l %acc0, %d1 |
406 movclr.l %acc1, %d2 |
411 bhi.b .sos_longloop_1 |
413 movem.l (%sp), %d1-%d7/%a2-%a5 | restore registers
415 lea.l 44(%sp), %sp | cleanup
418 .size sample_output_stereo, .sos_end-sample_output_stereo
420 /****************************************************************************
421 * void sample_output_mono(int count, struct dsp_data *data,
422 * int32_t *src[], int16_t *dst)
424 * Same treatment as sample_output_stereo but for one channel.
427 .global sample_output_mono
429 lea.l -28(%sp), %sp | save registers
430 move.l %macsr, %d1 | do it now as at many lines will
431 movem.l %d1-%d5/%a2-%a3, (%sp) | be the far more common condition
432 move.l #0x80, %macsr | put emac unit in signed int mode
433 movem.l 32(%sp), %a0-%a3 |
434 lea.l (%a3, %a0.l*4), %a0 | %a0 = end address
435 move.l (%a1), %d1 | %d5 = multiplier: (1 << (16 - scale))
440 movem.l (%a2), %a2 | get source channel pointer
441 moveq.l #28, %d0 | %d0 = second line bound
443 and.l #0xfffffff0, %d0 |
444 cmp.l %a3, %d0 | at least a full line?
445 blo.w .som_longloop_1_start | no? jump to trailing longword
446 sub.l #16, %d0 | %d1 = first line bound
447 cmp.l %a3, %d0 | any leading longwords?
448 bls.b .som_lineloop_start | no? jump to line loop
450 move.l (%a2)+, %d1 | read longword from L and R
451 mac.l %d1, %d5, %acc0 | shift L to high word
452 movclr.l %acc0, %d1 | get possibly saturated results
454 swap %d2 | move R to low word
455 move.w %d2, %d1 | duplicate single channel into
456 move.l %d1, (%a3)+ | L and R
458 bhi.b .som_longloop_0 |
460 lea.l -12(%a0), %a1 | %a1 = at or just before last line bound
462 move.l (%a2)+, %d0 | get next 4 L samples and scale
463 mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation
464 mac.l %d1, %d5, (%a2)+, %d2, %acc1 |
465 mac.l %d2, %d5, (%a2)+, %d3, %acc2 |
466 mac.l %d3, %d5 , %acc3 |
467 lea.l 16(%a3), %a3 | increment dest here, mitigate stalls
468 movclr.l %acc0, %d0 | obtain results
469 movclr.l %acc1, %d1 |
470 movclr.l %acc2, %d2 |
471 movclr.l %acc3, %d3 |
472 move.l %d0, %d4 | duplicate single channel
473 swap %d4 | into L and R
484 movem.l %d0-%d3, -16(%a3) | write four stereo samples
486 bhi.b .som_lineloop |
487 .som_longloop_1_start:
488 cmp.l %a3, %a0 | any longwords left?
489 bls.b .som_done | no? finished.
491 move.l (%a2)+, %d1 | handle trailing longwords
492 mac.l %d1, %d5, %acc0 | the same way as leading ones
493 movclr.l %acc0, %d1 |
499 bhi.b .som_longloop_1 |
501 movem.l (%sp), %d1-%d5/%a2-%a3 | restore registers
503 lea.l 28(%sp), %sp | cleanup
506 .size sample_output_mono, .som_end-sample_output_mono