1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2006 Thom Johansen
11 * Portions Copyright (C) 2007 Michael Sevakis
13 * All files in this archive are subject to the GNU General Public License.
14 * See the file COPYING in the source tree root for full license agreement.
16 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
17 * KIND, either express or implied.
19 ****************************************************************************/
21 /****************************************************************************
22 * void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[])
26 .global dsp_apply_gain
28 lea.l -20(%sp), %sp | save registers
29 movem.l %d2-%d4/%a2-%a3, (%sp) |
30 movem.l 28(%sp), %a0-%a1 | %a0 = data,
32 move.l 4(%a0), %d1 | %d1 = data->num_channels
33 move.l 32(%a0), %a0 | %a0 = data->gain (in s8.23)
35 move.l 24(%sp), %d0 | %d0 = count
36 move.l -4(%a1, %d1.l*4), %a2 | %a2 = s = buf[ch-1]
37 move.l %a2, %a3 | %a3 = d = s
38 move.l (%a2)+, %d2 | %d2 = *s++,
39 mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1)
40 subq.l #1, %d0 | --count > 0 ? : effectively n++
41 ble.b 30f | loop done | no? finish up
43 move.l %accext01, %d4 | fetch S(n-1)[7:0]
44 movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0]
45 asl.l #8, %d3 | *s++ = (S(n-1)[40:8] << 8) | S(n-1)[7:0]
46 mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1)
49 subq.l #1, %d0 | --count > 0 ? : effectively n++
50 bgt.b 20b | loop | yes? do more samples
52 move.l %accext01, %d4 | fetch S(n-1)[7:0]
53 movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0]
54 asl.l #8, %d3 | *s = (S(n-1)[40:8] << 8) | S(n-1)[7:0]
57 subq.l #1, %d1 | next channel
58 bgt.b 10b | channel loop |
59 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
60 lea.l 20(%sp), %sp | cleanup stack
62 .size dsp_apply_gain,.-dsp_apply_gain
64 /****************************************************************************
65 * void apply_crossfeed(int count, int32_t *buf[])
69 .global apply_crossfeed
72 movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs
73 movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src
74 movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1]
75 lea.l crossfeed_data, %a1 | %a1 = &crossfeed_data
76 move.l (%a1)+, %d6 | %d6 = direct gain
77 movem.l 12(%a1), %d0-%d3 | fetch filter history samples
78 move.l 132(%a1), %a0 | fetch delay line address
79 movem.l (%a1), %a1-%a3 | load filter coefs
80 lea.l crossfeed_data+136, %a6 | %a6 = delay line wrap limit
81 bra.b 20f | loop start | go to loop start point
82 /* Register usage in loop:
83 * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs),
84 * %a4 = buf[0], %a5 = buf[1],
85 * %a6 = delay line pointer wrap limit,
92 movclr.l %acc0, %d4 | write outputs
93 move.l %d4, (%a4)+ | .
94 movclr.l %acc1, %d5 | .
95 move.l %d5, (%a5)+ | .
97 mac.l %a2, %d0, (%a0)+, %d0, %acc0 | %acc0 = b1*dl[n - 1], %d0 = dl[n]
98 mac.l %a1, %d0 , %acc0 | %acc0 += b0*dl[n]
99 mac.l %a3, %d1, (%a5), %d5, %acc0 | %acc0 += a1*y_r[n - 1], load R
100 mac.l %a2, %d2, (%a0)+, %d2, %acc1 | %acc1 = b1*dr[n - 1], %d2 = dr[n]
101 mac.l %a1, %d2 , %acc1 | %acc1 += b0*dr[n]
102 mac.l %a3, %d3, (%a4), %d4, %acc1 | %acc1 += a1*y_l[n - 1], load L
103 movem.l %d4-%d5, -8(%a0) | save left & right inputs to delay line
104 move.l %acc0, %d3 | get filtered delayed left sample (y_l[n])
105 move.l %acc1, %d1 | get filtered delayed right sample (y_r[n])
106 mac.l %d6, %d4, %acc0 | %acc0 += gain*x_l[n]
107 mac.l %d6, %d5, %acc1 | %acc1 += gain*x_r[n]
108 cmp.l %a6, %a0 | wrap %a0 if passed end
109 bhs.b 30f | wrap buffer |
110 .word 0x51fb | tpf.l | trap the buffer wrap
111 30: | wrap buffer | ...fwd taken branches more costly
112 lea.l -104(%a0), %a0 | wrap it up
113 subq.l #1, %d7 | --count > 0 ?
114 bgt.b 10b | loop | yes? do more
115 movclr.l %acc0, %d4 | write last outputs
116 move.l %d4, (%a4) | .
117 movclr.l %acc1, %d5 | .
118 move.l %d5, (%a5) | .
119 lea.l crossfeed_data+16, %a1 | save data back to struct
120 movem.l %d0-%d3, (%a1) | ...history
121 move.l %a0, 120(%a1) | ...delay_p
122 movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs
125 .size apply_crossfeed,.-apply_crossfeed
127 /****************************************************************************
128 * int dsp_downsample(int count, struct dsp_data *data,
129 * in32_t *src[], int32_t *dst[])
133 .global dsp_downsample
135 lea.l -40(%sp), %sp | save non-clobberables
136 movem.l %d2-%d7/%a2-%a5, (%sp) |
137 movem.l 44(%sp), %d2/%a0-%a2 | %d2 = count
141 movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels
142 | %d4 = delta = data->resample_data.delta
143 moveq.l #16, %d7 | %d7 = shift
145 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
146 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
147 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
148 lea.l 12(%a0, %d3.l*4), %a5 | %a5 = &data->resample_data.ast_sample[ch-1]
149 move.l (%a5), %d0 | %d0 = last = data->resample_data.last_sample[ch-1]
150 move.l -4(%a3, %d2.l*4), (%a5) | data->resample_data.last_sample[ch-1] = s[count-1]
151 move.l %d5, %d6 | %d6 = pos = phase >> 16
153 cmp.l %d2, %d6 | past end of samples?
154 bge.b 40f | skip resample loop| yes? skip loop
155 tst.l %d6 | need last sample of prev. frame?
156 bne.b 20f | resample loop | no? start main loop
157 move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos]
158 bra.b 30f | resample start last | start with last (last in %d0)
159 20: | resample loop |
160 lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos]
161 movem.l (%a5), %d0-%d1 |
162 30: | resample start last |
163 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
164 move.l %d0, %acc0 | %acc0 = previous sample
165 move.l %d5, %d0 | frac = (phase << 16) >> 1
168 mac.l %d0, %d1, %acc0 | %acc0 += frac * diff
169 add.l %d4, %d5 | phase += delta
170 move.l %d5, %d6 | pos = phase >> 16
172 movclr.l %acc0, %d0 |
173 move.l %d0, (%a4)+ | *d++ = %d0
174 cmp.l %d2, %d6 | pos < count?
175 blt.b 20b | resample loop | yes? continue resampling
176 40: | skip resample loop |
177 subq.l #1, %d3 | ch > 0?
178 bgt.b 10b | channel loop | yes? process next channel
179 lsl.l %d7, %d2 | wrap phase to start of next frame
180 sub.l %d2, %d5 | data->resample_data.phase =
181 move.l %d5, 12(%a0) | ... phase - (count << 16)
182 move.l %a4, %d0 | return d - d[0]
184 asr.l #2, %d0 | convert bytes->samples
185 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
186 lea.l 40(%sp), %sp | cleanup stack
188 .size dsp_downsample,.-dsp_downsample
190 /****************************************************************************
191 * int dsp_upsample(int count, struct dsp_data *dsp,
192 * int32_t *src[], int32_t *dst[])
198 lea.l -40(%sp), %sp | save non-clobberables
199 movem.l %d2-%d7/%a2-%a5, (%sp) |
200 movem.l 44(%sp), %d2/%a0-%a2 | %d2 = count
204 movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels
205 | %d4 = delta = data->resample_data.delta
206 swap %d4 | swap delta to high word to use...
207 | ...carries to increment position
209 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
210 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
211 lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1]
212 lea.l -4(%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count-1]
213 move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1]
214 move.l (%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1]
215 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
216 move.l (%a3)+, %d1 | fetch first sample - might throw this...
217 | ...away later but we'll be preincremented
218 move.l %d1, %d6 | save sample value
219 sub.l %d0, %d1 | %d1 = diff = s[0] - last
220 swap %d5 | swap phase to high word to use
221 | carries to increment position
222 move.l %d5, %d7 | %d7 = pos = phase >> 16
224 eor.l %d5, %d7 | pos == 0?
225 beq.b 40f | loop start | yes? start loop
226 cmp.l %d2, %d7 | past end of samples?
227 bge.b 50f | skip resample loop| yes? go to next channel and collect info
228 lea.l (%a3, %d7.l*4), %a3 | %a3 = s = &s[pos+1]
229 movem.l -8(%a3), %d0-%d1 | %d0 = s[pos-1], %d1 = s[pos]
230 move.l %d1, %d6 | save sample value
231 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
232 bra.b 40f | loop start |
233 20: | next sample loop |
234 move.l %d6, %d0 | move previous sample to %d0
235 move.l (%a3)+, %d1 | fetch next sample
236 move.l %d1, %d6 | save sample value
237 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
238 30: | same sample loop |
239 movclr.l %acc0, %d7 | %d7 = result
240 move.l %d7, (%a4)+ | *d++ = %d7
242 lsr.l #1, %d5 | make phase into frac
243 move.l %d0, %acc0 | %acc0 = s[pos-1]
244 mac.l %d1, %d5, %acc0 | %acc0 = diff * frac
245 lsl.l #1, %d5 | restore frac to phase
246 add.l %d4, %d5 | phase += delta
247 bcc.b 30b | same sample loop | load next values?
248 cmp.l %a5, %a3 | src <= src_end?
249 bls.b 20b | next sample loop | yes? continue resampling
250 movclr.l %acc0, %d7 | %d7 = result
251 move.l %d7, (%a4)+ | *d++ = %d7
252 50: | skip resample loop |
253 subq.l #1, %d3 | ch > 0?
254 bgt.b 10b | channel loop | yes? process next channel
255 swap %d5 | wrap phase to start of next frame
256 move.l %d5, 12(%a0) | ...and save in data->resample_data.phase
257 move.l %a4, %d0 | return d - d[0]
259 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
260 asr.l #2, %d0 | convert bytes->samples
261 lea.l 40(%sp), %sp | cleanup stack
263 .size dsp_upsample,.-dsp_upsample
265 /****************************************************************************
266 * void channels_process_sound_chan_mono(int count, int32_t *buf[])
268 * Mix left and right channels 50/50 into a center channel.
272 .global channels_process_sound_chan_mono
273 channels_process_sound_chan_mono:
274 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
275 lea.l -20(%sp), %sp | save registers
276 movem.l %d2-%d4/%a2-%a3, (%sp) |
277 movem.l (%a0), %a0-%a1 | get channel pointers
278 move.l %a0, %a2 | use separate dst pointers since read
279 move.l %a1, %a3 | pointers run one ahead of write
280 move.l #0x40000000, %d3 | %d3 = 0.5
281 move.l (%a0)+, %d1 | prime the input registers
283 mac.l %d1, %d3, (%a0)+, %d1, %acc0 |
284 mac.l %d2, %d3, (%a1)+, %d2, %acc0 |
286 ble.s 20f | loop done |
288 movclr.l %acc0, %d4 | L = R = l/2 + r/2
289 mac.l %d1, %d3, (%a0)+, %d1, %acc0 |
290 mac.l %d2, %d3, (%a1)+, %d2, %acc0 |
291 move.l %d4, (%a2)+ | output to original buffer
296 movclr.l %acc0, %d4 | output last sample
299 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
300 lea.l 20(%sp), %sp | cleanup
302 .size channels_process_sound_chan_mono, \
303 .-channels_process_sound_chan_mono
305 /****************************************************************************
306 * void channels_process_sound_chan_custom(int count, int32_t *buf[])
308 * Apply stereo width (narrowing/expanding) effect.
312 .global channels_process_sound_chan_custom
313 channels_process_sound_chan_custom:
314 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
315 lea.l -28(%sp), %sp | save registers
316 movem.l %d2-%d6/%a2-%a3, (%sp) |
317 movem.l (%a0), %a0-%a1 | get channel pointers
318 move.l %a0, %a2 | use separate dst pointers since read
319 move.l %a1, %a3 | pointers run one ahead of write
320 move.l dsp_sw_gain, %d3 | load straight (mid) gain
321 move.l dsp_sw_cross, %d4 | load cross (side) gain
322 move.l (%a0)+, %d1 | prime the input registers
324 mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross
325 mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross
326 mac.l %d2, %d4 , %acc0 |
327 mac.l %d2, %d3, (%a1)+, %d2, %acc1 |
329 ble.b 20f | loop done |
331 movclr.l %acc0, %d5 |
332 movclr.l %acc1, %d6 |
333 mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross
334 mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross
335 mac.l %d2, %d4 , %acc0 |
336 mac.l %d2, %d3, (%a1)+, %d2, %acc1 |
342 movclr.l %acc0, %d5 | output last sample
343 movclr.l %acc1, %d6 |
346 movem.l (%sp), %d2-%d6/%a2-%a3 | restore registers
347 lea.l 28(%sp), %sp | cleanup
349 .size channels_process_sound_chan_custom, \
350 .-channels_process_sound_chan_custom
352 /****************************************************************************
353 * void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
355 * Separate channels into side channels.
359 .global channels_process_sound_chan_karaoke
360 channels_process_sound_chan_karaoke:
361 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
362 lea.l -20(%sp), %sp | save registers
363 movem.l %d2-%d4/%a2-%a3, (%sp) |
364 movem.l (%a0), %a0-%a1 | get channel src pointers
365 move.l %a0, %a2 | use separate dst pointers since read
366 move.l %a1, %a3 | pointers run one ahead of write
367 move.l #0x40000000, %d3 | %d3 = 0.5
368 move.l (%a0)+, %d1 | prime the input registers
370 mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2
371 msac.l %d2, %d3, (%a1)+, %d2, %acc0 |
373 ble.b 20f | loop done |
375 movclr.l %acc0, %d4 |
376 mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2
377 msac.l %d2, %d3, (%a1)+, %d2, %acc0 |
379 neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2
384 movclr.l %acc0, %d4 | output last sample
386 neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2
388 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
389 lea.l 20(%sp), %sp | cleanup
391 .size channels_process_sound_chan_karaoke, \
392 .-channels_process_sound_chan_karaoke
394 /****************************************************************************
395 * void sample_output_stereo(int count, struct dsp_data *data,
396 * int32_t *src[], int16_t *dst)
398 * Framework based on the ubiquitous Rockbox line transfer logic for
401 * Does emac clamping and scaling (which proved faster than the usual
402 * checks and branches - even single test clamping) and writes using
403 * line burst transfers. Also better than writing a single L-R pair per
404 * loop but a good deal more code.
406 * Attemping bursting during reads is rather futile since the source and
407 * destination alignments rarely agree and too much complication will
408 * slow us up. The parallel loads seem to do a bit better at least until
409 * a pcm buffer can always give line aligned chunk and then aligning the
410 * dest can then imply the source is aligned if the source buffers are.
411 * For now longword alignment is assumed of both the source and dest.
416 .global sample_output_stereo
417 sample_output_stereo:
418 lea.l -44(%sp), %sp | save registers
419 move.l %macsr, %d1 | do it now as at many lines will
420 movem.l %d1-%d7/%a2-%a5, (%sp) | be the far more common condition
421 move.l #0x80, %macsr | put emac unit in signed int mode
422 movem.l 48(%sp), %a0-%a2/%a4 |
423 lea.l (%a4, %a0.l*4), %a0 | %a0 = end address
424 move.l (%a1), %d1 | %a1 = multiplier: (1 << (16 - scale))
430 movem.l (%a2), %a2-%a3 | get L/R channel pointers
431 moveq.l #28, %d0 | %d0 = second line bound
433 and.l #0xfffffff0, %d0 |
434 cmp.l %a0, %d0 | at least a full line?
435 bhi.w 40f | long loop 1 start | no? do as trailing longwords
436 sub.l #16, %d0 | %d1 = first line bound
437 cmp.l %a4, %d0 | any leading longwords?
438 bls.b 20f | line loop start | no? start line loop
440 move.l (%a2)+, %d1 | read longword from L and R
441 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | shift L to high word
442 mac.l %d2, %a1, %acc1 | shift R to high word
443 movclr.l %acc0, %d1 | get possibly saturated results
444 movclr.l %acc1, %d2 |
445 swap %d2 | move R to low word
446 move.w %d2, %d1 | interleave MS 16 bits of each
447 move.l %d1, (%a4)+ | ...and write both
449 bhi.b 10b | long loop 0 |
450 20: | line loop start |
451 lea.l -12(%a0), %a5 | %a5 = at or just before last line bound
453 move.l (%a3)+, %d4 | get next 4 R samples and scale
454 mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation
455 mac.l %d5, %a1, (%a3)+, %d6, %acc1 |
456 mac.l %d6, %a1, (%a3)+, %d7, %acc2 |
457 mac.l %d7, %a1, (%a2)+, %d0, %acc3 |
458 lea.l 16(%a4), %a4 | increment dest here, mitigate stalls
459 movclr.l %acc0, %d4 | obtain R results
460 movclr.l %acc1, %d5 |
461 movclr.l %acc2, %d6 |
462 movclr.l %acc3, %d7 |
463 mac.l %d0, %a1, (%a2)+, %d1, %acc0 | get next 4 L samples and scale
464 mac.l %d1, %a1, (%a2)+, %d2, %acc1 | with saturation
465 mac.l %d2, %a1, (%a2)+, %d3, %acc2 |
466 mac.l %d3, %a1 , %acc3 |
467 swap %d4 | a) interleave most significant...
471 movclr.l %acc0, %d0 | obtain L results
472 movclr.l %acc1, %d1 |
473 movclr.l %acc2, %d2 |
474 movclr.l %acc3, %d3 |
475 move.w %d4, %d0 | a) ... 16 bits of L and R
479 movem.l %d0-%d3, -16(%a4) | write four stereo samples
481 bhi.b 30b | line loop |
482 40: | long loop 1 start |
483 cmp.l %a4, %a0 | any longwords left?
484 bls.b 60f | output end | no? stop
486 move.l (%a2)+, %d1 | handle trailing longwords
487 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | the same way as leading ones
488 mac.l %d2, %a1, %acc1 |
489 movclr.l %acc0, %d1 |
490 movclr.l %acc1, %d2 |
495 bhi.b 50b | long loop 1
497 movem.l (%sp), %d1-%d7/%a2-%a5 | restore registers
499 lea.l 44(%sp), %sp | cleanup
501 .size sample_output_stereo, .-sample_output_stereo
503 /****************************************************************************
504 * void sample_output_mono(int count, struct dsp_data *data,
505 * int32_t *src[], int16_t *dst)
507 * Same treatment as sample_output_stereo but for one channel.
511 .global sample_output_mono
513 lea.l -28(%sp), %sp | save registers
514 move.l %macsr, %d1 | do it now as at many lines will
515 movem.l %d1-%d5/%a2-%a3, (%sp) | be the far more common condition
516 move.l #0x80, %macsr | put emac unit in signed int mode
517 movem.l 32(%sp), %a0-%a3 |
518 lea.l (%a3, %a0.l*4), %a0 | %a0 = end address
519 move.l (%a1), %d1 | %d5 = multiplier: (1 << (16 - scale))
524 movem.l (%a2), %a2 | get source channel pointer
525 moveq.l #28, %d0 | %d0 = second line bound
527 and.l #0xfffffff0, %d0 |
528 cmp.l %a0, %d0 | at least a full line?
529 bhi.w 40f | long loop 1 start | no? do as trailing longwords
530 sub.l #16, %d0 | %d1 = first line bound
531 cmp.l %a3, %d0 | any leading longwords?
532 bls.b 20f | line loop start | no? start line loop
534 move.l (%a2)+, %d1 | read longword from L and R
535 mac.l %d1, %d5, %acc0 | shift L to high word
536 movclr.l %acc0, %d1 | get possibly saturated results
538 swap %d2 | move R to low word
539 move.w %d2, %d1 | duplicate single channel into
540 move.l %d1, (%a3)+ | L and R
542 bhi.b 10b | long loop 0 |
543 20: | line loop start |
544 lea.l -12(%a0), %a1 | %a1 = at or just before last line bound
546 move.l (%a2)+, %d0 | get next 4 L samples and scale
547 mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation
548 mac.l %d1, %d5, (%a2)+, %d2, %acc1 |
549 mac.l %d2, %d5, (%a2)+, %d3, %acc2 |
550 mac.l %d3, %d5 , %acc3 |
551 lea.l 16(%a3), %a3 | increment dest here, mitigate stalls
552 movclr.l %acc0, %d0 | obtain results
553 movclr.l %acc1, %d1 |
554 movclr.l %acc2, %d2 |
555 movclr.l %acc3, %d3 |
556 move.l %d0, %d4 | duplicate single channel
557 swap %d4 | into L and R
568 movem.l %d0-%d3, -16(%a3) | write four stereo samples
570 bhi.b 30b | line loop |
571 40: | long loop 1 start |
572 cmp.l %a3, %a0 | any longwords left?
573 bls.b 60f | output end | no? stop
575 move.l (%a2)+, %d1 | handle trailing longwords
576 mac.l %d1, %d5, %acc0 | the same way as leading ones
577 movclr.l %acc0, %d1 |
583 bhi.b 50b | long loop 1 |
585 movem.l (%sp), %d1-%d5/%a2-%a3 | restore registers
587 lea.l 28(%sp), %sp | cleanup
589 .size sample_output_mono, .-sample_output_mono