RaaA: Improve tagcache search to make the database built.
[maemo-rb.git] / apps / codecs / libspc / spc_dsp.c
blobb4fc57158bd93d6834d8d40330e9eead7b496672
1 /***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
10 * Copyright (C) 2007-2008 Michael Sevakis (jhMikeS)
11 * Copyright (C) 2006-2007 Adam Gashlin (hcs)
12 * Copyright (C) 2004-2007 Shay Green (blargg)
13 * Copyright (C) 2002 Brad Martin
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
21 * KIND, either express or implied.
23 ****************************************************************************/
25 /* The DSP portion (awe!) */
26 #include "codeclib.h"
27 #include "spc_codec.h"
28 #include "spc_profiler.h"
30 #if defined(CPU_COLDFIRE) || defined (CPU_ARM)
31 int32_t fir_buf[FIR_BUF_CNT]
32 __attribute__ ((aligned (FIR_BUF_ALIGN*1))) IBSS_ATTR;
33 #endif
34 #if SPC_BRRCACHE
35 /* a little extra for samples that go past end */
36 int16_t BRRcache [BRR_CACHE_SIZE] CACHEALIGN_ATTR;
37 #endif
39 void DSP_write( struct Spc_Dsp* this, int i, int data )
41 assert( (unsigned) i < REGISTER_COUNT );
43 this->r.reg [i] = data;
44 int high = i >> 4;
45 int low = i & 0x0F;
46 if ( low < 2 ) /* voice volumes */
48 int left = *(int8_t const*) &this->r.reg [i & ~1];
49 int right = *(int8_t const*) &this->r.reg [i | 1];
50 struct voice_t* v = this->voice_state + high;
51 v->volume [0] = left;
52 v->volume [1] = right;
54 else if ( low == 0x0F ) /* fir coefficients */
56 this->fir_coeff [7 - high] = (int8_t) data; /* sign-extend */
60 #if ARM_ARCH >= 6
61 /* if ( n < -32768 ) out = -32768; */
62 /* if ( n > 32767 ) out = 32767; */
63 #define CLAMP16( n ) \
64 ({ \
65 asm ("ssat %0, #16, %1" \
66 : "=r" ( n ) : "r"( n ) ); \
67 n; \
69 #else
70 /* if ( n < -32768 ) out = -32768; */
71 /* if ( n > 32767 ) out = 32767; */
72 #define CLAMP16( n ) \
73 ({ \
74 if ( (int16_t) n != n ) \
75 n = 0x7FFF ^ (n >> 31); \
76 n; \
78 #endif
80 #if SPC_BRRCACHE
81 static void decode_brr( struct Spc_Dsp* this, unsigned start_addr,
82 struct voice_t* voice,
83 struct raw_voice_t const* const raw_voice ) ICODE_ATTR;
84 static void decode_brr( struct Spc_Dsp* this, unsigned start_addr,
85 struct voice_t* voice,
86 struct raw_voice_t const* const raw_voice )
88 /* setup same variables as where decode_brr() is called from */
89 #undef RAM
90 #define RAM ram.ram
92 struct src_dir const* const sd =
93 &ram.sd[this->r.g.wave_page * 0x100/sizeof(struct src_dir)];
94 struct cache_entry_t* const wave_entry =
95 &this->wave_entry [raw_voice->waveform];
97 /* the following block can be put in place of the call to
98 decode_brr() below
101 DEBUGF( "decode at %08x (wave #%d)\n",
102 start_addr, raw_voice->waveform );
104 /* see if in cache */
105 int i;
106 for ( i = 0; i < this->oldsize; i++ )
108 struct cache_entry_t* e = &this->wave_entry_old [i];
109 if ( e->start_addr == start_addr )
111 DEBUGF( "found in wave_entry_old (oldsize=%d)\n",
112 this->oldsize );
113 *wave_entry = *e;
114 goto wave_in_cache;
118 wave_entry->start_addr = start_addr;
120 uint8_t const* const loop_ptr =
121 RAM + letoh16(sd[raw_voice->waveform].loop);
122 short* loop_start = 0;
124 short* out = BRRcache + start_addr * 2;
125 wave_entry->samples = out;
126 *out++ = 0;
127 int smp1 = 0;
128 int smp2 = 0;
130 uint8_t const* addr = RAM + start_addr;
131 int block_header;
134 if ( addr == loop_ptr )
136 loop_start = out;
137 DEBUGF( "loop at %08lx (wave #%d)\n",
138 (unsigned long)(addr - RAM), raw_voice->waveform );
141 /* header */
142 block_header = *addr;
143 addr += 9;
144 voice->addr = addr;
145 int const filter = (block_header & 0x0C) - 0x08;
147 /* scaling
148 (invalid scaling gives -4096 for neg nybble, 0 for pos) */
149 static unsigned char const right_shifts [16] = {
150 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 29, 29, 29,
152 static unsigned char const left_shifts [16] = {
153 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11
155 int const scale = block_header >> 4;
156 int const right_shift = right_shifts [scale];
157 int const left_shift = left_shifts [scale];
159 /* output position */
160 out += BRR_BLOCK_SIZE;
161 int offset = -BRR_BLOCK_SIZE << 2;
163 do /* decode and filter 16 samples */
165 /* Get nybble, sign-extend, then scale
166 get byte, select which nybble, sign-extend, then shift based
167 on scaling. also handles invalid scaling values. */
168 int delta = (int) (int8_t) (addr [offset >> 3] << (offset & 4))
169 >> right_shift << left_shift;
171 out [offset >> 2] = smp2;
173 if ( filter == 0 ) /* mode 0x08 (30-90% of the time) */
175 delta -= smp2 >> 1;
176 delta += smp2 >> 5;
177 smp2 = smp1;
178 delta += smp1;
179 delta += (-smp1 - (smp1 >> 1)) >> 5;
181 else
183 if ( filter == -4 ) /* mode 0x04 */
185 delta += smp1 >> 1;
186 delta += (-smp1) >> 5;
188 else if ( filter > -4 ) /* mode 0x0C */
190 delta -= smp2 >> 1;
191 delta += (smp2 + (smp2 >> 1)) >> 4;
192 delta += smp1;
193 delta += (-smp1 * 13) >> 7;
195 smp2 = smp1;
198 delta = CLAMP16( delta );
199 smp1 = (int16_t) (delta * 2); /* sign-extend */
201 while ( (offset += 4) != 0 );
203 /* if next block has end flag set, this block ends early */
204 /* (verified) */
205 if ( (block_header & 3) != 3 && (*addr & 3) == 1 )
207 /* skip last 9 samples */
208 out -= 9;
209 goto early_end;
212 while ( !(block_header & 1) && addr < RAM + 0x10000 );
214 out [0] = smp2;
215 out [1] = smp1;
217 early_end:
218 wave_entry->end = (out - 1 - wave_entry->samples) << 12;
220 wave_entry->loop = 0;
221 if ( (block_header & 2) )
223 if ( loop_start )
225 int loop = out - loop_start;
226 wave_entry->loop = loop;
227 wave_entry->end += 0x3000;
228 out [2] = loop_start [2];
229 out [3] = loop_start [3];
230 out [4] = loop_start [4];
232 else
234 DEBUGF( "loop point outside initial wave\n" );
238 DEBUGF( "end at %08lx (wave #%d)\n",
239 (unsigned long)(addr - RAM), raw_voice->waveform );
241 /* add to cache */
242 this->wave_entry_old [this->oldsize++] = *wave_entry;
243 wave_in_cache:;
246 #endif
248 static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice,
249 struct src_dir const* const sd,
250 struct raw_voice_t const* const raw_voice,
251 const int key_on_delay, const int vbit) ICODE_ATTR;
252 static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice,
253 struct src_dir const* const sd,
254 struct raw_voice_t const* const raw_voice,
255 const int key_on_delay, const int vbit) {
256 #undef RAM
257 #define RAM ram.ram
258 int const env_rate_init = 0x7800;
259 voice->key_on_delay = key_on_delay;
260 if ( key_on_delay == 0 )
262 this->keys_down |= vbit;
263 voice->envx = 0;
264 voice->env_mode = state_attack;
265 voice->env_timer = env_rate_init; /* TODO: inaccurate? */
266 unsigned start_addr = letoh16(sd[raw_voice->waveform].start);
267 #if !SPC_BRRCACHE
269 voice->addr = RAM + start_addr;
270 /* BRR filter uses previous samples */
271 voice->samples [BRR_BLOCK_SIZE + 1] = 0;
272 voice->samples [BRR_BLOCK_SIZE + 2] = 0;
273 /* decode three samples immediately */
274 voice->position = (BRR_BLOCK_SIZE + 3) * 0x1000 - 1;
275 voice->block_header = 0; /* "previous" BRR header */
277 #else
279 voice->position = 3 * 0x1000 - 1;
280 struct cache_entry_t* const wave_entry =
281 &this->wave_entry [raw_voice->waveform];
283 /* predecode BRR if not already */
284 if ( wave_entry->start_addr != start_addr )
286 /* the following line can be replaced by the indicated block
287 in decode_brr() */
288 decode_brr( this, start_addr, voice, raw_voice );
291 voice->samples = wave_entry->samples;
292 voice->wave_end = wave_entry->end;
293 voice->wave_loop = wave_entry->loop;
295 #endif
299 void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
301 #undef RAM
302 #if defined(CPU_ARM) && !SPC_BRRCACHE
303 uint8_t* const ram_ = ram.ram;
304 #define RAM ram_
305 #else
306 #define RAM ram.ram
307 #endif
308 #if 0
309 EXIT_TIMER(cpu);
310 ENTER_TIMER(dsp);
311 #endif
313 /* Here we check for keys on/off. Docs say that successive writes
314 to KON/KOF must be separated by at least 2 Ts periods or risk
315 being neglected. Therefore DSP only looks at these during an
316 update, and not at the time of the write. Only need to do this
317 once however, since the regs haven't changed over the whole
318 period we need to catch up with. */
321 int key_ons = this->r.g.key_ons;
322 int key_offs = this->r.g.key_offs;
323 /* keying on a voice resets that bit in ENDX */
324 this->r.g.wave_ended &= ~key_ons;
325 /* key_off bits prevent key_on from being acknowledged */
326 this->r.g.key_ons = key_ons & key_offs;
328 /* process key events outside loop, since they won't re-occur */
329 struct voice_t* voice = this->voice_state + 8;
330 int vbit = 0x80;
333 --voice;
334 if ( key_offs & vbit )
336 voice->env_mode = state_release;
337 voice->key_on_delay = 0;
339 else if ( key_ons & vbit )
341 voice->key_on_delay = 8;
344 while ( (vbit >>= 1) != 0 );
347 struct src_dir const* const sd =
348 &ram.sd[this->r.g.wave_page * 0x100/sizeof(struct src_dir)];
350 #ifdef ROCKBOX_BIG_ENDIAN
351 /* Convert endiannesses before entering loops - these
352 get used alot */
353 const uint32_t rates[VOICE_COUNT] =
355 GET_LE16A( this->r.voice[0].rate ) & 0x3FFF,
356 GET_LE16A( this->r.voice[1].rate ) & 0x3FFF,
357 GET_LE16A( this->r.voice[2].rate ) & 0x3FFF,
358 GET_LE16A( this->r.voice[3].rate ) & 0x3FFF,
359 GET_LE16A( this->r.voice[4].rate ) & 0x3FFF,
360 GET_LE16A( this->r.voice[5].rate ) & 0x3FFF,
361 GET_LE16A( this->r.voice[6].rate ) & 0x3FFF,
362 GET_LE16A( this->r.voice[7].rate ) & 0x3FFF,
364 #define VOICE_RATE(x) *(x)
365 #define IF_RBE(...) __VA_ARGS__
366 #ifdef CPU_COLDFIRE
367 /* Initialize mask register with the buffer address mask */
368 asm volatile ("move.l %[m], %%mask" : : [m]"i"(FIR_BUF_MASK));
369 const int echo_wrap = (this->r.g.echo_delay & 15) * 0x800;
370 const int echo_start = this->r.g.echo_page * 0x100;
371 #endif /* CPU_COLDFIRE */
372 #else
373 #define VOICE_RATE(x) (GET_LE16(raw_voice->rate) & 0x3FFF)
374 #define IF_RBE(...)
375 #endif /* ROCKBOX_BIG_ENDIAN */
377 #if !SPC_NOINTERP
378 int const slow_gaussian = (this->r.g.pitch_mods >> 1) |
379 this->r.g.noise_enables;
380 #endif
381 /* (g.flags & 0x40) ? 30 : 14 */
382 int const global_muting = ((this->r.g.flags & 0x40) >> 2) + 14 - 8;
383 int const global_vol_0 = this->r.g.volume_0;
384 int const global_vol_1 = this->r.g.volume_1;
386 /* each rate divides exactly into 0x7800 without remainder */
387 int const env_rate_init = 0x7800;
388 static unsigned short const env_rates [0x20] ICONST_ATTR =
390 0x0000, 0x000F, 0x0014, 0x0018, 0x001E, 0x0028, 0x0030, 0x003C,
391 0x0050, 0x0060, 0x0078, 0x00A0, 0x00C0, 0x00F0, 0x0140, 0x0180,
392 0x01E0, 0x0280, 0x0300, 0x03C0, 0x0500, 0x0600, 0x0780, 0x0A00,
393 0x0C00, 0x0F00, 0x1400, 0x1800, 0x1E00, 0x2800, 0x3C00, 0x7800
396 do /* one pair of output samples per iteration */
398 /* Noise */
399 if ( this->r.g.noise_enables )
401 if ( (this->noise_count -=
402 env_rates [this->r.g.flags & 0x1F]) <= 0 )
404 this->noise_count = env_rate_init;
405 int feedback = (this->noise << 13) ^ (this->noise << 14);
406 this->noise = (feedback & 0x8000) ^ (this->noise >> 1 & ~1);
410 #if !SPC_NOECHO
411 int echo_0 = 0;
412 int echo_1 = 0;
413 #endif
414 long prev_outx = 0; /* TODO: correct value for first channel? */
415 int chans_0 = 0;
416 int chans_1 = 0;
417 /* TODO: put raw_voice pointer in voice_t? */
418 struct raw_voice_t * raw_voice = this->r.voice;
419 struct voice_t* voice = this->voice_state;
420 int vbit = 1;
421 IF_RBE( const uint32_t* vr = rates; )
422 for ( ; vbit < 0x100; vbit <<= 1, ++voice, ++raw_voice IF_RBE( , ++vr ) )
424 /* pregen involves checking keyon, etc */
425 #if 0
426 ENTER_TIMER(dsp_pregen);
427 #endif
429 /* Key on events are delayed */
430 int key_on_delay = voice->key_on_delay;
432 if ( UNLIKELY ( --key_on_delay >= 0 ) ) /* <1% of the time */
434 key_on(this,voice,sd,raw_voice,key_on_delay,vbit);
437 if ( !(this->keys_down & vbit) ) /* Silent channel */
439 silent_chan:
440 raw_voice->envx = 0;
441 raw_voice->outx = 0;
442 prev_outx = 0;
443 continue;
446 /* Envelope */
448 int const ENV_RANGE = 0x800;
449 int env_mode = voice->env_mode;
450 int adsr0 = raw_voice->adsr [0];
451 int env_timer;
452 if ( LIKELY ( env_mode != state_release ) ) /* 99% of the time */
454 env_timer = voice->env_timer;
455 if ( LIKELY ( adsr0 & 0x80 ) ) /* 79% of the time */
457 int adsr1 = raw_voice->adsr [1];
458 if ( LIKELY ( env_mode == state_sustain ) ) /* 74% of the time */
460 if ( (env_timer -= env_rates [adsr1 & 0x1F]) > 0 )
461 goto write_env_timer;
463 int envx = voice->envx;
464 envx--; /* envx *= 255 / 256 */
465 envx -= envx >> 8;
466 voice->envx = envx;
467 /* TODO: should this be 8? */
468 raw_voice->envx = envx >> 4;
469 goto init_env_timer;
471 else if ( env_mode < 0 ) /* 25% state_decay */
473 int envx = voice->envx;
474 if ( (env_timer -=
475 env_rates [(adsr0 >> 3 & 0x0E) + 0x10]) <= 0 )
477 envx--; /* envx *= 255 / 256 */
478 envx -= envx >> 8;
479 voice->envx = envx;
480 /* TODO: should this be 8? */
481 raw_voice->envx = envx >> 4;
482 env_timer = env_rate_init;
485 int sustain_level = adsr1 >> 5;
486 if ( envx <= (sustain_level + 1) * 0x100 )
487 voice->env_mode = state_sustain;
489 goto write_env_timer;
491 else /* state_attack */
493 int t = adsr0 & 0x0F;
494 if ( (env_timer -= env_rates [t * 2 + 1]) > 0 )
495 goto write_env_timer;
497 int envx = voice->envx;
499 int const step = ENV_RANGE / 64;
500 envx += step;
501 if ( t == 15 )
502 envx += ENV_RANGE / 2 - step;
504 if ( envx >= ENV_RANGE )
506 envx = ENV_RANGE - 1;
507 voice->env_mode = state_decay;
509 voice->envx = envx;
510 /* TODO: should this be 8? */
511 raw_voice->envx = envx >> 4;
512 goto init_env_timer;
515 else /* gain mode */
517 int t = raw_voice->gain;
518 if ( t < 0x80 )
520 raw_voice->envx = t;
521 voice->envx = t << 4;
522 goto env_end;
524 else
526 if ( (env_timer -= env_rates [t & 0x1F]) > 0 )
527 goto write_env_timer;
529 int envx = voice->envx;
530 int mode = t >> 5;
531 if ( mode <= 5 ) /* decay */
533 int step = ENV_RANGE / 64;
534 if ( mode == 5 ) /* exponential */
536 envx--; /* envx *= 255 / 256 */
537 step = envx >> 8;
539 if ( (envx -= step) < 0 )
541 envx = 0;
542 if ( voice->env_mode == state_attack )
543 voice->env_mode = state_decay;
546 else /* attack */
548 int const step = ENV_RANGE / 64;
549 envx += step;
550 if ( mode == 7 &&
551 envx >= ENV_RANGE * 3 / 4 + step )
552 envx += ENV_RANGE / 256 - step;
554 if ( envx >= ENV_RANGE )
555 envx = ENV_RANGE - 1;
557 voice->envx = envx;
558 /* TODO: should this be 8? */
559 raw_voice->envx = envx >> 4;
560 goto init_env_timer;
564 else /* state_release */
566 int envx = voice->envx;
567 if ( (envx -= ENV_RANGE / 256) > 0 )
569 voice->envx = envx;
570 raw_voice->envx = envx >> 8;
571 goto env_end;
573 else
575 /* bit was set, so this clears it */
576 this->keys_down ^= vbit;
577 voice->envx = 0;
578 goto silent_chan;
581 init_env_timer:
582 env_timer = env_rate_init;
583 write_env_timer:
584 voice->env_timer = env_timer;
585 env_end:;
587 #if 0
588 EXIT_TIMER(dsp_pregen);
590 ENTER_TIMER(dsp_gen);
591 #endif
592 #if !SPC_BRRCACHE
593 /* Decode BRR block */
594 if ( voice->position >= BRR_BLOCK_SIZE * 0x1000 )
596 voice->position -= BRR_BLOCK_SIZE * 0x1000;
598 uint8_t const* addr = voice->addr;
599 if ( addr >= RAM + 0x10000 )
600 addr -= 0x10000;
602 /* action based on previous block's header */
603 if ( voice->block_header & 1 )
605 addr = RAM + letoh16(sd[raw_voice->waveform].loop);
606 this->r.g.wave_ended |= vbit;
607 if ( !(voice->block_header & 2) ) /* 1% of the time */
609 /* first block was end block;
610 don't play anything (verified) */
611 /* bit was set, so this clears it */
612 this->keys_down ^= vbit;
614 /* since voice->envx is 0,
615 samples and position don't matter */
616 raw_voice->envx = 0;
617 voice->envx = 0;
618 goto skip_decode;
622 /* header */
623 int const block_header = *addr;
624 addr += 9;
625 voice->addr = addr;
626 voice->block_header = block_header;
628 /* previous samples */
629 int smp2 = voice->samples [BRR_BLOCK_SIZE + 1];
630 int smp1 = voice->samples [BRR_BLOCK_SIZE + 2];
631 voice->samples [0] = voice->samples [BRR_BLOCK_SIZE];
633 /* output position */
634 short* out = voice->samples + (1 + BRR_BLOCK_SIZE);
635 int offset = -BRR_BLOCK_SIZE << 2;
637 /* if next block has end flag set,
638 this block ends early (verified) */
639 if ( (block_header & 3) != 3 && (*addr & 3) == 1 )
641 /* arrange for last 9 samples to be skipped */
642 int const skip = 9;
643 out += (skip & 1);
644 voice->samples [skip] = voice->samples [BRR_BLOCK_SIZE];
645 voice->position += skip * 0x1000;
646 offset = (-BRR_BLOCK_SIZE + (skip & ~1)) << 2;
647 addr -= skip / 2;
648 /* force sample to end on next decode */
649 voice->block_header = 1;
652 int const filter = block_header & 0x0c;
653 int const scale = block_header >> 4;
655 if ( filter == 0x08 ) /* filter 2 (30-90% of the time) */
657 /* y[n] = x[n] + 61/32 * y[n-1] - 15/16 * y[n-2] */
658 do /* decode and filter 16 samples */
660 /* Get nybble, sign-extend, then scale
661 get byte, select which nybble, sign-extend, then shift
662 based on scaling. */
663 int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4;
664 delta = (delta << scale) >> 1;
666 if (scale > 0xc)
667 delta = (delta >> 17) << 11;
669 out [offset >> 2] = smp2;
671 delta -= smp2 >> 1;
672 delta += smp2 >> 5;
673 delta += smp1;
674 delta += (-smp1 - (smp1 >> 1)) >> 5;
676 delta = CLAMP16( delta );
677 smp2 = smp1;
678 smp1 = (int16_t) (delta * 2); /* sign-extend */
680 while ( (offset += 4) != 0 );
682 else if ( filter == 0x04 ) /* filter 1 */
684 /* y[n] = x[n] + 15/16 * y[n-1] */
685 do /* decode and filter 16 samples */
687 /* Get nybble, sign-extend, then scale
688 get byte, select which nybble, sign-extend, then shift
689 based on scaling. */
690 int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4;
691 delta = (delta << scale) >> 1;
693 if (scale > 0xc)
694 delta = (delta >> 17) << 11;
696 out [offset >> 2] = smp2;
698 delta += smp1 >> 1;
699 delta += (-smp1) >> 5;
701 delta = CLAMP16( delta );
702 smp2 = smp1;
703 smp1 = (int16_t) (delta * 2); /* sign-extend */
705 while ( (offset += 4) != 0 );
707 else if ( filter == 0x0c ) /* filter 3 */
709 /* y[n] = x[n] + 115/64 * y[n-1] - 13/16 * y[n-2] */
710 do /* decode and filter 16 samples */
712 /* Get nybble, sign-extend, then scale
713 get byte, select which nybble, sign-extend, then shift
714 based on scaling. */
715 int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4;
716 delta = (delta << scale) >> 1;
718 if (scale > 0xc)
719 delta = (delta >> 17) << 11;
721 out [offset >> 2] = smp2;
723 delta -= smp2 >> 1;
724 delta += (smp2 + (smp2 >> 1)) >> 4;
725 delta += smp1;
726 delta += (-smp1 * 13) >> 7;
728 delta = CLAMP16( delta );
729 smp2 = smp1;
730 smp1 = (int16_t) (delta * 2); /* sign-extend */
732 while ( (offset += 4) != 0 );
734 else /* filter 0 */
736 /* y[n] = x[n] */
737 do /* decode and filter 16 samples */
739 /* Get nybble, sign-extend, then scale
740 get byte, select which nybble, sign-extend, then shift
741 based on scaling. */
742 int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4;
743 delta = (delta << scale) >> 1;
745 if (scale > 0xc)
746 delta = (delta >> 17) << 11;
748 out [offset >> 2] = smp2;
750 smp2 = smp1;
751 smp1 = delta * 2;
753 while ( (offset += 4) != 0 );
756 out [0] = smp2;
757 out [1] = smp1;
759 skip_decode:;
761 #endif /* !SPC_BRRCACHE */
762 /* Get rate (with possible modulation) */
763 int rate = VOICE_RATE(vr);
764 if ( this->r.g.pitch_mods & vbit )
765 rate = (rate * (prev_outx + 32768)) >> 15;
767 #if !SPC_NOINTERP
768 /* Interleved gauss table (to improve cache coherency). */
769 /* gauss [i * 2 + j] = normal_gauss [(1 - j) * 256 + i] */
770 static short const gauss [512] =
772 370,1305, 366,1305, 362,1304, 358,1304, 354,1304, 351,1304, 347,1304, 343,1303,
773 339,1303, 336,1303, 332,1302, 328,1302, 325,1301, 321,1300, 318,1300, 314,1299,
774 311,1298, 307,1297, 304,1297, 300,1296, 297,1295, 293,1294, 290,1293, 286,1292,
775 283,1291, 280,1290, 276,1288, 273,1287, 270,1286, 267,1284, 263,1283, 260,1282,
776 257,1280, 254,1279, 251,1277, 248,1275, 245,1274, 242,1272, 239,1270, 236,1269,
777 233,1267, 230,1265, 227,1263, 224,1261, 221,1259, 218,1257, 215,1255, 212,1253,
778 210,1251, 207,1248, 204,1246, 201,1244, 199,1241, 196,1239, 193,1237, 191,1234,
779 188,1232, 186,1229, 183,1227, 180,1224, 178,1221, 175,1219, 173,1216, 171,1213,
780 168,1210, 166,1207, 163,1205, 161,1202, 159,1199, 156,1196, 154,1193, 152,1190,
781 150,1186, 147,1183, 145,1180, 143,1177, 141,1174, 139,1170, 137,1167, 134,1164,
782 132,1160, 130,1157, 128,1153, 126,1150, 124,1146, 122,1143, 120,1139, 118,1136,
783 117,1132, 115,1128, 113,1125, 111,1121, 109,1117, 107,1113, 106,1109, 104,1106,
784 102,1102, 100,1098, 99,1094, 97,1090, 95,1086, 94,1082, 92,1078, 90,1074,
785 89,1070, 87,1066, 86,1061, 84,1057, 83,1053, 81,1049, 80,1045, 78,1040,
786 77,1036, 76,1032, 74,1027, 73,1023, 71,1019, 70,1014, 69,1010, 67,1005,
787 66,1001, 65, 997, 64, 992, 62, 988, 61, 983, 60, 978, 59, 974, 58, 969,
788 56, 965, 55, 960, 54, 955, 53, 951, 52, 946, 51, 941, 50, 937, 49, 932,
789 48, 927, 47, 923, 46, 918, 45, 913, 44, 908, 43, 904, 42, 899, 41, 894,
790 40, 889, 39, 884, 38, 880, 37, 875, 36, 870, 36, 865, 35, 860, 34, 855,
791 33, 851, 32, 846, 32, 841, 31, 836, 30, 831, 29, 826, 29, 821, 28, 816,
792 27, 811, 27, 806, 26, 802, 25, 797, 24, 792, 24, 787, 23, 782, 23, 777,
793 22, 772, 21, 767, 21, 762, 20, 757, 20, 752, 19, 747, 19, 742, 18, 737,
794 17, 732, 17, 728, 16, 723, 16, 718, 15, 713, 15, 708, 15, 703, 14, 698,
795 14, 693, 13, 688, 13, 683, 12, 678, 12, 674, 11, 669, 11, 664, 11, 659,
796 10, 654, 10, 649, 10, 644, 9, 640, 9, 635, 9, 630, 8, 625, 8, 620,
797 8, 615, 7, 611, 7, 606, 7, 601, 6, 596, 6, 592, 6, 587, 6, 582,
798 5, 577, 5, 573, 5, 568, 5, 563, 4, 559, 4, 554, 4, 550, 4, 545,
799 4, 540, 3, 536, 3, 531, 3, 527, 3, 522, 3, 517, 2, 513, 2, 508,
800 2, 504, 2, 499, 2, 495, 2, 491, 2, 486, 1, 482, 1, 477, 1, 473,
801 1, 469, 1, 464, 1, 460, 1, 456, 1, 451, 1, 447, 1, 443, 1, 439,
802 0, 434, 0, 430, 0, 426, 0, 422, 0, 418, 0, 414, 0, 410, 0, 405,
803 0, 401, 0, 397, 0, 393, 0, 389, 0, 385, 0, 381, 0, 378, 0, 374,
805 /* Gaussian interpolation using most recent 4 samples */
806 long position = voice->position;
807 voice->position += rate;
808 short const* interp = voice->samples + (position >> 12);
809 int offset = position >> 4 & 0xFF;
811 /* Only left half of gaussian kernel is in table, so we must mirror
812 for right half */
813 short const* fwd = gauss + offset * 2;
814 short const* rev = gauss + 510 - offset * 2;
816 /* Use faster gaussian interpolation when exact result isn't needed
817 by pitch modulator of next channel */
818 int amp_0, amp_1; /* Also serve as temps _0, and _1 */
819 if ( LIKELY ( !(slow_gaussian & vbit) ) ) /* 99% of the time */
821 /* Main optimization is lack of clamping. Not a problem since
822 output never goes more than +/- 16 outside 16-bit range and
823 things are clamped later anyway. Other optimization is to
824 preserve fractional accuracy, eliminating several masks. */
825 #if defined (CPU_ARM)
826 int output;
827 int _2, _3; /* All-purpose temps */
828 /* Multiple ASM blocks keep regs free and reduce result
829 * latency issues. */
830 #if ARM_ARCH >= 6
831 /* Interpolate */
832 asm volatile (
833 "ldr %[_0], [%[interp]] \r\n" /* _0=i0i1 */
834 "ldr %[_2], [%[fwd]] \r\n" /* _2=f0f1 */
835 "ldr %[_1], [%[interp], #4] \r\n" /* _1=i2i3 */
836 "ldr %[_3], [%[rev]] \r\n" /* _3=r0r1 */
837 "smuad %[out], %[_0], %[_2] \r\n" /* out=f0*i0 + f1*i1 */
838 "smladx %[out], %[_1], %[_3], %[out] \r\n" /* out+=r1*i2 + r0*i3 */
839 : [out]"=r"(output),
840 [_0]"=&r"(amp_0), [_1]"=&r"(amp_1),
841 [_2]"=&r"(_2), [_3]"=r"(_3)
842 : [fwd]"r"(fwd), [rev]"r"(rev),
843 [interp]"r"(interp));
844 /* Apply voice envelope */
845 asm volatile (
846 "mov %[_2], %[out], asr #(11-5) \r\n" /* To do >> 16 later */
847 "mul %[out], %[_2], %[envx] \r\n" /* and avoid exp. shift */
848 : [out]"+r"(output), [_2]"=&r"(_2)
849 : [envx]"r"((int)voice->envx));
850 /* Apply left and right volume */
851 asm volatile (
852 "smulwb %[amp_0], %[out], %[vvol_0] \r\n" /* (32x16->48)[47:16]->[31:0] */
853 "smulwb %[amp_1], %[out], %[vvol_1] \r\n"
854 : [out]"+r"(output),
855 [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1)
856 : [vvol_0]"r"(voice->volume[0]),
857 [vvol_1]"r"(voice->volume[1]));
859 raw_voice->outx = output >> (8+5); /* 'output' still 5 bits too big */
860 #else /* ARM_ARCH < 6 */
861 /* Perform gaussian interpolation on four samples */
862 asm volatile (
863 "ldrsh %[_0], [%[interp]] \r\n"
864 "ldrsh %[_2], [%[fwd]] \r\n"
865 "ldrsh %[_1], [%[interp], #2] \r\n"
866 "ldrsh %[_3], [%[fwd], #2] \r\n"
867 "mul %[out], %[_0], %[_2] \r\n" /* out= fwd[0]*interp[0] */
868 "ldrsh %[_0], [%[interp], #4] \r\n"
869 "ldrsh %[_2], [%[rev], #2] \r\n"
870 "mla %[out], %[_1], %[_3], %[out] \r\n" /* out+=fwd[1]*interp[1] */
871 "ldrsh %[_1], [%[interp], #6] \r\n"
872 "ldrsh %[_3], [%[rev]] \r\n"
873 "mla %[out], %[_0], %[_2], %[out] \r\n" /* out+=rev[1]*interp[2] */
874 "mla %[out], %[_1], %[_3], %[out] \r\n" /* out+=rev[0]*interp[3] */
875 : [out]"=&r"(output),
876 [_0]"=&r"(amp_0), [_1]"=&r"(amp_1),
877 [_2]"=&r"(_2), [_3]"=&r"(_3)
878 : [fwd]"r"(fwd), [rev]"r"(rev),
879 [interp]"r"(interp));
880 /* Apply voice envelope */
881 asm volatile (
882 "mov %[_2], %[out], asr #11 \r\n"
883 "mul %[out], %[_2], %[envx] \r\n"
884 : [out]"+r"(output), [_2]"=&r"(_2)
885 : [envx]"r"((int)voice->envx));
886 /* Reduce and apply left and right volume */
887 asm volatile (
888 "mov %[out], %[out], asr #11 \r\n"
889 "mul %[amp_0], %[out], %[vvol_0] \r\n"
890 "mul %[amp_1], %[out], %[vvol_1] \r\n"
891 : [out]"+r"(output),
892 [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1)
893 : [vvol_0]"r"((int)voice->volume[0]),
894 [vvol_1]"r"((int)voice->volume[1]));
896 raw_voice->outx = output >> 8;
897 #endif /* ARM_ARCH */
898 #else /* Unoptimized CPU */
899 int output = (((fwd [0] * interp [0] +
900 fwd [1] * interp [1] +
901 rev [1] * interp [2] +
902 rev [0] * interp [3] ) >> 11) * voice->envx) >> 11;
904 /* duplicated here to give compiler more to run in parallel */
905 amp_0 = voice->volume [0] * output;
906 amp_1 = voice->volume [1] * output;
908 raw_voice->outx = output >> 8;
909 #endif /* CPU_* */
911 else /* slow gaussian */
913 #if defined(CPU_ARM)
914 #if ARM_ARCH >= 6
915 int output = *(int16_t*) &this->noise;
917 if ( !(this->r.g.noise_enables & vbit) )
919 /* Interpolate */
920 int _2, _3;
921 asm volatile (
922 /* NOTE: often-unaligned accesses */
923 "ldr %[_0], [%[interp]] \r\n" /* _0=i0i1 */
924 "ldr %[_2], [%[fwd]] \r\n" /* _2=f0f1 */
925 "ldr %[_1], [%[interp], #4] \r\n" /* _1=i2i3 */
926 "ldr %[_3], [%[rev]] \r\n" /* _3=f2f3 */
927 "smulbb %[out], %[_0], %[_2] \r\n" /* out=f0*i0 */
928 "smultt %[_0], %[_0], %[_2] \r\n" /* _0=f1*i1 */
929 "smulbt %[_2], %[_1], %[_3] \r\n" /* _2=r1*i2 */
930 "smultb %[_3], %[_1], %[_3] \r\n" /* _3=r0*i3 */
931 : [out]"=r"(output),
932 [_0]"=&r"(amp_0), [_1]"=&r"(amp_1),
933 [_2]"=&r"(_2), [_3]"=r"(_3)
934 : [fwd]"r"(fwd), [rev]"r"(rev),
935 [interp]"r"(interp));
936 asm volatile (
937 "mov %[out], %[out], asr#12 \r\n"
938 "add %[_0], %[out], %[_0], asr #12 \r\n"
939 "add %[_2], %[_0], %[_2], asr #12 \r\n"
940 "pkhbt %[_0], %[_2], %[_3], asl #4 \r\n" /* _3[31:16], _2[15:0] */
941 "sadd16 %[_0], %[_0], %[_0] \r\n" /* _3[31:16]*2, _2[15:0]*2 */
942 "qsubaddx %[out], %[_0], %[_0] \r\n" /* out[15:0]=
943 * sat16(_3[31:16]+_2[15:0]) */
944 : [out]"+r"(output),
945 [_0]"+r"(amp_0), [_2]"+r"(_2), [_3]"+r"(_3));
947 /* Apply voice envelope */
948 asm volatile (
949 "smulbb %[out], %[out], %[envx] \r\n"
950 : [out]"+r"(output)
951 : [envx]"r"(voice->envx));
952 /* Reduce and apply left and right volume */
953 asm volatile (
954 "mov %[out], %[out], asr #11 \r\n"
955 "bic %[out], %[out], #0x1 \r\n"
956 "mul %[amp_0], %[out], %[vvol_0] \r\n"
957 "mul %[amp_1], %[out], %[vvol_1] \r\n"
958 : [out]"+r"(output),
959 [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1)
960 : [vvol_0]"r"((int)voice->volume[0]),
961 [vvol_1]"r"((int)voice->volume[1]));
963 prev_outx = output;
964 raw_voice->outx = output >> 8;
965 #else /* ARM_ARCH < 6 */
966 int output = *(int16_t*) &this->noise;
968 if ( !(this->r.g.noise_enables & vbit) )
970 /* Interpolate */
971 int _2, _3;
972 asm volatile (
973 "ldrsh %[_0], [%[interp]] \r\n"
974 "ldrsh %[_2], [%[fwd]] \r\n"
975 "ldrsh %[_1], [%[interp], #2] \r\n"
976 "ldrsh %[_3], [%[fwd], #2] \r\n"
977 "mul %[out], %[_2], %[_0] \r\n" /* fwd[0]*interp[0] */
978 "ldrsh %[_2], [%[rev], #2] \r\n"
979 "mul %[_0], %[_3], %[_1] \r\n" /* fwd[1]*interp[1] */
980 "ldrsh %[_1], [%[interp], #4] \r\n"
981 "mov %[out], %[out], asr #12 \r\n"
982 "ldrsh %[_3], [%[rev]] \r\n"
983 "mul %[_2], %[_1], %[_2] \r\n" /* rev[1]*interp[2] */
984 "ldrsh %[_1], [%[interp], #6] \r\n"
985 "add %[_0], %[out], %[_0], asr #12 \r\n"
986 "mul %[_3], %[_1], %[_3] \r\n" /* rev[0]*interp[3] */
987 "add %[_2], %[_0], %[_2], asr #12 \r\n"
988 "mov %[_2], %[_2], lsl #17 \r\n"
989 "mov %[_3], %[_3], asr #12 \r\n"
990 "mov %[_3], %[_3], asl #1 \r\n"
991 "add %[out], %[_3], %[_2], asr #16 \r\n"
992 : [out]"=&r"(output),
993 [_0]"=&r"(amp_0), [_1]"=&r"(amp_1),
994 [_2]"=&r"(_2), [_3]"=&r"(_3)
995 : [fwd]"r"(fwd), [rev]"r"(rev),
996 [interp]"r"(interp));
998 output = CLAMP16(output);
1000 /* Apply voice envelope */
1001 asm volatile (
1002 "mul %[_0], %[out], %[envx] \r\n"
1003 : [_0]"=r"(amp_0)
1004 : [out]"r"(output), [envx]"r"((int)voice->envx));
1005 /* Reduce and apply left and right volume */
1006 asm volatile (
1007 "mov %[out], %[amp_0], asr #11 \r\n" /* amp_0 = _0 */
1008 "bic %[out], %[out], #0x1 \r\n"
1009 "mul %[amp_0], %[out], %[vvol_0] \r\n"
1010 "mul %[amp_1], %[out], %[vvol_1] \r\n"
1011 : [out]"+r"(output),
1012 [amp_0]"+r"(amp_0), [amp_1]"=r"(amp_1)
1013 : [vvol_0]"r"((int)voice->volume[0]),
1014 [vvol_1]"r"((int)voice->volume[1]));
1016 prev_outx = output;
1017 raw_voice->outx = output >> 8;
1018 #endif /* ARM_ARCH >= 6 */
1019 #else /* Unoptimized CPU */
1020 int output = *(int16_t*) &this->noise;
1022 if ( !(this->r.g.noise_enables & vbit) )
1024 output = (fwd [0] * interp [0]) & ~0xFFF;
1025 output = (output + fwd [1] * interp [1]) & ~0xFFF;
1026 output = (output + rev [1] * interp [2]) >> 12;
1027 output = (int16_t) (output * 2);
1028 output += ((rev [0] * interp [3]) >> 12) * 2;
1029 output = CLAMP16( output );
1031 output = (output * voice->envx) >> 11 & ~1;
1033 /* duplicated here to give compiler more to run in parallel */
1034 amp_0 = voice->volume [0] * output;
1035 amp_1 = voice->volume [1] * output;
1037 prev_outx = output;
1038 raw_voice->outx = output >> 8;
1039 #endif /* CPU_* */
1041 #else /* SPCNOINTERP */
1042 /* two-point linear interpolation */
1043 #ifdef CPU_COLDFIRE
1044 int amp_0 = (int16_t)this->noise;
1045 int amp_1;
1047 if ( (this->r.g.noise_enables & vbit) == 0 )
1049 uint32_t f = voice->position;
1050 int32_t y0;
1053 * Formula (fastest found so far of MANY):
1054 * output = y0 + f*y1 - f*y0
1056 asm volatile (
1057 /* separate fractional and whole parts */
1058 "move.l %[f], %[y1] \r\n"
1059 "and.l #0xfff, %[f] \r\n"
1060 "lsr.l %[sh], %[y1] \r\n"
1061 /* load samples y0 (upper) & y1 (lower) */
1062 "move.l 2(%[s], %[y1].l*2), %[y1] \r\n"
1063 /* %acc0 = f*y1 */
1064 "mac.w %[f]l, %[y1]l, %%acc0 \r\n"
1065 /* %acc0 -= f*y0 */
1066 "msac.w %[f]l, %[y1]u, %%acc0 \r\n"
1067 /* separate out y0 and sign extend */
1068 "swap %[y1] \r\n"
1069 "movea.w %[y1], %[y0] \r\n"
1070 /* fetch result, scale down and add y0 */
1071 "movclr.l %%acc0, %[y1] \r\n"
1072 /* output = y0 + (result >> 12) */
1073 "asr.l %[sh], %[y1] \r\n"
1074 "add.l %[y0], %[y1] \r\n"
1075 : [f]"+d"(f), [y0]"=&a"(y0), [y1]"=&d"(amp_0)
1076 : [s]"a"(voice->samples), [sh]"d"(12));
1079 /* apply voice envelope to output */
1080 asm volatile (
1081 "mac.w %[out]l, %[envx]l, %%acc0 \r\n"
1083 : [out]"r"(amp_0), [envx]"r"(voice->envx));
1085 /* advance voice position */
1086 voice->position += rate;
1088 /* fetch output, scale and apply left and right
1089 voice volume */
1090 asm volatile (
1091 "movclr.l %%acc0, %[out] \r\n"
1092 "asr.l %[sh], %[out] \r\n"
1093 "mac.l %[vvol_0], %[out], %%acc0 \r\n"
1094 "mac.l %[vvol_1], %[out], %%acc1 \r\n"
1095 : [out]"=&d"(amp_0)
1096 : [vvol_0]"r"((int)voice->volume[0]),
1097 [vvol_1]"r"((int)voice->volume[1]),
1098 [sh]"d"(11));
1100 /* save this output into previous, scale and save in
1101 output register */
1102 prev_outx = amp_0;
1103 raw_voice->outx = amp_0 >> 8;
1105 /* fetch final voice output */
1106 asm volatile (
1107 "movclr.l %%acc0, %[amp_0] \r\n"
1108 "movclr.l %%acc1, %[amp_1] \r\n"
1109 : [amp_0]"=r"(amp_0), [amp_1]"=r"(amp_1));
1110 #elif defined (CPU_ARM)
1111 int amp_0, amp_1;
1113 if ( (this->r.g.noise_enables & vbit) != 0 )
1115 amp_0 = *(int16_t *)&this->noise;
1117 else
1119 uint32_t f = voice->position;
1120 amp_0 = (uint32_t)voice->samples;
1122 asm volatile(
1123 "mov %[y1], %[f], lsr #12 \r\n"
1124 "eor %[f], %[f], %[y1], lsl #12 \r\n"
1125 "add %[y1], %[y0], %[y1], lsl #1 \r\n"
1126 "ldrsh %[y0], [%[y1], #2] \r\n"
1127 "ldrsh %[y1], [%[y1], #4] \r\n"
1128 "sub %[y1], %[y1], %[y0] \r\n"
1129 "mul %[f], %[y1], %[f] \r\n"
1130 "add %[y0], %[y0], %[f], asr #12 \r\n"
1131 : [f]"+r"(f), [y0]"+r"(amp_0), [y1]"=&r"(amp_1));
1134 voice->position += rate;
1136 asm volatile(
1137 "mul %[amp_1], %[amp_0], %[envx] \r\n"
1138 "mov %[amp_0], %[amp_1], asr #11 \r\n"
1139 "mov %[amp_1], %[amp_0], asr #8 \r\n"
1140 : [amp_0]"+r"(amp_0), [amp_1]"=r"(amp_1)
1141 : [envx]"r"(voice->envx));
1143 prev_outx = amp_0;
1144 raw_voice->outx = (int8_t)amp_1;
1146 asm volatile(
1147 "mul %[amp_1], %[amp_0], %[vol_1] \r\n"
1148 "mul %[amp_0], %[vol_0], %[amp_0] \r\n"
1149 : [amp_0]"+r"(amp_0), [amp_1]"=&r"(amp_1)
1150 : [vol_0]"r"((int)voice->volume[0]),
1151 [vol_1]"r"((int)voice->volume[1]));
1152 #else /* Unoptimized CPU */
1153 int output;
1155 if ( (this->r.g.noise_enables & vbit) == 0 )
1157 int const fraction = voice->position & 0xfff;
1158 short const* const pos = (voice->samples + (voice->position >> 12)) + 1;
1159 output = pos[0] + ((fraction * (pos[1] - pos[0])) >> 12);
1160 } else {
1161 output = *(int16_t *)&this->noise;
1164 voice->position += rate;
1166 output = (output * voice->envx) >> 11;
1168 /* duplicated here to give compiler more to run in parallel */
1169 int amp_0 = voice->volume [0] * output;
1170 int amp_1 = voice->volume [1] * output;
1172 prev_outx = output;
1173 raw_voice->outx = (int8_t) (output >> 8);
1174 #endif /* CPU_* */
1175 #endif /* SPCNOINTERP */
1177 #if SPC_BRRCACHE
1178 if ( voice->position >= voice->wave_end )
1180 long loop_len = voice->wave_loop << 12;
1181 voice->position -= loop_len;
1182 this->r.g.wave_ended |= vbit;
1183 if ( !loop_len )
1185 this->keys_down ^= vbit;
1186 raw_voice->envx = 0;
1187 voice->envx = 0;
1190 #endif
1191 #if 0
1192 EXIT_TIMER(dsp_gen);
1194 ENTER_TIMER(dsp_mix);
1195 #endif
1196 chans_0 += amp_0;
1197 chans_1 += amp_1;
1198 #if !SPC_NOECHO
1199 if ( this->r.g.echo_ons & vbit )
1201 echo_0 += amp_0;
1202 echo_1 += amp_1;
1204 #endif
1205 #if 0
1206 EXIT_TIMER(dsp_mix);
1207 #endif
1209 /* end of voice loop */
1211 #if !SPC_NOECHO
1212 #ifdef CPU_COLDFIRE
1213 /* Read feedback from echo buffer */
1214 int echo_pos = this->echo_pos;
1215 uint8_t* const echo_ptr = RAM + ((echo_start + echo_pos) & 0xFFFF);
1216 echo_pos += 4;
1217 if ( echo_pos >= echo_wrap )
1218 echo_pos = 0;
1219 this->echo_pos = echo_pos;
1220 int fb = swap_odd_even32(*(int32_t *)echo_ptr);
1221 int out_0, out_1;
1223 /* Keep last 8 samples */
1224 *this->last_fir_ptr = fb;
1225 this->last_fir_ptr = this->fir_ptr;
1227 /* Apply echo FIR filter to output samples read from echo buffer -
1228 circular buffer is hardware incremented and masked; FIR
1229 coefficients and buffer history are loaded in parallel with
1230 multiply accumulate operations. Shift left by one here and once
1231 again when calculating feedback to have sample values justified
1232 to bit 31 in the output to ease endian swap, interleaving and
1233 clamping before placing result in the program's echo buffer. */
1234 int _0, _1, _2;
1235 asm volatile (
1236 "move.l (%[fir_c]) , %[_2] \r\n"
1237 "mac.w %[fb]u, %[_2]u, <<, (%[fir_p])+&, %[_0], %%acc0 \r\n"
1238 "mac.w %[fb]l, %[_2]u, <<, (%[fir_p])& , %[_1], %%acc1 \r\n"
1239 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1240 "mac.w %[_0]l, %[_2]l, <<, 4(%[fir_c]) , %[_2], %%acc1 \r\n"
1241 "mac.w %[_1]u, %[_2]u, <<, 4(%[fir_p])& , %[_0], %%acc0 \r\n"
1242 "mac.w %[_1]l, %[_2]u, <<, 8(%[fir_p])& , %[_1], %%acc1 \r\n"
1243 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1244 "mac.w %[_0]l, %[_2]l, <<, 8(%[fir_c]) , %[_2], %%acc1 \r\n"
1245 "mac.w %[_1]u, %[_2]u, <<, 12(%[fir_p])& , %[_0], %%acc0 \r\n"
1246 "mac.w %[_1]l, %[_2]u, <<, 16(%[fir_p])& , %[_1], %%acc1 \r\n"
1247 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1248 "mac.w %[_0]l, %[_2]l, <<, 12(%[fir_c]) , %[_2], %%acc1 \r\n"
1249 "mac.w %[_1]u, %[_2]u, <<, 20(%[fir_p])& , %[_0], %%acc0 \r\n"
1250 "mac.w %[_1]l, %[_2]u, << , %%acc1 \r\n"
1251 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1252 "mac.w %[_0]l, %[_2]l, << , %%acc1 \r\n"
1253 : [_0]"=&r"(_0), [_1]"=&r"(_1), [_2]"=&r"(_2),
1254 [fir_p]"+a"(this->fir_ptr)
1255 : [fir_c]"a"(this->fir_coeff), [fb]"r"(fb)
1258 /* Generate output */
1259 asm volatile (
1260 /* fetch filter results _after_ gcc loads asm
1261 block parameters to eliminate emac stalls */
1262 "movclr.l %%acc0, %[out_0] \r\n"
1263 "movclr.l %%acc1, %[out_1] \r\n"
1264 /* apply global volume */
1265 "mac.l %[chans_0], %[gv_0] , %%acc2 \r\n"
1266 "mac.l %[chans_1], %[gv_1] , %%acc3 \r\n"
1267 /* apply echo volume and add to final output */
1268 "mac.l %[ev_0], %[out_0], >>, %%acc2 \r\n"
1269 "mac.l %[ev_1], %[out_1], >>, %%acc3 \r\n"
1270 : [out_0]"=&r"(out_0), [out_1]"=&r"(out_1)
1271 : [chans_0]"r"(chans_0), [gv_0]"r"(global_vol_0),
1272 [ev_0]"r"((int)this->r.g.echo_volume_0),
1273 [chans_1]"r"(chans_1), [gv_1]"r"(global_vol_1),
1274 [ev_1]"r"((int)this->r.g.echo_volume_1)
1277 /* Feedback into echo buffer */
1278 if ( !(this->r.g.flags & 0x20) )
1280 asm volatile (
1281 /* scale echo voices; saturate if overflow */
1282 "mac.l %[sh], %[e1] , %%acc1 \r\n"
1283 "mac.l %[sh], %[e0] , %%acc0 \r\n"
1284 /* add scaled output from FIR filter */
1285 "mac.l %[out_1], %[ef], <<, %%acc1 \r\n"
1286 "mac.l %[out_0], %[ef], <<, %%acc0 \r\n"
1287 /* swap and fetch feedback results - simply
1288 swap_odd_even32 mixed in between macs and
1289 movclrs to mitigate stall issues */
1290 "move.l #0x00ff00ff, %[sh] \r\n"
1291 "movclr.l %%acc1, %[e1] \r\n"
1292 "swap %[e1] \r\n"
1293 "movclr.l %%acc0, %[e0] \r\n"
1294 "move.w %[e1], %[e0] \r\n"
1295 "and.l %[e0], %[sh] \r\n"
1296 "eor.l %[sh], %[e0] \r\n"
1297 "lsl.l #8, %[sh] \r\n"
1298 "lsr.l #8, %[e0] \r\n"
1299 "or.l %[sh], %[e0] \r\n"
1300 /* save final feedback into echo buffer */
1301 "move.l %[e0], (%[echo_ptr]) \r\n"
1302 : [e0]"+d"(echo_0), [e1]"+d"(echo_1)
1303 : [out_0]"r"(out_0), [out_1]"r"(out_1),
1304 [ef]"r"((int)this->r.g.echo_feedback),
1305 [echo_ptr]"a"((int32_t *)echo_ptr),
1306 [sh]"d"(1 << 9)
1310 /* Output final samples */
1311 asm volatile (
1312 /* fetch output saved in %acc2 and %acc3 */
1313 "movclr.l %%acc2, %[out_0] \r\n"
1314 "movclr.l %%acc3, %[out_1] \r\n"
1315 /* scale right by global_muting shift */
1316 "asr.l %[gm], %[out_0] \r\n"
1317 "asr.l %[gm], %[out_1] \r\n"
1318 : [out_0]"=&d"(out_0), [out_1]"=&d"(out_1)
1319 : [gm]"d"(global_muting)
1322 out_buf [ 0] = out_0;
1323 out_buf [WAV_CHUNK_SIZE] = out_1;
1324 out_buf ++;
1325 #elif defined (CPU_ARM)
1326 /* Read feedback from echo buffer */
1327 int echo_pos = this->echo_pos;
1328 uint8_t* const echo_ptr = RAM +
1329 ((this->r.g.echo_page * 0x100 + echo_pos) & 0xFFFF);
1330 echo_pos += 4;
1331 if ( echo_pos >= (this->r.g.echo_delay & 15) * 0x800 )
1332 echo_pos = 0;
1333 this->echo_pos = echo_pos;
1335 #if ARM_ARCH >= 6
1336 int32_t *fir_ptr, *fir_coeff;
1337 int fb_0, fb_1;
1339 /* Apply FIR */
1340 fb_0 = *(uint32_t *)echo_ptr;
1342 /* Keep last 8 samples */
1343 asm volatile (
1344 "add %[fir_p], %[t_fir_p], #4 \r\n"
1345 "bic %[t_fir_p], %[fir_p], %[mask] \r\n"
1346 "str %[fb_0], [%[fir_p], #-4] \r\n"
1347 /* duplicate at +8 eliminates wrap checking below */
1348 "str %[fb_0], [%[fir_p], #28] \r\n"
1349 : [fir_p]"=&r"(fir_ptr), [t_fir_p]"+r"(this->fir_ptr)
1350 : [fb_0]"r"(fb_0), [mask]"i"(~FIR_BUF_MASK));
1352 fir_coeff = (int32_t *)this->fir_coeff;
1354 /* Fugly, but the best version found. */
1355 int _0;
1356 asm volatile ( /* L0R0 = acc0 */
1357 "ldmia %[fir_p]!, { r2-r5 } \r\n" /* L1R1-L4R4 = r2-r5 */
1358 "ldmia %[fir_c]!, { r0-r1 } \r\n" /* C0C1-C2C3 = r0-r1 */
1359 "pkhbt %[_0], %[acc0], r2, asl #16 \r\n" /* L0R0,L1R1->L0L1,R0R1 */
1360 "pkhtb r2, r2, %[acc0], asr #16 \r\n"
1361 "smuad %[acc0], %[_0], r0 \r\n" /* acc0=L0*C0+L1*C1 */
1362 "smuad %[acc1], r2, r0 \r\n" /* acc1=R0*C0+R1*C1 */
1363 "pkhbt %[_0], r3, r4, asl #16 \r\n" /* L2R2,L3R3->L2L3,R2R3 */
1364 "pkhtb r4, r4, r3, asr #16 \r\n"
1365 "smlad %[acc0], %[_0], r1, %[acc0] \r\n" /* acc0+=L2*C2+L3*C3 */
1366 "smlad %[acc1], r4, r1, %[acc1] \r\n" /* acc1+=R2*C2+R3*C3 */
1367 "ldmia %[fir_p], { r2-r4 } \r\n" /* L5R5-L7R7 = r2-r4 */
1368 "ldmia %[fir_c], { r0-r1 } \r\n" /* C4C5-C6C7 = r0-r1 */
1369 "pkhbt %[_0], r5, r2, asl #16 \r\n" /* L4R4,L5R5->L4L5,R4R5 */
1370 "pkhtb r2, r2, r5, asr #16 \r\n"
1371 "smlad %[acc0], %[_0], r0, %[acc0] \r\n" /* acc0+=L4*C4+L5*C5 */
1372 "smlad %[acc1], r2, r0, %[acc1] \r\n" /* acc1+=R4*C4+R5*C5 */
1373 "pkhbt %[_0], r3, r4, asl #16 \r\n" /* L6R6,L7R7->L6L7,R6R7 */
1374 "pkhtb r4, r4, r3, asr #16 \r\n"
1375 "smlad %[acc0], %[_0], r1, %[acc0] \r\n" /* acc0+=L6*C6+L7*C7 */
1376 "smlad %[acc1], r4, r1, %[acc1] \r\n" /* acc1+=R6*C6+R7*C7 */
1377 : [acc0]"+r"(fb_0), [acc1]"=&r"(fb_1), [_0]"=&r"(_0),
1378 [fir_p]"+r"(fir_ptr), [fir_c]"+r"(fir_coeff)
1380 : "r0", "r1", "r2", "r3", "r4", "r5");
1382 /* Generate output */
1383 int amp_0, amp_1;
1385 asm volatile (
1386 "mul %[amp_0], %[gvol_0], %[chans_0] \r\n"
1387 "mul %[amp_1], %[gvol_1], %[chans_1] \r\n"
1388 : [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1)
1389 : [gvol_0]"r"(global_vol_0), [gvol_1]"r"(global_vol_1),
1390 [chans_0]"r"(chans_0), [chans_1]"r"(chans_1));
1391 asm volatile (
1392 "mla %[amp_0], %[fb_0], %[ev_0], %[amp_0] \r\n"
1393 "mla %[amp_1], %[fb_1], %[ev_1], %[amp_1] \r\n"
1394 : [amp_0]"+r"(amp_0), [amp_1]"+r"(amp_1)
1395 : [fb_0]"r"(fb_0), [fb_1]"r"(fb_1),
1396 [ev_0]"r"((int)this->r.g.echo_volume_0),
1397 [ev_1]"r"((int)this->r.g.echo_volume_1));
1399 out_buf [ 0] = amp_0 >> global_muting;
1400 out_buf [WAV_CHUNK_SIZE] = amp_1 >> global_muting;
1401 out_buf ++;
1403 if ( !(this->r.g.flags & 0x20) )
1405 /* Feedback into echo buffer */
1406 int e0, e1;
1408 asm volatile (
1409 "mov %[e0], %[echo_0], asl #7 \r\n"
1410 "mov %[e1], %[echo_1], asl #7 \r\n"
1411 "mla %[e0], %[fb_0], %[efb], %[e0] \r\n"
1412 "mla %[e1], %[fb_1], %[efb], %[e1] \r\n"
1413 : [e0]"=&r"(e0), [e1]"=&r"(e1)
1414 : [echo_0]"r"(echo_0), [echo_1]"r"(echo_1),
1415 [fb_0]"r"(fb_0), [fb_1]"r"(fb_1),
1416 [efb]"r"((int)this->r.g.echo_feedback));
1417 asm volatile (
1418 "ssat %[e0], #16, %[e0], asr #14 \r\n"
1419 "ssat %[e1], #16, %[e1], asr #14 \r\n"
1420 "pkhbt %[e0], %[e0], %[e1], lsl #16 \r\n"
1421 "str %[e0], [%[echo_p]] \r\n"
1422 : [e0]"+r"(e0), [e1]"+r"(e1)
1423 : [echo_p]"r"(echo_ptr));
1425 #else /* ARM_ARCH < 6 */
1426 int fb_0 = GET_LE16SA( echo_ptr );
1427 int fb_1 = GET_LE16SA( echo_ptr + 2 );
1428 int32_t *fir_ptr, *fir_coeff;
1430 /* Keep last 8 samples */
1432 /* Apply FIR */
1433 asm volatile (
1434 "add %[fir_p], %[t_fir_p], #8 \r\n"
1435 "bic %[t_fir_p], %[fir_p], %[mask] \r\n"
1436 "str %[fb_0], [%[fir_p], #-8] \r\n"
1437 "str %[fb_1], [%[fir_p], #-4] \r\n"
1438 /* duplicate at +8 eliminates wrap checking below */
1439 "str %[fb_0], [%[fir_p], #56] \r\n"
1440 "str %[fb_1], [%[fir_p], #60] \r\n"
1441 : [fir_p]"=&r"(fir_ptr), [t_fir_p]"+r"(this->fir_ptr)
1442 : [fb_0]"r"(fb_0), [fb_1]"r"(fb_1), [mask]"i"(~FIR_BUF_MASK));
1444 fir_coeff = this->fir_coeff;
1446 asm volatile (
1447 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1448 "ldmia %[fir_p]!, { r4-r5 } \r\n"
1449 "mul %[fb_0], r0, %[fb_0] \r\n"
1450 "mul %[fb_1], r0, %[fb_1] \r\n"
1451 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1452 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1453 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1454 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1455 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1456 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1457 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1458 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1459 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1460 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1461 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1462 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1463 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1464 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1465 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1466 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1467 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1468 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1469 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1470 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1471 : [fb_0]"+r"(fb_0), [fb_1]"+r"(fb_1),
1472 [fir_p]"+r"(fir_ptr), [fir_c]"+r"(fir_coeff)
1474 : "r0", "r1", "r2", "r3", "r4", "r5");
1476 /* Generate output */
1477 int amp_0 = (chans_0 * global_vol_0 + fb_0 * this->r.g.echo_volume_0)
1478 >> global_muting;
1479 int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1)
1480 >> global_muting;
1482 out_buf [ 0] = amp_0;
1483 out_buf [WAV_CHUNK_SIZE] = amp_1;
1484 out_buf ++;
1486 if ( !(this->r.g.flags & 0x20) )
1488 /* Feedback into echo buffer */
1489 int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14);
1490 int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14);
1491 e0 = CLAMP16( e0 );
1492 SET_LE16A( echo_ptr , e0 );
1493 e1 = CLAMP16( e1 );
1494 SET_LE16A( echo_ptr + 2, e1 );
1496 #endif /* ARM_ARCH */
1497 #else /* Unoptimized CPU */
1498 /* Read feedback from echo buffer */
1499 int echo_pos = this->echo_pos;
1500 uint8_t* const echo_ptr = RAM +
1501 ((this->r.g.echo_page * 0x100 + echo_pos) & 0xFFFF);
1502 echo_pos += 4;
1503 if ( echo_pos >= (this->r.g.echo_delay & 15) * 0x800 )
1504 echo_pos = 0;
1505 this->echo_pos = echo_pos;
1506 int fb_0 = GET_LE16SA( echo_ptr );
1507 int fb_1 = GET_LE16SA( echo_ptr + 2 );
1509 /* Keep last 8 samples */
1510 int (* const fir_ptr) [2] = this->fir_buf + this->fir_pos;
1511 this->fir_pos = (this->fir_pos + 1) & (FIR_BUF_HALF - 1);
1512 fir_ptr [ 0] [0] = fb_0;
1513 fir_ptr [ 0] [1] = fb_1;
1514 /* duplicate at +8 eliminates wrap checking below */
1515 fir_ptr [FIR_BUF_HALF] [0] = fb_0;
1516 fir_ptr [FIR_BUF_HALF] [1] = fb_1;
1518 /* Apply FIR */
1519 fb_0 *= this->fir_coeff [0];
1520 fb_1 *= this->fir_coeff [0];
1522 #define DO_PT( i )\
1523 fb_0 += fir_ptr [i] [0] * this->fir_coeff [i];\
1524 fb_1 += fir_ptr [i] [1] * this->fir_coeff [i];
1526 DO_PT( 1 )
1527 DO_PT( 2 )
1528 DO_PT( 3 )
1529 DO_PT( 4 )
1530 DO_PT( 5 )
1531 DO_PT( 6 )
1532 DO_PT( 7 )
1534 /* Generate output */
1535 int amp_0 = (chans_0 * global_vol_0 + fb_0 * this->r.g.echo_volume_0)
1536 >> global_muting;
1537 int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1)
1538 >> global_muting;
1539 out_buf [ 0] = amp_0;
1540 out_buf [WAV_CHUNK_SIZE] = amp_1;
1541 out_buf ++;
1543 if ( !(this->r.g.flags & 0x20) )
1545 /* Feedback into echo buffer */
1546 int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14);
1547 int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14);
1548 e0 = CLAMP16( e0 );
1549 SET_LE16A( echo_ptr , e0 );
1550 e1 = CLAMP16( e1 );
1551 SET_LE16A( echo_ptr + 2, e1 );
1553 #endif /* CPU_* */
1554 #else /* SPCNOECHO == 1*/
1555 /* Generate output */
1556 int amp_0 = (chans_0 * global_vol_0) >> global_muting;
1557 int amp_1 = (chans_1 * global_vol_1) >> global_muting;
1558 out_buf [ 0] = amp_0;
1559 out_buf [WAV_CHUNK_SIZE] = amp_1;
1560 out_buf ++;
1561 #endif /* SPCNOECHO */
1563 while ( --count );
1564 #if 0
1565 EXIT_TIMER(dsp);
1566 ENTER_TIMER(cpu);
1567 #endif
1570 void DSP_reset( struct Spc_Dsp* this )
1572 this->keys_down = 0;
1573 this->echo_pos = 0;
1574 this->noise_count = 0;
1575 this->noise = 2;
1577 this->r.g.flags = 0xE0; /* reset, mute, echo off */
1578 this->r.g.key_ons = 0;
1580 ci->memset( this->voice_state, 0, sizeof this->voice_state );
1582 int i;
1583 for ( i = VOICE_COUNT; --i >= 0; )
1585 struct voice_t* v = this->voice_state + i;
1586 v->env_mode = state_release;
1587 v->addr = ram.ram;
1590 #if SPC_BRRCACHE
1591 this->oldsize = 0;
1592 for ( i = 0; i < 256; i++ )
1593 this->wave_entry [i].start_addr = -1;
1594 #endif
1596 #if defined(CPU_COLDFIRE)
1597 this->fir_ptr = fir_buf;
1598 this->last_fir_ptr = &fir_buf [7];
1599 ci->memset( fir_buf, 0, sizeof fir_buf );
1600 #elif defined (CPU_ARM)
1601 this->fir_ptr = fir_buf;
1602 ci->memset( fir_buf, 0, sizeof fir_buf );
1603 #else
1604 this->fir_pos = 0;
1605 ci->memset( this->fir_buf, 0, sizeof this->fir_buf );
1606 #endif
1608 assert( offsetof (struct globals_t,unused9 [2]) == REGISTER_COUNT );
1609 assert( sizeof (this->r.voice) == REGISTER_COUNT );