1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2007-2008 Michael Sevakis (jhMikeS)
11 * Copyright (C) 2006-2007 Adam Gashlin (hcs)
12 * Copyright (C) 2004-2007 Shay Green (blargg)
13 * Copyright (C) 2002 Brad Martin
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
21 * KIND, either express or implied.
23 ****************************************************************************/
25 /* The DSP portion (awe!) */
27 #include "spc_codec.h"
28 #include "spc_profiler.h"
30 #if defined(CPU_COLDFIRE) || defined (CPU_ARM)
31 int32_t fir_buf
[FIR_BUF_CNT
]
32 __attribute__ ((aligned (FIR_BUF_ALIGN
*1))) IBSS_ATTR
;
35 /* a little extra for samples that go past end */
36 int16_t BRRcache
[BRR_CACHE_SIZE
] CACHEALIGN_ATTR
;
39 void DSP_write( struct Spc_Dsp
* this, int i
, int data
)
41 assert( (unsigned) i
< REGISTER_COUNT
);
43 this->r
.reg
[i
] = data
;
46 if ( low
< 2 ) /* voice volumes */
48 int left
= *(int8_t const*) &this->r
.reg
[i
& ~1];
49 int right
= *(int8_t const*) &this->r
.reg
[i
| 1];
50 struct voice_t
* v
= this->voice_state
+ high
;
52 v
->volume
[1] = right
;
54 else if ( low
== 0x0F ) /* fir coefficients */
56 this->fir_coeff
[7 - high
] = (int8_t) data
; /* sign-extend */
61 /* if ( n < -32768 ) out = -32768; */
62 /* if ( n > 32767 ) out = 32767; */
63 #define CLAMP16( n ) \
65 asm ("ssat %0, #16, %1" \
66 : "=r" ( n ) : "r"( n ) ); \
70 /* if ( n < -32768 ) out = -32768; */
71 /* if ( n > 32767 ) out = 32767; */
72 #define CLAMP16( n ) \
74 if ( (int16_t) n != n ) \
75 n = 0x7FFF ^ (n >> 31); \
81 static void decode_brr( struct Spc_Dsp
* this, unsigned start_addr
,
82 struct voice_t
* voice
,
83 struct raw_voice_t
const* const raw_voice
) ICODE_ATTR
;
84 static void decode_brr( struct Spc_Dsp
* this, unsigned start_addr
,
85 struct voice_t
* voice
,
86 struct raw_voice_t
const* const raw_voice
)
88 /* setup same variables as where decode_brr() is called from */
92 struct src_dir
const* const sd
=
93 &ram
.sd
[this->r
.g
.wave_page
* 0x100/sizeof(struct src_dir
)];
94 struct cache_entry_t
* const wave_entry
=
95 &this->wave_entry
[raw_voice
->waveform
];
97 /* the following block can be put in place of the call to
101 DEBUGF( "decode at %08x (wave #%d)\n",
102 start_addr
, raw_voice
->waveform
);
104 /* see if in cache */
106 for ( i
= 0; i
< this->oldsize
; i
++ )
108 struct cache_entry_t
* e
= &this->wave_entry_old
[i
];
109 if ( e
->start_addr
== start_addr
)
111 DEBUGF( "found in wave_entry_old (oldsize=%d)\n",
118 wave_entry
->start_addr
= start_addr
;
120 uint8_t const* const loop_ptr
=
121 RAM
+ letoh16(sd
[raw_voice
->waveform
].loop
);
122 short* loop_start
= 0;
124 short* out
= BRRcache
+ start_addr
* 2;
125 wave_entry
->samples
= out
;
130 uint8_t const* addr
= RAM
+ start_addr
;
134 if ( addr
== loop_ptr
)
137 DEBUGF( "loop at %08lx (wave #%d)\n",
138 (unsigned long)(addr
- RAM
), raw_voice
->waveform
);
142 block_header
= *addr
;
145 int const filter
= (block_header
& 0x0C) - 0x08;
148 (invalid scaling gives -4096 for neg nybble, 0 for pos) */
149 static unsigned char const right_shifts
[16] = {
150 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 29, 29, 29,
152 static unsigned char const left_shifts
[16] = {
153 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11
155 int const scale
= block_header
>> 4;
156 int const right_shift
= right_shifts
[scale
];
157 int const left_shift
= left_shifts
[scale
];
159 /* output position */
160 out
+= BRR_BLOCK_SIZE
;
161 int offset
= -BRR_BLOCK_SIZE
<< 2;
163 do /* decode and filter 16 samples */
165 /* Get nybble, sign-extend, then scale
166 get byte, select which nybble, sign-extend, then shift based
167 on scaling. also handles invalid scaling values. */
168 int delta
= (int) (int8_t) (addr
[offset
>> 3] << (offset
& 4))
169 >> right_shift
<< left_shift
;
171 out
[offset
>> 2] = smp2
;
173 if ( filter
== 0 ) /* mode 0x08 (30-90% of the time) */
179 delta
+= (-smp1
- (smp1
>> 1)) >> 5;
183 if ( filter
== -4 ) /* mode 0x04 */
186 delta
+= (-smp1
) >> 5;
188 else if ( filter
> -4 ) /* mode 0x0C */
191 delta
+= (smp2
+ (smp2
>> 1)) >> 4;
193 delta
+= (-smp1
* 13) >> 7;
198 delta
= CLAMP16( delta
);
199 smp1
= (int16_t) (delta
* 2); /* sign-extend */
201 while ( (offset
+= 4) != 0 );
203 /* if next block has end flag set, this block ends early */
205 if ( (block_header
& 3) != 3 && (*addr
& 3) == 1 )
207 /* skip last 9 samples */
212 while ( !(block_header
& 1) && addr
< RAM
+ 0x10000 );
218 wave_entry
->end
= (out
- 1 - wave_entry
->samples
) << 12;
220 wave_entry
->loop
= 0;
221 if ( (block_header
& 2) )
225 int loop
= out
- loop_start
;
226 wave_entry
->loop
= loop
;
227 wave_entry
->end
+= 0x3000;
228 out
[2] = loop_start
[2];
229 out
[3] = loop_start
[3];
230 out
[4] = loop_start
[4];
234 DEBUGF( "loop point outside initial wave\n" );
238 DEBUGF( "end at %08lx (wave #%d)\n",
239 (unsigned long)(addr
- RAM
), raw_voice
->waveform
);
242 this->wave_entry_old
[this->oldsize
++] = *wave_entry
;
248 static void key_on(struct Spc_Dsp
* const this, struct voice_t
* const voice
,
249 struct src_dir
const* const sd
,
250 struct raw_voice_t
const* const raw_voice
,
251 const int key_on_delay
, const int vbit
) ICODE_ATTR
;
252 static void key_on(struct Spc_Dsp
* const this, struct voice_t
* const voice
,
253 struct src_dir
const* const sd
,
254 struct raw_voice_t
const* const raw_voice
,
255 const int key_on_delay
, const int vbit
) {
258 int const env_rate_init
= 0x7800;
259 voice
->key_on_delay
= key_on_delay
;
260 if ( key_on_delay
== 0 )
262 this->keys_down
|= vbit
;
264 voice
->env_mode
= state_attack
;
265 voice
->env_timer
= env_rate_init
; /* TODO: inaccurate? */
266 unsigned start_addr
= letoh16(sd
[raw_voice
->waveform
].start
);
269 voice
->addr
= RAM
+ start_addr
;
270 /* BRR filter uses previous samples */
271 voice
->samples
[BRR_BLOCK_SIZE
+ 1] = 0;
272 voice
->samples
[BRR_BLOCK_SIZE
+ 2] = 0;
273 /* decode three samples immediately */
274 voice
->position
= (BRR_BLOCK_SIZE
+ 3) * 0x1000 - 1;
275 voice
->block_header
= 0; /* "previous" BRR header */
279 voice
->position
= 3 * 0x1000 - 1;
280 struct cache_entry_t
* const wave_entry
=
281 &this->wave_entry
[raw_voice
->waveform
];
283 /* predecode BRR if not already */
284 if ( wave_entry
->start_addr
!= start_addr
)
286 /* the following line can be replaced by the indicated block
288 decode_brr( this, start_addr
, voice
, raw_voice
);
291 voice
->samples
= wave_entry
->samples
;
292 voice
->wave_end
= wave_entry
->end
;
293 voice
->wave_loop
= wave_entry
->loop
;
299 void DSP_run_( struct Spc_Dsp
* this, long count
, int32_t* out_buf
)
302 #if defined(CPU_ARM) && !SPC_BRRCACHE
303 uint8_t* const ram_
= ram
.ram
;
313 /* Here we check for keys on/off. Docs say that successive writes
314 to KON/KOF must be separated by at least 2 Ts periods or risk
315 being neglected. Therefore DSP only looks at these during an
316 update, and not at the time of the write. Only need to do this
317 once however, since the regs haven't changed over the whole
318 period we need to catch up with. */
321 int key_ons
= this->r
.g
.key_ons
;
322 int key_offs
= this->r
.g
.key_offs
;
323 /* keying on a voice resets that bit in ENDX */
324 this->r
.g
.wave_ended
&= ~key_ons
;
325 /* key_off bits prevent key_on from being acknowledged */
326 this->r
.g
.key_ons
= key_ons
& key_offs
;
328 /* process key events outside loop, since they won't re-occur */
329 struct voice_t
* voice
= this->voice_state
+ 8;
334 if ( key_offs
& vbit
)
336 voice
->env_mode
= state_release
;
337 voice
->key_on_delay
= 0;
339 else if ( key_ons
& vbit
)
341 voice
->key_on_delay
= 8;
344 while ( (vbit
>>= 1) != 0 );
347 struct src_dir
const* const sd
=
348 &ram
.sd
[this->r
.g
.wave_page
* 0x100/sizeof(struct src_dir
)];
350 #ifdef ROCKBOX_BIG_ENDIAN
351 /* Convert endiannesses before entering loops - these
353 const uint32_t rates
[VOICE_COUNT
] =
355 GET_LE16A( this->r
.voice
[0].rate
) & 0x3FFF,
356 GET_LE16A( this->r
.voice
[1].rate
) & 0x3FFF,
357 GET_LE16A( this->r
.voice
[2].rate
) & 0x3FFF,
358 GET_LE16A( this->r
.voice
[3].rate
) & 0x3FFF,
359 GET_LE16A( this->r
.voice
[4].rate
) & 0x3FFF,
360 GET_LE16A( this->r
.voice
[5].rate
) & 0x3FFF,
361 GET_LE16A( this->r
.voice
[6].rate
) & 0x3FFF,
362 GET_LE16A( this->r
.voice
[7].rate
) & 0x3FFF,
364 #define VOICE_RATE(x) *(x)
365 #define IF_RBE(...) __VA_ARGS__
367 /* Initialize mask register with the buffer address mask */
368 asm volatile ("move.l %[m], %%mask" : : [m
]"i"(FIR_BUF_MASK
));
369 const int echo_wrap
= (this->r
.g
.echo_delay
& 15) * 0x800;
370 const int echo_start
= this->r
.g
.echo_page
* 0x100;
371 #endif /* CPU_COLDFIRE */
373 #define VOICE_RATE(x) (GET_LE16(raw_voice->rate) & 0x3FFF)
375 #endif /* ROCKBOX_BIG_ENDIAN */
378 int const slow_gaussian
= (this->r
.g
.pitch_mods
>> 1) |
379 this->r
.g
.noise_enables
;
381 /* (g.flags & 0x40) ? 30 : 14 */
382 int const global_muting
= ((this->r
.g
.flags
& 0x40) >> 2) + 14 - 8;
383 int const global_vol_0
= this->r
.g
.volume_0
;
384 int const global_vol_1
= this->r
.g
.volume_1
;
386 /* each rate divides exactly into 0x7800 without remainder */
387 int const env_rate_init
= 0x7800;
388 static unsigned short const env_rates
[0x20] ICONST_ATTR
=
390 0x0000, 0x000F, 0x0014, 0x0018, 0x001E, 0x0028, 0x0030, 0x003C,
391 0x0050, 0x0060, 0x0078, 0x00A0, 0x00C0, 0x00F0, 0x0140, 0x0180,
392 0x01E0, 0x0280, 0x0300, 0x03C0, 0x0500, 0x0600, 0x0780, 0x0A00,
393 0x0C00, 0x0F00, 0x1400, 0x1800, 0x1E00, 0x2800, 0x3C00, 0x7800
396 do /* one pair of output samples per iteration */
399 if ( this->r
.g
.noise_enables
)
401 if ( (this->noise_count
-=
402 env_rates
[this->r
.g
.flags
& 0x1F]) <= 0 )
404 this->noise_count
= env_rate_init
;
405 int feedback
= (this->noise
<< 13) ^ (this->noise
<< 14);
406 this->noise
= (feedback
& 0x8000) ^ (this->noise
>> 1 & ~1);
414 long prev_outx
= 0; /* TODO: correct value for first channel? */
417 /* TODO: put raw_voice pointer in voice_t? */
418 struct raw_voice_t
* raw_voice
= this->r
.voice
;
419 struct voice_t
* voice
= this->voice_state
;
421 IF_RBE( const uint32_t* vr
= rates
; )
422 for ( ; vbit
< 0x100; vbit
<<= 1, ++voice
, ++raw_voice
IF_RBE( , ++vr
) )
424 /* pregen involves checking keyon, etc */
426 ENTER_TIMER(dsp_pregen
);
429 /* Key on events are delayed */
430 int key_on_delay
= voice
->key_on_delay
;
432 if ( UNLIKELY ( --key_on_delay
>= 0 ) ) /* <1% of the time */
434 key_on(this,voice
,sd
,raw_voice
,key_on_delay
,vbit
);
437 if ( !(this->keys_down
& vbit
) ) /* Silent channel */
448 int const ENV_RANGE
= 0x800;
449 int env_mode
= voice
->env_mode
;
450 int adsr0
= raw_voice
->adsr
[0];
452 if ( LIKELY ( env_mode
!= state_release
) ) /* 99% of the time */
454 env_timer
= voice
->env_timer
;
455 if ( LIKELY ( adsr0
& 0x80 ) ) /* 79% of the time */
457 int adsr1
= raw_voice
->adsr
[1];
458 if ( LIKELY ( env_mode
== state_sustain
) ) /* 74% of the time */
460 if ( (env_timer
-= env_rates
[adsr1
& 0x1F]) > 0 )
461 goto write_env_timer
;
463 int envx
= voice
->envx
;
464 envx
--; /* envx *= 255 / 256 */
467 /* TODO: should this be 8? */
468 raw_voice
->envx
= envx
>> 4;
471 else if ( env_mode
< 0 ) /* 25% state_decay */
473 int envx
= voice
->envx
;
475 env_rates
[(adsr0
>> 3 & 0x0E) + 0x10]) <= 0 )
477 envx
--; /* envx *= 255 / 256 */
480 /* TODO: should this be 8? */
481 raw_voice
->envx
= envx
>> 4;
482 env_timer
= env_rate_init
;
485 int sustain_level
= adsr1
>> 5;
486 if ( envx
<= (sustain_level
+ 1) * 0x100 )
487 voice
->env_mode
= state_sustain
;
489 goto write_env_timer
;
491 else /* state_attack */
493 int t
= adsr0
& 0x0F;
494 if ( (env_timer
-= env_rates
[t
* 2 + 1]) > 0 )
495 goto write_env_timer
;
497 int envx
= voice
->envx
;
499 int const step
= ENV_RANGE
/ 64;
502 envx
+= ENV_RANGE
/ 2 - step
;
504 if ( envx
>= ENV_RANGE
)
506 envx
= ENV_RANGE
- 1;
507 voice
->env_mode
= state_decay
;
510 /* TODO: should this be 8? */
511 raw_voice
->envx
= envx
>> 4;
517 int t
= raw_voice
->gain
;
521 voice
->envx
= t
<< 4;
526 if ( (env_timer
-= env_rates
[t
& 0x1F]) > 0 )
527 goto write_env_timer
;
529 int envx
= voice
->envx
;
531 if ( mode
<= 5 ) /* decay */
533 int step
= ENV_RANGE
/ 64;
534 if ( mode
== 5 ) /* exponential */
536 envx
--; /* envx *= 255 / 256 */
539 if ( (envx
-= step
) < 0 )
542 if ( voice
->env_mode
== state_attack
)
543 voice
->env_mode
= state_decay
;
548 int const step
= ENV_RANGE
/ 64;
551 envx
>= ENV_RANGE
* 3 / 4 + step
)
552 envx
+= ENV_RANGE
/ 256 - step
;
554 if ( envx
>= ENV_RANGE
)
555 envx
= ENV_RANGE
- 1;
558 /* TODO: should this be 8? */
559 raw_voice
->envx
= envx
>> 4;
564 else /* state_release */
566 int envx
= voice
->envx
;
567 if ( (envx
-= ENV_RANGE
/ 256) > 0 )
570 raw_voice
->envx
= envx
>> 8;
575 /* bit was set, so this clears it */
576 this->keys_down
^= vbit
;
582 env_timer
= env_rate_init
;
584 voice
->env_timer
= env_timer
;
588 EXIT_TIMER(dsp_pregen
);
590 ENTER_TIMER(dsp_gen
);
593 /* Decode BRR block */
594 if ( voice
->position
>= BRR_BLOCK_SIZE
* 0x1000 )
596 voice
->position
-= BRR_BLOCK_SIZE
* 0x1000;
598 uint8_t const* addr
= voice
->addr
;
599 if ( addr
>= RAM
+ 0x10000 )
602 /* action based on previous block's header */
603 if ( voice
->block_header
& 1 )
605 addr
= RAM
+ letoh16(sd
[raw_voice
->waveform
].loop
);
606 this->r
.g
.wave_ended
|= vbit
;
607 if ( !(voice
->block_header
& 2) ) /* 1% of the time */
609 /* first block was end block;
610 don't play anything (verified) */
611 /* bit was set, so this clears it */
612 this->keys_down
^= vbit
;
614 /* since voice->envx is 0,
615 samples and position don't matter */
623 int const block_header
= *addr
;
626 voice
->block_header
= block_header
;
628 /* previous samples */
629 int smp2
= voice
->samples
[BRR_BLOCK_SIZE
+ 1];
630 int smp1
= voice
->samples
[BRR_BLOCK_SIZE
+ 2];
631 voice
->samples
[0] = voice
->samples
[BRR_BLOCK_SIZE
];
633 /* output position */
634 short* out
= voice
->samples
+ (1 + BRR_BLOCK_SIZE
);
635 int offset
= -BRR_BLOCK_SIZE
<< 2;
637 /* if next block has end flag set,
638 this block ends early (verified) */
639 if ( (block_header
& 3) != 3 && (*addr
& 3) == 1 )
641 /* arrange for last 9 samples to be skipped */
644 voice
->samples
[skip
] = voice
->samples
[BRR_BLOCK_SIZE
];
645 voice
->position
+= skip
* 0x1000;
646 offset
= (-BRR_BLOCK_SIZE
+ (skip
& ~1)) << 2;
648 /* force sample to end on next decode */
649 voice
->block_header
= 1;
652 int const filter
= block_header
& 0x0c;
653 int const scale
= block_header
>> 4;
655 if ( filter
== 0x08 ) /* filter 2 (30-90% of the time) */
657 /* y[n] = x[n] + 61/32 * y[n-1] - 15/16 * y[n-2] */
658 do /* decode and filter 16 samples */
660 /* Get nybble, sign-extend, then scale
661 get byte, select which nybble, sign-extend, then shift
663 int delta
= (int8_t)(addr
[offset
>> 3] << (offset
& 4)) >> 4;
664 delta
= (delta
<< scale
) >> 1;
667 delta
= (delta
>> 17) << 11;
669 out
[offset
>> 2] = smp2
;
674 delta
+= (-smp1
- (smp1
>> 1)) >> 5;
676 delta
= CLAMP16( delta
);
678 smp1
= (int16_t) (delta
* 2); /* sign-extend */
680 while ( (offset
+= 4) != 0 );
682 else if ( filter
== 0x04 ) /* filter 1 */
684 /* y[n] = x[n] + 15/16 * y[n-1] */
685 do /* decode and filter 16 samples */
687 /* Get nybble, sign-extend, then scale
688 get byte, select which nybble, sign-extend, then shift
690 int delta
= (int8_t)(addr
[offset
>> 3] << (offset
& 4)) >> 4;
691 delta
= (delta
<< scale
) >> 1;
694 delta
= (delta
>> 17) << 11;
696 out
[offset
>> 2] = smp2
;
699 delta
+= (-smp1
) >> 5;
701 delta
= CLAMP16( delta
);
703 smp1
= (int16_t) (delta
* 2); /* sign-extend */
705 while ( (offset
+= 4) != 0 );
707 else if ( filter
== 0x0c ) /* filter 3 */
709 /* y[n] = x[n] + 115/64 * y[n-1] - 13/16 * y[n-2] */
710 do /* decode and filter 16 samples */
712 /* Get nybble, sign-extend, then scale
713 get byte, select which nybble, sign-extend, then shift
715 int delta
= (int8_t)(addr
[offset
>> 3] << (offset
& 4)) >> 4;
716 delta
= (delta
<< scale
) >> 1;
719 delta
= (delta
>> 17) << 11;
721 out
[offset
>> 2] = smp2
;
724 delta
+= (smp2
+ (smp2
>> 1)) >> 4;
726 delta
+= (-smp1
* 13) >> 7;
728 delta
= CLAMP16( delta
);
730 smp1
= (int16_t) (delta
* 2); /* sign-extend */
732 while ( (offset
+= 4) != 0 );
737 do /* decode and filter 16 samples */
739 /* Get nybble, sign-extend, then scale
740 get byte, select which nybble, sign-extend, then shift
742 int delta
= (int8_t)(addr
[offset
>> 3] << (offset
& 4)) >> 4;
743 delta
= (delta
<< scale
) >> 1;
746 delta
= (delta
>> 17) << 11;
748 out
[offset
>> 2] = smp2
;
753 while ( (offset
+= 4) != 0 );
761 #endif /* !SPC_BRRCACHE */
762 /* Get rate (with possible modulation) */
763 int rate
= VOICE_RATE(vr
);
764 if ( this->r
.g
.pitch_mods
& vbit
)
765 rate
= (rate
* (prev_outx
+ 32768)) >> 15;
768 /* Interleved gauss table (to improve cache coherency). */
769 /* gauss [i * 2 + j] = normal_gauss [(1 - j) * 256 + i] */
770 static short const gauss
[512] =
772 370,1305, 366,1305, 362,1304, 358,1304, 354,1304, 351,1304, 347,1304, 343,1303,
773 339,1303, 336,1303, 332,1302, 328,1302, 325,1301, 321,1300, 318,1300, 314,1299,
774 311,1298, 307,1297, 304,1297, 300,1296, 297,1295, 293,1294, 290,1293, 286,1292,
775 283,1291, 280,1290, 276,1288, 273,1287, 270,1286, 267,1284, 263,1283, 260,1282,
776 257,1280, 254,1279, 251,1277, 248,1275, 245,1274, 242,1272, 239,1270, 236,1269,
777 233,1267, 230,1265, 227,1263, 224,1261, 221,1259, 218,1257, 215,1255, 212,1253,
778 210,1251, 207,1248, 204,1246, 201,1244, 199,1241, 196,1239, 193,1237, 191,1234,
779 188,1232, 186,1229, 183,1227, 180,1224, 178,1221, 175,1219, 173,1216, 171,1213,
780 168,1210, 166,1207, 163,1205, 161,1202, 159,1199, 156,1196, 154,1193, 152,1190,
781 150,1186, 147,1183, 145,1180, 143,1177, 141,1174, 139,1170, 137,1167, 134,1164,
782 132,1160, 130,1157, 128,1153, 126,1150, 124,1146, 122,1143, 120,1139, 118,1136,
783 117,1132, 115,1128, 113,1125, 111,1121, 109,1117, 107,1113, 106,1109, 104,1106,
784 102,1102, 100,1098, 99,1094, 97,1090, 95,1086, 94,1082, 92,1078, 90,1074,
785 89,1070, 87,1066, 86,1061, 84,1057, 83,1053, 81,1049, 80,1045, 78,1040,
786 77,1036, 76,1032, 74,1027, 73,1023, 71,1019, 70,1014, 69,1010, 67,1005,
787 66,1001, 65, 997, 64, 992, 62, 988, 61, 983, 60, 978, 59, 974, 58, 969,
788 56, 965, 55, 960, 54, 955, 53, 951, 52, 946, 51, 941, 50, 937, 49, 932,
789 48, 927, 47, 923, 46, 918, 45, 913, 44, 908, 43, 904, 42, 899, 41, 894,
790 40, 889, 39, 884, 38, 880, 37, 875, 36, 870, 36, 865, 35, 860, 34, 855,
791 33, 851, 32, 846, 32, 841, 31, 836, 30, 831, 29, 826, 29, 821, 28, 816,
792 27, 811, 27, 806, 26, 802, 25, 797, 24, 792, 24, 787, 23, 782, 23, 777,
793 22, 772, 21, 767, 21, 762, 20, 757, 20, 752, 19, 747, 19, 742, 18, 737,
794 17, 732, 17, 728, 16, 723, 16, 718, 15, 713, 15, 708, 15, 703, 14, 698,
795 14, 693, 13, 688, 13, 683, 12, 678, 12, 674, 11, 669, 11, 664, 11, 659,
796 10, 654, 10, 649, 10, 644, 9, 640, 9, 635, 9, 630, 8, 625, 8, 620,
797 8, 615, 7, 611, 7, 606, 7, 601, 6, 596, 6, 592, 6, 587, 6, 582,
798 5, 577, 5, 573, 5, 568, 5, 563, 4, 559, 4, 554, 4, 550, 4, 545,
799 4, 540, 3, 536, 3, 531, 3, 527, 3, 522, 3, 517, 2, 513, 2, 508,
800 2, 504, 2, 499, 2, 495, 2, 491, 2, 486, 1, 482, 1, 477, 1, 473,
801 1, 469, 1, 464, 1, 460, 1, 456, 1, 451, 1, 447, 1, 443, 1, 439,
802 0, 434, 0, 430, 0, 426, 0, 422, 0, 418, 0, 414, 0, 410, 0, 405,
803 0, 401, 0, 397, 0, 393, 0, 389, 0, 385, 0, 381, 0, 378, 0, 374,
805 /* Gaussian interpolation using most recent 4 samples */
806 long position
= voice
->position
;
807 voice
->position
+= rate
;
808 short const* interp
= voice
->samples
+ (position
>> 12);
809 int offset
= position
>> 4 & 0xFF;
811 /* Only left half of gaussian kernel is in table, so we must mirror
813 short const* fwd
= gauss
+ offset
* 2;
814 short const* rev
= gauss
+ 510 - offset
* 2;
816 /* Use faster gaussian interpolation when exact result isn't needed
817 by pitch modulator of next channel */
818 int amp_0
, amp_1
; /* Also serve as temps _0, and _1 */
819 if ( LIKELY ( !(slow_gaussian
& vbit
) ) ) /* 99% of the time */
821 /* Main optimization is lack of clamping. Not a problem since
822 output never goes more than +/- 16 outside 16-bit range and
823 things are clamped later anyway. Other optimization is to
824 preserve fractional accuracy, eliminating several masks. */
825 #if defined (CPU_ARM)
827 int _2
, _3
; /* All-purpose temps */
828 /* Multiple ASM blocks keep regs free and reduce result
833 "ldr %[_0], [%[interp]] \r\n" /* _0=i0i1 */
834 "ldr %[_2], [%[fwd]] \r\n" /* _2=f0f1 */
835 "ldr %[_1], [%[interp], #4] \r\n" /* _1=i2i3 */
836 "ldr %[_3], [%[rev]] \r\n" /* _3=r0r1 */
837 "smuad %[out], %[_0], %[_2] \r\n" /* out=f0*i0 + f1*i1 */
838 "smladx %[out], %[_1], %[_3], %[out] \r\n" /* out+=r1*i2 + r0*i3 */
840 [_0
]"=&r"(amp_0
), [_1
]"=&r"(amp_1
),
841 [_2
]"=&r"(_2
), [_3
]"=r"(_3
)
842 : [fwd
]"r"(fwd
), [rev
]"r"(rev
),
843 [interp
]"r"(interp
));
844 /* Apply voice envelope */
846 "mov %[_2], %[out], asr #(11-5) \r\n" /* To do >> 16 later */
847 "mul %[out], %[_2], %[envx] \r\n" /* and avoid exp. shift */
848 : [out
]"+r"(output
), [_2
]"=&r"(_2
)
849 : [envx
]"r"((int)voice
->envx
));
850 /* Apply left and right volume */
852 "smulwb %[amp_0], %[out], %[vvol_0] \r\n" /* (32x16->48)[47:16]->[31:0] */
853 "smulwb %[amp_1], %[out], %[vvol_1] \r\n"
855 [amp_0
]"=&r"(amp_0
), [amp_1
]"=r"(amp_1
)
856 : [vvol_0
]"r"(voice
->volume
[0]),
857 [vvol_1
]"r"(voice
->volume
[1]));
859 raw_voice
->outx
= output
>> (8+5); /* 'output' still 5 bits too big */
860 #else /* ARM_ARCH < 6 */
861 /* Perform gaussian interpolation on four samples */
863 "ldrsh %[_0], [%[interp]] \r\n"
864 "ldrsh %[_2], [%[fwd]] \r\n"
865 "ldrsh %[_1], [%[interp], #2] \r\n"
866 "ldrsh %[_3], [%[fwd], #2] \r\n"
867 "mul %[out], %[_0], %[_2] \r\n" /* out= fwd[0]*interp[0] */
868 "ldrsh %[_0], [%[interp], #4] \r\n"
869 "ldrsh %[_2], [%[rev], #2] \r\n"
870 "mla %[out], %[_1], %[_3], %[out] \r\n" /* out+=fwd[1]*interp[1] */
871 "ldrsh %[_1], [%[interp], #6] \r\n"
872 "ldrsh %[_3], [%[rev]] \r\n"
873 "mla %[out], %[_0], %[_2], %[out] \r\n" /* out+=rev[1]*interp[2] */
874 "mla %[out], %[_1], %[_3], %[out] \r\n" /* out+=rev[0]*interp[3] */
875 : [out
]"=&r"(output
),
876 [_0
]"=&r"(amp_0
), [_1
]"=&r"(amp_1
),
877 [_2
]"=&r"(_2
), [_3
]"=&r"(_3
)
878 : [fwd
]"r"(fwd
), [rev
]"r"(rev
),
879 [interp
]"r"(interp
));
880 /* Apply voice envelope */
882 "mov %[_2], %[out], asr #11 \r\n"
883 "mul %[out], %[_2], %[envx] \r\n"
884 : [out
]"+r"(output
), [_2
]"=&r"(_2
)
885 : [envx
]"r"((int)voice
->envx
));
886 /* Reduce and apply left and right volume */
888 "mov %[out], %[out], asr #11 \r\n"
889 "mul %[amp_0], %[out], %[vvol_0] \r\n"
890 "mul %[amp_1], %[out], %[vvol_1] \r\n"
892 [amp_0
]"=&r"(amp_0
), [amp_1
]"=r"(amp_1
)
893 : [vvol_0
]"r"((int)voice
->volume
[0]),
894 [vvol_1
]"r"((int)voice
->volume
[1]));
896 raw_voice
->outx
= output
>> 8;
897 #endif /* ARM_ARCH */
898 #else /* Unoptimized CPU */
899 int output
= (((fwd
[0] * interp
[0] +
900 fwd
[1] * interp
[1] +
901 rev
[1] * interp
[2] +
902 rev
[0] * interp
[3] ) >> 11) * voice
->envx
) >> 11;
904 /* duplicated here to give compiler more to run in parallel */
905 amp_0
= voice
->volume
[0] * output
;
906 amp_1
= voice
->volume
[1] * output
;
908 raw_voice
->outx
= output
>> 8;
911 else /* slow gaussian */
915 int output
= *(int16_t*) &this->noise
;
917 if ( !(this->r
.g
.noise_enables
& vbit
) )
922 /* NOTE: often-unaligned accesses */
923 "ldr %[_0], [%[interp]] \r\n" /* _0=i0i1 */
924 "ldr %[_2], [%[fwd]] \r\n" /* _2=f0f1 */
925 "ldr %[_1], [%[interp], #4] \r\n" /* _1=i2i3 */
926 "ldr %[_3], [%[rev]] \r\n" /* _3=f2f3 */
927 "smulbb %[out], %[_0], %[_2] \r\n" /* out=f0*i0 */
928 "smultt %[_0], %[_0], %[_2] \r\n" /* _0=f1*i1 */
929 "smulbt %[_2], %[_1], %[_3] \r\n" /* _2=r1*i2 */
930 "smultb %[_3], %[_1], %[_3] \r\n" /* _3=r0*i3 */
932 [_0
]"=&r"(amp_0
), [_1
]"=&r"(amp_1
),
933 [_2
]"=&r"(_2
), [_3
]"=r"(_3
)
934 : [fwd
]"r"(fwd
), [rev
]"r"(rev
),
935 [interp
]"r"(interp
));
937 "mov %[out], %[out], asr#12 \r\n"
938 "add %[_0], %[out], %[_0], asr #12 \r\n"
939 "add %[_2], %[_0], %[_2], asr #12 \r\n"
940 "pkhbt %[_0], %[_2], %[_3], asl #4 \r\n" /* _3[31:16], _2[15:0] */
941 "sadd16 %[_0], %[_0], %[_0] \r\n" /* _3[31:16]*2, _2[15:0]*2 */
942 "qsubaddx %[out], %[_0], %[_0] \r\n" /* out[15:0]=
943 * sat16(_3[31:16]+_2[15:0]) */
945 [_0
]"+r"(amp_0
), [_2
]"+r"(_2
), [_3
]"+r"(_3
));
947 /* Apply voice envelope */
949 "smulbb %[out], %[out], %[envx] \r\n"
951 : [envx
]"r"(voice
->envx
));
952 /* Reduce and apply left and right volume */
954 "mov %[out], %[out], asr #11 \r\n"
955 "bic %[out], %[out], #0x1 \r\n"
956 "mul %[amp_0], %[out], %[vvol_0] \r\n"
957 "mul %[amp_1], %[out], %[vvol_1] \r\n"
959 [amp_0
]"=&r"(amp_0
), [amp_1
]"=r"(amp_1
)
960 : [vvol_0
]"r"((int)voice
->volume
[0]),
961 [vvol_1
]"r"((int)voice
->volume
[1]));
964 raw_voice
->outx
= output
>> 8;
965 #else /* ARM_ARCH < 6 */
966 int output
= *(int16_t*) &this->noise
;
968 if ( !(this->r
.g
.noise_enables
& vbit
) )
973 "ldrsh %[_0], [%[interp]] \r\n"
974 "ldrsh %[_2], [%[fwd]] \r\n"
975 "ldrsh %[_1], [%[interp], #2] \r\n"
976 "ldrsh %[_3], [%[fwd], #2] \r\n"
977 "mul %[out], %[_2], %[_0] \r\n" /* fwd[0]*interp[0] */
978 "ldrsh %[_2], [%[rev], #2] \r\n"
979 "mul %[_0], %[_3], %[_1] \r\n" /* fwd[1]*interp[1] */
980 "ldrsh %[_1], [%[interp], #4] \r\n"
981 "mov %[out], %[out], asr #12 \r\n"
982 "ldrsh %[_3], [%[rev]] \r\n"
983 "mul %[_2], %[_1], %[_2] \r\n" /* rev[1]*interp[2] */
984 "ldrsh %[_1], [%[interp], #6] \r\n"
985 "add %[_0], %[out], %[_0], asr #12 \r\n"
986 "mul %[_3], %[_1], %[_3] \r\n" /* rev[0]*interp[3] */
987 "add %[_2], %[_0], %[_2], asr #12 \r\n"
988 "mov %[_2], %[_2], lsl #17 \r\n"
989 "mov %[_3], %[_3], asr #12 \r\n"
990 "mov %[_3], %[_3], asl #1 \r\n"
991 "add %[out], %[_3], %[_2], asr #16 \r\n"
992 : [out
]"=&r"(output
),
993 [_0
]"=&r"(amp_0
), [_1
]"=&r"(amp_1
),
994 [_2
]"=&r"(_2
), [_3
]"=&r"(_3
)
995 : [fwd
]"r"(fwd
), [rev
]"r"(rev
),
996 [interp
]"r"(interp
));
998 output
= CLAMP16(output
);
1000 /* Apply voice envelope */
1002 "mul %[_0], %[out], %[envx] \r\n"
1004 : [out
]"r"(output
), [envx
]"r"((int)voice
->envx
));
1005 /* Reduce and apply left and right volume */
1007 "mov %[out], %[amp_0], asr #11 \r\n" /* amp_0 = _0 */
1008 "bic %[out], %[out], #0x1 \r\n"
1009 "mul %[amp_0], %[out], %[vvol_0] \r\n"
1010 "mul %[amp_1], %[out], %[vvol_1] \r\n"
1011 : [out
]"+r"(output
),
1012 [amp_0
]"+r"(amp_0
), [amp_1
]"=r"(amp_1
)
1013 : [vvol_0
]"r"((int)voice
->volume
[0]),
1014 [vvol_1
]"r"((int)voice
->volume
[1]));
1017 raw_voice
->outx
= output
>> 8;
1018 #endif /* ARM_ARCH >= 6 */
1019 #else /* Unoptimized CPU */
1020 int output
= *(int16_t*) &this->noise
;
1022 if ( !(this->r
.g
.noise_enables
& vbit
) )
1024 output
= (fwd
[0] * interp
[0]) & ~0xFFF;
1025 output
= (output
+ fwd
[1] * interp
[1]) & ~0xFFF;
1026 output
= (output
+ rev
[1] * interp
[2]) >> 12;
1027 output
= (int16_t) (output
* 2);
1028 output
+= ((rev
[0] * interp
[3]) >> 12) * 2;
1029 output
= CLAMP16( output
);
1031 output
= (output
* voice
->envx
) >> 11 & ~1;
1033 /* duplicated here to give compiler more to run in parallel */
1034 amp_0
= voice
->volume
[0] * output
;
1035 amp_1
= voice
->volume
[1] * output
;
1038 raw_voice
->outx
= output
>> 8;
1041 #else /* SPCNOINTERP */
1042 /* two-point linear interpolation */
1044 int amp_0
= (int16_t)this->noise
;
1047 if ( (this->r
.g
.noise_enables
& vbit
) == 0 )
1049 uint32_t f
= voice
->position
;
1053 * Formula (fastest found so far of MANY):
1054 * output = y0 + f*y1 - f*y0
1057 /* separate fractional and whole parts */
1058 "move.l %[f], %[y1] \r\n"
1059 "and.l #0xfff, %[f] \r\n"
1060 "lsr.l %[sh], %[y1] \r\n"
1061 /* load samples y0 (upper) & y1 (lower) */
1062 "move.l 2(%[s], %[y1].l*2), %[y1] \r\n"
1064 "mac.w %[f]l, %[y1]l, %%acc0 \r\n"
1066 "msac.w %[f]l, %[y1]u, %%acc0 \r\n"
1067 /* separate out y0 and sign extend */
1069 "movea.w %[y1], %[y0] \r\n"
1070 /* fetch result, scale down and add y0 */
1071 "movclr.l %%acc0, %[y1] \r\n"
1072 /* output = y0 + (result >> 12) */
1073 "asr.l %[sh], %[y1] \r\n"
1074 "add.l %[y0], %[y1] \r\n"
1075 : [f
]"+d"(f
), [y0
]"=&a"(y0
), [y1
]"=&d"(amp_0
)
1076 : [s
]"a"(voice
->samples
), [sh
]"d"(12));
1079 /* apply voice envelope to output */
1081 "mac.w %[out]l, %[envx]l, %%acc0 \r\n"
1083 : [out
]"r"(amp_0
), [envx
]"r"(voice
->envx
));
1085 /* advance voice position */
1086 voice
->position
+= rate
;
1088 /* fetch output, scale and apply left and right
1091 "movclr.l %%acc0, %[out] \r\n"
1092 "asr.l %[sh], %[out] \r\n"
1093 "mac.l %[vvol_0], %[out], %%acc0 \r\n"
1094 "mac.l %[vvol_1], %[out], %%acc1 \r\n"
1096 : [vvol_0
]"r"((int)voice
->volume
[0]),
1097 [vvol_1
]"r"((int)voice
->volume
[1]),
1100 /* save this output into previous, scale and save in
1103 raw_voice
->outx
= amp_0
>> 8;
1105 /* fetch final voice output */
1107 "movclr.l %%acc0, %[amp_0] \r\n"
1108 "movclr.l %%acc1, %[amp_1] \r\n"
1109 : [amp_0
]"=r"(amp_0
), [amp_1
]"=r"(amp_1
));
1110 #elif defined (CPU_ARM)
1113 if ( (this->r
.g
.noise_enables
& vbit
) != 0 )
1115 amp_0
= *(int16_t *)&this->noise
;
1119 uint32_t f
= voice
->position
;
1120 amp_0
= (uint32_t)voice
->samples
;
1123 "mov %[y1], %[f], lsr #12 \r\n"
1124 "eor %[f], %[f], %[y1], lsl #12 \r\n"
1125 "add %[y1], %[y0], %[y1], lsl #1 \r\n"
1126 "ldrsh %[y0], [%[y1], #2] \r\n"
1127 "ldrsh %[y1], [%[y1], #4] \r\n"
1128 "sub %[y1], %[y1], %[y0] \r\n"
1129 "mul %[f], %[y1], %[f] \r\n"
1130 "add %[y0], %[y0], %[f], asr #12 \r\n"
1131 : [f
]"+r"(f
), [y0
]"+r"(amp_0
), [y1
]"=&r"(amp_1
));
1134 voice
->position
+= rate
;
1137 "mul %[amp_1], %[amp_0], %[envx] \r\n"
1138 "mov %[amp_0], %[amp_1], asr #11 \r\n"
1139 "mov %[amp_1], %[amp_0], asr #8 \r\n"
1140 : [amp_0
]"+r"(amp_0
), [amp_1
]"=r"(amp_1
)
1141 : [envx
]"r"(voice
->envx
));
1144 raw_voice
->outx
= (int8_t)amp_1
;
1147 "mul %[amp_1], %[amp_0], %[vol_1] \r\n"
1148 "mul %[amp_0], %[vol_0], %[amp_0] \r\n"
1149 : [amp_0
]"+r"(amp_0
), [amp_1
]"=&r"(amp_1
)
1150 : [vol_0
]"r"((int)voice
->volume
[0]),
1151 [vol_1
]"r"((int)voice
->volume
[1]));
1152 #else /* Unoptimized CPU */
1155 if ( (this->r
.g
.noise_enables
& vbit
) == 0 )
1157 int const fraction
= voice
->position
& 0xfff;
1158 short const* const pos
= (voice
->samples
+ (voice
->position
>> 12)) + 1;
1159 output
= pos
[0] + ((fraction
* (pos
[1] - pos
[0])) >> 12);
1161 output
= *(int16_t *)&this->noise
;
1164 voice
->position
+= rate
;
1166 output
= (output
* voice
->envx
) >> 11;
1168 /* duplicated here to give compiler more to run in parallel */
1169 int amp_0
= voice
->volume
[0] * output
;
1170 int amp_1
= voice
->volume
[1] * output
;
1173 raw_voice
->outx
= (int8_t) (output
>> 8);
1175 #endif /* SPCNOINTERP */
1178 if ( voice
->position
>= voice
->wave_end
)
1180 long loop_len
= voice
->wave_loop
<< 12;
1181 voice
->position
-= loop_len
;
1182 this->r
.g
.wave_ended
|= vbit
;
1185 this->keys_down
^= vbit
;
1186 raw_voice
->envx
= 0;
1192 EXIT_TIMER(dsp_gen
);
1194 ENTER_TIMER(dsp_mix
);
1199 if ( this->r
.g
.echo_ons
& vbit
)
1206 EXIT_TIMER(dsp_mix
);
1209 /* end of voice loop */
1213 /* Read feedback from echo buffer */
1214 int echo_pos
= this->echo_pos
;
1215 uint8_t* const echo_ptr
= RAM
+ ((echo_start
+ echo_pos
) & 0xFFFF);
1217 if ( echo_pos
>= echo_wrap
)
1219 this->echo_pos
= echo_pos
;
1220 int fb
= swap_odd_even32(*(int32_t *)echo_ptr
);
1223 /* Keep last 8 samples */
1224 *this->last_fir_ptr
= fb
;
1225 this->last_fir_ptr
= this->fir_ptr
;
1227 /* Apply echo FIR filter to output samples read from echo buffer -
1228 circular buffer is hardware incremented and masked; FIR
1229 coefficients and buffer history are loaded in parallel with
1230 multiply accumulate operations. Shift left by one here and once
1231 again when calculating feedback to have sample values justified
1232 to bit 31 in the output to ease endian swap, interleaving and
1233 clamping before placing result in the program's echo buffer. */
1236 "move.l (%[fir_c]) , %[_2] \r\n"
1237 "mac.w %[fb]u, %[_2]u, <<, (%[fir_p])+&, %[_0], %%acc0 \r\n"
1238 "mac.w %[fb]l, %[_2]u, <<, (%[fir_p])& , %[_1], %%acc1 \r\n"
1239 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1240 "mac.w %[_0]l, %[_2]l, <<, 4(%[fir_c]) , %[_2], %%acc1 \r\n"
1241 "mac.w %[_1]u, %[_2]u, <<, 4(%[fir_p])& , %[_0], %%acc0 \r\n"
1242 "mac.w %[_1]l, %[_2]u, <<, 8(%[fir_p])& , %[_1], %%acc1 \r\n"
1243 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1244 "mac.w %[_0]l, %[_2]l, <<, 8(%[fir_c]) , %[_2], %%acc1 \r\n"
1245 "mac.w %[_1]u, %[_2]u, <<, 12(%[fir_p])& , %[_0], %%acc0 \r\n"
1246 "mac.w %[_1]l, %[_2]u, <<, 16(%[fir_p])& , %[_1], %%acc1 \r\n"
1247 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1248 "mac.w %[_0]l, %[_2]l, <<, 12(%[fir_c]) , %[_2], %%acc1 \r\n"
1249 "mac.w %[_1]u, %[_2]u, <<, 20(%[fir_p])& , %[_0], %%acc0 \r\n"
1250 "mac.w %[_1]l, %[_2]u, << , %%acc1 \r\n"
1251 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1252 "mac.w %[_0]l, %[_2]l, << , %%acc1 \r\n"
1253 : [_0
]"=&r"(_0
), [_1
]"=&r"(_1
), [_2
]"=&r"(_2
),
1254 [fir_p
]"+a"(this->fir_ptr
)
1255 : [fir_c
]"a"(this->fir_coeff
), [fb
]"r"(fb
)
1258 /* Generate output */
1260 /* fetch filter results _after_ gcc loads asm
1261 block parameters to eliminate emac stalls */
1262 "movclr.l %%acc0, %[out_0] \r\n"
1263 "movclr.l %%acc1, %[out_1] \r\n"
1264 /* apply global volume */
1265 "mac.l %[chans_0], %[gv_0] , %%acc2 \r\n"
1266 "mac.l %[chans_1], %[gv_1] , %%acc3 \r\n"
1267 /* apply echo volume and add to final output */
1268 "mac.l %[ev_0], %[out_0], >>, %%acc2 \r\n"
1269 "mac.l %[ev_1], %[out_1], >>, %%acc3 \r\n"
1270 : [out_0
]"=&r"(out_0
), [out_1
]"=&r"(out_1
)
1271 : [chans_0
]"r"(chans_0
), [gv_0
]"r"(global_vol_0
),
1272 [ev_0
]"r"((int)this->r
.g
.echo_volume_0
),
1273 [chans_1
]"r"(chans_1
), [gv_1
]"r"(global_vol_1
),
1274 [ev_1
]"r"((int)this->r
.g
.echo_volume_1
)
1277 /* Feedback into echo buffer */
1278 if ( !(this->r
.g
.flags
& 0x20) )
1281 /* scale echo voices; saturate if overflow */
1282 "mac.l %[sh], %[e1] , %%acc1 \r\n"
1283 "mac.l %[sh], %[e0] , %%acc0 \r\n"
1284 /* add scaled output from FIR filter */
1285 "mac.l %[out_1], %[ef], <<, %%acc1 \r\n"
1286 "mac.l %[out_0], %[ef], <<, %%acc0 \r\n"
1287 /* swap and fetch feedback results - simply
1288 swap_odd_even32 mixed in between macs and
1289 movclrs to mitigate stall issues */
1290 "move.l #0x00ff00ff, %[sh] \r\n"
1291 "movclr.l %%acc1, %[e1] \r\n"
1293 "movclr.l %%acc0, %[e0] \r\n"
1294 "move.w %[e1], %[e0] \r\n"
1295 "and.l %[e0], %[sh] \r\n"
1296 "eor.l %[sh], %[e0] \r\n"
1297 "lsl.l #8, %[sh] \r\n"
1298 "lsr.l #8, %[e0] \r\n"
1299 "or.l %[sh], %[e0] \r\n"
1300 /* save final feedback into echo buffer */
1301 "move.l %[e0], (%[echo_ptr]) \r\n"
1302 : [e0
]"+d"(echo_0
), [e1
]"+d"(echo_1
)
1303 : [out_0
]"r"(out_0
), [out_1
]"r"(out_1
),
1304 [ef
]"r"((int)this->r
.g
.echo_feedback
),
1305 [echo_ptr
]"a"((int32_t *)echo_ptr
),
1310 /* Output final samples */
1312 /* fetch output saved in %acc2 and %acc3 */
1313 "movclr.l %%acc2, %[out_0] \r\n"
1314 "movclr.l %%acc3, %[out_1] \r\n"
1315 /* scale right by global_muting shift */
1316 "asr.l %[gm], %[out_0] \r\n"
1317 "asr.l %[gm], %[out_1] \r\n"
1318 : [out_0
]"=&d"(out_0
), [out_1
]"=&d"(out_1
)
1319 : [gm
]"d"(global_muting
)
1322 out_buf
[ 0] = out_0
;
1323 out_buf
[WAV_CHUNK_SIZE
] = out_1
;
1325 #elif defined (CPU_ARM)
1326 /* Read feedback from echo buffer */
1327 int echo_pos
= this->echo_pos
;
1328 uint8_t* const echo_ptr
= RAM
+
1329 ((this->r
.g
.echo_page
* 0x100 + echo_pos
) & 0xFFFF);
1331 if ( echo_pos
>= (this->r
.g
.echo_delay
& 15) * 0x800 )
1333 this->echo_pos
= echo_pos
;
1336 int32_t *fir_ptr
, *fir_coeff
;
1340 fb_0
= *(uint32_t *)echo_ptr
;
1342 /* Keep last 8 samples */
1344 "add %[fir_p], %[t_fir_p], #4 \r\n"
1345 "bic %[t_fir_p], %[fir_p], %[mask] \r\n"
1346 "str %[fb_0], [%[fir_p], #-4] \r\n"
1347 /* duplicate at +8 eliminates wrap checking below */
1348 "str %[fb_0], [%[fir_p], #28] \r\n"
1349 : [fir_p
]"=&r"(fir_ptr
), [t_fir_p
]"+r"(this->fir_ptr
)
1350 : [fb_0
]"r"(fb_0
), [mask
]"i"(~FIR_BUF_MASK
));
1352 fir_coeff
= (int32_t *)this->fir_coeff
;
1354 /* Fugly, but the best version found. */
1356 asm volatile ( /* L0R0 = acc0 */
1357 "ldmia %[fir_p]!, { r2-r5 } \r\n" /* L1R1-L4R4 = r2-r5 */
1358 "ldmia %[fir_c]!, { r0-r1 } \r\n" /* C0C1-C2C3 = r0-r1 */
1359 "pkhbt %[_0], %[acc0], r2, asl #16 \r\n" /* L0R0,L1R1->L0L1,R0R1 */
1360 "pkhtb r2, r2, %[acc0], asr #16 \r\n"
1361 "smuad %[acc0], %[_0], r0 \r\n" /* acc0=L0*C0+L1*C1 */
1362 "smuad %[acc1], r2, r0 \r\n" /* acc1=R0*C0+R1*C1 */
1363 "pkhbt %[_0], r3, r4, asl #16 \r\n" /* L2R2,L3R3->L2L3,R2R3 */
1364 "pkhtb r4, r4, r3, asr #16 \r\n"
1365 "smlad %[acc0], %[_0], r1, %[acc0] \r\n" /* acc0+=L2*C2+L3*C3 */
1366 "smlad %[acc1], r4, r1, %[acc1] \r\n" /* acc1+=R2*C2+R3*C3 */
1367 "ldmia %[fir_p], { r2-r4 } \r\n" /* L5R5-L7R7 = r2-r4 */
1368 "ldmia %[fir_c], { r0-r1 } \r\n" /* C4C5-C6C7 = r0-r1 */
1369 "pkhbt %[_0], r5, r2, asl #16 \r\n" /* L4R4,L5R5->L4L5,R4R5 */
1370 "pkhtb r2, r2, r5, asr #16 \r\n"
1371 "smlad %[acc0], %[_0], r0, %[acc0] \r\n" /* acc0+=L4*C4+L5*C5 */
1372 "smlad %[acc1], r2, r0, %[acc1] \r\n" /* acc1+=R4*C4+R5*C5 */
1373 "pkhbt %[_0], r3, r4, asl #16 \r\n" /* L6R6,L7R7->L6L7,R6R7 */
1374 "pkhtb r4, r4, r3, asr #16 \r\n"
1375 "smlad %[acc0], %[_0], r1, %[acc0] \r\n" /* acc0+=L6*C6+L7*C7 */
1376 "smlad %[acc1], r4, r1, %[acc1] \r\n" /* acc1+=R6*C6+R7*C7 */
1377 : [acc0
]"+r"(fb_0
), [acc1
]"=&r"(fb_1
), [_0
]"=&r"(_0
),
1378 [fir_p
]"+r"(fir_ptr
), [fir_c
]"+r"(fir_coeff
)
1380 : "r0", "r1", "r2", "r3", "r4", "r5");
1382 /* Generate output */
1386 "mul %[amp_0], %[gvol_0], %[chans_0] \r\n"
1387 "mul %[amp_1], %[gvol_1], %[chans_1] \r\n"
1388 : [amp_0
]"=&r"(amp_0
), [amp_1
]"=r"(amp_1
)
1389 : [gvol_0
]"r"(global_vol_0
), [gvol_1
]"r"(global_vol_1
),
1390 [chans_0
]"r"(chans_0
), [chans_1
]"r"(chans_1
));
1392 "mla %[amp_0], %[fb_0], %[ev_0], %[amp_0] \r\n"
1393 "mla %[amp_1], %[fb_1], %[ev_1], %[amp_1] \r\n"
1394 : [amp_0
]"+r"(amp_0
), [amp_1
]"+r"(amp_1
)
1395 : [fb_0
]"r"(fb_0
), [fb_1
]"r"(fb_1
),
1396 [ev_0
]"r"((int)this->r
.g
.echo_volume_0
),
1397 [ev_1
]"r"((int)this->r
.g
.echo_volume_1
));
1399 out_buf
[ 0] = amp_0
>> global_muting
;
1400 out_buf
[WAV_CHUNK_SIZE
] = amp_1
>> global_muting
;
1403 if ( !(this->r
.g
.flags
& 0x20) )
1405 /* Feedback into echo buffer */
1409 "mov %[e0], %[echo_0], asl #7 \r\n"
1410 "mov %[e1], %[echo_1], asl #7 \r\n"
1411 "mla %[e0], %[fb_0], %[efb], %[e0] \r\n"
1412 "mla %[e1], %[fb_1], %[efb], %[e1] \r\n"
1413 : [e0
]"=&r"(e0
), [e1
]"=&r"(e1
)
1414 : [echo_0
]"r"(echo_0
), [echo_1
]"r"(echo_1
),
1415 [fb_0
]"r"(fb_0
), [fb_1
]"r"(fb_1
),
1416 [efb
]"r"((int)this->r
.g
.echo_feedback
));
1418 "ssat %[e0], #16, %[e0], asr #14 \r\n"
1419 "ssat %[e1], #16, %[e1], asr #14 \r\n"
1420 "pkhbt %[e0], %[e0], %[e1], lsl #16 \r\n"
1421 "str %[e0], [%[echo_p]] \r\n"
1422 : [e0
]"+r"(e0
), [e1
]"+r"(e1
)
1423 : [echo_p
]"r"(echo_ptr
));
1425 #else /* ARM_ARCH < 6 */
1426 int fb_0
= GET_LE16SA( echo_ptr
);
1427 int fb_1
= GET_LE16SA( echo_ptr
+ 2 );
1428 int32_t *fir_ptr
, *fir_coeff
;
1430 /* Keep last 8 samples */
1434 "add %[fir_p], %[t_fir_p], #8 \r\n"
1435 "bic %[t_fir_p], %[fir_p], %[mask] \r\n"
1436 "str %[fb_0], [%[fir_p], #-8] \r\n"
1437 "str %[fb_1], [%[fir_p], #-4] \r\n"
1438 /* duplicate at +8 eliminates wrap checking below */
1439 "str %[fb_0], [%[fir_p], #56] \r\n"
1440 "str %[fb_1], [%[fir_p], #60] \r\n"
1441 : [fir_p
]"=&r"(fir_ptr
), [t_fir_p
]"+r"(this->fir_ptr
)
1442 : [fb_0
]"r"(fb_0
), [fb_1
]"r"(fb_1
), [mask
]"i"(~FIR_BUF_MASK
));
1444 fir_coeff
= this->fir_coeff
;
1447 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1448 "ldmia %[fir_p]!, { r4-r5 } \r\n"
1449 "mul %[fb_0], r0, %[fb_0] \r\n"
1450 "mul %[fb_1], r0, %[fb_1] \r\n"
1451 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1452 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1453 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1454 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1455 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1456 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1457 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1458 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1459 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1460 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1461 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1462 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1463 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1464 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1465 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1466 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1467 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1468 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1469 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1470 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1471 : [fb_0
]"+r"(fb_0
), [fb_1
]"+r"(fb_1
),
1472 [fir_p
]"+r"(fir_ptr
), [fir_c
]"+r"(fir_coeff
)
1474 : "r0", "r1", "r2", "r3", "r4", "r5");
1476 /* Generate output */
1477 int amp_0
= (chans_0
* global_vol_0
+ fb_0
* this->r
.g
.echo_volume_0
)
1479 int amp_1
= (chans_1
* global_vol_1
+ fb_1
* this->r
.g
.echo_volume_1
)
1482 out_buf
[ 0] = amp_0
;
1483 out_buf
[WAV_CHUNK_SIZE
] = amp_1
;
1486 if ( !(this->r
.g
.flags
& 0x20) )
1488 /* Feedback into echo buffer */
1489 int e0
= (echo_0
>> 7) + ((fb_0
* this->r
.g
.echo_feedback
) >> 14);
1490 int e1
= (echo_1
>> 7) + ((fb_1
* this->r
.g
.echo_feedback
) >> 14);
1492 SET_LE16A( echo_ptr
, e0
);
1494 SET_LE16A( echo_ptr
+ 2, e1
);
1496 #endif /* ARM_ARCH */
1497 #else /* Unoptimized CPU */
1498 /* Read feedback from echo buffer */
1499 int echo_pos
= this->echo_pos
;
1500 uint8_t* const echo_ptr
= RAM
+
1501 ((this->r
.g
.echo_page
* 0x100 + echo_pos
) & 0xFFFF);
1503 if ( echo_pos
>= (this->r
.g
.echo_delay
& 15) * 0x800 )
1505 this->echo_pos
= echo_pos
;
1506 int fb_0
= GET_LE16SA( echo_ptr
);
1507 int fb_1
= GET_LE16SA( echo_ptr
+ 2 );
1509 /* Keep last 8 samples */
1510 int (* const fir_ptr
) [2] = this->fir_buf
+ this->fir_pos
;
1511 this->fir_pos
= (this->fir_pos
+ 1) & (FIR_BUF_HALF
- 1);
1512 fir_ptr
[ 0] [0] = fb_0
;
1513 fir_ptr
[ 0] [1] = fb_1
;
1514 /* duplicate at +8 eliminates wrap checking below */
1515 fir_ptr
[FIR_BUF_HALF
] [0] = fb_0
;
1516 fir_ptr
[FIR_BUF_HALF
] [1] = fb_1
;
1519 fb_0
*= this->fir_coeff
[0];
1520 fb_1
*= this->fir_coeff
[0];
1523 fb_0 += fir_ptr [i] [0] * this->fir_coeff [i];\
1524 fb_1 += fir_ptr [i] [1] * this->fir_coeff [i];
1534 /* Generate output */
1535 int amp_0
= (chans_0
* global_vol_0
+ fb_0
* this->r
.g
.echo_volume_0
)
1537 int amp_1
= (chans_1
* global_vol_1
+ fb_1
* this->r
.g
.echo_volume_1
)
1539 out_buf
[ 0] = amp_0
;
1540 out_buf
[WAV_CHUNK_SIZE
] = amp_1
;
1543 if ( !(this->r
.g
.flags
& 0x20) )
1545 /* Feedback into echo buffer */
1546 int e0
= (echo_0
>> 7) + ((fb_0
* this->r
.g
.echo_feedback
) >> 14);
1547 int e1
= (echo_1
>> 7) + ((fb_1
* this->r
.g
.echo_feedback
) >> 14);
1549 SET_LE16A( echo_ptr
, e0
);
1551 SET_LE16A( echo_ptr
+ 2, e1
);
1554 #else /* SPCNOECHO == 1*/
1555 /* Generate output */
1556 int amp_0
= (chans_0
* global_vol_0
) >> global_muting
;
1557 int amp_1
= (chans_1
* global_vol_1
) >> global_muting
;
1558 out_buf
[ 0] = amp_0
;
1559 out_buf
[WAV_CHUNK_SIZE
] = amp_1
;
1561 #endif /* SPCNOECHO */
1570 void DSP_reset( struct Spc_Dsp
* this )
1572 this->keys_down
= 0;
1574 this->noise_count
= 0;
1577 this->r
.g
.flags
= 0xE0; /* reset, mute, echo off */
1578 this->r
.g
.key_ons
= 0;
1580 ci
->memset( this->voice_state
, 0, sizeof this->voice_state
);
1583 for ( i
= VOICE_COUNT
; --i
>= 0; )
1585 struct voice_t
* v
= this->voice_state
+ i
;
1586 v
->env_mode
= state_release
;
1592 for ( i
= 0; i
< 256; i
++ )
1593 this->wave_entry
[i
].start_addr
= -1;
1596 #if defined(CPU_COLDFIRE)
1597 this->fir_ptr
= fir_buf
;
1598 this->last_fir_ptr
= &fir_buf
[7];
1599 ci
->memset( fir_buf
, 0, sizeof fir_buf
);
1600 #elif defined (CPU_ARM)
1601 this->fir_ptr
= fir_buf
;
1602 ci
->memset( fir_buf
, 0, sizeof fir_buf
);
1605 ci
->memset( this->fir_buf
, 0, sizeof this->fir_buf
);
1608 assert( offsetof (struct globals_t
,unused9
[2]) == REGISTER_COUNT
);
1609 assert( sizeof (this->r
.voice
) == REGISTER_COUNT
);