1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2007-2008 Michael Sevakis (jhMikeS)
11 * Copyright (C) 2006-2007 Adam Gashlin (hcs)
12 * Copyright (C) 2004-2007 Shay Green (blargg)
13 * Copyright (C) 2002 Brad Martin
15 * All files in this archive are subject to the GNU General Public License.
16 * See the file COPYING in the source tree root for full license agreement.
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
21 ****************************************************************************/
23 /* The DSP portion (awe!) */
26 #include "spc_codec.h"
27 #include "spc_profiler.h"
29 #if defined(CPU_COLDFIRE) || defined (CPU_ARM)
30 int32_t fir_buf
[FIR_BUF_CNT
]
31 __attribute__ ((aligned (FIR_BUF_ALIGN
*1))) IBSS_ATTR
;
34 /* a little extra for samples that go past end */
35 int16_t BRRcache
[BRR_CACHE_SIZE
] CACHEALIGN_ATTR
;
38 void DSP_write( struct Spc_Dsp
* this, int i
, int data
)
40 assert( (unsigned) i
< REGISTER_COUNT
);
42 this->r
.reg
[i
] = data
;
45 if ( low
< 2 ) /* voice volumes */
47 int left
= *(int8_t const*) &this->r
.reg
[i
& ~1];
48 int right
= *(int8_t const*) &this->r
.reg
[i
| 1];
49 struct voice_t
* v
= this->voice_state
+ high
;
51 v
->volume
[1] = right
;
53 else if ( low
== 0x0F ) /* fir coefficients */
55 this->fir_coeff
[7 - high
] = (int8_t) data
; /* sign-extend */
59 /* if ( n < -32768 ) out = -32768; */
60 /* if ( n > 32767 ) out = 32767; */
61 #define CLAMP16( n ) \
63 if ( (int16_t) n != n ) \
64 n = 0x7FFF ^ (n >> 31); \
69 static void decode_brr( struct Spc_Dsp
* this, unsigned start_addr
,
70 struct voice_t
* voice
,
71 struct raw_voice_t
const* const raw_voice
) ICODE_ATTR
;
72 static void decode_brr( struct Spc_Dsp
* this, unsigned start_addr
,
73 struct voice_t
* voice
,
74 struct raw_voice_t
const* const raw_voice
)
76 /* setup same variables as where decode_brr() is called from */
79 struct src_dir
const* const sd
=
80 (struct src_dir
*) &RAM
[this->r
.g
.wave_page
* 0x100];
81 struct cache_entry_t
* const wave_entry
=
82 &this->wave_entry
[raw_voice
->waveform
];
84 /* the following block can be put in place of the call to
88 DEBUGF( "decode at %08x (wave #%d)\n",
89 start_addr
, raw_voice
->waveform
);
93 for ( i
= 0; i
< this->oldsize
; i
++ )
95 struct cache_entry_t
* e
= &this->wave_entry_old
[i
];
96 if ( e
->start_addr
== start_addr
)
98 DEBUGF( "found in wave_entry_old (oldsize=%d)\n",
105 wave_entry
->start_addr
= start_addr
;
107 uint8_t const* const loop_ptr
=
108 RAM
+ GET_LE16A( sd
[raw_voice
->waveform
].loop
);
109 short* loop_start
= 0;
111 short* out
= BRRcache
+ start_addr
* 2;
112 wave_entry
->samples
= out
;
117 uint8_t const* addr
= RAM
+ start_addr
;
121 if ( addr
== loop_ptr
)
124 DEBUGF( "loop at %08lx (wave #%d)\n",
125 (unsigned long)(addr
- RAM
), raw_voice
->waveform
);
129 block_header
= *addr
;
132 int const filter
= (block_header
& 0x0C) - 0x08;
135 (invalid scaling gives -4096 for neg nybble, 0 for pos) */
136 static unsigned char const right_shifts
[16] = {
137 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 29, 29, 29,
139 static unsigned char const left_shifts
[16] = {
140 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11
142 int const scale
= block_header
>> 4;
143 int const right_shift
= right_shifts
[scale
];
144 int const left_shift
= left_shifts
[scale
];
146 /* output position */
147 out
+= BRR_BLOCK_SIZE
;
148 int offset
= -BRR_BLOCK_SIZE
<< 2;
150 do /* decode and filter 16 samples */
152 /* Get nybble, sign-extend, then scale
153 get byte, select which nybble, sign-extend, then shift based
154 on scaling. also handles invalid scaling values. */
155 int delta
= (int) (int8_t) (addr
[offset
>> 3] << (offset
& 4))
156 >> right_shift
<< left_shift
;
158 out
[offset
>> 2] = smp2
;
160 if ( filter
== 0 ) /* mode 0x08 (30-90% of the time) */
166 delta
+= (-smp1
- (smp1
>> 1)) >> 5;
170 if ( filter
== -4 ) /* mode 0x04 */
173 delta
+= (-smp1
) >> 5;
175 else if ( filter
> -4 ) /* mode 0x0C */
178 delta
+= (smp2
+ (smp2
>> 1)) >> 4;
180 delta
+= (-smp1
* 13) >> 7;
185 delta
= CLAMP16( delta
);
186 smp1
= (int16_t) (delta
* 2); /* sign-extend */
188 while ( (offset
+= 4) != 0 );
190 /* if next block has end flag set, this block ends early */
192 if ( (block_header
& 3) != 3 && (*addr
& 3) == 1 )
194 /* skip last 9 samples */
199 while ( !(block_header
& 1) && addr
< RAM
+ 0x10000 );
205 wave_entry
->end
= (out
- 1 - wave_entry
->samples
) << 12;
207 wave_entry
->loop
= 0;
208 if ( (block_header
& 2) )
212 int loop
= out
- loop_start
;
213 wave_entry
->loop
= loop
;
214 wave_entry
->end
+= 0x3000;
215 out
[2] = loop_start
[2];
216 out
[3] = loop_start
[3];
217 out
[4] = loop_start
[4];
221 DEBUGF( "loop point outside initial wave\n" );
225 DEBUGF( "end at %08lx (wave #%d)\n",
226 (unsigned long)(addr
- RAM
), raw_voice
->waveform
);
229 this->wave_entry_old
[this->oldsize
++] = *wave_entry
;
235 static void key_on(struct Spc_Dsp
* const this, struct voice_t
* const voice
,
236 struct src_dir
const* const sd
,
237 struct raw_voice_t
const* const raw_voice
,
238 const int key_on_delay
, const int vbit
) ICODE_ATTR
;
239 static void key_on(struct Spc_Dsp
* const this, struct voice_t
* const voice
,
240 struct src_dir
const* const sd
,
241 struct raw_voice_t
const* const raw_voice
,
242 const int key_on_delay
, const int vbit
) {
245 int const env_rate_init
= 0x7800;
246 voice
->key_on_delay
= key_on_delay
;
247 if ( key_on_delay
== 0 )
249 this->keys_down
|= vbit
;
251 voice
->env_mode
= state_attack
;
252 voice
->env_timer
= env_rate_init
; /* TODO: inaccurate? */
253 unsigned start_addr
= GET_LE16A(sd
[raw_voice
->waveform
].start
);
256 voice
->addr
= RAM
+ start_addr
;
257 /* BRR filter uses previous samples */
258 voice
->samples
[BRR_BLOCK_SIZE
+ 1] = 0;
259 voice
->samples
[BRR_BLOCK_SIZE
+ 2] = 0;
260 /* decode three samples immediately */
261 voice
->position
= (BRR_BLOCK_SIZE
+ 3) * 0x1000 - 1;
262 voice
->block_header
= 0; /* "previous" BRR header */
266 voice
->position
= 3 * 0x1000 - 1;
267 struct cache_entry_t
* const wave_entry
=
268 &this->wave_entry
[raw_voice
->waveform
];
270 /* predecode BRR if not already */
271 if ( wave_entry
->start_addr
!= start_addr
)
273 /* the following line can be replaced by the indicated block
275 decode_brr( this, start_addr
, voice
, raw_voice
);
278 voice
->samples
= wave_entry
->samples
;
279 voice
->wave_end
= wave_entry
->end
;
280 voice
->wave_loop
= wave_entry
->loop
;
286 void DSP_run_( struct Spc_Dsp
* this, long count
, int32_t* out_buf
)
290 uint8_t* const ram_
= ram
.ram
;
300 /* Here we check for keys on/off. Docs say that successive writes
301 to KON/KOF must be separated by at least 2 Ts periods or risk
302 being neglected. Therefore DSP only looks at these during an
303 update, and not at the time of the write. Only need to do this
304 once however, since the regs haven't changed over the whole
305 period we need to catch up with. */
308 int key_ons
= this->r
.g
.key_ons
;
309 int key_offs
= this->r
.g
.key_offs
;
310 /* keying on a voice resets that bit in ENDX */
311 this->r
.g
.wave_ended
&= ~key_ons
;
312 /* key_off bits prevent key_on from being acknowledged */
313 this->r
.g
.key_ons
= key_ons
& key_offs
;
315 /* process key events outside loop, since they won't re-occur */
316 struct voice_t
* voice
= this->voice_state
+ 8;
321 if ( key_offs
& vbit
)
323 voice
->env_mode
= state_release
;
324 voice
->key_on_delay
= 0;
326 else if ( key_ons
& vbit
)
328 voice
->key_on_delay
= 8;
331 while ( (vbit
>>= 1) != 0 );
334 struct src_dir
const* const sd
=
335 (struct src_dir
*) &RAM
[this->r
.g
.wave_page
* 0x100];
337 #ifdef ROCKBOX_BIG_ENDIAN
338 /* Convert endiannesses before entering loops - these
340 const uint32_t rates
[VOICE_COUNT
] =
342 GET_LE16A( this->r
.voice
[0].rate
) & 0x3FFF,
343 GET_LE16A( this->r
.voice
[1].rate
) & 0x3FFF,
344 GET_LE16A( this->r
.voice
[2].rate
) & 0x3FFF,
345 GET_LE16A( this->r
.voice
[3].rate
) & 0x3FFF,
346 GET_LE16A( this->r
.voice
[4].rate
) & 0x3FFF,
347 GET_LE16A( this->r
.voice
[5].rate
) & 0x3FFF,
348 GET_LE16A( this->r
.voice
[6].rate
) & 0x3FFF,
349 GET_LE16A( this->r
.voice
[7].rate
) & 0x3FFF,
351 #define VOICE_RATE(x) *(x)
352 #define IF_RBE(...) __VA_ARGS__
354 /* Initialize mask register with the buffer address mask */
355 asm volatile ("move.l %[m], %%mask" : : [m
]"i"(FIR_BUF_MASK
));
356 const int echo_wrap
= (this->r
.g
.echo_delay
& 15) * 0x800;
357 const int echo_start
= this->r
.g
.echo_page
* 0x100;
358 #endif /* CPU_COLDFIRE */
360 #define VOICE_RATE(x) (INT16A(raw_voice->rate) & 0x3FFF)
362 #endif /* ROCKBOX_BIG_ENDIAN */
365 int const slow_gaussian
= (this->r
.g
.pitch_mods
>> 1) |
366 this->r
.g
.noise_enables
;
368 /* (g.flags & 0x40) ? 30 : 14 */
369 int const global_muting
= ((this->r
.g
.flags
& 0x40) >> 2) + 14 - 8;
370 int const global_vol_0
= this->r
.g
.volume_0
;
371 int const global_vol_1
= this->r
.g
.volume_1
;
373 /* each rate divides exactly into 0x7800 without remainder */
374 int const env_rate_init
= 0x7800;
375 static unsigned short const env_rates
[0x20] ICONST_ATTR
=
377 0x0000, 0x000F, 0x0014, 0x0018, 0x001E, 0x0028, 0x0030, 0x003C,
378 0x0050, 0x0060, 0x0078, 0x00A0, 0x00C0, 0x00F0, 0x0140, 0x0180,
379 0x01E0, 0x0280, 0x0300, 0x03C0, 0x0500, 0x0600, 0x0780, 0x0A00,
380 0x0C00, 0x0F00, 0x1400, 0x1800, 0x1E00, 0x2800, 0x3C00, 0x7800
383 do /* one pair of output samples per iteration */
386 if ( this->r
.g
.noise_enables
)
388 if ( (this->noise_count
-=
389 env_rates
[this->r
.g
.flags
& 0x1F]) <= 0 )
391 this->noise_count
= env_rate_init
;
392 int feedback
= (this->noise
<< 13) ^ (this->noise
<< 14);
393 this->noise
= (feedback
& 0x8000) ^ (this->noise
>> 1 & ~1);
401 long prev_outx
= 0; /* TODO: correct value for first channel? */
404 /* TODO: put raw_voice pointer in voice_t? */
405 struct raw_voice_t
* raw_voice
= this->r
.voice
;
406 struct voice_t
* voice
= this->voice_state
;
408 IF_RBE( const uint32_t* vr
= rates
; )
409 for ( ; vbit
< 0x100; vbit
<<= 1, ++voice
, ++raw_voice
IF_RBE( , ++vr
) )
411 /* pregen involves checking keyon, etc */
413 ENTER_TIMER(dsp_pregen
);
416 /* Key on events are delayed */
417 int key_on_delay
= voice
->key_on_delay
;
419 if ( --key_on_delay
>= 0 ) /* <1% of the time */
421 key_on(this,voice
,sd
,raw_voice
,key_on_delay
,vbit
);
424 if ( !(this->keys_down
& vbit
) ) /* Silent channel */
435 int const ENV_RANGE
= 0x800;
436 int env_mode
= voice
->env_mode
;
437 int adsr0
= raw_voice
->adsr
[0];
439 if ( env_mode
!= state_release
) /* 99% of the time */
441 env_timer
= voice
->env_timer
;
442 if ( adsr0
& 0x80 ) /* 79% of the time */
444 int adsr1
= raw_voice
->adsr
[1];
445 if ( env_mode
== state_sustain
) /* 74% of the time */
447 if ( (env_timer
-= env_rates
[adsr1
& 0x1F]) > 0 )
448 goto write_env_timer
;
450 int envx
= voice
->envx
;
451 envx
--; /* envx *= 255 / 256 */
454 /* TODO: should this be 8? */
455 raw_voice
->envx
= envx
>> 4;
458 else if ( env_mode
< 0 ) /* 25% state_decay */
460 int envx
= voice
->envx
;
462 env_rates
[(adsr0
>> 3 & 0x0E) + 0x10]) <= 0 )
464 envx
--; /* envx *= 255 / 256 */
467 /* TODO: should this be 8? */
468 raw_voice
->envx
= envx
>> 4;
469 env_timer
= env_rate_init
;
472 int sustain_level
= adsr1
>> 5;
473 if ( envx
<= (sustain_level
+ 1) * 0x100 )
474 voice
->env_mode
= state_sustain
;
476 goto write_env_timer
;
478 else /* state_attack */
480 int t
= adsr0
& 0x0F;
481 if ( (env_timer
-= env_rates
[t
* 2 + 1]) > 0 )
482 goto write_env_timer
;
484 int envx
= voice
->envx
;
486 int const step
= ENV_RANGE
/ 64;
489 envx
+= ENV_RANGE
/ 2 - step
;
491 if ( envx
>= ENV_RANGE
)
493 envx
= ENV_RANGE
- 1;
494 voice
->env_mode
= state_decay
;
497 /* TODO: should this be 8? */
498 raw_voice
->envx
= envx
>> 4;
504 int t
= raw_voice
->gain
;
508 voice
->envx
= t
<< 4;
513 if ( (env_timer
-= env_rates
[t
& 0x1F]) > 0 )
514 goto write_env_timer
;
516 int envx
= voice
->envx
;
518 if ( mode
<= 5 ) /* decay */
520 int step
= ENV_RANGE
/ 64;
521 if ( mode
== 5 ) /* exponential */
523 envx
--; /* envx *= 255 / 256 */
526 if ( (envx
-= step
) < 0 )
529 if ( voice
->env_mode
== state_attack
)
530 voice
->env_mode
= state_decay
;
535 int const step
= ENV_RANGE
/ 64;
538 envx
>= ENV_RANGE
* 3 / 4 + step
)
539 envx
+= ENV_RANGE
/ 256 - step
;
541 if ( envx
>= ENV_RANGE
)
542 envx
= ENV_RANGE
- 1;
545 /* TODO: should this be 8? */
546 raw_voice
->envx
= envx
>> 4;
551 else /* state_release */
553 int envx
= voice
->envx
;
554 if ( (envx
-= ENV_RANGE
/ 256) > 0 )
557 raw_voice
->envx
= envx
>> 8;
562 /* bit was set, so this clears it */
563 this->keys_down
^= vbit
;
569 env_timer
= env_rate_init
;
571 voice
->env_timer
= env_timer
;
575 EXIT_TIMER(dsp_pregen
);
577 ENTER_TIMER(dsp_gen
);
580 /* Decode BRR block */
581 if ( voice
->position
>= BRR_BLOCK_SIZE
* 0x1000 )
583 voice
->position
-= BRR_BLOCK_SIZE
* 0x1000;
585 uint8_t const* addr
= voice
->addr
;
586 if ( addr
>= RAM
+ 0x10000 )
589 /* action based on previous block's header */
590 if ( voice
->block_header
& 1 )
592 addr
= RAM
+ GET_LE16A( sd
[raw_voice
->waveform
].loop
);
593 this->r
.g
.wave_ended
|= vbit
;
594 if ( !(voice
->block_header
& 2) ) /* 1% of the time */
596 /* first block was end block;
597 don't play anything (verified) */
598 /* bit was set, so this clears it */
599 this->keys_down
^= vbit
;
601 /* since voice->envx is 0,
602 samples and position don't matter */
610 int const block_header
= *addr
;
613 voice
->block_header
= block_header
;
614 int const filter
= (block_header
& 0x0C) - 0x08;
616 /* scaling (invalid scaling gives -4096 for neg nybble,
618 static unsigned char const right_shifts
[16] = {
619 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 29, 29, 29,
621 static unsigned char const left_shifts
[16] = {
622 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11
624 int const scale
= block_header
>> 4;
625 int const right_shift
= right_shifts
[scale
];
626 int const left_shift
= left_shifts
[scale
];
628 /* previous samples */
629 int smp2
= voice
->samples
[BRR_BLOCK_SIZE
+ 1];
630 int smp1
= voice
->samples
[BRR_BLOCK_SIZE
+ 2];
631 voice
->samples
[0] = voice
->samples
[BRR_BLOCK_SIZE
];
633 /* output position */
634 short* out
= voice
->samples
+ (1 + BRR_BLOCK_SIZE
);
635 int offset
= -BRR_BLOCK_SIZE
<< 2;
637 /* if next block has end flag set,
638 this block ends early (verified) */
639 if ( (block_header
& 3) != 3 && (*addr
& 3) == 1 )
641 /* arrange for last 9 samples to be skipped */
644 voice
->samples
[skip
] = voice
->samples
[BRR_BLOCK_SIZE
];
645 voice
->position
+= skip
* 0x1000;
646 offset
= (-BRR_BLOCK_SIZE
+ (skip
& ~1)) << 2;
648 /* force sample to end on next decode */
649 voice
->block_header
= 1;
652 do /* decode and filter 16 samples */
654 /* Get nybble, sign-extend, then scale
655 get byte, select which nybble, sign-extend, then shift
656 based on scaling. also handles invalid scaling values.*/
657 int delta
= (int) (int8_t) (addr
[offset
>> 3] <<
658 (offset
& 4)) >> right_shift
<< left_shift
;
660 out
[offset
>> 2] = smp2
;
662 if ( filter
== 0 ) /* mode 0x08 (30-90% of the time) */
668 delta
+= (-smp1
- (smp1
>> 1)) >> 5;
672 if ( filter
== -4 ) /* mode 0x04 */
675 delta
+= (-smp1
) >> 5;
677 else if ( filter
> -4 ) /* mode 0x0C */
680 delta
+= (smp2
+ (smp2
>> 1)) >> 4;
682 delta
+= (-smp1
* 13) >> 7;
687 delta
= CLAMP16( delta
);
688 smp1
= (int16_t) (delta
* 2); /* sign-extend */
690 while ( (offset
+= 4) != 0 );
699 /* Get rate (with possible modulation) */
700 int rate
= VOICE_RATE(vr
);
701 if ( this->r
.g
.pitch_mods
& vbit
)
702 rate
= (rate
* (prev_outx
+ 32768)) >> 15;
705 /* Interleved gauss table (to improve cache coherency). */
706 /* gauss [i * 2 + j] = normal_gauss [(1 - j) * 256 + i] */
707 static short const gauss
[512] =
709 370,1305, 366,1305, 362,1304, 358,1304, 354,1304, 351,1304, 347,1304, 343,1303,
710 339,1303, 336,1303, 332,1302, 328,1302, 325,1301, 321,1300, 318,1300, 314,1299,
711 311,1298, 307,1297, 304,1297, 300,1296, 297,1295, 293,1294, 290,1293, 286,1292,
712 283,1291, 280,1290, 276,1288, 273,1287, 270,1286, 267,1284, 263,1283, 260,1282,
713 257,1280, 254,1279, 251,1277, 248,1275, 245,1274, 242,1272, 239,1270, 236,1269,
714 233,1267, 230,1265, 227,1263, 224,1261, 221,1259, 218,1257, 215,1255, 212,1253,
715 210,1251, 207,1248, 204,1246, 201,1244, 199,1241, 196,1239, 193,1237, 191,1234,
716 188,1232, 186,1229, 183,1227, 180,1224, 178,1221, 175,1219, 173,1216, 171,1213,
717 168,1210, 166,1207, 163,1205, 161,1202, 159,1199, 156,1196, 154,1193, 152,1190,
718 150,1186, 147,1183, 145,1180, 143,1177, 141,1174, 139,1170, 137,1167, 134,1164,
719 132,1160, 130,1157, 128,1153, 126,1150, 124,1146, 122,1143, 120,1139, 118,1136,
720 117,1132, 115,1128, 113,1125, 111,1121, 109,1117, 107,1113, 106,1109, 104,1106,
721 102,1102, 100,1098, 99,1094, 97,1090, 95,1086, 94,1082, 92,1078, 90,1074,
722 89,1070, 87,1066, 86,1061, 84,1057, 83,1053, 81,1049, 80,1045, 78,1040,
723 77,1036, 76,1032, 74,1027, 73,1023, 71,1019, 70,1014, 69,1010, 67,1005,
724 66,1001, 65, 997, 64, 992, 62, 988, 61, 983, 60, 978, 59, 974, 58, 969,
725 56, 965, 55, 960, 54, 955, 53, 951, 52, 946, 51, 941, 50, 937, 49, 932,
726 48, 927, 47, 923, 46, 918, 45, 913, 44, 908, 43, 904, 42, 899, 41, 894,
727 40, 889, 39, 884, 38, 880, 37, 875, 36, 870, 36, 865, 35, 860, 34, 855,
728 33, 851, 32, 846, 32, 841, 31, 836, 30, 831, 29, 826, 29, 821, 28, 816,
729 27, 811, 27, 806, 26, 802, 25, 797, 24, 792, 24, 787, 23, 782, 23, 777,
730 22, 772, 21, 767, 21, 762, 20, 757, 20, 752, 19, 747, 19, 742, 18, 737,
731 17, 732, 17, 728, 16, 723, 16, 718, 15, 713, 15, 708, 15, 703, 14, 698,
732 14, 693, 13, 688, 13, 683, 12, 678, 12, 674, 11, 669, 11, 664, 11, 659,
733 10, 654, 10, 649, 10, 644, 9, 640, 9, 635, 9, 630, 8, 625, 8, 620,
734 8, 615, 7, 611, 7, 606, 7, 601, 6, 596, 6, 592, 6, 587, 6, 582,
735 5, 577, 5, 573, 5, 568, 5, 563, 4, 559, 4, 554, 4, 550, 4, 545,
736 4, 540, 3, 536, 3, 531, 3, 527, 3, 522, 3, 517, 2, 513, 2, 508,
737 2, 504, 2, 499, 2, 495, 2, 491, 2, 486, 1, 482, 1, 477, 1, 473,
738 1, 469, 1, 464, 1, 460, 1, 456, 1, 451, 1, 447, 1, 443, 1, 439,
739 0, 434, 0, 430, 0, 426, 0, 422, 0, 418, 0, 414, 0, 410, 0, 405,
740 0, 401, 0, 397, 0, 393, 0, 389, 0, 385, 0, 381, 0, 378, 0, 374,
742 /* Gaussian interpolation using most recent 4 samples */
743 long position
= voice
->position
;
744 voice
->position
+= rate
;
745 short const* interp
= voice
->samples
+ (position
>> 12);
746 int offset
= position
>> 4 & 0xFF;
748 /* Only left half of gaussian kernel is in table, so we must mirror
750 short const* fwd
= gauss
+ offset
* 2;
751 short const* rev
= gauss
+ 510 - offset
* 2;
753 /* Use faster gaussian interpolation when exact result isn't needed
754 by pitch modulator of next channel */
756 if ( !(slow_gaussian
& vbit
) ) /* 99% of the time */
758 /* Main optimization is lack of clamping. Not a problem since
759 output never goes more than +/- 16 outside 16-bit range and
760 things are clamped later anyway. Other optimization is to
761 preserve fractional accuracy, eliminating several masks. */
762 int output
= (((fwd
[0] * interp
[0] +
763 fwd
[1] * interp
[1] +
764 rev
[1] * interp
[2] +
765 rev
[0] * interp
[3] ) >> 11) * voice
->envx
) >> 11;
767 /* duplicated here to give compiler more to run in parallel */
768 amp_0
= voice
->volume
[0] * output
;
769 amp_1
= voice
->volume
[1] * output
;
770 raw_voice
->outx
= output
>> 8;
774 int output
= *(int16_t*) &this->noise
;
775 if ( !(this->r
.g
.noise_enables
& vbit
) )
777 output
= (fwd
[0] * interp
[0]) & ~0xFFF;
778 output
= (output
+ fwd
[1] * interp
[1]) & ~0xFFF;
779 output
= (output
+ rev
[1] * interp
[2]) >> 12;
780 output
= (int16_t) (output
* 2);
781 output
+= ((rev
[0] * interp
[3]) >> 12) * 2;
782 output
= CLAMP16( output
);
784 output
= (output
* voice
->envx
) >> 11 & ~1;
786 /* duplicated here to give compiler more to run in parallel */
787 amp_0
= voice
->volume
[0] * output
;
788 amp_1
= voice
->volume
[1] * output
;
790 raw_voice
->outx
= (int8_t) (output
>> 8);
792 #else /* SPCNOINTERP */
793 /* two-point linear interpolation */
795 int amp_0
= (int16_t)this->noise
;
798 if ( (this->r
.g
.noise_enables
& vbit
) == 0 )
800 uint32_t f
= voice
->position
;
804 * Formula (fastest found so far of MANY):
805 * output = y0 + f*y1 - f*y0
808 /* separate fractional and whole parts */
809 "move.l %[f], %[y1] \r\n"
810 "and.l #0xfff, %[f] \r\n"
811 "lsr.l %[sh], %[y1] \r\n"
812 /* load samples y0 (upper) & y1 (lower) */
813 "move.l 2(%[s], %[y1].l*2), %[y1] \r\n"
815 "mac.w %[f]l, %[y1]l, %%acc0 \r\n"
817 "msac.w %[f]l, %[y1]u, %%acc0 \r\n"
818 /* separate out y0 and sign extend */
820 "movea.w %[y1], %[y0] \r\n"
821 /* fetch result, scale down and add y0 */
822 "movclr.l %%acc0, %[y1] \r\n"
823 /* output = y0 + (result >> 12) */
824 "asr.l %[sh], %[y1] \r\n"
825 "add.l %[y0], %[y1] \r\n"
826 : [f
]"+d"(f
), [y0
]"=&a"(y0
), [y1
]"=&d"(amp_0
)
827 : [s
]"a"(voice
->samples
), [sh
]"d"(12)
831 /* apply voice envelope to output */
833 "mac.w %[output]l, %[envx]l, %%acc0 \r\n"
835 : [output
]"r"(amp_0
), [envx
]"r"(voice
->envx
)
838 /* advance voice position */
839 voice
->position
+= rate
;
841 /* fetch output, scale and apply left and right
844 "movclr.l %%acc0, %[output] \r\n"
845 "asr.l %[sh], %[output] \r\n"
846 "mac.l %[vvol_0], %[output], %%acc0 \r\n"
847 "mac.l %[vvol_1], %[output], %%acc1 \r\n"
848 : [output
]"=&d"(amp_0
)
849 : [vvol_0
]"r"((int)voice
->volume
[0]),
850 [vvol_1
]"r"((int)voice
->volume
[1]),
854 /* save this output into previous, scale and save in
857 raw_voice
->outx
= amp_0
>> 8;
859 /* fetch final voice output */
861 "movclr.l %%acc0, %[amp_0] \r\n"
862 "movclr.l %%acc1, %[amp_1] \r\n"
863 : [amp_0
]"=r"(amp_0
), [amp_1
]"=r"(amp_1
)
865 #elif defined (CPU_ARM)
868 if ( (this->r
.g
.noise_enables
& vbit
) != 0 ) {
869 amp_0
= *(int16_t *)&this->noise
;
871 uint32_t f
= voice
->position
;
872 amp_0
= (uint32_t)voice
->samples
;
875 "mov %[y1], %[f], lsr #12 \r\n"
876 "eor %[f], %[f], %[y1], lsl #12 \r\n"
877 "add %[y1], %[y0], %[y1], lsl #1 \r\n"
878 "ldrsh %[y0], [%[y1], #2] \r\n"
879 "ldrsh %[y1], [%[y1], #4] \r\n"
880 "sub %[y1], %[y1], %[y0] \r\n"
881 "mul %[f], %[y1], %[f] \r\n"
882 "add %[y0], %[y0], %[f], asr #12 \r\n"
883 : [f
]"+r"(f
), [y0
]"+r"(amp_0
), [y1
]"=&r"(amp_1
)
887 voice
->position
+= rate
;
890 "mul %[amp_1], %[amp_0], %[envx] \r\n"
891 "mov %[amp_0], %[amp_1], asr #11 \r\n"
892 "mov %[amp_1], %[amp_0], asr #8 \r\n"
893 : [amp_0
]"+r"(amp_0
), [amp_1
]"=&r"(amp_1
)
894 : [envx
]"r"(voice
->envx
)
898 raw_voice
->outx
= (int8_t)amp_1
;
901 "mul %[amp_1], %[amp_0], %[vol_1] \r\n"
902 "mul %[amp_0], %[vol_0], %[amp_0] \r\n"
903 : [amp_0
]"+r"(amp_0
), [amp_1
]"+r"(amp_1
)
904 : [vol_0
]"r"((int)voice
->volume
[0]),
905 [vol_1
]"r"((int)voice
->volume
[1])
907 #else /* Unoptimized CPU */
910 if ( (this->r
.g
.noise_enables
& vbit
) == 0 )
912 int const fraction
= voice
->position
& 0xfff;
913 short const* const pos
= (voice
->samples
+ (voice
->position
>> 12)) + 1;
914 output
= pos
[0] + ((fraction
* (pos
[1] - pos
[0])) >> 12);
916 output
= *(int16_t *)&this->noise
;
919 voice
->position
+= rate
;
921 output
= (output
* voice
->envx
) >> 11;
923 /* duplicated here to give compiler more to run in parallel */
924 int amp_0
= voice
->volume
[0] * output
;
925 int amp_1
= voice
->volume
[1] * output
;
928 raw_voice
->outx
= (int8_t) (output
>> 8);
930 #endif /* SPCNOINTERP */
933 if ( voice
->position
>= voice
->wave_end
)
935 long loop_len
= voice
->wave_loop
<< 12;
936 voice
->position
-= loop_len
;
937 this->r
.g
.wave_ended
|= vbit
;
940 this->keys_down
^= vbit
;
949 ENTER_TIMER(dsp_mix
);
954 if ( this->r
.g
.echo_ons
& vbit
)
964 /* end of voice loop */
968 /* Read feedback from echo buffer */
969 int echo_pos
= this->echo_pos
;
970 uint8_t* const echo_ptr
= RAM
+ ((echo_start
+ echo_pos
) & 0xFFFF);
972 if ( echo_pos
>= echo_wrap
)
974 this->echo_pos
= echo_pos
;
975 int fb
= swap_odd_even32(*(int32_t *)echo_ptr
);
978 /* Keep last 8 samples */
979 *this->last_fir_ptr
= fb
;
980 this->last_fir_ptr
= this->fir_ptr
;
982 /* Apply echo FIR filter to output samples read from echo buffer -
983 circular buffer is hardware incremented and masked; FIR
984 coefficients and buffer history are loaded in parallel with
985 multiply accumulate operations. Shift left by one here and once
986 again when calculating feedback to have sample values justified
987 to bit 31 in the output to ease endian swap, interleaving and
988 clamping before placing result in the program's echo buffer. */
991 "move.l (%[fir_c]) , %[_2] \r\n"
992 "mac.w %[fb]u, %[_2]u, <<, (%[fir_p])+&, %[_0], %%acc0 \r\n"
993 "mac.w %[fb]l, %[_2]u, <<, (%[fir_p])& , %[_1], %%acc1 \r\n"
994 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
995 "mac.w %[_0]l, %[_2]l, <<, 4(%[fir_c]) , %[_2], %%acc1 \r\n"
996 "mac.w %[_1]u, %[_2]u, <<, 4(%[fir_p])& , %[_0], %%acc0 \r\n"
997 "mac.w %[_1]l, %[_2]u, <<, 8(%[fir_p])& , %[_1], %%acc1 \r\n"
998 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
999 "mac.w %[_0]l, %[_2]l, <<, 8(%[fir_c]) , %[_2], %%acc1 \r\n"
1000 "mac.w %[_1]u, %[_2]u, <<, 12(%[fir_p])& , %[_0], %%acc0 \r\n"
1001 "mac.w %[_1]l, %[_2]u, <<, 16(%[fir_p])& , %[_1], %%acc1 \r\n"
1002 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1003 "mac.w %[_0]l, %[_2]l, <<, 12(%[fir_c]) , %[_2], %%acc1 \r\n"
1004 "mac.w %[_1]u, %[_2]u, <<, 20(%[fir_p])& , %[_0], %%acc0 \r\n"
1005 "mac.w %[_1]l, %[_2]u, << , %%acc1 \r\n"
1006 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1007 "mac.w %[_0]l, %[_2]l, << , %%acc1 \r\n"
1008 : [_0
]"=&r"(_0
), [_1
]"=&r"(_1
), [_2
]"=&r"(_2
),
1009 [fir_p
]"+a"(this->fir_ptr
)
1010 : [fir_c
]"a"(this->fir_coeff
), [fb
]"r"(fb
)
1013 /* Generate output */
1015 /* fetch filter results _after_ gcc loads asm
1016 block parameters to eliminate emac stalls */
1017 "movclr.l %%acc0, %[out_0] \r\n"
1018 "movclr.l %%acc1, %[out_1] \r\n"
1019 /* apply global volume */
1020 "mac.l %[chans_0], %[gv_0] , %%acc2 \r\n"
1021 "mac.l %[chans_1], %[gv_1] , %%acc3 \r\n"
1022 /* apply echo volume and add to final output */
1023 "mac.l %[ev_0], %[out_0], >>, %%acc2 \r\n"
1024 "mac.l %[ev_1], %[out_1], >>, %%acc3 \r\n"
1025 : [out_0
]"=&r"(out_0
), [out_1
]"=&r"(out_1
)
1026 : [chans_0
]"r"(chans_0
), [gv_0
]"r"(global_vol_0
),
1027 [ev_0
]"r"((int)this->r
.g
.echo_volume_0
),
1028 [chans_1
]"r"(chans_1
), [gv_1
]"r"(global_vol_1
),
1029 [ev_1
]"r"((int)this->r
.g
.echo_volume_1
)
1032 /* Feedback into echo buffer */
1033 if ( !(this->r
.g
.flags
& 0x20) )
1036 /* scale echo voices; saturate if overflow */
1037 "mac.l %[sh], %[e1] , %%acc1 \r\n"
1038 "mac.l %[sh], %[e0] , %%acc0 \r\n"
1039 /* add scaled output from FIR filter */
1040 "mac.l %[out_1], %[ef], <<, %%acc1 \r\n"
1041 "mac.l %[out_0], %[ef], <<, %%acc0 \r\n"
1042 /* swap and fetch feedback results - simply
1043 swap_odd_even32 mixed in between macs and
1044 movclrs to mitigate stall issues */
1045 "move.l #0x00ff00ff, %[sh] \r\n"
1046 "movclr.l %%acc1, %[e1] \r\n"
1048 "movclr.l %%acc0, %[e0] \r\n"
1049 "move.w %[e1], %[e0] \r\n"
1050 "and.l %[e0], %[sh] \r\n"
1051 "eor.l %[sh], %[e0] \r\n"
1052 "lsl.l #8, %[sh] \r\n"
1053 "lsr.l #8, %[e0] \r\n"
1054 "or.l %[sh], %[e0] \r\n"
1055 /* save final feedback into echo buffer */
1056 "move.l %[e0], (%[echo_ptr]) \r\n"
1057 : [e0
]"+d"(echo_0
), [e1
]"+d"(echo_1
)
1058 : [out_0
]"r"(out_0
), [out_1
]"r"(out_1
),
1059 [ef
]"r"((int)this->r
.g
.echo_feedback
),
1060 [echo_ptr
]"a"((int32_t *)echo_ptr
),
1065 /* Output final samples */
1067 /* fetch output saved in %acc2 and %acc3 */
1068 "movclr.l %%acc2, %[out_0] \r\n"
1069 "movclr.l %%acc3, %[out_1] \r\n"
1070 /* scale right by global_muting shift */
1071 "asr.l %[gm], %[out_0] \r\n"
1072 "asr.l %[gm], %[out_1] \r\n"
1073 : [out_0
]"=&d"(out_0
), [out_1
]"=&d"(out_1
)
1074 : [gm
]"d"(global_muting
)
1077 out_buf
[ 0] = out_0
;
1078 out_buf
[WAV_CHUNK_SIZE
] = out_1
;
1080 #elif defined (CPU_ARM)
1081 /* Read feedback from echo buffer */
1082 int echo_pos
= this->echo_pos
;
1083 uint8_t* const echo_ptr
= RAM
+
1084 ((this->r
.g
.echo_page
* 0x100 + echo_pos
) & 0xFFFF);
1086 if ( echo_pos
>= (this->r
.g
.echo_delay
& 15) * 0x800 )
1088 this->echo_pos
= echo_pos
;
1090 int fb_0
= GET_LE16SA( echo_ptr
);
1091 int fb_1
= GET_LE16SA( echo_ptr
+ 2 );
1093 /* Keep last 8 samples */
1094 int32_t *fir_ptr
= this->fir_ptr
;
1098 "str %[fb_0], [%[fir_p]], #4 \r\n"
1099 "str %[fb_1], [%[fir_p]], #4 \r\n"
1100 /* duplicate at +8 eliminates wrap checking below */
1101 "str %[fb_0], [%[fir_p], #56] \r\n"
1102 "str %[fb_1], [%[fir_p], #60] \r\n"
1103 : [fir_p
]"+r"(fir_ptr
)
1104 : [fb_0
]"r"(fb_0
), [fb_1
]"r"(fb_1
)
1107 this->fir_ptr
= (int32_t *)((intptr_t)fir_ptr
& FIR_BUF_MASK
);
1108 int32_t *fir_coeff
= this->fir_coeff
;
1111 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1112 "ldmia %[fir_p]!, { r4-r5 } \r\n"
1113 "mul %[fb_0], r0, %[fb_0] \r\n"
1114 "mul %[fb_1], r0, %[fb_1] \r\n"
1115 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1116 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1117 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1118 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1119 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1120 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1121 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1122 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1123 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1124 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1125 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1126 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1127 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1128 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1129 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1130 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1131 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1132 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1133 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1134 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1135 : [fb_0
]"+r"(fb_0
), [fb_1
]"+r"(fb_1
),
1136 [fir_p
]"+r"(fir_ptr
), [fir_c
]"+r"(fir_coeff
)
1138 : "r0", "r1", "r2", "r3", "r4", "r5"
1141 /* Generate output */
1142 int amp_0
= (chans_0
* global_vol_0
+ fb_0
* this->r
.g
.echo_volume_0
)
1144 int amp_1
= (chans_1
* global_vol_1
+ fb_1
* this->r
.g
.echo_volume_1
)
1147 out_buf
[ 0] = amp_0
;
1148 out_buf
[WAV_CHUNK_SIZE
] = amp_1
;
1151 if ( !(this->r
.g
.flags
& 0x20) )
1153 /* Feedback into echo buffer */
1154 int e0
= (echo_0
>> 7) + ((fb_0
* this->r
.g
.echo_feedback
) >> 14);
1155 int e1
= (echo_1
>> 7) + ((fb_1
* this->r
.g
.echo_feedback
) >> 14);
1157 SET_LE16A( echo_ptr
, e0
);
1159 SET_LE16A( echo_ptr
+ 2, e1
);
1161 #else /* Unoptimized CPU */
1162 /* Read feedback from echo buffer */
1163 int echo_pos
= this->echo_pos
;
1164 uint8_t* const echo_ptr
= RAM
+
1165 ((this->r
.g
.echo_page
* 0x100 + echo_pos
) & 0xFFFF);
1167 if ( echo_pos
>= (this->r
.g
.echo_delay
& 15) * 0x800 )
1169 this->echo_pos
= echo_pos
;
1170 int fb_0
= GET_LE16SA( echo_ptr
);
1171 int fb_1
= GET_LE16SA( echo_ptr
+ 2 );
1173 /* Keep last 8 samples */
1174 int (* const fir_ptr
) [2] = this->fir_buf
+ this->fir_pos
;
1175 this->fir_pos
= (this->fir_pos
+ 1) & (FIR_BUF_HALF
- 1);
1176 fir_ptr
[ 0] [0] = fb_0
;
1177 fir_ptr
[ 0] [1] = fb_1
;
1178 /* duplicate at +8 eliminates wrap checking below */
1179 fir_ptr
[FIR_BUF_HALF
] [0] = fb_0
;
1180 fir_ptr
[FIR_BUF_HALF
] [1] = fb_1
;
1183 fb_0
*= this->fir_coeff
[0];
1184 fb_1
*= this->fir_coeff
[0];
1187 fb_0 += fir_ptr [i] [0] * this->fir_coeff [i];\
1188 fb_1 += fir_ptr [i] [1] * this->fir_coeff [i];
1198 /* Generate output */
1199 int amp_0
= (chans_0
* global_vol_0
+ fb_0
* this->r
.g
.echo_volume_0
)
1201 int amp_1
= (chans_1
* global_vol_1
+ fb_1
* this->r
.g
.echo_volume_1
)
1203 out_buf
[ 0] = amp_0
;
1204 out_buf
[WAV_CHUNK_SIZE
] = amp_1
;
1207 if ( !(this->r
.g
.flags
& 0x20) )
1209 /* Feedback into echo buffer */
1210 int e0
= (echo_0
>> 7) + ((fb_0
* this->r
.g
.echo_feedback
) >> 14);
1211 int e1
= (echo_1
>> 7) + ((fb_1
* this->r
.g
.echo_feedback
) >> 14);
1213 SET_LE16A( echo_ptr
, e0
);
1215 SET_LE16A( echo_ptr
+ 2, e1
);
1218 #else /* SPCNOECHO == 1*/
1219 /* Generate output */
1220 int amp_0
= (chans_0
* global_vol_0
) >> global_muting
;
1221 int amp_1
= (chans_1
* global_vol_1
) >> global_muting
;
1222 out_buf
[ 0] = amp_0
;
1223 out_buf
[WAV_CHUNK_SIZE
] = amp_1
;
1225 #endif /* SPCNOECHO */
1234 void DSP_reset( struct Spc_Dsp
* this )
1236 this->keys_down
= 0;
1238 this->noise_count
= 0;
1241 this->r
.g
.flags
= 0xE0; /* reset, mute, echo off */
1242 this->r
.g
.key_ons
= 0;
1244 ci
->memset( this->voice_state
, 0, sizeof this->voice_state
);
1247 for ( i
= VOICE_COUNT
; --i
>= 0; )
1249 struct voice_t
* v
= this->voice_state
+ i
;
1250 v
->env_mode
= state_release
;
1256 for ( i
= 0; i
< 256; i
++ )
1257 this->wave_entry
[i
].start_addr
= -1;
1260 #if defined(CPU_COLDFIRE)
1261 this->fir_ptr
= fir_buf
;
1262 this->last_fir_ptr
= &fir_buf
[7];
1263 ci
->memset( fir_buf
, 0, sizeof fir_buf
);
1264 #elif defined (CPU_ARM)
1265 this->fir_ptr
= fir_buf
;
1266 ci
->memset( fir_buf
, 0, sizeof fir_buf
);
1269 ci
->memset( this->fir_buf
, 0, sizeof this->fir_buf
);
1272 assert( offsetof (struct globals_t
,unused9
[2]) == REGISTER_COUNT
);
1273 assert( sizeof (this->r
.voice
) == REGISTER_COUNT
);