1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2007-2008 Michael Sevakis (jhMikeS)
11 * Copyright (C) 2006-2007 Adam Gashlin (hcs)
12 * Copyright (C) 2004-2007 Shay Green (blargg)
13 * Copyright (C) 2002 Brad Martin
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
21 * KIND, either express or implied.
23 ****************************************************************************/
25 /* The DSP portion (awe!) */
27 #include "spc_codec.h"
28 #include "spc_profiler.h"
30 #if defined(CPU_COLDFIRE) || defined (CPU_ARM)
31 int32_t fir_buf
[FIR_BUF_CNT
]
32 __attribute__ ((aligned (FIR_BUF_ALIGN
*1))) IBSS_ATTR
;
35 /* a little extra for samples that go past end */
36 int16_t BRRcache
[BRR_CACHE_SIZE
] CACHEALIGN_ATTR
;
39 void DSP_write( struct Spc_Dsp
* this, int i
, int data
)
41 assert( (unsigned) i
< REGISTER_COUNT
);
43 this->r
.reg
[i
] = data
;
46 if ( low
< 2 ) /* voice volumes */
48 int left
= *(int8_t const*) &this->r
.reg
[i
& ~1];
49 int right
= *(int8_t const*) &this->r
.reg
[i
| 1];
50 struct voice_t
* v
= this->voice_state
+ high
;
52 v
->volume
[1] = right
;
54 else if ( low
== 0x0F ) /* fir coefficients */
56 this->fir_coeff
[7 - high
] = (int8_t) data
; /* sign-extend */
60 /* if ( n < -32768 ) out = -32768; */
61 /* if ( n > 32767 ) out = 32767; */
62 #define CLAMP16( n ) \
64 if ( (int16_t) n != n ) \
65 n = 0x7FFF ^ (n >> 31); \
70 static void decode_brr( struct Spc_Dsp
* this, unsigned start_addr
,
71 struct voice_t
* voice
,
72 struct raw_voice_t
const* const raw_voice
) ICODE_ATTR
;
73 static void decode_brr( struct Spc_Dsp
* this, unsigned start_addr
,
74 struct voice_t
* voice
,
75 struct raw_voice_t
const* const raw_voice
)
77 /* setup same variables as where decode_brr() is called from */
80 struct src_dir
const* const sd
=
81 (struct src_dir
*) &RAM
[this->r
.g
.wave_page
* 0x100];
82 struct cache_entry_t
* const wave_entry
=
83 &this->wave_entry
[raw_voice
->waveform
];
85 /* the following block can be put in place of the call to
89 DEBUGF( "decode at %08x (wave #%d)\n",
90 start_addr
, raw_voice
->waveform
);
94 for ( i
= 0; i
< this->oldsize
; i
++ )
96 struct cache_entry_t
* e
= &this->wave_entry_old
[i
];
97 if ( e
->start_addr
== start_addr
)
99 DEBUGF( "found in wave_entry_old (oldsize=%d)\n",
106 wave_entry
->start_addr
= start_addr
;
108 uint8_t const* const loop_ptr
=
109 RAM
+ GET_LE16A( sd
[raw_voice
->waveform
].loop
);
110 short* loop_start
= 0;
112 short* out
= BRRcache
+ start_addr
* 2;
113 wave_entry
->samples
= out
;
118 uint8_t const* addr
= RAM
+ start_addr
;
122 if ( addr
== loop_ptr
)
125 DEBUGF( "loop at %08lx (wave #%d)\n",
126 (unsigned long)(addr
- RAM
), raw_voice
->waveform
);
130 block_header
= *addr
;
133 int const filter
= (block_header
& 0x0C) - 0x08;
136 (invalid scaling gives -4096 for neg nybble, 0 for pos) */
137 static unsigned char const right_shifts
[16] = {
138 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 29, 29, 29,
140 static unsigned char const left_shifts
[16] = {
141 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11
143 int const scale
= block_header
>> 4;
144 int const right_shift
= right_shifts
[scale
];
145 int const left_shift
= left_shifts
[scale
];
147 /* output position */
148 out
+= BRR_BLOCK_SIZE
;
149 int offset
= -BRR_BLOCK_SIZE
<< 2;
151 do /* decode and filter 16 samples */
153 /* Get nybble, sign-extend, then scale
154 get byte, select which nybble, sign-extend, then shift based
155 on scaling. also handles invalid scaling values. */
156 int delta
= (int) (int8_t) (addr
[offset
>> 3] << (offset
& 4))
157 >> right_shift
<< left_shift
;
159 out
[offset
>> 2] = smp2
;
161 if ( filter
== 0 ) /* mode 0x08 (30-90% of the time) */
167 delta
+= (-smp1
- (smp1
>> 1)) >> 5;
171 if ( filter
== -4 ) /* mode 0x04 */
174 delta
+= (-smp1
) >> 5;
176 else if ( filter
> -4 ) /* mode 0x0C */
179 delta
+= (smp2
+ (smp2
>> 1)) >> 4;
181 delta
+= (-smp1
* 13) >> 7;
186 delta
= CLAMP16( delta
);
187 smp1
= (int16_t) (delta
* 2); /* sign-extend */
189 while ( (offset
+= 4) != 0 );
191 /* if next block has end flag set, this block ends early */
193 if ( (block_header
& 3) != 3 && (*addr
& 3) == 1 )
195 /* skip last 9 samples */
200 while ( !(block_header
& 1) && addr
< RAM
+ 0x10000 );
206 wave_entry
->end
= (out
- 1 - wave_entry
->samples
) << 12;
208 wave_entry
->loop
= 0;
209 if ( (block_header
& 2) )
213 int loop
= out
- loop_start
;
214 wave_entry
->loop
= loop
;
215 wave_entry
->end
+= 0x3000;
216 out
[2] = loop_start
[2];
217 out
[3] = loop_start
[3];
218 out
[4] = loop_start
[4];
222 DEBUGF( "loop point outside initial wave\n" );
226 DEBUGF( "end at %08lx (wave #%d)\n",
227 (unsigned long)(addr
- RAM
), raw_voice
->waveform
);
230 this->wave_entry_old
[this->oldsize
++] = *wave_entry
;
236 static void key_on(struct Spc_Dsp
* const this, struct voice_t
* const voice
,
237 struct src_dir
const* const sd
,
238 struct raw_voice_t
const* const raw_voice
,
239 const int key_on_delay
, const int vbit
) ICODE_ATTR
;
240 static void key_on(struct Spc_Dsp
* const this, struct voice_t
* const voice
,
241 struct src_dir
const* const sd
,
242 struct raw_voice_t
const* const raw_voice
,
243 const int key_on_delay
, const int vbit
) {
246 int const env_rate_init
= 0x7800;
247 voice
->key_on_delay
= key_on_delay
;
248 if ( key_on_delay
== 0 )
250 this->keys_down
|= vbit
;
252 voice
->env_mode
= state_attack
;
253 voice
->env_timer
= env_rate_init
; /* TODO: inaccurate? */
254 unsigned start_addr
= GET_LE16A(sd
[raw_voice
->waveform
].start
);
257 voice
->addr
= RAM
+ start_addr
;
258 /* BRR filter uses previous samples */
259 voice
->samples
[BRR_BLOCK_SIZE
+ 1] = 0;
260 voice
->samples
[BRR_BLOCK_SIZE
+ 2] = 0;
261 /* decode three samples immediately */
262 voice
->position
= (BRR_BLOCK_SIZE
+ 3) * 0x1000 - 1;
263 voice
->block_header
= 0; /* "previous" BRR header */
267 voice
->position
= 3 * 0x1000 - 1;
268 struct cache_entry_t
* const wave_entry
=
269 &this->wave_entry
[raw_voice
->waveform
];
271 /* predecode BRR if not already */
272 if ( wave_entry
->start_addr
!= start_addr
)
274 /* the following line can be replaced by the indicated block
276 decode_brr( this, start_addr
, voice
, raw_voice
);
279 voice
->samples
= wave_entry
->samples
;
280 voice
->wave_end
= wave_entry
->end
;
281 voice
->wave_loop
= wave_entry
->loop
;
287 void DSP_run_( struct Spc_Dsp
* this, long count
, int32_t* out_buf
)
291 uint8_t* const ram_
= ram
.ram
;
301 /* Here we check for keys on/off. Docs say that successive writes
302 to KON/KOF must be separated by at least 2 Ts periods or risk
303 being neglected. Therefore DSP only looks at these during an
304 update, and not at the time of the write. Only need to do this
305 once however, since the regs haven't changed over the whole
306 period we need to catch up with. */
309 int key_ons
= this->r
.g
.key_ons
;
310 int key_offs
= this->r
.g
.key_offs
;
311 /* keying on a voice resets that bit in ENDX */
312 this->r
.g
.wave_ended
&= ~key_ons
;
313 /* key_off bits prevent key_on from being acknowledged */
314 this->r
.g
.key_ons
= key_ons
& key_offs
;
316 /* process key events outside loop, since they won't re-occur */
317 struct voice_t
* voice
= this->voice_state
+ 8;
322 if ( key_offs
& vbit
)
324 voice
->env_mode
= state_release
;
325 voice
->key_on_delay
= 0;
327 else if ( key_ons
& vbit
)
329 voice
->key_on_delay
= 8;
332 while ( (vbit
>>= 1) != 0 );
335 struct src_dir
const* const sd
=
336 (struct src_dir
*) &RAM
[this->r
.g
.wave_page
* 0x100];
338 #ifdef ROCKBOX_BIG_ENDIAN
339 /* Convert endiannesses before entering loops - these
341 const uint32_t rates
[VOICE_COUNT
] =
343 GET_LE16A( this->r
.voice
[0].rate
) & 0x3FFF,
344 GET_LE16A( this->r
.voice
[1].rate
) & 0x3FFF,
345 GET_LE16A( this->r
.voice
[2].rate
) & 0x3FFF,
346 GET_LE16A( this->r
.voice
[3].rate
) & 0x3FFF,
347 GET_LE16A( this->r
.voice
[4].rate
) & 0x3FFF,
348 GET_LE16A( this->r
.voice
[5].rate
) & 0x3FFF,
349 GET_LE16A( this->r
.voice
[6].rate
) & 0x3FFF,
350 GET_LE16A( this->r
.voice
[7].rate
) & 0x3FFF,
352 #define VOICE_RATE(x) *(x)
353 #define IF_RBE(...) __VA_ARGS__
355 /* Initialize mask register with the buffer address mask */
356 asm volatile ("move.l %[m], %%mask" : : [m
]"i"(FIR_BUF_MASK
));
357 const int echo_wrap
= (this->r
.g
.echo_delay
& 15) * 0x800;
358 const int echo_start
= this->r
.g
.echo_page
* 0x100;
359 #endif /* CPU_COLDFIRE */
361 #define VOICE_RATE(x) (INT16A(raw_voice->rate) & 0x3FFF)
363 #endif /* ROCKBOX_BIG_ENDIAN */
366 int const slow_gaussian
= (this->r
.g
.pitch_mods
>> 1) |
367 this->r
.g
.noise_enables
;
369 /* (g.flags & 0x40) ? 30 : 14 */
370 int const global_muting
= ((this->r
.g
.flags
& 0x40) >> 2) + 14 - 8;
371 int const global_vol_0
= this->r
.g
.volume_0
;
372 int const global_vol_1
= this->r
.g
.volume_1
;
374 /* each rate divides exactly into 0x7800 without remainder */
375 int const env_rate_init
= 0x7800;
376 static unsigned short const env_rates
[0x20] ICONST_ATTR
=
378 0x0000, 0x000F, 0x0014, 0x0018, 0x001E, 0x0028, 0x0030, 0x003C,
379 0x0050, 0x0060, 0x0078, 0x00A0, 0x00C0, 0x00F0, 0x0140, 0x0180,
380 0x01E0, 0x0280, 0x0300, 0x03C0, 0x0500, 0x0600, 0x0780, 0x0A00,
381 0x0C00, 0x0F00, 0x1400, 0x1800, 0x1E00, 0x2800, 0x3C00, 0x7800
384 do /* one pair of output samples per iteration */
387 if ( this->r
.g
.noise_enables
)
389 if ( (this->noise_count
-=
390 env_rates
[this->r
.g
.flags
& 0x1F]) <= 0 )
392 this->noise_count
= env_rate_init
;
393 int feedback
= (this->noise
<< 13) ^ (this->noise
<< 14);
394 this->noise
= (feedback
& 0x8000) ^ (this->noise
>> 1 & ~1);
402 long prev_outx
= 0; /* TODO: correct value for first channel? */
405 /* TODO: put raw_voice pointer in voice_t? */
406 struct raw_voice_t
* raw_voice
= this->r
.voice
;
407 struct voice_t
* voice
= this->voice_state
;
409 IF_RBE( const uint32_t* vr
= rates
; )
410 for ( ; vbit
< 0x100; vbit
<<= 1, ++voice
, ++raw_voice
IF_RBE( , ++vr
) )
412 /* pregen involves checking keyon, etc */
414 ENTER_TIMER(dsp_pregen
);
417 /* Key on events are delayed */
418 int key_on_delay
= voice
->key_on_delay
;
420 if ( --key_on_delay
>= 0 ) /* <1% of the time */
422 key_on(this,voice
,sd
,raw_voice
,key_on_delay
,vbit
);
425 if ( !(this->keys_down
& vbit
) ) /* Silent channel */
436 int const ENV_RANGE
= 0x800;
437 int env_mode
= voice
->env_mode
;
438 int adsr0
= raw_voice
->adsr
[0];
440 if ( env_mode
!= state_release
) /* 99% of the time */
442 env_timer
= voice
->env_timer
;
443 if ( adsr0
& 0x80 ) /* 79% of the time */
445 int adsr1
= raw_voice
->adsr
[1];
446 if ( env_mode
== state_sustain
) /* 74% of the time */
448 if ( (env_timer
-= env_rates
[adsr1
& 0x1F]) > 0 )
449 goto write_env_timer
;
451 int envx
= voice
->envx
;
452 envx
--; /* envx *= 255 / 256 */
455 /* TODO: should this be 8? */
456 raw_voice
->envx
= envx
>> 4;
459 else if ( env_mode
< 0 ) /* 25% state_decay */
461 int envx
= voice
->envx
;
463 env_rates
[(adsr0
>> 3 & 0x0E) + 0x10]) <= 0 )
465 envx
--; /* envx *= 255 / 256 */
468 /* TODO: should this be 8? */
469 raw_voice
->envx
= envx
>> 4;
470 env_timer
= env_rate_init
;
473 int sustain_level
= adsr1
>> 5;
474 if ( envx
<= (sustain_level
+ 1) * 0x100 )
475 voice
->env_mode
= state_sustain
;
477 goto write_env_timer
;
479 else /* state_attack */
481 int t
= adsr0
& 0x0F;
482 if ( (env_timer
-= env_rates
[t
* 2 + 1]) > 0 )
483 goto write_env_timer
;
485 int envx
= voice
->envx
;
487 int const step
= ENV_RANGE
/ 64;
490 envx
+= ENV_RANGE
/ 2 - step
;
492 if ( envx
>= ENV_RANGE
)
494 envx
= ENV_RANGE
- 1;
495 voice
->env_mode
= state_decay
;
498 /* TODO: should this be 8? */
499 raw_voice
->envx
= envx
>> 4;
505 int t
= raw_voice
->gain
;
509 voice
->envx
= t
<< 4;
514 if ( (env_timer
-= env_rates
[t
& 0x1F]) > 0 )
515 goto write_env_timer
;
517 int envx
= voice
->envx
;
519 if ( mode
<= 5 ) /* decay */
521 int step
= ENV_RANGE
/ 64;
522 if ( mode
== 5 ) /* exponential */
524 envx
--; /* envx *= 255 / 256 */
527 if ( (envx
-= step
) < 0 )
530 if ( voice
->env_mode
== state_attack
)
531 voice
->env_mode
= state_decay
;
536 int const step
= ENV_RANGE
/ 64;
539 envx
>= ENV_RANGE
* 3 / 4 + step
)
540 envx
+= ENV_RANGE
/ 256 - step
;
542 if ( envx
>= ENV_RANGE
)
543 envx
= ENV_RANGE
- 1;
546 /* TODO: should this be 8? */
547 raw_voice
->envx
= envx
>> 4;
552 else /* state_release */
554 int envx
= voice
->envx
;
555 if ( (envx
-= ENV_RANGE
/ 256) > 0 )
558 raw_voice
->envx
= envx
>> 8;
563 /* bit was set, so this clears it */
564 this->keys_down
^= vbit
;
570 env_timer
= env_rate_init
;
572 voice
->env_timer
= env_timer
;
576 EXIT_TIMER(dsp_pregen
);
578 ENTER_TIMER(dsp_gen
);
581 /* Decode BRR block */
582 if ( voice
->position
>= BRR_BLOCK_SIZE
* 0x1000 )
584 voice
->position
-= BRR_BLOCK_SIZE
* 0x1000;
586 uint8_t const* addr
= voice
->addr
;
587 if ( addr
>= RAM
+ 0x10000 )
590 /* action based on previous block's header */
591 if ( voice
->block_header
& 1 )
593 addr
= RAM
+ GET_LE16A( sd
[raw_voice
->waveform
].loop
);
594 this->r
.g
.wave_ended
|= vbit
;
595 if ( !(voice
->block_header
& 2) ) /* 1% of the time */
597 /* first block was end block;
598 don't play anything (verified) */
599 /* bit was set, so this clears it */
600 this->keys_down
^= vbit
;
602 /* since voice->envx is 0,
603 samples and position don't matter */
611 int const block_header
= *addr
;
614 voice
->block_header
= block_header
;
615 int const filter
= (block_header
& 0x0C) - 0x08;
617 /* scaling (invalid scaling gives -4096 for neg nybble,
619 static unsigned char const right_shifts
[16] = {
620 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 29, 29, 29,
622 static unsigned char const left_shifts
[16] = {
623 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11
625 int const scale
= block_header
>> 4;
626 int const right_shift
= right_shifts
[scale
];
627 int const left_shift
= left_shifts
[scale
];
629 /* previous samples */
630 int smp2
= voice
->samples
[BRR_BLOCK_SIZE
+ 1];
631 int smp1
= voice
->samples
[BRR_BLOCK_SIZE
+ 2];
632 voice
->samples
[0] = voice
->samples
[BRR_BLOCK_SIZE
];
634 /* output position */
635 short* out
= voice
->samples
+ (1 + BRR_BLOCK_SIZE
);
636 int offset
= -BRR_BLOCK_SIZE
<< 2;
638 /* if next block has end flag set,
639 this block ends early (verified) */
640 if ( (block_header
& 3) != 3 && (*addr
& 3) == 1 )
642 /* arrange for last 9 samples to be skipped */
645 voice
->samples
[skip
] = voice
->samples
[BRR_BLOCK_SIZE
];
646 voice
->position
+= skip
* 0x1000;
647 offset
= (-BRR_BLOCK_SIZE
+ (skip
& ~1)) << 2;
649 /* force sample to end on next decode */
650 voice
->block_header
= 1;
653 do /* decode and filter 16 samples */
655 /* Get nybble, sign-extend, then scale
656 get byte, select which nybble, sign-extend, then shift
657 based on scaling. also handles invalid scaling values.*/
658 int delta
= (int) (int8_t) (addr
[offset
>> 3] <<
659 (offset
& 4)) >> right_shift
<< left_shift
;
661 out
[offset
>> 2] = smp2
;
663 if ( filter
== 0 ) /* mode 0x08 (30-90% of the time) */
669 delta
+= (-smp1
- (smp1
>> 1)) >> 5;
673 if ( filter
== -4 ) /* mode 0x04 */
676 delta
+= (-smp1
) >> 5;
678 else if ( filter
> -4 ) /* mode 0x0C */
681 delta
+= (smp2
+ (smp2
>> 1)) >> 4;
683 delta
+= (-smp1
* 13) >> 7;
688 delta
= CLAMP16( delta
);
689 smp1
= (int16_t) (delta
* 2); /* sign-extend */
691 while ( (offset
+= 4) != 0 );
700 /* Get rate (with possible modulation) */
701 int rate
= VOICE_RATE(vr
);
702 if ( this->r
.g
.pitch_mods
& vbit
)
703 rate
= (rate
* (prev_outx
+ 32768)) >> 15;
706 /* Interleved gauss table (to improve cache coherency). */
707 /* gauss [i * 2 + j] = normal_gauss [(1 - j) * 256 + i] */
708 static short const gauss
[512] =
710 370,1305, 366,1305, 362,1304, 358,1304, 354,1304, 351,1304, 347,1304, 343,1303,
711 339,1303, 336,1303, 332,1302, 328,1302, 325,1301, 321,1300, 318,1300, 314,1299,
712 311,1298, 307,1297, 304,1297, 300,1296, 297,1295, 293,1294, 290,1293, 286,1292,
713 283,1291, 280,1290, 276,1288, 273,1287, 270,1286, 267,1284, 263,1283, 260,1282,
714 257,1280, 254,1279, 251,1277, 248,1275, 245,1274, 242,1272, 239,1270, 236,1269,
715 233,1267, 230,1265, 227,1263, 224,1261, 221,1259, 218,1257, 215,1255, 212,1253,
716 210,1251, 207,1248, 204,1246, 201,1244, 199,1241, 196,1239, 193,1237, 191,1234,
717 188,1232, 186,1229, 183,1227, 180,1224, 178,1221, 175,1219, 173,1216, 171,1213,
718 168,1210, 166,1207, 163,1205, 161,1202, 159,1199, 156,1196, 154,1193, 152,1190,
719 150,1186, 147,1183, 145,1180, 143,1177, 141,1174, 139,1170, 137,1167, 134,1164,
720 132,1160, 130,1157, 128,1153, 126,1150, 124,1146, 122,1143, 120,1139, 118,1136,
721 117,1132, 115,1128, 113,1125, 111,1121, 109,1117, 107,1113, 106,1109, 104,1106,
722 102,1102, 100,1098, 99,1094, 97,1090, 95,1086, 94,1082, 92,1078, 90,1074,
723 89,1070, 87,1066, 86,1061, 84,1057, 83,1053, 81,1049, 80,1045, 78,1040,
724 77,1036, 76,1032, 74,1027, 73,1023, 71,1019, 70,1014, 69,1010, 67,1005,
725 66,1001, 65, 997, 64, 992, 62, 988, 61, 983, 60, 978, 59, 974, 58, 969,
726 56, 965, 55, 960, 54, 955, 53, 951, 52, 946, 51, 941, 50, 937, 49, 932,
727 48, 927, 47, 923, 46, 918, 45, 913, 44, 908, 43, 904, 42, 899, 41, 894,
728 40, 889, 39, 884, 38, 880, 37, 875, 36, 870, 36, 865, 35, 860, 34, 855,
729 33, 851, 32, 846, 32, 841, 31, 836, 30, 831, 29, 826, 29, 821, 28, 816,
730 27, 811, 27, 806, 26, 802, 25, 797, 24, 792, 24, 787, 23, 782, 23, 777,
731 22, 772, 21, 767, 21, 762, 20, 757, 20, 752, 19, 747, 19, 742, 18, 737,
732 17, 732, 17, 728, 16, 723, 16, 718, 15, 713, 15, 708, 15, 703, 14, 698,
733 14, 693, 13, 688, 13, 683, 12, 678, 12, 674, 11, 669, 11, 664, 11, 659,
734 10, 654, 10, 649, 10, 644, 9, 640, 9, 635, 9, 630, 8, 625, 8, 620,
735 8, 615, 7, 611, 7, 606, 7, 601, 6, 596, 6, 592, 6, 587, 6, 582,
736 5, 577, 5, 573, 5, 568, 5, 563, 4, 559, 4, 554, 4, 550, 4, 545,
737 4, 540, 3, 536, 3, 531, 3, 527, 3, 522, 3, 517, 2, 513, 2, 508,
738 2, 504, 2, 499, 2, 495, 2, 491, 2, 486, 1, 482, 1, 477, 1, 473,
739 1, 469, 1, 464, 1, 460, 1, 456, 1, 451, 1, 447, 1, 443, 1, 439,
740 0, 434, 0, 430, 0, 426, 0, 422, 0, 418, 0, 414, 0, 410, 0, 405,
741 0, 401, 0, 397, 0, 393, 0, 389, 0, 385, 0, 381, 0, 378, 0, 374,
743 /* Gaussian interpolation using most recent 4 samples */
744 long position
= voice
->position
;
745 voice
->position
+= rate
;
746 short const* interp
= voice
->samples
+ (position
>> 12);
747 int offset
= position
>> 4 & 0xFF;
749 /* Only left half of gaussian kernel is in table, so we must mirror
751 short const* fwd
= gauss
+ offset
* 2;
752 short const* rev
= gauss
+ 510 - offset
* 2;
754 /* Use faster gaussian interpolation when exact result isn't needed
755 by pitch modulator of next channel */
757 if ( !(slow_gaussian
& vbit
) ) /* 99% of the time */
759 /* Main optimization is lack of clamping. Not a problem since
760 output never goes more than +/- 16 outside 16-bit range and
761 things are clamped later anyway. Other optimization is to
762 preserve fractional accuracy, eliminating several masks. */
763 int output
= (((fwd
[0] * interp
[0] +
764 fwd
[1] * interp
[1] +
765 rev
[1] * interp
[2] +
766 rev
[0] * interp
[3] ) >> 11) * voice
->envx
) >> 11;
768 /* duplicated here to give compiler more to run in parallel */
769 amp_0
= voice
->volume
[0] * output
;
770 amp_1
= voice
->volume
[1] * output
;
771 raw_voice
->outx
= output
>> 8;
775 int output
= *(int16_t*) &this->noise
;
776 if ( !(this->r
.g
.noise_enables
& vbit
) )
778 output
= (fwd
[0] * interp
[0]) & ~0xFFF;
779 output
= (output
+ fwd
[1] * interp
[1]) & ~0xFFF;
780 output
= (output
+ rev
[1] * interp
[2]) >> 12;
781 output
= (int16_t) (output
* 2);
782 output
+= ((rev
[0] * interp
[3]) >> 12) * 2;
783 output
= CLAMP16( output
);
785 output
= (output
* voice
->envx
) >> 11 & ~1;
787 /* duplicated here to give compiler more to run in parallel */
788 amp_0
= voice
->volume
[0] * output
;
789 amp_1
= voice
->volume
[1] * output
;
791 raw_voice
->outx
= (int8_t) (output
>> 8);
793 #else /* SPCNOINTERP */
794 /* two-point linear interpolation */
796 int amp_0
= (int16_t)this->noise
;
799 if ( (this->r
.g
.noise_enables
& vbit
) == 0 )
801 uint32_t f
= voice
->position
;
805 * Formula (fastest found so far of MANY):
806 * output = y0 + f*y1 - f*y0
809 /* separate fractional and whole parts */
810 "move.l %[f], %[y1] \r\n"
811 "and.l #0xfff, %[f] \r\n"
812 "lsr.l %[sh], %[y1] \r\n"
813 /* load samples y0 (upper) & y1 (lower) */
814 "move.l 2(%[s], %[y1].l*2), %[y1] \r\n"
816 "mac.w %[f]l, %[y1]l, %%acc0 \r\n"
818 "msac.w %[f]l, %[y1]u, %%acc0 \r\n"
819 /* separate out y0 and sign extend */
821 "movea.w %[y1], %[y0] \r\n"
822 /* fetch result, scale down and add y0 */
823 "movclr.l %%acc0, %[y1] \r\n"
824 /* output = y0 + (result >> 12) */
825 "asr.l %[sh], %[y1] \r\n"
826 "add.l %[y0], %[y1] \r\n"
827 : [f
]"+d"(f
), [y0
]"=&a"(y0
), [y1
]"=&d"(amp_0
)
828 : [s
]"a"(voice
->samples
), [sh
]"d"(12)
832 /* apply voice envelope to output */
834 "mac.w %[output]l, %[envx]l, %%acc0 \r\n"
836 : [output
]"r"(amp_0
), [envx
]"r"(voice
->envx
)
839 /* advance voice position */
840 voice
->position
+= rate
;
842 /* fetch output, scale and apply left and right
845 "movclr.l %%acc0, %[output] \r\n"
846 "asr.l %[sh], %[output] \r\n"
847 "mac.l %[vvol_0], %[output], %%acc0 \r\n"
848 "mac.l %[vvol_1], %[output], %%acc1 \r\n"
849 : [output
]"=&d"(amp_0
)
850 : [vvol_0
]"r"((int)voice
->volume
[0]),
851 [vvol_1
]"r"((int)voice
->volume
[1]),
855 /* save this output into previous, scale and save in
858 raw_voice
->outx
= amp_0
>> 8;
860 /* fetch final voice output */
862 "movclr.l %%acc0, %[amp_0] \r\n"
863 "movclr.l %%acc1, %[amp_1] \r\n"
864 : [amp_0
]"=r"(amp_0
), [amp_1
]"=r"(amp_1
)
866 #elif defined (CPU_ARM)
869 if ( (this->r
.g
.noise_enables
& vbit
) != 0 ) {
870 amp_0
= *(int16_t *)&this->noise
;
872 uint32_t f
= voice
->position
;
873 amp_0
= (uint32_t)voice
->samples
;
876 "mov %[y1], %[f], lsr #12 \r\n"
877 "eor %[f], %[f], %[y1], lsl #12 \r\n"
878 "add %[y1], %[y0], %[y1], lsl #1 \r\n"
879 "ldrsh %[y0], [%[y1], #2] \r\n"
880 "ldrsh %[y1], [%[y1], #4] \r\n"
881 "sub %[y1], %[y1], %[y0] \r\n"
882 "mul %[f], %[y1], %[f] \r\n"
883 "add %[y0], %[y0], %[f], asr #12 \r\n"
884 : [f
]"+r"(f
), [y0
]"+r"(amp_0
), [y1
]"=&r"(amp_1
)
888 voice
->position
+= rate
;
891 "mul %[amp_1], %[amp_0], %[envx] \r\n"
892 "mov %[amp_0], %[amp_1], asr #11 \r\n"
893 "mov %[amp_1], %[amp_0], asr #8 \r\n"
894 : [amp_0
]"+r"(amp_0
), [amp_1
]"=&r"(amp_1
)
895 : [envx
]"r"(voice
->envx
)
899 raw_voice
->outx
= (int8_t)amp_1
;
902 "mul %[amp_1], %[amp_0], %[vol_1] \r\n"
903 "mul %[amp_0], %[vol_0], %[amp_0] \r\n"
904 : [amp_0
]"+r"(amp_0
), [amp_1
]"+r"(amp_1
)
905 : [vol_0
]"r"((int)voice
->volume
[0]),
906 [vol_1
]"r"((int)voice
->volume
[1])
908 #else /* Unoptimized CPU */
911 if ( (this->r
.g
.noise_enables
& vbit
) == 0 )
913 int const fraction
= voice
->position
& 0xfff;
914 short const* const pos
= (voice
->samples
+ (voice
->position
>> 12)) + 1;
915 output
= pos
[0] + ((fraction
* (pos
[1] - pos
[0])) >> 12);
917 output
= *(int16_t *)&this->noise
;
920 voice
->position
+= rate
;
922 output
= (output
* voice
->envx
) >> 11;
924 /* duplicated here to give compiler more to run in parallel */
925 int amp_0
= voice
->volume
[0] * output
;
926 int amp_1
= voice
->volume
[1] * output
;
929 raw_voice
->outx
= (int8_t) (output
>> 8);
931 #endif /* SPCNOINTERP */
934 if ( voice
->position
>= voice
->wave_end
)
936 long loop_len
= voice
->wave_loop
<< 12;
937 voice
->position
-= loop_len
;
938 this->r
.g
.wave_ended
|= vbit
;
941 this->keys_down
^= vbit
;
950 ENTER_TIMER(dsp_mix
);
955 if ( this->r
.g
.echo_ons
& vbit
)
965 /* end of voice loop */
969 /* Read feedback from echo buffer */
970 int echo_pos
= this->echo_pos
;
971 uint8_t* const echo_ptr
= RAM
+ ((echo_start
+ echo_pos
) & 0xFFFF);
973 if ( echo_pos
>= echo_wrap
)
975 this->echo_pos
= echo_pos
;
976 int fb
= swap_odd_even32(*(int32_t *)echo_ptr
);
979 /* Keep last 8 samples */
980 *this->last_fir_ptr
= fb
;
981 this->last_fir_ptr
= this->fir_ptr
;
983 /* Apply echo FIR filter to output samples read from echo buffer -
984 circular buffer is hardware incremented and masked; FIR
985 coefficients and buffer history are loaded in parallel with
986 multiply accumulate operations. Shift left by one here and once
987 again when calculating feedback to have sample values justified
988 to bit 31 in the output to ease endian swap, interleaving and
989 clamping before placing result in the program's echo buffer. */
992 "move.l (%[fir_c]) , %[_2] \r\n"
993 "mac.w %[fb]u, %[_2]u, <<, (%[fir_p])+&, %[_0], %%acc0 \r\n"
994 "mac.w %[fb]l, %[_2]u, <<, (%[fir_p])& , %[_1], %%acc1 \r\n"
995 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
996 "mac.w %[_0]l, %[_2]l, <<, 4(%[fir_c]) , %[_2], %%acc1 \r\n"
997 "mac.w %[_1]u, %[_2]u, <<, 4(%[fir_p])& , %[_0], %%acc0 \r\n"
998 "mac.w %[_1]l, %[_2]u, <<, 8(%[fir_p])& , %[_1], %%acc1 \r\n"
999 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1000 "mac.w %[_0]l, %[_2]l, <<, 8(%[fir_c]) , %[_2], %%acc1 \r\n"
1001 "mac.w %[_1]u, %[_2]u, <<, 12(%[fir_p])& , %[_0], %%acc0 \r\n"
1002 "mac.w %[_1]l, %[_2]u, <<, 16(%[fir_p])& , %[_1], %%acc1 \r\n"
1003 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1004 "mac.w %[_0]l, %[_2]l, <<, 12(%[fir_c]) , %[_2], %%acc1 \r\n"
1005 "mac.w %[_1]u, %[_2]u, <<, 20(%[fir_p])& , %[_0], %%acc0 \r\n"
1006 "mac.w %[_1]l, %[_2]u, << , %%acc1 \r\n"
1007 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1008 "mac.w %[_0]l, %[_2]l, << , %%acc1 \r\n"
1009 : [_0
]"=&r"(_0
), [_1
]"=&r"(_1
), [_2
]"=&r"(_2
),
1010 [fir_p
]"+a"(this->fir_ptr
)
1011 : [fir_c
]"a"(this->fir_coeff
), [fb
]"r"(fb
)
1014 /* Generate output */
1016 /* fetch filter results _after_ gcc loads asm
1017 block parameters to eliminate emac stalls */
1018 "movclr.l %%acc0, %[out_0] \r\n"
1019 "movclr.l %%acc1, %[out_1] \r\n"
1020 /* apply global volume */
1021 "mac.l %[chans_0], %[gv_0] , %%acc2 \r\n"
1022 "mac.l %[chans_1], %[gv_1] , %%acc3 \r\n"
1023 /* apply echo volume and add to final output */
1024 "mac.l %[ev_0], %[out_0], >>, %%acc2 \r\n"
1025 "mac.l %[ev_1], %[out_1], >>, %%acc3 \r\n"
1026 : [out_0
]"=&r"(out_0
), [out_1
]"=&r"(out_1
)
1027 : [chans_0
]"r"(chans_0
), [gv_0
]"r"(global_vol_0
),
1028 [ev_0
]"r"((int)this->r
.g
.echo_volume_0
),
1029 [chans_1
]"r"(chans_1
), [gv_1
]"r"(global_vol_1
),
1030 [ev_1
]"r"((int)this->r
.g
.echo_volume_1
)
1033 /* Feedback into echo buffer */
1034 if ( !(this->r
.g
.flags
& 0x20) )
1037 /* scale echo voices; saturate if overflow */
1038 "mac.l %[sh], %[e1] , %%acc1 \r\n"
1039 "mac.l %[sh], %[e0] , %%acc0 \r\n"
1040 /* add scaled output from FIR filter */
1041 "mac.l %[out_1], %[ef], <<, %%acc1 \r\n"
1042 "mac.l %[out_0], %[ef], <<, %%acc0 \r\n"
1043 /* swap and fetch feedback results - simply
1044 swap_odd_even32 mixed in between macs and
1045 movclrs to mitigate stall issues */
1046 "move.l #0x00ff00ff, %[sh] \r\n"
1047 "movclr.l %%acc1, %[e1] \r\n"
1049 "movclr.l %%acc0, %[e0] \r\n"
1050 "move.w %[e1], %[e0] \r\n"
1051 "and.l %[e0], %[sh] \r\n"
1052 "eor.l %[sh], %[e0] \r\n"
1053 "lsl.l #8, %[sh] \r\n"
1054 "lsr.l #8, %[e0] \r\n"
1055 "or.l %[sh], %[e0] \r\n"
1056 /* save final feedback into echo buffer */
1057 "move.l %[e0], (%[echo_ptr]) \r\n"
1058 : [e0
]"+d"(echo_0
), [e1
]"+d"(echo_1
)
1059 : [out_0
]"r"(out_0
), [out_1
]"r"(out_1
),
1060 [ef
]"r"((int)this->r
.g
.echo_feedback
),
1061 [echo_ptr
]"a"((int32_t *)echo_ptr
),
1066 /* Output final samples */
1068 /* fetch output saved in %acc2 and %acc3 */
1069 "movclr.l %%acc2, %[out_0] \r\n"
1070 "movclr.l %%acc3, %[out_1] \r\n"
1071 /* scale right by global_muting shift */
1072 "asr.l %[gm], %[out_0] \r\n"
1073 "asr.l %[gm], %[out_1] \r\n"
1074 : [out_0
]"=&d"(out_0
), [out_1
]"=&d"(out_1
)
1075 : [gm
]"d"(global_muting
)
1078 out_buf
[ 0] = out_0
;
1079 out_buf
[WAV_CHUNK_SIZE
] = out_1
;
1081 #elif defined (CPU_ARM)
1082 /* Read feedback from echo buffer */
1083 int echo_pos
= this->echo_pos
;
1084 uint8_t* const echo_ptr
= RAM
+
1085 ((this->r
.g
.echo_page
* 0x100 + echo_pos
) & 0xFFFF);
1087 if ( echo_pos
>= (this->r
.g
.echo_delay
& 15) * 0x800 )
1089 this->echo_pos
= echo_pos
;
1091 int fb_0
= GET_LE16SA( echo_ptr
);
1092 int fb_1
= GET_LE16SA( echo_ptr
+ 2 );
1094 /* Keep last 8 samples */
1095 int32_t *fir_ptr
= this->fir_ptr
;
1099 "str %[fb_0], [%[fir_p]], #4 \r\n"
1100 "str %[fb_1], [%[fir_p]], #4 \r\n"
1101 /* duplicate at +8 eliminates wrap checking below */
1102 "str %[fb_0], [%[fir_p], #56] \r\n"
1103 "str %[fb_1], [%[fir_p], #60] \r\n"
1104 : [fir_p
]"+r"(fir_ptr
)
1105 : [fb_0
]"r"(fb_0
), [fb_1
]"r"(fb_1
)
1108 this->fir_ptr
= (int32_t *)((intptr_t)fir_ptr
& FIR_BUF_MASK
);
1109 int32_t *fir_coeff
= this->fir_coeff
;
1112 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1113 "ldmia %[fir_p]!, { r4-r5 } \r\n"
1114 "mul %[fb_0], r0, %[fb_0] \r\n"
1115 "mul %[fb_1], r0, %[fb_1] \r\n"
1116 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1117 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1118 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1119 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1120 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1121 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1122 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1123 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1124 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1125 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1126 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1127 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1128 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1129 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1130 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1131 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1132 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1133 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1134 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1135 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1136 : [fb_0
]"+r"(fb_0
), [fb_1
]"+r"(fb_1
),
1137 [fir_p
]"+r"(fir_ptr
), [fir_c
]"+r"(fir_coeff
)
1139 : "r0", "r1", "r2", "r3", "r4", "r5"
1142 /* Generate output */
1143 int amp_0
= (chans_0
* global_vol_0
+ fb_0
* this->r
.g
.echo_volume_0
)
1145 int amp_1
= (chans_1
* global_vol_1
+ fb_1
* this->r
.g
.echo_volume_1
)
1148 out_buf
[ 0] = amp_0
;
1149 out_buf
[WAV_CHUNK_SIZE
] = amp_1
;
1152 if ( !(this->r
.g
.flags
& 0x20) )
1154 /* Feedback into echo buffer */
1155 int e0
= (echo_0
>> 7) + ((fb_0
* this->r
.g
.echo_feedback
) >> 14);
1156 int e1
= (echo_1
>> 7) + ((fb_1
* this->r
.g
.echo_feedback
) >> 14);
1158 SET_LE16A( echo_ptr
, e0
);
1160 SET_LE16A( echo_ptr
+ 2, e1
);
1162 #else /* Unoptimized CPU */
1163 /* Read feedback from echo buffer */
1164 int echo_pos
= this->echo_pos
;
1165 uint8_t* const echo_ptr
= RAM
+
1166 ((this->r
.g
.echo_page
* 0x100 + echo_pos
) & 0xFFFF);
1168 if ( echo_pos
>= (this->r
.g
.echo_delay
& 15) * 0x800 )
1170 this->echo_pos
= echo_pos
;
1171 int fb_0
= GET_LE16SA( echo_ptr
);
1172 int fb_1
= GET_LE16SA( echo_ptr
+ 2 );
1174 /* Keep last 8 samples */
1175 int (* const fir_ptr
) [2] = this->fir_buf
+ this->fir_pos
;
1176 this->fir_pos
= (this->fir_pos
+ 1) & (FIR_BUF_HALF
- 1);
1177 fir_ptr
[ 0] [0] = fb_0
;
1178 fir_ptr
[ 0] [1] = fb_1
;
1179 /* duplicate at +8 eliminates wrap checking below */
1180 fir_ptr
[FIR_BUF_HALF
] [0] = fb_0
;
1181 fir_ptr
[FIR_BUF_HALF
] [1] = fb_1
;
1184 fb_0
*= this->fir_coeff
[0];
1185 fb_1
*= this->fir_coeff
[0];
1188 fb_0 += fir_ptr [i] [0] * this->fir_coeff [i];\
1189 fb_1 += fir_ptr [i] [1] * this->fir_coeff [i];
1199 /* Generate output */
1200 int amp_0
= (chans_0
* global_vol_0
+ fb_0
* this->r
.g
.echo_volume_0
)
1202 int amp_1
= (chans_1
* global_vol_1
+ fb_1
* this->r
.g
.echo_volume_1
)
1204 out_buf
[ 0] = amp_0
;
1205 out_buf
[WAV_CHUNK_SIZE
] = amp_1
;
1208 if ( !(this->r
.g
.flags
& 0x20) )
1210 /* Feedback into echo buffer */
1211 int e0
= (echo_0
>> 7) + ((fb_0
* this->r
.g
.echo_feedback
) >> 14);
1212 int e1
= (echo_1
>> 7) + ((fb_1
* this->r
.g
.echo_feedback
) >> 14);
1214 SET_LE16A( echo_ptr
, e0
);
1216 SET_LE16A( echo_ptr
+ 2, e1
);
1219 #else /* SPCNOECHO == 1*/
1220 /* Generate output */
1221 int amp_0
= (chans_0
* global_vol_0
) >> global_muting
;
1222 int amp_1
= (chans_1
* global_vol_1
) >> global_muting
;
1223 out_buf
[ 0] = amp_0
;
1224 out_buf
[WAV_CHUNK_SIZE
] = amp_1
;
1226 #endif /* SPCNOECHO */
1235 void DSP_reset( struct Spc_Dsp
* this )
1237 this->keys_down
= 0;
1239 this->noise_count
= 0;
1242 this->r
.g
.flags
= 0xE0; /* reset, mute, echo off */
1243 this->r
.g
.key_ons
= 0;
1245 ci
->memset( this->voice_state
, 0, sizeof this->voice_state
);
1248 for ( i
= VOICE_COUNT
; --i
>= 0; )
1250 struct voice_t
* v
= this->voice_state
+ i
;
1251 v
->env_mode
= state_release
;
1257 for ( i
= 0; i
< 256; i
++ )
1258 this->wave_entry
[i
].start_addr
= -1;
1261 #if defined(CPU_COLDFIRE)
1262 this->fir_ptr
= fir_buf
;
1263 this->last_fir_ptr
= &fir_buf
[7];
1264 ci
->memset( fir_buf
, 0, sizeof fir_buf
);
1265 #elif defined (CPU_ARM)
1266 this->fir_ptr
= fir_buf
;
1267 ci
->memset( fir_buf
, 0, sizeof fir_buf
);
1270 ci
->memset( this->fir_buf
, 0, sizeof this->fir_buf
);
1273 assert( offsetof (struct globals_t
,unused9
[2]) == REGISTER_COUNT
);
1274 assert( sizeof (this->r
.voice
) == REGISTER_COUNT
);