1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2007-2008 Michael Sevakis (jhMikeS)
11 * Copyright (C) 2006-2007 Adam Gashlin (hcs)
12 * Copyright (C) 2004-2007 Shay Green (blargg)
13 * Copyright (C) 2002 Brad Martin
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
21 * KIND, either express or implied.
23 ****************************************************************************/
25 /* The DSP portion (awe!) */
27 #include "spc_codec.h"
28 #include "spc_profiler.h"
30 #if defined(CPU_COLDFIRE) || defined (CPU_ARM)
31 int32_t fir_buf
[FIR_BUF_CNT
]
32 __attribute__ ((aligned (FIR_BUF_ALIGN
*1))) IBSS_ATTR
;
35 /* a little extra for samples that go past end */
36 int16_t BRRcache
[BRR_CACHE_SIZE
] CACHEALIGN_ATTR
;
39 void DSP_write( struct Spc_Dsp
* this, int i
, int data
)
41 assert( (unsigned) i
< REGISTER_COUNT
);
43 this->r
.reg
[i
] = data
;
46 if ( low
< 2 ) /* voice volumes */
48 int left
= *(int8_t const*) &this->r
.reg
[i
& ~1];
49 int right
= *(int8_t const*) &this->r
.reg
[i
| 1];
50 struct voice_t
* v
= this->voice_state
+ high
;
52 v
->volume
[1] = right
;
54 else if ( low
== 0x0F ) /* fir coefficients */
56 this->fir_coeff
[7 - high
] = (int8_t) data
; /* sign-extend */
60 /* if ( n < -32768 ) out = -32768; */
61 /* if ( n > 32767 ) out = 32767; */
62 #define CLAMP16( n ) \
64 if ( (int16_t) n != n ) \
65 n = 0x7FFF ^ (n >> 31); \
70 static void decode_brr( struct Spc_Dsp
* this, unsigned start_addr
,
71 struct voice_t
* voice
,
72 struct raw_voice_t
const* const raw_voice
) ICODE_ATTR
;
73 static void decode_brr( struct Spc_Dsp
* this, unsigned start_addr
,
74 struct voice_t
* voice
,
75 struct raw_voice_t
const* const raw_voice
)
77 /* setup same variables as where decode_brr() is called from */
81 struct src_dir
const* const sd
=
82 &ram
.sd
[this->r
.g
.wave_page
* 0x100/sizeof(struct src_dir
)];
83 struct cache_entry_t
* const wave_entry
=
84 &this->wave_entry
[raw_voice
->waveform
];
86 /* the following block can be put in place of the call to
90 DEBUGF( "decode at %08x (wave #%d)\n",
91 start_addr
, raw_voice
->waveform
);
95 for ( i
= 0; i
< this->oldsize
; i
++ )
97 struct cache_entry_t
* e
= &this->wave_entry_old
[i
];
98 if ( e
->start_addr
== start_addr
)
100 DEBUGF( "found in wave_entry_old (oldsize=%d)\n",
107 wave_entry
->start_addr
= start_addr
;
109 uint8_t const* const loop_ptr
=
110 RAM
+ letoh16(sd
[raw_voice
->waveform
].loop
);
111 short* loop_start
= 0;
113 short* out
= BRRcache
+ start_addr
* 2;
114 wave_entry
->samples
= out
;
119 uint8_t const* addr
= RAM
+ start_addr
;
123 if ( addr
== loop_ptr
)
126 DEBUGF( "loop at %08lx (wave #%d)\n",
127 (unsigned long)(addr
- RAM
), raw_voice
->waveform
);
131 block_header
= *addr
;
134 int const filter
= (block_header
& 0x0C) - 0x08;
137 (invalid scaling gives -4096 for neg nybble, 0 for pos) */
138 static unsigned char const right_shifts
[16] = {
139 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 29, 29, 29,
141 static unsigned char const left_shifts
[16] = {
142 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11
144 int const scale
= block_header
>> 4;
145 int const right_shift
= right_shifts
[scale
];
146 int const left_shift
= left_shifts
[scale
];
148 /* output position */
149 out
+= BRR_BLOCK_SIZE
;
150 int offset
= -BRR_BLOCK_SIZE
<< 2;
152 do /* decode and filter 16 samples */
154 /* Get nybble, sign-extend, then scale
155 get byte, select which nybble, sign-extend, then shift based
156 on scaling. also handles invalid scaling values. */
157 int delta
= (int) (int8_t) (addr
[offset
>> 3] << (offset
& 4))
158 >> right_shift
<< left_shift
;
160 out
[offset
>> 2] = smp2
;
162 if ( filter
== 0 ) /* mode 0x08 (30-90% of the time) */
168 delta
+= (-smp1
- (smp1
>> 1)) >> 5;
172 if ( filter
== -4 ) /* mode 0x04 */
175 delta
+= (-smp1
) >> 5;
177 else if ( filter
> -4 ) /* mode 0x0C */
180 delta
+= (smp2
+ (smp2
>> 1)) >> 4;
182 delta
+= (-smp1
* 13) >> 7;
187 delta
= CLAMP16( delta
);
188 smp1
= (int16_t) (delta
* 2); /* sign-extend */
190 while ( (offset
+= 4) != 0 );
192 /* if next block has end flag set, this block ends early */
194 if ( (block_header
& 3) != 3 && (*addr
& 3) == 1 )
196 /* skip last 9 samples */
201 while ( !(block_header
& 1) && addr
< RAM
+ 0x10000 );
207 wave_entry
->end
= (out
- 1 - wave_entry
->samples
) << 12;
209 wave_entry
->loop
= 0;
210 if ( (block_header
& 2) )
214 int loop
= out
- loop_start
;
215 wave_entry
->loop
= loop
;
216 wave_entry
->end
+= 0x3000;
217 out
[2] = loop_start
[2];
218 out
[3] = loop_start
[3];
219 out
[4] = loop_start
[4];
223 DEBUGF( "loop point outside initial wave\n" );
227 DEBUGF( "end at %08lx (wave #%d)\n",
228 (unsigned long)(addr
- RAM
), raw_voice
->waveform
);
231 this->wave_entry_old
[this->oldsize
++] = *wave_entry
;
237 static void key_on(struct Spc_Dsp
* const this, struct voice_t
* const voice
,
238 struct src_dir
const* const sd
,
239 struct raw_voice_t
const* const raw_voice
,
240 const int key_on_delay
, const int vbit
) ICODE_ATTR
;
241 static void key_on(struct Spc_Dsp
* const this, struct voice_t
* const voice
,
242 struct src_dir
const* const sd
,
243 struct raw_voice_t
const* const raw_voice
,
244 const int key_on_delay
, const int vbit
) {
247 int const env_rate_init
= 0x7800;
248 voice
->key_on_delay
= key_on_delay
;
249 if ( key_on_delay
== 0 )
251 this->keys_down
|= vbit
;
253 voice
->env_mode
= state_attack
;
254 voice
->env_timer
= env_rate_init
; /* TODO: inaccurate? */
255 unsigned start_addr
= letoh16(sd
[raw_voice
->waveform
].start
);
258 voice
->addr
= RAM
+ start_addr
;
259 /* BRR filter uses previous samples */
260 voice
->samples
[BRR_BLOCK_SIZE
+ 1] = 0;
261 voice
->samples
[BRR_BLOCK_SIZE
+ 2] = 0;
262 /* decode three samples immediately */
263 voice
->position
= (BRR_BLOCK_SIZE
+ 3) * 0x1000 - 1;
264 voice
->block_header
= 0; /* "previous" BRR header */
268 voice
->position
= 3 * 0x1000 - 1;
269 struct cache_entry_t
* const wave_entry
=
270 &this->wave_entry
[raw_voice
->waveform
];
272 /* predecode BRR if not already */
273 if ( wave_entry
->start_addr
!= start_addr
)
275 /* the following line can be replaced by the indicated block
277 decode_brr( this, start_addr
, voice
, raw_voice
);
280 voice
->samples
= wave_entry
->samples
;
281 voice
->wave_end
= wave_entry
->end
;
282 voice
->wave_loop
= wave_entry
->loop
;
288 void DSP_run_( struct Spc_Dsp
* this, long count
, int32_t* out_buf
)
291 #if defined(CPU_ARM) && !SPC_BRRCACHE
292 uint8_t* const ram_
= ram
.ram
;
302 /* Here we check for keys on/off. Docs say that successive writes
303 to KON/KOF must be separated by at least 2 Ts periods or risk
304 being neglected. Therefore DSP only looks at these during an
305 update, and not at the time of the write. Only need to do this
306 once however, since the regs haven't changed over the whole
307 period we need to catch up with. */
310 int key_ons
= this->r
.g
.key_ons
;
311 int key_offs
= this->r
.g
.key_offs
;
312 /* keying on a voice resets that bit in ENDX */
313 this->r
.g
.wave_ended
&= ~key_ons
;
314 /* key_off bits prevent key_on from being acknowledged */
315 this->r
.g
.key_ons
= key_ons
& key_offs
;
317 /* process key events outside loop, since they won't re-occur */
318 struct voice_t
* voice
= this->voice_state
+ 8;
323 if ( key_offs
& vbit
)
325 voice
->env_mode
= state_release
;
326 voice
->key_on_delay
= 0;
328 else if ( key_ons
& vbit
)
330 voice
->key_on_delay
= 8;
333 while ( (vbit
>>= 1) != 0 );
336 struct src_dir
const* const sd
=
337 &ram
.sd
[this->r
.g
.wave_page
* 0x100/sizeof(struct src_dir
)];
339 #ifdef ROCKBOX_BIG_ENDIAN
340 /* Convert endiannesses before entering loops - these
342 const uint32_t rates
[VOICE_COUNT
] =
344 GET_LE16A( this->r
.voice
[0].rate
) & 0x3FFF,
345 GET_LE16A( this->r
.voice
[1].rate
) & 0x3FFF,
346 GET_LE16A( this->r
.voice
[2].rate
) & 0x3FFF,
347 GET_LE16A( this->r
.voice
[3].rate
) & 0x3FFF,
348 GET_LE16A( this->r
.voice
[4].rate
) & 0x3FFF,
349 GET_LE16A( this->r
.voice
[5].rate
) & 0x3FFF,
350 GET_LE16A( this->r
.voice
[6].rate
) & 0x3FFF,
351 GET_LE16A( this->r
.voice
[7].rate
) & 0x3FFF,
353 #define VOICE_RATE(x) *(x)
354 #define IF_RBE(...) __VA_ARGS__
356 /* Initialize mask register with the buffer address mask */
357 asm volatile ("move.l %[m], %%mask" : : [m
]"i"(FIR_BUF_MASK
));
358 const int echo_wrap
= (this->r
.g
.echo_delay
& 15) * 0x800;
359 const int echo_start
= this->r
.g
.echo_page
* 0x100;
360 #endif /* CPU_COLDFIRE */
362 #define VOICE_RATE(x) (GET_LE16(raw_voice->rate) & 0x3FFF)
364 #endif /* ROCKBOX_BIG_ENDIAN */
367 int const slow_gaussian
= (this->r
.g
.pitch_mods
>> 1) |
368 this->r
.g
.noise_enables
;
370 /* (g.flags & 0x40) ? 30 : 14 */
371 int const global_muting
= ((this->r
.g
.flags
& 0x40) >> 2) + 14 - 8;
372 int const global_vol_0
= this->r
.g
.volume_0
;
373 int const global_vol_1
= this->r
.g
.volume_1
;
375 /* each rate divides exactly into 0x7800 without remainder */
376 int const env_rate_init
= 0x7800;
377 static unsigned short const env_rates
[0x20] ICONST_ATTR
=
379 0x0000, 0x000F, 0x0014, 0x0018, 0x001E, 0x0028, 0x0030, 0x003C,
380 0x0050, 0x0060, 0x0078, 0x00A0, 0x00C0, 0x00F0, 0x0140, 0x0180,
381 0x01E0, 0x0280, 0x0300, 0x03C0, 0x0500, 0x0600, 0x0780, 0x0A00,
382 0x0C00, 0x0F00, 0x1400, 0x1800, 0x1E00, 0x2800, 0x3C00, 0x7800
385 do /* one pair of output samples per iteration */
388 if ( this->r
.g
.noise_enables
)
390 if ( (this->noise_count
-=
391 env_rates
[this->r
.g
.flags
& 0x1F]) <= 0 )
393 this->noise_count
= env_rate_init
;
394 int feedback
= (this->noise
<< 13) ^ (this->noise
<< 14);
395 this->noise
= (feedback
& 0x8000) ^ (this->noise
>> 1 & ~1);
403 long prev_outx
= 0; /* TODO: correct value for first channel? */
406 /* TODO: put raw_voice pointer in voice_t? */
407 struct raw_voice_t
* raw_voice
= this->r
.voice
;
408 struct voice_t
* voice
= this->voice_state
;
410 IF_RBE( const uint32_t* vr
= rates
; )
411 for ( ; vbit
< 0x100; vbit
<<= 1, ++voice
, ++raw_voice
IF_RBE( , ++vr
) )
413 /* pregen involves checking keyon, etc */
415 ENTER_TIMER(dsp_pregen
);
418 /* Key on events are delayed */
419 int key_on_delay
= voice
->key_on_delay
;
421 if ( --key_on_delay
>= 0 ) /* <1% of the time */
423 key_on(this,voice
,sd
,raw_voice
,key_on_delay
,vbit
);
426 if ( !(this->keys_down
& vbit
) ) /* Silent channel */
437 int const ENV_RANGE
= 0x800;
438 int env_mode
= voice
->env_mode
;
439 int adsr0
= raw_voice
->adsr
[0];
441 if ( env_mode
!= state_release
) /* 99% of the time */
443 env_timer
= voice
->env_timer
;
444 if ( adsr0
& 0x80 ) /* 79% of the time */
446 int adsr1
= raw_voice
->adsr
[1];
447 if ( env_mode
== state_sustain
) /* 74% of the time */
449 if ( (env_timer
-= env_rates
[adsr1
& 0x1F]) > 0 )
450 goto write_env_timer
;
452 int envx
= voice
->envx
;
453 envx
--; /* envx *= 255 / 256 */
456 /* TODO: should this be 8? */
457 raw_voice
->envx
= envx
>> 4;
460 else if ( env_mode
< 0 ) /* 25% state_decay */
462 int envx
= voice
->envx
;
464 env_rates
[(adsr0
>> 3 & 0x0E) + 0x10]) <= 0 )
466 envx
--; /* envx *= 255 / 256 */
469 /* TODO: should this be 8? */
470 raw_voice
->envx
= envx
>> 4;
471 env_timer
= env_rate_init
;
474 int sustain_level
= adsr1
>> 5;
475 if ( envx
<= (sustain_level
+ 1) * 0x100 )
476 voice
->env_mode
= state_sustain
;
478 goto write_env_timer
;
480 else /* state_attack */
482 int t
= adsr0
& 0x0F;
483 if ( (env_timer
-= env_rates
[t
* 2 + 1]) > 0 )
484 goto write_env_timer
;
486 int envx
= voice
->envx
;
488 int const step
= ENV_RANGE
/ 64;
491 envx
+= ENV_RANGE
/ 2 - step
;
493 if ( envx
>= ENV_RANGE
)
495 envx
= ENV_RANGE
- 1;
496 voice
->env_mode
= state_decay
;
499 /* TODO: should this be 8? */
500 raw_voice
->envx
= envx
>> 4;
506 int t
= raw_voice
->gain
;
510 voice
->envx
= t
<< 4;
515 if ( (env_timer
-= env_rates
[t
& 0x1F]) > 0 )
516 goto write_env_timer
;
518 int envx
= voice
->envx
;
520 if ( mode
<= 5 ) /* decay */
522 int step
= ENV_RANGE
/ 64;
523 if ( mode
== 5 ) /* exponential */
525 envx
--; /* envx *= 255 / 256 */
528 if ( (envx
-= step
) < 0 )
531 if ( voice
->env_mode
== state_attack
)
532 voice
->env_mode
= state_decay
;
537 int const step
= ENV_RANGE
/ 64;
540 envx
>= ENV_RANGE
* 3 / 4 + step
)
541 envx
+= ENV_RANGE
/ 256 - step
;
543 if ( envx
>= ENV_RANGE
)
544 envx
= ENV_RANGE
- 1;
547 /* TODO: should this be 8? */
548 raw_voice
->envx
= envx
>> 4;
553 else /* state_release */
555 int envx
= voice
->envx
;
556 if ( (envx
-= ENV_RANGE
/ 256) > 0 )
559 raw_voice
->envx
= envx
>> 8;
564 /* bit was set, so this clears it */
565 this->keys_down
^= vbit
;
571 env_timer
= env_rate_init
;
573 voice
->env_timer
= env_timer
;
577 EXIT_TIMER(dsp_pregen
);
579 ENTER_TIMER(dsp_gen
);
582 /* Decode BRR block */
583 if ( voice
->position
>= BRR_BLOCK_SIZE
* 0x1000 )
585 voice
->position
-= BRR_BLOCK_SIZE
* 0x1000;
587 uint8_t const* addr
= voice
->addr
;
588 if ( addr
>= RAM
+ 0x10000 )
591 /* action based on previous block's header */
592 if ( voice
->block_header
& 1 )
594 addr
= RAM
+ letoh16(sd
[raw_voice
->waveform
].loop
);
595 this->r
.g
.wave_ended
|= vbit
;
596 if ( !(voice
->block_header
& 2) ) /* 1% of the time */
598 /* first block was end block;
599 don't play anything (verified) */
600 /* bit was set, so this clears it */
601 this->keys_down
^= vbit
;
603 /* since voice->envx is 0,
604 samples and position don't matter */
612 int const block_header
= *addr
;
615 voice
->block_header
= block_header
;
616 int const filter
= (block_header
& 0x0C) - 0x08;
618 /* scaling (invalid scaling gives -4096 for neg nybble,
620 static unsigned char const right_shifts
[16] = {
621 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 29, 29, 29,
623 static unsigned char const left_shifts
[16] = {
624 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11
626 int const scale
= block_header
>> 4;
627 int const right_shift
= right_shifts
[scale
];
628 int const left_shift
= left_shifts
[scale
];
630 /* previous samples */
631 int smp2
= voice
->samples
[BRR_BLOCK_SIZE
+ 1];
632 int smp1
= voice
->samples
[BRR_BLOCK_SIZE
+ 2];
633 voice
->samples
[0] = voice
->samples
[BRR_BLOCK_SIZE
];
635 /* output position */
636 short* out
= voice
->samples
+ (1 + BRR_BLOCK_SIZE
);
637 int offset
= -BRR_BLOCK_SIZE
<< 2;
639 /* if next block has end flag set,
640 this block ends early (verified) */
641 if ( (block_header
& 3) != 3 && (*addr
& 3) == 1 )
643 /* arrange for last 9 samples to be skipped */
646 voice
->samples
[skip
] = voice
->samples
[BRR_BLOCK_SIZE
];
647 voice
->position
+= skip
* 0x1000;
648 offset
= (-BRR_BLOCK_SIZE
+ (skip
& ~1)) << 2;
650 /* force sample to end on next decode */
651 voice
->block_header
= 1;
654 do /* decode and filter 16 samples */
656 /* Get nybble, sign-extend, then scale
657 get byte, select which nybble, sign-extend, then shift
658 based on scaling. also handles invalid scaling values.*/
659 int delta
= (int) (int8_t) (addr
[offset
>> 3] <<
660 (offset
& 4)) >> right_shift
<< left_shift
;
662 out
[offset
>> 2] = smp2
;
664 if ( filter
== 0 ) /* mode 0x08 (30-90% of the time) */
670 delta
+= (-smp1
- (smp1
>> 1)) >> 5;
674 if ( filter
== -4 ) /* mode 0x04 */
677 delta
+= (-smp1
) >> 5;
679 else if ( filter
> -4 ) /* mode 0x0C */
682 delta
+= (smp2
+ (smp2
>> 1)) >> 4;
684 delta
+= (-smp1
* 13) >> 7;
689 delta
= CLAMP16( delta
);
690 smp1
= (int16_t) (delta
* 2); /* sign-extend */
692 while ( (offset
+= 4) != 0 );
701 /* Get rate (with possible modulation) */
702 int rate
= VOICE_RATE(vr
);
703 if ( this->r
.g
.pitch_mods
& vbit
)
704 rate
= (rate
* (prev_outx
+ 32768)) >> 15;
707 /* Interleved gauss table (to improve cache coherency). */
708 /* gauss [i * 2 + j] = normal_gauss [(1 - j) * 256 + i] */
709 static short const gauss
[512] =
711 370,1305, 366,1305, 362,1304, 358,1304, 354,1304, 351,1304, 347,1304, 343,1303,
712 339,1303, 336,1303, 332,1302, 328,1302, 325,1301, 321,1300, 318,1300, 314,1299,
713 311,1298, 307,1297, 304,1297, 300,1296, 297,1295, 293,1294, 290,1293, 286,1292,
714 283,1291, 280,1290, 276,1288, 273,1287, 270,1286, 267,1284, 263,1283, 260,1282,
715 257,1280, 254,1279, 251,1277, 248,1275, 245,1274, 242,1272, 239,1270, 236,1269,
716 233,1267, 230,1265, 227,1263, 224,1261, 221,1259, 218,1257, 215,1255, 212,1253,
717 210,1251, 207,1248, 204,1246, 201,1244, 199,1241, 196,1239, 193,1237, 191,1234,
718 188,1232, 186,1229, 183,1227, 180,1224, 178,1221, 175,1219, 173,1216, 171,1213,
719 168,1210, 166,1207, 163,1205, 161,1202, 159,1199, 156,1196, 154,1193, 152,1190,
720 150,1186, 147,1183, 145,1180, 143,1177, 141,1174, 139,1170, 137,1167, 134,1164,
721 132,1160, 130,1157, 128,1153, 126,1150, 124,1146, 122,1143, 120,1139, 118,1136,
722 117,1132, 115,1128, 113,1125, 111,1121, 109,1117, 107,1113, 106,1109, 104,1106,
723 102,1102, 100,1098, 99,1094, 97,1090, 95,1086, 94,1082, 92,1078, 90,1074,
724 89,1070, 87,1066, 86,1061, 84,1057, 83,1053, 81,1049, 80,1045, 78,1040,
725 77,1036, 76,1032, 74,1027, 73,1023, 71,1019, 70,1014, 69,1010, 67,1005,
726 66,1001, 65, 997, 64, 992, 62, 988, 61, 983, 60, 978, 59, 974, 58, 969,
727 56, 965, 55, 960, 54, 955, 53, 951, 52, 946, 51, 941, 50, 937, 49, 932,
728 48, 927, 47, 923, 46, 918, 45, 913, 44, 908, 43, 904, 42, 899, 41, 894,
729 40, 889, 39, 884, 38, 880, 37, 875, 36, 870, 36, 865, 35, 860, 34, 855,
730 33, 851, 32, 846, 32, 841, 31, 836, 30, 831, 29, 826, 29, 821, 28, 816,
731 27, 811, 27, 806, 26, 802, 25, 797, 24, 792, 24, 787, 23, 782, 23, 777,
732 22, 772, 21, 767, 21, 762, 20, 757, 20, 752, 19, 747, 19, 742, 18, 737,
733 17, 732, 17, 728, 16, 723, 16, 718, 15, 713, 15, 708, 15, 703, 14, 698,
734 14, 693, 13, 688, 13, 683, 12, 678, 12, 674, 11, 669, 11, 664, 11, 659,
735 10, 654, 10, 649, 10, 644, 9, 640, 9, 635, 9, 630, 8, 625, 8, 620,
736 8, 615, 7, 611, 7, 606, 7, 601, 6, 596, 6, 592, 6, 587, 6, 582,
737 5, 577, 5, 573, 5, 568, 5, 563, 4, 559, 4, 554, 4, 550, 4, 545,
738 4, 540, 3, 536, 3, 531, 3, 527, 3, 522, 3, 517, 2, 513, 2, 508,
739 2, 504, 2, 499, 2, 495, 2, 491, 2, 486, 1, 482, 1, 477, 1, 473,
740 1, 469, 1, 464, 1, 460, 1, 456, 1, 451, 1, 447, 1, 443, 1, 439,
741 0, 434, 0, 430, 0, 426, 0, 422, 0, 418, 0, 414, 0, 410, 0, 405,
742 0, 401, 0, 397, 0, 393, 0, 389, 0, 385, 0, 381, 0, 378, 0, 374,
744 /* Gaussian interpolation using most recent 4 samples */
745 long position
= voice
->position
;
746 voice
->position
+= rate
;
747 short const* interp
= voice
->samples
+ (position
>> 12);
748 int offset
= position
>> 4 & 0xFF;
750 /* Only left half of gaussian kernel is in table, so we must mirror
752 short const* fwd
= gauss
+ offset
* 2;
753 short const* rev
= gauss
+ 510 - offset
* 2;
755 /* Use faster gaussian interpolation when exact result isn't needed
756 by pitch modulator of next channel */
758 if ( !(slow_gaussian
& vbit
) ) /* 99% of the time */
760 /* Main optimization is lack of clamping. Not a problem since
761 output never goes more than +/- 16 outside 16-bit range and
762 things are clamped later anyway. Other optimization is to
763 preserve fractional accuracy, eliminating several masks. */
764 int output
= (((fwd
[0] * interp
[0] +
765 fwd
[1] * interp
[1] +
766 rev
[1] * interp
[2] +
767 rev
[0] * interp
[3] ) >> 11) * voice
->envx
) >> 11;
769 /* duplicated here to give compiler more to run in parallel */
770 amp_0
= voice
->volume
[0] * output
;
771 amp_1
= voice
->volume
[1] * output
;
772 raw_voice
->outx
= output
>> 8;
776 int output
= *(int16_t*) &this->noise
;
777 if ( !(this->r
.g
.noise_enables
& vbit
) )
779 output
= (fwd
[0] * interp
[0]) & ~0xFFF;
780 output
= (output
+ fwd
[1] * interp
[1]) & ~0xFFF;
781 output
= (output
+ rev
[1] * interp
[2]) >> 12;
782 output
= (int16_t) (output
* 2);
783 output
+= ((rev
[0] * interp
[3]) >> 12) * 2;
784 output
= CLAMP16( output
);
786 output
= (output
* voice
->envx
) >> 11 & ~1;
788 /* duplicated here to give compiler more to run in parallel */
789 amp_0
= voice
->volume
[0] * output
;
790 amp_1
= voice
->volume
[1] * output
;
792 raw_voice
->outx
= (int8_t) (output
>> 8);
794 #else /* SPCNOINTERP */
795 /* two-point linear interpolation */
797 int amp_0
= (int16_t)this->noise
;
800 if ( (this->r
.g
.noise_enables
& vbit
) == 0 )
802 uint32_t f
= voice
->position
;
806 * Formula (fastest found so far of MANY):
807 * output = y0 + f*y1 - f*y0
810 /* separate fractional and whole parts */
811 "move.l %[f], %[y1] \r\n"
812 "and.l #0xfff, %[f] \r\n"
813 "lsr.l %[sh], %[y1] \r\n"
814 /* load samples y0 (upper) & y1 (lower) */
815 "move.l 2(%[s], %[y1].l*2), %[y1] \r\n"
817 "mac.w %[f]l, %[y1]l, %%acc0 \r\n"
819 "msac.w %[f]l, %[y1]u, %%acc0 \r\n"
820 /* separate out y0 and sign extend */
822 "movea.w %[y1], %[y0] \r\n"
823 /* fetch result, scale down and add y0 */
824 "movclr.l %%acc0, %[y1] \r\n"
825 /* output = y0 + (result >> 12) */
826 "asr.l %[sh], %[y1] \r\n"
827 "add.l %[y0], %[y1] \r\n"
828 : [f
]"+d"(f
), [y0
]"=&a"(y0
), [y1
]"=&d"(amp_0
)
829 : [s
]"a"(voice
->samples
), [sh
]"d"(12)
833 /* apply voice envelope to output */
835 "mac.w %[output]l, %[envx]l, %%acc0 \r\n"
837 : [output
]"r"(amp_0
), [envx
]"r"(voice
->envx
)
840 /* advance voice position */
841 voice
->position
+= rate
;
843 /* fetch output, scale and apply left and right
846 "movclr.l %%acc0, %[output] \r\n"
847 "asr.l %[sh], %[output] \r\n"
848 "mac.l %[vvol_0], %[output], %%acc0 \r\n"
849 "mac.l %[vvol_1], %[output], %%acc1 \r\n"
850 : [output
]"=&d"(amp_0
)
851 : [vvol_0
]"r"((int)voice
->volume
[0]),
852 [vvol_1
]"r"((int)voice
->volume
[1]),
856 /* save this output into previous, scale and save in
859 raw_voice
->outx
= amp_0
>> 8;
861 /* fetch final voice output */
863 "movclr.l %%acc0, %[amp_0] \r\n"
864 "movclr.l %%acc1, %[amp_1] \r\n"
865 : [amp_0
]"=r"(amp_0
), [amp_1
]"=r"(amp_1
)
867 #elif defined (CPU_ARM)
870 if ( (this->r
.g
.noise_enables
& vbit
) != 0 ) {
871 amp_0
= *(int16_t *)&this->noise
;
873 uint32_t f
= voice
->position
;
874 amp_0
= (uint32_t)voice
->samples
;
877 "mov %[y1], %[f], lsr #12 \r\n"
878 "eor %[f], %[f], %[y1], lsl #12 \r\n"
879 "add %[y1], %[y0], %[y1], lsl #1 \r\n"
880 "ldrsh %[y0], [%[y1], #2] \r\n"
881 "ldrsh %[y1], [%[y1], #4] \r\n"
882 "sub %[y1], %[y1], %[y0] \r\n"
883 "mul %[f], %[y1], %[f] \r\n"
884 "add %[y0], %[y0], %[f], asr #12 \r\n"
885 : [f
]"+r"(f
), [y0
]"+r"(amp_0
), [y1
]"=&r"(amp_1
)
889 voice
->position
+= rate
;
892 "mul %[amp_1], %[amp_0], %[envx] \r\n"
893 "mov %[amp_0], %[amp_1], asr #11 \r\n"
894 "mov %[amp_1], %[amp_0], asr #8 \r\n"
895 : [amp_0
]"+r"(amp_0
), [amp_1
]"=&r"(amp_1
)
896 : [envx
]"r"(voice
->envx
)
900 raw_voice
->outx
= (int8_t)amp_1
;
903 "mul %[amp_1], %[amp_0], %[vol_1] \r\n"
904 "mul %[amp_0], %[vol_0], %[amp_0] \r\n"
905 : [amp_0
]"+r"(amp_0
), [amp_1
]"+r"(amp_1
)
906 : [vol_0
]"r"((int)voice
->volume
[0]),
907 [vol_1
]"r"((int)voice
->volume
[1])
909 #else /* Unoptimized CPU */
912 if ( (this->r
.g
.noise_enables
& vbit
) == 0 )
914 int const fraction
= voice
->position
& 0xfff;
915 short const* const pos
= (voice
->samples
+ (voice
->position
>> 12)) + 1;
916 output
= pos
[0] + ((fraction
* (pos
[1] - pos
[0])) >> 12);
918 output
= *(int16_t *)&this->noise
;
921 voice
->position
+= rate
;
923 output
= (output
* voice
->envx
) >> 11;
925 /* duplicated here to give compiler more to run in parallel */
926 int amp_0
= voice
->volume
[0] * output
;
927 int amp_1
= voice
->volume
[1] * output
;
930 raw_voice
->outx
= (int8_t) (output
>> 8);
932 #endif /* SPCNOINTERP */
935 if ( voice
->position
>= voice
->wave_end
)
937 long loop_len
= voice
->wave_loop
<< 12;
938 voice
->position
-= loop_len
;
939 this->r
.g
.wave_ended
|= vbit
;
942 this->keys_down
^= vbit
;
951 ENTER_TIMER(dsp_mix
);
956 if ( this->r
.g
.echo_ons
& vbit
)
966 /* end of voice loop */
970 /* Read feedback from echo buffer */
971 int echo_pos
= this->echo_pos
;
972 uint8_t* const echo_ptr
= RAM
+ ((echo_start
+ echo_pos
) & 0xFFFF);
974 if ( echo_pos
>= echo_wrap
)
976 this->echo_pos
= echo_pos
;
977 int fb
= swap_odd_even32(*(int32_t *)echo_ptr
);
980 /* Keep last 8 samples */
981 *this->last_fir_ptr
= fb
;
982 this->last_fir_ptr
= this->fir_ptr
;
984 /* Apply echo FIR filter to output samples read from echo buffer -
985 circular buffer is hardware incremented and masked; FIR
986 coefficients and buffer history are loaded in parallel with
987 multiply accumulate operations. Shift left by one here and once
988 again when calculating feedback to have sample values justified
989 to bit 31 in the output to ease endian swap, interleaving and
990 clamping before placing result in the program's echo buffer. */
993 "move.l (%[fir_c]) , %[_2] \r\n"
994 "mac.w %[fb]u, %[_2]u, <<, (%[fir_p])+&, %[_0], %%acc0 \r\n"
995 "mac.w %[fb]l, %[_2]u, <<, (%[fir_p])& , %[_1], %%acc1 \r\n"
996 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
997 "mac.w %[_0]l, %[_2]l, <<, 4(%[fir_c]) , %[_2], %%acc1 \r\n"
998 "mac.w %[_1]u, %[_2]u, <<, 4(%[fir_p])& , %[_0], %%acc0 \r\n"
999 "mac.w %[_1]l, %[_2]u, <<, 8(%[fir_p])& , %[_1], %%acc1 \r\n"
1000 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1001 "mac.w %[_0]l, %[_2]l, <<, 8(%[fir_c]) , %[_2], %%acc1 \r\n"
1002 "mac.w %[_1]u, %[_2]u, <<, 12(%[fir_p])& , %[_0], %%acc0 \r\n"
1003 "mac.w %[_1]l, %[_2]u, <<, 16(%[fir_p])& , %[_1], %%acc1 \r\n"
1004 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1005 "mac.w %[_0]l, %[_2]l, <<, 12(%[fir_c]) , %[_2], %%acc1 \r\n"
1006 "mac.w %[_1]u, %[_2]u, <<, 20(%[fir_p])& , %[_0], %%acc0 \r\n"
1007 "mac.w %[_1]l, %[_2]u, << , %%acc1 \r\n"
1008 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1009 "mac.w %[_0]l, %[_2]l, << , %%acc1 \r\n"
1010 : [_0
]"=&r"(_0
), [_1
]"=&r"(_1
), [_2
]"=&r"(_2
),
1011 [fir_p
]"+a"(this->fir_ptr
)
1012 : [fir_c
]"a"(this->fir_coeff
), [fb
]"r"(fb
)
1015 /* Generate output */
1017 /* fetch filter results _after_ gcc loads asm
1018 block parameters to eliminate emac stalls */
1019 "movclr.l %%acc0, %[out_0] \r\n"
1020 "movclr.l %%acc1, %[out_1] \r\n"
1021 /* apply global volume */
1022 "mac.l %[chans_0], %[gv_0] , %%acc2 \r\n"
1023 "mac.l %[chans_1], %[gv_1] , %%acc3 \r\n"
1024 /* apply echo volume and add to final output */
1025 "mac.l %[ev_0], %[out_0], >>, %%acc2 \r\n"
1026 "mac.l %[ev_1], %[out_1], >>, %%acc3 \r\n"
1027 : [out_0
]"=&r"(out_0
), [out_1
]"=&r"(out_1
)
1028 : [chans_0
]"r"(chans_0
), [gv_0
]"r"(global_vol_0
),
1029 [ev_0
]"r"((int)this->r
.g
.echo_volume_0
),
1030 [chans_1
]"r"(chans_1
), [gv_1
]"r"(global_vol_1
),
1031 [ev_1
]"r"((int)this->r
.g
.echo_volume_1
)
1034 /* Feedback into echo buffer */
1035 if ( !(this->r
.g
.flags
& 0x20) )
1038 /* scale echo voices; saturate if overflow */
1039 "mac.l %[sh], %[e1] , %%acc1 \r\n"
1040 "mac.l %[sh], %[e0] , %%acc0 \r\n"
1041 /* add scaled output from FIR filter */
1042 "mac.l %[out_1], %[ef], <<, %%acc1 \r\n"
1043 "mac.l %[out_0], %[ef], <<, %%acc0 \r\n"
1044 /* swap and fetch feedback results - simply
1045 swap_odd_even32 mixed in between macs and
1046 movclrs to mitigate stall issues */
1047 "move.l #0x00ff00ff, %[sh] \r\n"
1048 "movclr.l %%acc1, %[e1] \r\n"
1050 "movclr.l %%acc0, %[e0] \r\n"
1051 "move.w %[e1], %[e0] \r\n"
1052 "and.l %[e0], %[sh] \r\n"
1053 "eor.l %[sh], %[e0] \r\n"
1054 "lsl.l #8, %[sh] \r\n"
1055 "lsr.l #8, %[e0] \r\n"
1056 "or.l %[sh], %[e0] \r\n"
1057 /* save final feedback into echo buffer */
1058 "move.l %[e0], (%[echo_ptr]) \r\n"
1059 : [e0
]"+d"(echo_0
), [e1
]"+d"(echo_1
)
1060 : [out_0
]"r"(out_0
), [out_1
]"r"(out_1
),
1061 [ef
]"r"((int)this->r
.g
.echo_feedback
),
1062 [echo_ptr
]"a"((int32_t *)echo_ptr
),
1067 /* Output final samples */
1069 /* fetch output saved in %acc2 and %acc3 */
1070 "movclr.l %%acc2, %[out_0] \r\n"
1071 "movclr.l %%acc3, %[out_1] \r\n"
1072 /* scale right by global_muting shift */
1073 "asr.l %[gm], %[out_0] \r\n"
1074 "asr.l %[gm], %[out_1] \r\n"
1075 : [out_0
]"=&d"(out_0
), [out_1
]"=&d"(out_1
)
1076 : [gm
]"d"(global_muting
)
1079 out_buf
[ 0] = out_0
;
1080 out_buf
[WAV_CHUNK_SIZE
] = out_1
;
1082 #elif defined (CPU_ARM)
1083 /* Read feedback from echo buffer */
1084 int echo_pos
= this->echo_pos
;
1085 uint8_t* const echo_ptr
= RAM
+
1086 ((this->r
.g
.echo_page
* 0x100 + echo_pos
) & 0xFFFF);
1088 if ( echo_pos
>= (this->r
.g
.echo_delay
& 15) * 0x800 )
1090 this->echo_pos
= echo_pos
;
1092 int fb_0
= GET_LE16SA( echo_ptr
);
1093 int fb_1
= GET_LE16SA( echo_ptr
+ 2 );
1095 /* Keep last 8 samples */
1096 int32_t *fir_ptr
= this->fir_ptr
;
1100 "str %[fb_0], [%[fir_p]], #4 \r\n"
1101 "str %[fb_1], [%[fir_p]], #4 \r\n"
1102 /* duplicate at +8 eliminates wrap checking below */
1103 "str %[fb_0], [%[fir_p], #56] \r\n"
1104 "str %[fb_1], [%[fir_p], #60] \r\n"
1105 : [fir_p
]"+r"(fir_ptr
)
1106 : [fb_0
]"r"(fb_0
), [fb_1
]"r"(fb_1
)
1109 this->fir_ptr
= (int32_t *)((intptr_t)fir_ptr
& FIR_BUF_MASK
);
1110 int32_t *fir_coeff
= this->fir_coeff
;
1113 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1114 "ldmia %[fir_p]!, { r4-r5 } \r\n"
1115 "mul %[fb_0], r0, %[fb_0] \r\n"
1116 "mul %[fb_1], r0, %[fb_1] \r\n"
1117 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1118 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1119 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1120 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1121 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1122 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1123 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1124 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1125 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1126 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1127 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1128 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1129 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1130 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1131 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1132 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1133 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1134 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1135 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1136 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1137 : [fb_0
]"+r"(fb_0
), [fb_1
]"+r"(fb_1
),
1138 [fir_p
]"+r"(fir_ptr
), [fir_c
]"+r"(fir_coeff
)
1140 : "r0", "r1", "r2", "r3", "r4", "r5"
1143 /* Generate output */
1144 int amp_0
= (chans_0
* global_vol_0
+ fb_0
* this->r
.g
.echo_volume_0
)
1146 int amp_1
= (chans_1
* global_vol_1
+ fb_1
* this->r
.g
.echo_volume_1
)
1149 out_buf
[ 0] = amp_0
;
1150 out_buf
[WAV_CHUNK_SIZE
] = amp_1
;
1153 if ( !(this->r
.g
.flags
& 0x20) )
1155 /* Feedback into echo buffer */
1156 int e0
= (echo_0
>> 7) + ((fb_0
* this->r
.g
.echo_feedback
) >> 14);
1157 int e1
= (echo_1
>> 7) + ((fb_1
* this->r
.g
.echo_feedback
) >> 14);
1159 SET_LE16A( echo_ptr
, e0
);
1161 SET_LE16A( echo_ptr
+ 2, e1
);
1163 #else /* Unoptimized CPU */
1164 /* Read feedback from echo buffer */
1165 int echo_pos
= this->echo_pos
;
1166 uint8_t* const echo_ptr
= RAM
+
1167 ((this->r
.g
.echo_page
* 0x100 + echo_pos
) & 0xFFFF);
1169 if ( echo_pos
>= (this->r
.g
.echo_delay
& 15) * 0x800 )
1171 this->echo_pos
= echo_pos
;
1172 int fb_0
= GET_LE16SA( echo_ptr
);
1173 int fb_1
= GET_LE16SA( echo_ptr
+ 2 );
1175 /* Keep last 8 samples */
1176 int (* const fir_ptr
) [2] = this->fir_buf
+ this->fir_pos
;
1177 this->fir_pos
= (this->fir_pos
+ 1) & (FIR_BUF_HALF
- 1);
1178 fir_ptr
[ 0] [0] = fb_0
;
1179 fir_ptr
[ 0] [1] = fb_1
;
1180 /* duplicate at +8 eliminates wrap checking below */
1181 fir_ptr
[FIR_BUF_HALF
] [0] = fb_0
;
1182 fir_ptr
[FIR_BUF_HALF
] [1] = fb_1
;
1185 fb_0
*= this->fir_coeff
[0];
1186 fb_1
*= this->fir_coeff
[0];
1189 fb_0 += fir_ptr [i] [0] * this->fir_coeff [i];\
1190 fb_1 += fir_ptr [i] [1] * this->fir_coeff [i];
1200 /* Generate output */
1201 int amp_0
= (chans_0
* global_vol_0
+ fb_0
* this->r
.g
.echo_volume_0
)
1203 int amp_1
= (chans_1
* global_vol_1
+ fb_1
* this->r
.g
.echo_volume_1
)
1205 out_buf
[ 0] = amp_0
;
1206 out_buf
[WAV_CHUNK_SIZE
] = amp_1
;
1209 if ( !(this->r
.g
.flags
& 0x20) )
1211 /* Feedback into echo buffer */
1212 int e0
= (echo_0
>> 7) + ((fb_0
* this->r
.g
.echo_feedback
) >> 14);
1213 int e1
= (echo_1
>> 7) + ((fb_1
* this->r
.g
.echo_feedback
) >> 14);
1215 SET_LE16A( echo_ptr
, e0
);
1217 SET_LE16A( echo_ptr
+ 2, e1
);
1220 #else /* SPCNOECHO == 1*/
1221 /* Generate output */
1222 int amp_0
= (chans_0
* global_vol_0
) >> global_muting
;
1223 int amp_1
= (chans_1
* global_vol_1
) >> global_muting
;
1224 out_buf
[ 0] = amp_0
;
1225 out_buf
[WAV_CHUNK_SIZE
] = amp_1
;
1227 #endif /* SPCNOECHO */
1236 void DSP_reset( struct Spc_Dsp
* this )
1238 this->keys_down
= 0;
1240 this->noise_count
= 0;
1243 this->r
.g
.flags
= 0xE0; /* reset, mute, echo off */
1244 this->r
.g
.key_ons
= 0;
1246 ci
->memset( this->voice_state
, 0, sizeof this->voice_state
);
1249 for ( i
= VOICE_COUNT
; --i
>= 0; )
1251 struct voice_t
* v
= this->voice_state
+ i
;
1252 v
->env_mode
= state_release
;
1258 for ( i
= 0; i
< 256; i
++ )
1259 this->wave_entry
[i
].start_addr
= -1;
1262 #if defined(CPU_COLDFIRE)
1263 this->fir_ptr
= fir_buf
;
1264 this->last_fir_ptr
= &fir_buf
[7];
1265 ci
->memset( fir_buf
, 0, sizeof fir_buf
);
1266 #elif defined (CPU_ARM)
1267 this->fir_ptr
= fir_buf
;
1268 ci
->memset( fir_buf
, 0, sizeof fir_buf
);
1271 ci
->memset( this->fir_buf
, 0, sizeof this->fir_buf
);
1274 assert( offsetof (struct globals_t
,unused9
[2]) == REGISTER_COUNT
);
1275 assert( sizeof (this->r
.voice
) == REGISTER_COUNT
);