apps/codecs/spc/spc_dsp.c

   1 /***************************************************************************
   2  *             __________               __   ___.
   3  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7  *                     \/            \/     \/    \/            \/
   8  * $Id$
   9  *
  10  * Copyright (C) 2007-2008 Michael Sevakis (jhMikeS)
  11  * Copyright (C) 2006-2007 Adam Gashlin (hcs)
  12  * Copyright (C) 2004-2007 Shay Green (blargg)
  13  * Copyright (C) 2002 Brad Martin
  14  *
  15  * All files in this archive are subject to the GNU General Public License.
  16  * See the file COPYING in the source tree root for full license agreement.
  17  *
  18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  19  * KIND, either express or implied.
  20  *
  21  ****************************************************************************/
  22
  23 /* The DSP portion (awe!) */
  24 #include "codec.h"
  25 #include "codecs.h"
  26 #include "spc_codec.h"
  27 #include "spc_profiler.h"
  28
  29 #if defined(CPU_COLDFIRE) || defined (CPU_ARM)
  30 int32_t fir_buf[FIR_BUF_CNT]
  31     __attribute__ ((aligned (FIR_BUF_ALIGN*1))) IBSS_ATTR;
  32 #endif
  33 #if SPC_BRRCACHE
  34 /* a little extra for samples that go past end */
  35 int16_t BRRcache [BRR_CACHE_SIZE] CACHEALIGN_ATTR;
  36 #endif
  37
  38 void DSP_write( struct Spc_Dsp* this, int i, int data )
  39 {
  40     assert( (unsigned) i < REGISTER_COUNT );
  41
  42     this->r.reg [i] = data;
  43     int high = i >> 4;
  44     int low  = i & 0x0F;
  45     if ( low < 2 ) /* voice volumes */
  46     {
  47         int left  = *(int8_t const*) &this->r.reg [i & ~1];
  48         int right = *(int8_t const*) &this->r.reg [i |  1];
  49         struct voice_t* v = this->voice_state + high;
  50         v->volume [0] = left;
  51         v->volume [1] = right;
  52     }
  53     else if ( low == 0x0F ) /* fir coefficients */
  54     {
  55         this->fir_coeff [7 - high] = (int8_t) data; /* sign-extend */
  56     }
  57 }
  58
  59 /* if ( n < -32768 ) out = -32768; */
  60 /* if ( n >  32767 ) out =  32767; */
  61 #define CLAMP16( n ) \
  62 ({                              \
  63     if ( (int16_t) n != n )     \
  64         n = 0x7FFF ^ (n >> 31); \
  65     n;                          \
  66 })
  67
  68 #if SPC_BRRCACHE
  69 static void decode_brr( struct Spc_Dsp* this, unsigned start_addr,
  70                         struct voice_t* voice,
  71                         struct raw_voice_t const* const raw_voice ) ICODE_ATTR;
  72 static void decode_brr( struct Spc_Dsp* this, unsigned start_addr,
  73                         struct voice_t* voice,
  74                         struct raw_voice_t const* const raw_voice )
  75 {
  76     /* setup same variables as where decode_brr() is called from */
  77     #undef RAM
  78     #define RAM ram.ram
  79     struct src_dir const* const sd =
  80         (struct src_dir*) &RAM [this->r.g.wave_page * 0x100];
  81     struct cache_entry_t* const wave_entry =
  82         &this->wave_entry [raw_voice->waveform];
  83
  84     /* the following block can be put in place of the call to
  85        decode_brr() below
  86     */
  87     {
  88         DEBUGF( "decode at %08x (wave #%d)\n",
  89                 start_addr, raw_voice->waveform );
  90
  91         /* see if in cache */
  92         int i;
  93         for ( i = 0; i < this->oldsize; i++ )
  94         {
  95             struct cache_entry_t* e = &this->wave_entry_old [i];
  96             if ( e->start_addr == start_addr )
  97             {
  98                 DEBUGF( "found in wave_entry_old (oldsize=%d)\n",
  99                     this->oldsize );
 100                 *wave_entry = *e;
 101                 goto wave_in_cache;
 102             }
 103         }
 104
 105         wave_entry->start_addr = start_addr;
 106
 107         uint8_t const* const loop_ptr =
 108             RAM + GET_LE16A( sd [raw_voice->waveform].loop );
 109         short* loop_start = 0;
 110
 111         short* out = BRRcache + start_addr * 2;
 112         wave_entry->samples = out;
 113         *out++ = 0;
 114         int smp1 = 0;
 115         int smp2 = 0;
 116
 117         uint8_t const* addr = RAM + start_addr;
 118         int block_header;
 119         do
 120         {
 121             if ( addr == loop_ptr )
 122             {
 123                 loop_start = out;
 124                 DEBUGF( "loop at %08lx (wave #%d)\n",
 125                         (unsigned long)(addr - RAM), raw_voice->waveform );
 126             }
 127
 128             /* header */
 129             block_header = *addr;
 130             addr += 9;
 131             voice->addr = addr;
 132             int const filter = (block_header & 0x0C) - 0x08;
 133
 134             /* scaling
 135                (invalid scaling gives -4096 for neg nybble, 0 for pos) */
 136             static unsigned char const right_shifts [16] = {
 137                 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,  4,  4, 29, 29, 29,
 138             };
 139             static unsigned char const left_shifts  [16] = {
 140                 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11
 141             };
 142             int const scale = block_header >> 4;
 143             int const right_shift = right_shifts [scale];
 144             int const left_shift  = left_shifts  [scale];
 145
 146             /* output position */
 147             out += BRR_BLOCK_SIZE;
 148             int offset = -BRR_BLOCK_SIZE << 2;
 149
 150             do /* decode and filter 16 samples */
 151             {
 152                 /* Get nybble, sign-extend, then scale
 153                    get byte, select which nybble, sign-extend, then shift based
 154                    on scaling. also handles invalid scaling values. */
 155                 int delta = (int) (int8_t) (addr [offset >> 3] << (offset & 4))
 156                         >> right_shift << left_shift;
 157
 158                 out [offset >> 2] = smp2;
 159
 160                 if ( filter == 0 ) /* mode 0x08 (30-90% of the time) */
 161                 {
 162                     delta -= smp2 >> 1;
 163                     delta += smp2 >> 5;
 164                     smp2 = smp1;
 165                     delta += smp1;
 166                     delta += (-smp1 - (smp1 >> 1)) >> 5;
 167                 }
 168                 else
 169                 {
 170                     if ( filter == -4 ) /* mode 0x04 */
 171                     {
 172                         delta += smp1 >> 1;
 173                         delta += (-smp1) >> 5;
 174                     }
 175                     else if ( filter > -4 ) /* mode 0x0C */
 176                     {
 177                         delta -= smp2 >> 1;
 178                         delta += (smp2 + (smp2 >> 1)) >> 4;
 179                         delta += smp1;
 180                         delta += (-smp1 * 13) >> 7;
 181                     }
 182                     smp2 = smp1;
 183                 }
 184
 185                 delta = CLAMP16( delta );
 186                 smp1 = (int16_t) (delta * 2); /* sign-extend */
 187             }
 188             while ( (offset += 4) != 0 );
 189
 190             /* if next block has end flag set, this block ends early */
 191             /* (verified) */
 192             if ( (block_header & 3) != 3 && (*addr & 3) == 1 )
 193             {
 194                 /* skip last 9 samples */
 195                 out -= 9;
 196                 goto early_end;
 197             }
 198         }
 199         while ( !(block_header & 1) && addr < RAM + 0x10000 );
 200
 201         out [0] = smp2;
 202         out [1] = smp1;
 203
 204     early_end:
 205         wave_entry->end = (out - 1 - wave_entry->samples) << 12;
 206
 207         wave_entry->loop = 0;
 208         if ( (block_header & 2) )
 209         {
 210             if ( loop_start )
 211             {
 212                 int loop = out - loop_start;
 213                 wave_entry->loop = loop;
 214                 wave_entry->end += 0x3000;
 215                 out [2] = loop_start [2];
 216                 out [3] = loop_start [3];
 217                 out [4] = loop_start [4];
 218             }
 219             else
 220             {
 221                 DEBUGF( "loop point outside initial wave\n" );
 222             }
 223         }
 224
 225         DEBUGF( "end at %08lx (wave #%d)\n",
 226                 (unsigned long)(addr - RAM), raw_voice->waveform );
 227
 228         /* add to cache */
 229         this->wave_entry_old [this->oldsize++] = *wave_entry;
 230 wave_in_cache:;
 231     }
 232 }
 233 #endif
 234
 235 static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice,
 236                    struct src_dir const* const sd,
 237                    struct raw_voice_t const* const raw_voice,
 238                    const int key_on_delay, const int vbit) ICODE_ATTR;
 239 static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice,
 240                    struct src_dir const* const sd,
 241                    struct raw_voice_t const* const raw_voice,
 242                    const int key_on_delay, const int vbit) {
 243     #undef RAM
 244     #define RAM ram.ram
 245     int const env_rate_init = 0x7800;
 246     voice->key_on_delay = key_on_delay;
 247     if ( key_on_delay == 0 )
 248     {
 249         this->keys_down |= vbit;
 250         voice->envx         = 0;
 251         voice->env_mode     = state_attack;
 252         voice->env_timer    = env_rate_init; /* TODO: inaccurate? */
 253         unsigned start_addr = GET_LE16A(sd [raw_voice->waveform].start);
 254         #if !SPC_BRRCACHE
 255         {
 256             voice->addr = RAM + start_addr;
 257             /* BRR filter uses previous samples */
 258             voice->samples [BRR_BLOCK_SIZE + 1] = 0;
 259             voice->samples [BRR_BLOCK_SIZE + 2] = 0;
 260             /* decode three samples immediately */
 261             voice->position     = (BRR_BLOCK_SIZE + 3) * 0x1000 - 1;
 262             voice->block_header = 0; /* "previous" BRR header */
 263         }
 264         #else
 265         {
 266             voice->position = 3 * 0x1000 - 1;
 267             struct cache_entry_t* const wave_entry =
 268                 &this->wave_entry [raw_voice->waveform];
 269
 270             /* predecode BRR if not already */
 271             if ( wave_entry->start_addr != start_addr )
 272             {
 273                 /* the following line can be replaced by the indicated block
 274                    in decode_brr() */
 275                 decode_brr( this, start_addr, voice, raw_voice );
 276             }
 277
 278             voice->samples   = wave_entry->samples;
 279             voice->wave_end  = wave_entry->end;
 280                     voice->wave_loop = wave_entry->loop;
 281         }
 282         #endif
 283     }
 284 }
 285
 286 void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
 287 {
 288     #undef RAM
 289 #ifdef CPU_ARM
 290     uint8_t* const ram_ = ram.ram;
 291     #define RAM ram_
 292 #else
 293     #define RAM ram.ram
 294 #endif
 295 #if 0
 296     EXIT_TIMER(cpu);
 297     ENTER_TIMER(dsp);
 298 #endif
 299
 300     /* Here we check for keys on/off.  Docs say that successive writes
 301        to KON/KOF must be separated by at least 2 Ts periods or risk
 302        being neglected.  Therefore DSP only looks at these during an
 303        update, and not at the time of the write.  Only need to do this
 304        once however, since the regs haven't changed over the whole
 305        period we need to catch up with. */
 306
 307     {
 308         int key_ons  = this->r.g.key_ons;
 309         int key_offs = this->r.g.key_offs;
 310         /* keying on a voice resets that bit in ENDX */
 311         this->r.g.wave_ended &= ~key_ons;
 312         /* key_off bits prevent key_on from being acknowledged */
 313         this->r.g.key_ons = key_ons & key_offs;
 314
 315         /* process key events outside loop, since they won't re-occur */
 316         struct voice_t* voice = this->voice_state + 8;
 317         int vbit = 0x80;
 318         do
 319         {
 320             --voice;
 321             if ( key_offs & vbit )
 322             {
 323                 voice->env_mode     = state_release;
 324                 voice->key_on_delay = 0;
 325             }
 326             else if ( key_ons & vbit )
 327             {
 328                 voice->key_on_delay = 8;
 329             }
 330         }
 331         while ( (vbit >>= 1) != 0 );
 332     }
 333
 334     struct src_dir const* const sd =
 335         (struct src_dir*) &RAM [this->r.g.wave_page * 0x100];
 336
 337     #ifdef ROCKBOX_BIG_ENDIAN
 338         /* Convert endiannesses before entering loops - these
 339            get used alot */
 340         const uint32_t rates[VOICE_COUNT] =
 341         {
 342             GET_LE16A( this->r.voice[0].rate ) & 0x3FFF,
 343             GET_LE16A( this->r.voice[1].rate ) & 0x3FFF,
 344             GET_LE16A( this->r.voice[2].rate ) & 0x3FFF,
 345             GET_LE16A( this->r.voice[3].rate ) & 0x3FFF,
 346             GET_LE16A( this->r.voice[4].rate ) & 0x3FFF,
 347             GET_LE16A( this->r.voice[5].rate ) & 0x3FFF,
 348             GET_LE16A( this->r.voice[6].rate ) & 0x3FFF,
 349             GET_LE16A( this->r.voice[7].rate ) & 0x3FFF,
 350         };
 351         #define VOICE_RATE(x) *(x)
 352         #define IF_RBE(...) __VA_ARGS__
 353     #ifdef CPU_COLDFIRE
 354         /* Initialize mask register with the buffer address mask */
 355         asm volatile ("move.l %[m], %%mask" : : [m]"i"(FIR_BUF_MASK));
 356         const int echo_wrap  = (this->r.g.echo_delay & 15) * 0x800;
 357         const int echo_start = this->r.g.echo_page * 0x100;
 358     #endif /* CPU_COLDFIRE */
 359     #else
 360         #define VOICE_RATE(x) (INT16A(raw_voice->rate) & 0x3FFF)
 361         #define IF_RBE(...)
 362     #endif /* ROCKBOX_BIG_ENDIAN */
 363
 364 #if !SPC_NOINTERP
 365     int const slow_gaussian = (this->r.g.pitch_mods >> 1) |
 366         this->r.g.noise_enables;
 367 #endif
 368     /* (g.flags & 0x40) ? 30 : 14 */
 369     int const global_muting = ((this->r.g.flags & 0x40) >> 2) + 14 - 8;
 370     int const global_vol_0  = this->r.g.volume_0;
 371     int const global_vol_1  = this->r.g.volume_1;
 372
 373     /* each rate divides exactly into 0x7800 without remainder */
 374     int const env_rate_init = 0x7800;
 375     static unsigned short const env_rates [0x20] ICONST_ATTR =
 376     {
 377         0x0000, 0x000F, 0x0014, 0x0018, 0x001E, 0x0028, 0x0030, 0x003C,
 378         0x0050, 0x0060, 0x0078, 0x00A0, 0x00C0, 0x00F0, 0x0140, 0x0180,
 379         0x01E0, 0x0280, 0x0300, 0x03C0, 0x0500, 0x0600, 0x0780, 0x0A00,
 380         0x0C00, 0x0F00, 0x1400, 0x1800, 0x1E00, 0x2800, 0x3C00, 0x7800
 381     };
 382
 383     do /* one pair of output samples per iteration */
 384     {
 385         /* Noise */
 386         if ( this->r.g.noise_enables )
 387         {
 388             if ( (this->noise_count -=
 389                  env_rates [this->r.g.flags & 0x1F]) <= 0 )
 390             {
 391                 this->noise_count = env_rate_init;
 392                 int feedback = (this->noise << 13) ^ (this->noise << 14);
 393                 this->noise = (feedback & 0x8000) ^ (this->noise >> 1 & ~1);
 394             }
 395         }
 396
 397 #if !SPC_NOECHO
 398         int echo_0 = 0;
 399         int echo_1 = 0;
 400 #endif
 401         long prev_outx = 0; /* TODO: correct value for first channel? */
 402         int chans_0 = 0;
 403         int chans_1 = 0;
 404         /* TODO: put raw_voice pointer in voice_t? */
 405         struct raw_voice_t * raw_voice = this->r.voice;
 406         struct voice_t* voice = this->voice_state;
 407         int vbit = 1;
 408         IF_RBE( const uint32_t* vr = rates; )
 409         for ( ; vbit < 0x100; vbit <<= 1, ++voice, ++raw_voice IF_RBE( , ++vr ) )
 410         {
 411             /* pregen involves checking keyon, etc */
 412 #if 0
 413             ENTER_TIMER(dsp_pregen);
 414 #endif
 415
 416             /* Key on events are delayed */
 417             int key_on_delay = voice->key_on_delay;
 418
 419             if ( --key_on_delay >= 0 ) /* <1% of the time */
 420             {
 421                 key_on(this,voice,sd,raw_voice,key_on_delay,vbit);
 422             }
 423
 424             if ( !(this->keys_down & vbit) ) /* Silent channel */
 425             {
 426         silent_chan:
 427                 raw_voice->envx = 0;
 428                 raw_voice->outx = 0;
 429                 prev_outx = 0;
 430                 continue;
 431             }
 432
 433             /* Envelope */
 434             {
 435                 int const ENV_RANGE = 0x800;
 436                 int env_mode = voice->env_mode;
 437                 int adsr0 = raw_voice->adsr [0];
 438                 int env_timer;
 439                 if ( env_mode != state_release ) /* 99% of the time */
 440                 {
 441                     env_timer = voice->env_timer;
 442                     if ( adsr0 & 0x80 ) /* 79% of the time */
 443                     {
 444                         int adsr1 = raw_voice->adsr [1];
 445                         if ( env_mode == state_sustain ) /* 74% of the time */
 446                         {
 447                             if ( (env_timer -= env_rates [adsr1 & 0x1F]) > 0 )
 448                                 goto write_env_timer;
 449
 450                             int envx = voice->envx;
 451                             envx--; /* envx *= 255 / 256 */
 452                             envx -= envx >> 8;
 453                             voice->envx = envx;
 454                             /* TODO: should this be 8? */
 455                             raw_voice->envx = envx >> 4;
 456                             goto init_env_timer;
 457                         }
 458                         else if ( env_mode < 0 ) /* 25% state_decay */
 459                         {
 460                             int envx = voice->envx;
 461                             if ( (env_timer -=
 462                                 env_rates [(adsr0 >> 3 & 0x0E) + 0x10]) <= 0 )
 463                             {
 464                                 envx--; /* envx *= 255 / 256 */
 465                                 envx -= envx >> 8;
 466                                 voice->envx = envx;
 467                                 /* TODO: should this be 8? */
 468                                 raw_voice->envx = envx >> 4;
 469                                 env_timer = env_rate_init;
 470                             }
 471
 472                             int sustain_level = adsr1 >> 5;
 473                             if ( envx <= (sustain_level + 1) * 0x100 )
 474                                 voice->env_mode = state_sustain;
 475
 476                             goto write_env_timer;
 477                         }
 478                         else /* state_attack */
 479                         {
 480                             int t = adsr0 & 0x0F;
 481                             if ( (env_timer -= env_rates [t * 2 + 1]) > 0 )
 482                                 goto write_env_timer;
 483
 484                             int envx = voice->envx;
 485
 486                             int const step = ENV_RANGE / 64;
 487                             envx += step;
 488                             if ( t == 15 )
 489                                 envx += ENV_RANGE / 2 - step;
 490
 491                             if ( envx >= ENV_RANGE )
 492                             {
 493                                 envx = ENV_RANGE - 1;
 494                                 voice->env_mode = state_decay;
 495                             }
 496                             voice->envx = envx;
 497                             /* TODO: should this be 8? */
 498                             raw_voice->envx = envx >> 4;
 499                             goto init_env_timer;
 500                         }
 501                     }
 502                     else /* gain mode */
 503                     {
 504                         int t = raw_voice->gain;
 505                         if ( t < 0x80 )
 506                         {
 507                             raw_voice->envx = t;
 508                             voice->envx = t << 4;
 509                             goto env_end;
 510                         }
 511                         else
 512                         {
 513                             if ( (env_timer -= env_rates [t & 0x1F]) > 0 )
 514                                 goto write_env_timer;
 515
 516                             int envx = voice->envx;
 517                             int mode = t >> 5;
 518                             if ( mode <= 5 ) /* decay */
 519                             {
 520                                 int step = ENV_RANGE / 64;
 521                                 if ( mode == 5 ) /* exponential */
 522                                 {
 523                                     envx--; /* envx *= 255 / 256 */
 524                                     step = envx >> 8;
 525                                 }
 526                                 if ( (envx -= step) < 0 )
 527                                 {
 528                                     envx = 0;
 529                                     if ( voice->env_mode == state_attack )
 530                                         voice->env_mode = state_decay;
 531                                 }
 532                             }
 533                             else /* attack */
 534                             {
 535                                 int const step = ENV_RANGE / 64;
 536                                 envx += step;
 537                                 if ( mode == 7 &&
 538                                      envx >= ENV_RANGE * 3 / 4 + step )
 539                                     envx += ENV_RANGE / 256 - step;
 540
 541                                 if ( envx >= ENV_RANGE )
 542                                     envx = ENV_RANGE - 1;
 543                             }
 544                             voice->envx = envx;
 545                             /* TODO: should this be 8? */
 546                             raw_voice->envx = envx >> 4;
 547                             goto init_env_timer;
 548                         }
 549                     }
 550                 }
 551                 else /* state_release */
 552                 {
 553                     int envx = voice->envx;
 554                     if ( (envx -= ENV_RANGE / 256) > 0 )
 555                     {
 556                         voice->envx = envx;
 557                         raw_voice->envx = envx >> 8;
 558                         goto env_end;
 559                     }
 560                     else
 561                     {
 562                         /* bit was set, so this clears it */
 563                         this->keys_down ^= vbit;
 564                         voice->envx = 0;
 565                         goto silent_chan;
 566                     }
 567                 }
 568             init_env_timer:
 569                 env_timer = env_rate_init;
 570             write_env_timer:
 571                 voice->env_timer = env_timer;
 572             env_end:;
 573             }
 574 #if 0
 575             EXIT_TIMER(dsp_pregen);
 576
 577             ENTER_TIMER(dsp_gen);
 578 #endif
 579             #if !SPC_BRRCACHE
 580             /* Decode BRR block */
 581             if ( voice->position >= BRR_BLOCK_SIZE * 0x1000 )
 582             {
 583                 voice->position -= BRR_BLOCK_SIZE * 0x1000;
 584
 585                 uint8_t const* addr = voice->addr;
 586                 if ( addr >= RAM + 0x10000 )
 587                     addr -= 0x10000;
 588
 589                 /* action based on previous block's header */
 590                 if ( voice->block_header & 1 )
 591                 {
 592                     addr = RAM + GET_LE16A( sd [raw_voice->waveform].loop );
 593                     this->r.g.wave_ended |= vbit;
 594                     if ( !(voice->block_header & 2) ) /* 1% of the time */
 595                     {
 596                         /* first block was end block;
 597                            don't play anything (verified) */
 598                         /* bit was set, so this clears it */
 599                         this->keys_down ^= vbit;
 600
 601                         /* since voice->envx is 0,
 602                            samples and position don't matter */
 603                         raw_voice->envx = 0;
 604                         voice->envx = 0;
 605                         goto skip_decode;
 606                     }
 607                 }
 608
 609                 /* header */
 610                 int const block_header = *addr;
 611                 addr += 9;
 612                 voice->addr = addr;
 613                 voice->block_header = block_header;
 614                 int const filter = (block_header & 0x0C) - 0x08;
 615
 616                 /* scaling (invalid scaling gives -4096 for neg nybble,
 617                    0 for pos) */
 618                 static unsigned char const right_shifts [16] = {
 619                     5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,  4,  4, 29, 29, 29,
 620                 };
 621                 static unsigned char const left_shifts  [16] = {
 622                     0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11
 623                 };
 624                 int const scale = block_header >> 4;
 625                 int const right_shift = right_shifts [scale];
 626                 int const left_shift  = left_shifts  [scale];
 627
 628                 /* previous samples */
 629                 int smp2 = voice->samples [BRR_BLOCK_SIZE + 1];
 630                 int smp1 = voice->samples [BRR_BLOCK_SIZE + 2];
 631                 voice->samples [0] = voice->samples [BRR_BLOCK_SIZE];
 632
 633                 /* output position */
 634                 short* out = voice->samples + (1 + BRR_BLOCK_SIZE);
 635                 int offset = -BRR_BLOCK_SIZE << 2;
 636
 637                 /* if next block has end flag set,
 638                    this block ends early (verified) */
 639                 if ( (block_header & 3) != 3 && (*addr & 3) == 1 )
 640                 {
 641                     /* arrange for last 9 samples to be skipped */
 642                     int const skip = 9;
 643                     out += (skip & 1);
 644                     voice->samples [skip] = voice->samples [BRR_BLOCK_SIZE];
 645                     voice->position += skip * 0x1000;
 646                     offset = (-BRR_BLOCK_SIZE + (skip & ~1)) << 2;
 647                     addr -= skip / 2;
 648                     /* force sample to end on next decode */
 649                     voice->block_header = 1;
 650                 }
 651
 652                 do /* decode and filter 16 samples */
 653                 {
 654                     /* Get nybble, sign-extend, then scale
 655                        get byte, select which nybble, sign-extend, then shift
 656                        based on scaling. also handles invalid scaling values.*/
 657                     int delta = (int) (int8_t) (addr [offset >> 3] <<
 658                             (offset & 4)) >> right_shift << left_shift;
 659
 660                     out [offset >> 2] = smp2;
 661
 662                     if ( filter == 0 ) /* mode 0x08 (30-90% of the time) */
 663                     {
 664                         delta -= smp2 >> 1;
 665                         delta += smp2 >> 5;
 666                         smp2 = smp1;
 667                         delta += smp1;
 668                         delta += (-smp1 - (smp1 >> 1)) >> 5;
 669                     }
 670                     else
 671                     {
 672                         if ( filter == -4 ) /* mode 0x04 */
 673                         {
 674                             delta += smp1 >> 1;
 675                             delta += (-smp1) >> 5;
 676                         }
 677                         else if ( filter > -4 ) /* mode 0x0C */
 678                         {
 679                             delta -= smp2 >> 1;
 680                             delta += (smp2 + (smp2 >> 1)) >> 4;
 681                             delta += smp1;
 682                             delta += (-smp1 * 13) >> 7;
 683                         }
 684                         smp2 = smp1;
 685                     }
 686
 687                     delta = CLAMP16( delta );
 688                     smp1 = (int16_t) (delta * 2); /* sign-extend */
 689                 }
 690                 while ( (offset += 4) != 0 );
 691
 692                 out [0] = smp2;
 693                 out [1] = smp1;
 694
 695             skip_decode:;
 696             }
 697             #endif
 698
 699             /* Get rate (with possible modulation) */
 700             int rate = VOICE_RATE(vr);
 701             if ( this->r.g.pitch_mods & vbit )
 702                 rate = (rate * (prev_outx + 32768)) >> 15;
 703
 704         #if !SPC_NOINTERP
 705             /* Interleved gauss table (to improve cache coherency). */
 706             /* gauss [i * 2 + j] = normal_gauss [(1 - j) * 256 + i] */
 707             static short const gauss [512] =
 708             {
 709 370,1305, 366,1305, 362,1304, 358,1304, 354,1304, 351,1304, 347,1304, 343,1303,
 710 339,1303, 336,1303, 332,1302, 328,1302, 325,1301, 321,1300, 318,1300, 314,1299,
 711 311,1298, 307,1297, 304,1297, 300,1296, 297,1295, 293,1294, 290,1293, 286,1292,
 712 283,1291, 280,1290, 276,1288, 273,1287, 270,1286, 267,1284, 263,1283, 260,1282,
 713 257,1280, 254,1279, 251,1277, 248,1275, 245,1274, 242,1272, 239,1270, 236,1269,
 714 233,1267, 230,1265, 227,1263, 224,1261, 221,1259, 218,1257, 215,1255, 212,1253,
 715 210,1251, 207,1248, 204,1246, 201,1244, 199,1241, 196,1239, 193,1237, 191,1234,
 716 188,1232, 186,1229, 183,1227, 180,1224, 178,1221, 175,1219, 173,1216, 171,1213,
 717 168,1210, 166,1207, 163,1205, 161,1202, 159,1199, 156,1196, 154,1193, 152,1190,
 718 150,1186, 147,1183, 145,1180, 143,1177, 141,1174, 139,1170, 137,1167, 134,1164,
 719 132,1160, 130,1157, 128,1153, 126,1150, 124,1146, 122,1143, 120,1139, 118,1136,
 720 117,1132, 115,1128, 113,1125, 111,1121, 109,1117, 107,1113, 106,1109, 104,1106,
 721 102,1102, 100,1098,  99,1094,  97,1090,  95,1086,  94,1082,  92,1078,  90,1074,
 722  89,1070,  87,1066,  86,1061,  84,1057,  83,1053,  81,1049,  80,1045,  78,1040,
 723  77,1036,  76,1032,  74,1027,  73,1023,  71,1019,  70,1014,  69,1010,  67,1005,
 724  66,1001,  65, 997,  64, 992,  62, 988,  61, 983,  60, 978,  59, 974,  58, 969,
 725  56, 965,  55, 960,  54, 955,  53, 951,  52, 946,  51, 941,  50, 937,  49, 932,
 726  48, 927,  47, 923,  46, 918,  45, 913,  44, 908,  43, 904,  42, 899,  41, 894,
 727  40, 889,  39, 884,  38, 880,  37, 875,  36, 870,  36, 865,  35, 860,  34, 855,
 728  33, 851,  32, 846,  32, 841,  31, 836,  30, 831,  29, 826,  29, 821,  28, 816,
 729  27, 811,  27, 806,  26, 802,  25, 797,  24, 792,  24, 787,  23, 782,  23, 777,
 730  22, 772,  21, 767,  21, 762,  20, 757,  20, 752,  19, 747,  19, 742,  18, 737,
 731  17, 732,  17, 728,  16, 723,  16, 718,  15, 713,  15, 708,  15, 703,  14, 698,
 732  14, 693,  13, 688,  13, 683,  12, 678,  12, 674,  11, 669,  11, 664,  11, 659,
 733  10, 654,  10, 649,  10, 644,   9, 640,   9, 635,   9, 630,   8, 625,   8, 620,
 734   8, 615,   7, 611,   7, 606,   7, 601,   6, 596,   6, 592,   6, 587,   6, 582,
 735   5, 577,   5, 573,   5, 568,   5, 563,   4, 559,   4, 554,   4, 550,   4, 545,
 736   4, 540,   3, 536,   3, 531,   3, 527,   3, 522,   3, 517,   2, 513,   2, 508,
 737   2, 504,   2, 499,   2, 495,   2, 491,   2, 486,   1, 482,   1, 477,   1, 473,
 738   1, 469,   1, 464,   1, 460,   1, 456,   1, 451,   1, 447,   1, 443,   1, 439,
 739   0, 434,   0, 430,   0, 426,   0, 422,   0, 418,   0, 414,   0, 410,   0, 405,
 740   0, 401,   0, 397,   0, 393,   0, 389,   0, 385,   0, 381,   0, 378,   0, 374,
 741             };
 742             /* Gaussian interpolation using most recent 4 samples */
 743             long position = voice->position;
 744             voice->position += rate;
 745             short const* interp = voice->samples + (position >> 12);
 746             int offset = position >> 4 & 0xFF;
 747
 748             /* Only left half of gaussian kernel is in table, so we must mirror
 749                for right half */
 750             short const* fwd = gauss       + offset * 2;
 751             short const* rev = gauss + 510 - offset * 2;
 752
 753             /* Use faster gaussian interpolation when exact result isn't needed
 754                by pitch modulator of next channel */
 755             int amp_0, amp_1;
 756             if ( !(slow_gaussian & vbit) ) /* 99% of the time */
 757             {
 758                 /* Main optimization is lack of clamping. Not a problem since
 759                    output never goes more than +/- 16 outside 16-bit range and
 760                    things are clamped later anyway. Other optimization is to
 761                    preserve fractional accuracy, eliminating several masks. */
 762                 int output = (((fwd [0] * interp [0] +
 763                          fwd [1] * interp [1] +
 764                          rev [1] * interp [2] +
 765                          rev [0] * interp [3]    ) >> 11) * voice->envx) >> 11;
 766
 767                 /* duplicated here to give compiler more to run in parallel */
 768                 amp_0 = voice->volume [0] * output;
 769                 amp_1 = voice->volume [1] * output;
 770                 raw_voice->outx = output >> 8;
 771             }
 772             else
 773             {
 774                 int output = *(int16_t*) &this->noise;
 775                 if ( !(this->r.g.noise_enables & vbit) )
 776                 {
 777                     output = (fwd [0] * interp [0]) & ~0xFFF;
 778                     output = (output + fwd [1] * interp [1]) & ~0xFFF;
 779                     output = (output + rev [1] * interp [2]) >> 12;
 780                     output = (int16_t) (output * 2);
 781                     output += ((rev [0] * interp [3]) >> 12) * 2;
 782                     output = CLAMP16( output );
 783                 }
 784                 output = (output * voice->envx) >> 11 & ~1;
 785
 786                 /* duplicated here to give compiler more to run in parallel */
 787                 amp_0 = voice->volume [0] * output;
 788                 amp_1 = voice->volume [1] * output;
 789                 prev_outx = output;
 790                 raw_voice->outx = (int8_t) (output >> 8);
 791             }
 792         #else /* SPCNOINTERP */
 793         /* two-point linear interpolation */
 794         #ifdef CPU_COLDFIRE
 795             int amp_0 = (int16_t)this->noise;
 796             int amp_1;
 797
 798             if ( (this->r.g.noise_enables & vbit) == 0 )
 799             {
 800                 uint32_t f = voice->position;
 801                 int32_t y0;
 802
 803                 /**
 804                  * Formula (fastest found so far of MANY):
 805                  * output = y0 + f*y1 - f*y0
 806                  */
 807                 asm volatile (
 808                 /* separate fractional and whole parts   */
 809                 "move.l     %[f], %[y1]               \r\n"
 810                 "and.l      #0xfff, %[f]              \r\n"
 811                 "lsr.l      %[sh], %[y1]              \r\n"
 812                 /* load samples y0 (upper) & y1 (lower)  */
 813                 "move.l     2(%[s], %[y1].l*2), %[y1] \r\n"
 814                 /* %acc0 = f*y1                          */
 815                 "mac.w      %[f]l, %[y1]l, %%acc0     \r\n"
 816                 /* %acc0 -= f*y0                         */
 817                 "msac.w     %[f]l, %[y1]u, %%acc0     \r\n"
 818                 /* separate out y0 and sign extend       */
 819                 "swap       %[y1]                     \r\n"
 820                 "movea.w    %[y1], %[y0]              \r\n"
 821                 /* fetch result, scale down and add y0   */
 822                 "movclr.l   %%acc0, %[y1]             \r\n"
 823                 /* output = y0 + (result >> 12)          */
 824                 "asr.l      %[sh], %[y1]              \r\n"
 825                 "add.l      %[y0], %[y1]              \r\n"
 826                 : [f]"+d"(f), [y0]"=&a"(y0), [y1]"=&d"(amp_0)
 827                 : [s]"a"(voice->samples), [sh]"d"(12)
 828                     );
 829             }
 830
 831             /* apply voice envelope to output */
 832             asm volatile (
 833             "mac.w %[output]l, %[envx]l, %%acc0 \r\n"
 834             :
 835             : [output]"r"(amp_0), [envx]"r"(voice->envx)
 836             );
 837
 838             /* advance voice position */
 839             voice->position += rate;
 840
 841             /* fetch output, scale and apply left and right
 842                voice volume */
 843             asm volatile (
 844             "movclr.l %%acc0,    %[output]         \r\n"
 845             "asr.l    %[sh],     %[output]         \r\n"
 846             "mac.l    %[vvol_0], %[output], %%acc0 \r\n"
 847             "mac.l    %[vvol_1], %[output], %%acc1 \r\n"
 848             : [output]"=&d"(amp_0)
 849             : [vvol_0]"r"((int)voice->volume[0]),
 850               [vvol_1]"r"((int)voice->volume[1]),
 851               [sh]"d"(11)
 852             );
 853
 854             /* save this output into previous, scale and save in
 855                output register */
 856             prev_outx = amp_0;
 857             raw_voice->outx = amp_0 >> 8;
 858
 859             /* fetch final voice output */
 860             asm volatile (
 861             "movclr.l %%acc0, %[amp_0] \r\n"
 862             "movclr.l %%acc1, %[amp_1] \r\n"
 863             : [amp_0]"=r"(amp_0), [amp_1]"=r"(amp_1)
 864             );
 865         #elif defined (CPU_ARM)
 866             int amp_0, amp_1;
 867
 868             if ( (this->r.g.noise_enables & vbit) != 0 ) {
 869                 amp_0 = *(int16_t *)&this->noise;
 870             } else {
 871                 uint32_t f = voice->position;
 872                 amp_0 = (uint32_t)voice->samples;
 873
 874                 asm volatile(
 875                 "mov    %[y1], %[f], lsr #12        \r\n"
 876                 "eor    %[f], %[f], %[y1], lsl #12  \r\n"
 877                 "add    %[y1], %[y0], %[y1], lsl #1 \r\n"
 878                 "ldrsh  %[y0], [%[y1], #2]          \r\n"
 879                 "ldrsh  %[y1], [%[y1], #4]          \r\n"
 880                 "sub    %[y1], %[y1], %[y0]         \r\n"
 881                 "mul    %[f], %[y1], %[f]           \r\n"
 882                 "add    %[y0], %[y0], %[f], asr #12 \r\n"
 883                 : [f]"+r"(f), [y0]"+r"(amp_0), [y1]"=&r"(amp_1)
 884                 );
 885             }
 886
 887             voice->position += rate;
 888
 889             asm volatile(
 890             "mul    %[amp_1], %[amp_0], %[envx] \r\n"
 891             "mov    %[amp_0], %[amp_1], asr #11 \r\n"
 892             "mov    %[amp_1], %[amp_0], asr #8  \r\n"
 893             : [amp_0]"+r"(amp_0), [amp_1]"=&r"(amp_1)
 894             : [envx]"r"(voice->envx)
 895             );
 896
 897             prev_outx = amp_0;
 898             raw_voice->outx = (int8_t)amp_1;
 899
 900             asm volatile(
 901             "mul    %[amp_1], %[amp_0], %[vol_1] \r\n"
 902             "mul    %[amp_0], %[vol_0], %[amp_0] \r\n"
 903             : [amp_0]"+r"(amp_0), [amp_1]"+r"(amp_1)
 904             : [vol_0]"r"((int)voice->volume[0]),
 905               [vol_1]"r"((int)voice->volume[1])
 906             );
 907         #else /* Unoptimized CPU */
 908             int output;
 909
 910             if ( (this->r.g.noise_enables & vbit) == 0 )
 911             {
 912                 int const fraction = voice->position & 0xfff;
 913                 short const* const pos = (voice->samples + (voice->position >> 12)) + 1;
 914                 output = pos[0] + ((fraction * (pos[1] - pos[0])) >> 12);
 915             } else {
 916                 output = *(int16_t *)&this->noise;
 917             }
 918
 919             voice->position += rate;
 920
 921             output = (output * voice->envx) >> 11;
 922
 923             /* duplicated here to give compiler more to run in parallel */
 924             int amp_0 = voice->volume [0] * output;
 925             int amp_1 = voice->volume [1] * output;
 926
 927             prev_outx = output;
 928             raw_voice->outx = (int8_t) (output >> 8);
 929         #endif /* CPU_* */
 930         #endif /* SPCNOINTERP */
 931
 932         #if SPC_BRRCACHE
 933             if ( voice->position >= voice->wave_end )
 934             {
 935                 long loop_len = voice->wave_loop << 12;
 936                 voice->position -= loop_len;
 937                 this->r.g.wave_ended |= vbit;
 938                 if ( !loop_len )
 939                 {
 940                     this->keys_down ^= vbit;
 941                     raw_voice->envx = 0;
 942                     voice->envx = 0;
 943                 }
 944             }
 945         #endif
 946 #if 0
 947             EXIT_TIMER(dsp_gen);
 948
 949             ENTER_TIMER(dsp_mix);
 950 #endif
 951             chans_0 += amp_0;
 952             chans_1 += amp_1;
 953             #if !SPC_NOECHO
 954                 if ( this->r.g.echo_ons & vbit )
 955                 {
 956                     echo_0 += amp_0;
 957                     echo_1 += amp_1;
 958                 }
 959             #endif
 960 #if 0
 961             EXIT_TIMER(dsp_mix);
 962 #endif
 963         }
 964         /* end of voice loop */
 965
 966     #if !SPC_NOECHO
 967     #ifdef CPU_COLDFIRE
 968         /* Read feedback from echo buffer */
 969         int echo_pos = this->echo_pos;
 970         uint8_t* const echo_ptr = RAM + ((echo_start + echo_pos) & 0xFFFF);
 971         echo_pos += 4;
 972         if ( echo_pos >= echo_wrap )
 973             echo_pos = 0;
 974         this->echo_pos = echo_pos;
 975         int fb = swap_odd_even32(*(int32_t *)echo_ptr);
 976         int out_0, out_1;
 977
 978         /* Keep last 8 samples */
 979         *this->last_fir_ptr = fb;
 980         this->last_fir_ptr  = this->fir_ptr;
 981
 982         /* Apply echo FIR filter to output samples read from echo buffer -
 983            circular buffer is hardware incremented and masked; FIR
 984            coefficients and buffer history are loaded in parallel with
 985            multiply accumulate operations. Shift left by one here and once
 986            again when calculating feedback to have sample values justified
 987            to bit 31 in the output to ease endian swap, interleaving and
 988            clamping before placing result in the program's echo buffer. */
 989         int _0, _1, _2;
 990         asm volatile (
 991         "move.l                           (%[fir_c])  , %[_2]         \r\n"
 992         "mac.w      %[fb]u, %[_2]u, <<,   (%[fir_p])+&, %[_0], %%acc0 \r\n"
 993         "mac.w      %[fb]l, %[_2]u, <<,   (%[fir_p])& , %[_1], %%acc1 \r\n"
 994         "mac.w      %[_0]u, %[_2]l, <<                       , %%acc0 \r\n"
 995         "mac.w      %[_0]l, %[_2]l, <<,  4(%[fir_c])  , %[_2], %%acc1 \r\n"
 996         "mac.w      %[_1]u, %[_2]u, <<,  4(%[fir_p])& , %[_0], %%acc0 \r\n"
 997         "mac.w      %[_1]l, %[_2]u, <<,  8(%[fir_p])& , %[_1], %%acc1 \r\n"
 998         "mac.w      %[_0]u, %[_2]l, <<                       , %%acc0 \r\n"
 999         "mac.w      %[_0]l, %[_2]l, <<,  8(%[fir_c])  , %[_2], %%acc1 \r\n"
1000         "mac.w      %[_1]u, %[_2]u, <<, 12(%[fir_p])& , %[_0], %%acc0 \r\n"
1001         "mac.w      %[_1]l, %[_2]u, <<, 16(%[fir_p])& , %[_1], %%acc1 \r\n"
1002         "mac.w      %[_0]u, %[_2]l, <<                       , %%acc0 \r\n"
1003         "mac.w      %[_0]l, %[_2]l, <<, 12(%[fir_c])  , %[_2], %%acc1 \r\n"
1004         "mac.w      %[_1]u, %[_2]u, <<, 20(%[fir_p])& , %[_0], %%acc0 \r\n"
1005         "mac.w      %[_1]l, %[_2]u, <<                       , %%acc1 \r\n"
1006         "mac.w      %[_0]u, %[_2]l, <<                       , %%acc0 \r\n"
1007         "mac.w      %[_0]l, %[_2]l, <<                       , %%acc1 \r\n"
1008         : [_0]"=&r"(_0), [_1]"=&r"(_1), [_2]"=&r"(_2),
1009           [fir_p]"+a"(this->fir_ptr)
1010         : [fir_c]"a"(this->fir_coeff), [fb]"r"(fb)
1011         );
1012
1013         /* Generate output */
1014         asm volatile (
1015         /* fetch filter results _after_ gcc loads asm
1016            block parameters to eliminate emac stalls   */
1017         "movclr.l   %%acc0, %[out_0]                \r\n"
1018         "movclr.l   %%acc1, %[out_1]                \r\n"
1019         /* apply global volume                         */
1020         "mac.l      %[chans_0], %[gv_0]    , %%acc2 \r\n"
1021         "mac.l      %[chans_1], %[gv_1]    , %%acc3 \r\n"
1022         /* apply echo volume and add to final output   */
1023         "mac.l      %[ev_0],   %[out_0], >>, %%acc2 \r\n"
1024         "mac.l      %[ev_1],   %[out_1], >>, %%acc3 \r\n"
1025         : [out_0]"=&r"(out_0), [out_1]"=&r"(out_1)
1026         : [chans_0]"r"(chans_0), [gv_0]"r"(global_vol_0),
1027           [ev_0]"r"((int)this->r.g.echo_volume_0),
1028           [chans_1]"r"(chans_1), [gv_1]"r"(global_vol_1),
1029           [ev_1]"r"((int)this->r.g.echo_volume_1)
1030         );
1031
1032         /* Feedback into echo buffer */
1033         if ( !(this->r.g.flags & 0x20) )
1034         {
1035             asm volatile (
1036             /* scale echo voices; saturate if overflow */
1037             "mac.l      %[sh], %[e1]       , %%acc1 \r\n"
1038             "mac.l      %[sh], %[e0]       , %%acc0 \r\n"
1039             /* add scaled output from FIR filter       */
1040             "mac.l      %[out_1], %[ef], <<, %%acc1 \r\n"
1041             "mac.l      %[out_0], %[ef], <<, %%acc0 \r\n"
1042             /* swap and fetch feedback results - simply
1043                swap_odd_even32 mixed in between macs and
1044                movclrs to mitigate stall issues        */
1045             "move.l     #0x00ff00ff, %[sh]          \r\n"
1046             "movclr.l   %%acc1, %[e1]               \r\n"
1047             "swap       %[e1]                       \r\n"
1048             "movclr.l   %%acc0, %[e0]               \r\n"
1049             "move.w     %[e1], %[e0]                \r\n"
1050             "and.l      %[e0], %[sh]                \r\n"
1051             "eor.l      %[sh], %[e0]                \r\n"
1052             "lsl.l      #8, %[sh]                   \r\n"
1053             "lsr.l      #8, %[e0]                   \r\n"
1054             "or.l       %[sh], %[e0]                \r\n"
1055             /* save final feedback into echo buffer    */
1056             "move.l     %[e0], (%[echo_ptr])        \r\n"
1057             : [e0]"+d"(echo_0), [e1]"+d"(echo_1)
1058             : [out_0]"r"(out_0), [out_1]"r"(out_1),
1059               [ef]"r"((int)this->r.g.echo_feedback),
1060               [echo_ptr]"a"((int32_t *)echo_ptr),
1061               [sh]"d"(1 << 9)
1062             );
1063         }
1064
1065         /* Output final samples */
1066         asm volatile (
1067         /* fetch output saved in %acc2 and %acc3 */
1068         "movclr.l   %%acc2, %[out_0] \r\n"
1069         "movclr.l   %%acc3, %[out_1] \r\n"
1070         /* scale right by global_muting shift    */
1071         "asr.l      %[gm],  %[out_0] \r\n"
1072         "asr.l      %[gm],  %[out_1] \r\n"
1073         : [out_0]"=&d"(out_0), [out_1]"=&d"(out_1)
1074         : [gm]"d"(global_muting)
1075         );
1076
1077         out_buf [             0] = out_0;
1078         out_buf [WAV_CHUNK_SIZE] = out_1;
1079         out_buf ++;
1080     #elif defined (CPU_ARM)
1081         /* Read feedback from echo buffer */
1082         int echo_pos = this->echo_pos;
1083         uint8_t* const echo_ptr = RAM +
1084                 ((this->r.g.echo_page * 0x100 + echo_pos) & 0xFFFF);
1085         echo_pos += 4;
1086         if ( echo_pos >= (this->r.g.echo_delay & 15) * 0x800 )
1087             echo_pos = 0;
1088         this->echo_pos = echo_pos;
1089
1090         int fb_0 = GET_LE16SA( echo_ptr     );
1091         int fb_1 = GET_LE16SA( echo_ptr + 2 );
1092
1093         /* Keep last 8 samples */
1094         int32_t *fir_ptr = this->fir_ptr;
1095
1096         /* Apply FIR */
1097         asm volatile (
1098         "str    %[fb_0], [%[fir_p]], #4  \r\n"
1099         "str    %[fb_1], [%[fir_p]], #4  \r\n"
1100         /* duplicate at +8 eliminates wrap checking below */
1101         "str    %[fb_0], [%[fir_p], #56] \r\n"
1102         "str    %[fb_1], [%[fir_p], #60] \r\n"
1103         : [fir_p]"+r"(fir_ptr)
1104         : [fb_0]"r"(fb_0), [fb_1]"r"(fb_1)
1105         );
1106
1107         this->fir_ptr = (int32_t *)((intptr_t)fir_ptr & FIR_BUF_MASK);
1108         int32_t *fir_coeff = this->fir_coeff;
1109
1110         asm volatile (
1111         "ldmia  %[fir_c]!, { r0-r1 }     \r\n"
1112         "ldmia  %[fir_p]!, { r4-r5 }     \r\n"
1113         "mul    %[fb_0],     r0, %[fb_0] \r\n"
1114         "mul    %[fb_1],     r0, %[fb_1] \r\n"
1115         "mla    %[fb_0], r4, r1, %[fb_0] \r\n"
1116         "mla    %[fb_1], r5, r1, %[fb_1] \r\n"
1117         "ldmia  %[fir_c]!, { r0-r1 }     \r\n"
1118         "ldmia  %[fir_p]!, { r2-r5 }     \r\n"
1119         "mla    %[fb_0], r2, r0, %[fb_0] \r\n"
1120         "mla    %[fb_1], r3, r0, %[fb_1] \r\n"
1121         "mla    %[fb_0], r4, r1, %[fb_0] \r\n"
1122         "mla    %[fb_1], r5, r1, %[fb_1] \r\n"
1123         "ldmia  %[fir_c]!, { r0-r1 }     \r\n"
1124         "ldmia  %[fir_p]!, { r2-r5 }     \r\n"
1125         "mla    %[fb_0], r2, r0, %[fb_0] \r\n"
1126         "mla    %[fb_1], r3, r0, %[fb_1] \r\n"
1127         "mla    %[fb_0], r4, r1, %[fb_0] \r\n"
1128         "mla    %[fb_1], r5, r1, %[fb_1] \r\n"
1129         "ldmia  %[fir_c]!, { r0-r1 }     \r\n"
1130         "ldmia  %[fir_p]!, { r2-r5 }     \r\n"
1131         "mla    %[fb_0], r2, r0, %[fb_0] \r\n"
1132         "mla    %[fb_1], r3, r0, %[fb_1] \r\n"
1133         "mla    %[fb_0], r4, r1, %[fb_0] \r\n"
1134         "mla    %[fb_1], r5, r1, %[fb_1] \r\n"
1135         : [fb_0]"+r"(fb_0), [fb_1]"+r"(fb_1),
1136           [fir_p]"+r"(fir_ptr), [fir_c]"+r"(fir_coeff)
1137         :
1138         : "r0", "r1", "r2", "r3", "r4", "r5"
1139         );
1140
1141         /* Generate output */
1142         int amp_0 = (chans_0 * global_vol_0 + fb_0 * this->r.g.echo_volume_0)
1143                     >> global_muting;
1144         int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1)
1145                     >> global_muting;
1146
1147         out_buf [             0] = amp_0;
1148         out_buf [WAV_CHUNK_SIZE] = amp_1;
1149         out_buf ++;
1150
1151         if ( !(this->r.g.flags & 0x20) )
1152         {
1153             /* Feedback into echo buffer */
1154             int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14);
1155             int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14);
1156             e0 = CLAMP16( e0 );
1157             SET_LE16A( echo_ptr    , e0 );
1158             e1 = CLAMP16( e1 );
1159             SET_LE16A( echo_ptr + 2, e1 );
1160         }
1161     #else /* Unoptimized CPU */
1162         /* Read feedback from echo buffer */
1163         int echo_pos = this->echo_pos;
1164         uint8_t* const echo_ptr = RAM +
1165                 ((this->r.g.echo_page * 0x100 + echo_pos) & 0xFFFF);
1166         echo_pos += 4;
1167         if ( echo_pos >= (this->r.g.echo_delay & 15) * 0x800 )
1168             echo_pos = 0;
1169         this->echo_pos = echo_pos;
1170         int fb_0 = GET_LE16SA( echo_ptr     );
1171         int fb_1 = GET_LE16SA( echo_ptr + 2 );
1172
1173         /* Keep last 8 samples */
1174         int (* const fir_ptr) [2] = this->fir_buf + this->fir_pos;
1175         this->fir_pos = (this->fir_pos + 1) & (FIR_BUF_HALF - 1);
1176         fir_ptr [           0] [0] = fb_0;
1177         fir_ptr [           0] [1] = fb_1;
1178         /* duplicate at +8 eliminates wrap checking below */
1179         fir_ptr [FIR_BUF_HALF] [0] = fb_0;
1180         fir_ptr [FIR_BUF_HALF] [1] = fb_1;
1181
1182         /* Apply FIR */
1183         fb_0 *= this->fir_coeff [0];
1184         fb_1 *= this->fir_coeff [0];
1185
1186         #define DO_PT( i )\
1187             fb_0 += fir_ptr [i] [0] * this->fir_coeff [i];\
1188             fb_1 += fir_ptr [i] [1] * this->fir_coeff [i];
1189
1190         DO_PT( 1 )
1191         DO_PT( 2 )
1192         DO_PT( 3 )
1193         DO_PT( 4 )
1194         DO_PT( 5 )
1195         DO_PT( 6 )
1196         DO_PT( 7 )
1197
1198         /* Generate output */
1199         int amp_0 = (chans_0 * global_vol_0 + fb_0 * this->r.g.echo_volume_0)
1200                     >> global_muting;
1201         int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1)
1202                     >> global_muting;
1203         out_buf [             0] = amp_0;
1204         out_buf [WAV_CHUNK_SIZE] = amp_1;
1205         out_buf ++;
1206
1207         if ( !(this->r.g.flags & 0x20) )
1208         {
1209             /* Feedback into echo buffer */
1210             int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14);
1211             int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14);
1212             e0 = CLAMP16( e0 );
1213             SET_LE16A( echo_ptr    , e0 );
1214             e1 = CLAMP16( e1 );
1215             SET_LE16A( echo_ptr + 2, e1 );
1216         }
1217     #endif /* CPU_* */
1218     #else /* SPCNOECHO == 1*/
1219         /* Generate output  */
1220         int amp_0 = (chans_0 * global_vol_0) >> global_muting;
1221         int amp_1 = (chans_1 * global_vol_1) >> global_muting;
1222         out_buf [             0] = amp_0;
1223         out_buf [WAV_CHUNK_SIZE] = amp_1;
1224         out_buf ++;
1225     #endif /* SPCNOECHO */
1226     }
1227     while ( --count );
1228 #if 0
1229     EXIT_TIMER(dsp);
1230     ENTER_TIMER(cpu);
1231 #endif
1232 }
1233
1234 void DSP_reset( struct Spc_Dsp* this )
1235 {
1236     this->keys_down   = 0;
1237     this->echo_pos    = 0;
1238     this->noise_count = 0;
1239     this->noise       = 2;
1240
1241     this->r.g.flags   = 0xE0; /* reset, mute, echo off */
1242     this->r.g.key_ons = 0;
1243
1244     ci->memset( this->voice_state, 0, sizeof this->voice_state );
1245
1246     int i;
1247     for ( i = VOICE_COUNT; --i >= 0; )
1248     {
1249         struct voice_t* v = this->voice_state + i;
1250         v->env_mode = state_release;
1251         v->addr     = ram.ram;
1252     }
1253
1254     #if SPC_BRRCACHE
1255         this->oldsize = 0;
1256         for ( i = 0; i < 256; i++ )
1257             this->wave_entry [i].start_addr = -1;
1258     #endif
1259
1260 #if defined(CPU_COLDFIRE)
1261     this->fir_ptr = fir_buf;
1262     this->last_fir_ptr = &fir_buf [7];
1263     ci->memset( fir_buf, 0, sizeof fir_buf );
1264 #elif defined (CPU_ARM)
1265     this->fir_ptr = fir_buf;
1266     ci->memset( fir_buf, 0, sizeof fir_buf );
1267 #else
1268     this->fir_pos = 0;
1269     ci->memset( this->fir_buf, 0, sizeof this->fir_buf );
1270 #endif
1271
1272     assert( offsetof (struct globals_t,unused9 [2]) == REGISTER_COUNT );
1273     assert( sizeof (this->r.voice) == REGISTER_COUNT );
1274 }