FS#12113: Optimize IRAM configuration for SPC. Performance increases by 5-6% on PP502...
[kugel-rb.git] / apps / codecs / libspc / spc_codec.h
blob391540cb19db0fc3427b57d5f9fc681fac37563c
1 /***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
10 * Copyright (C) 2007-2008 Michael Sevakis (jhMikeS)
11 * Copyright (C) 2006-2007 Adam Gashlin (hcs)
12 * Copyright (C) 2004-2007 Shay Green (blargg)
13 * Copyright (C) 2002 Brad Martin
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
21 * KIND, either express or implied.
23 ****************************************************************************/
25 /* lovingly ripped off from Game_Music_Emu 0.5.2. http://www.slack.net/~ant/ */
26 /* DSP Based on Brad Martin's OpenSPC DSP emulator */
27 /* tag reading from sexyspc by John Brawn (John_Brawn@yahoo.com) and others */
29 #ifndef _SPC_CODEC_H_
30 #define _SPC_CODEC_H_
32 /* rather than comment out asserts, just define NDEBUG */
33 #ifndef NDEBUG
34 #define NDEBUG
35 #endif
36 #include <assert.h>
38 /** Basic configuration options **/
40 #ifndef ARM_ARCH
41 #define ARM_ARCH 0
42 #endif
44 #define SPC_DUAL_CORE 1
46 #if !defined(SPC_DUAL_CORE) || NUM_CORES == 1
47 #undef SPC_DUAL_CORE
48 #define SPC_DUAL_CORE 0
49 #endif
51 /* Only some targets are fast enough for gaussian and realtime BRR decode */
52 #if CONFIG_CPU == S3C2440 || CONFIG_CPU == IMX31L || \
53 CONFIG_CPU == AS3525 || CONFIG_CPU == AS3525v2 || \
54 (CONFIG_PLATFORM & PLATFORM_HOSTED) || MEMORYSIZE <= 2
55 /* Don't cache BRR waves */
56 #define SPC_BRRCACHE 0
58 /* Allow gaussian interpolation */
59 #define SPC_NOINTERP 0
61 /* Allow echo processing */
62 #define SPC_NOECHO 0
63 #elif defined(CPU_COLDFIRE)
64 /* Cache BRR waves */
65 #define SPC_BRRCACHE 1
67 /* Disable gaussian interpolation */
68 #define SPC_NOINTERP 1
70 /* Allow echo processing */
71 #define SPC_NOECHO 0
72 #elif defined (CPU_PP) && SPC_DUAL_CORE
73 /* Cache BRR waves */
74 #define SPC_BRRCACHE 1
76 /* Disable gaussian interpolation */
77 #define SPC_NOINTERP 1
79 /* Allow echo processing */
80 #define SPC_NOECHO 0
81 #else
82 /* Cache BRR waves */
83 #define SPC_BRRCACHE 1
85 /* Disable gaussian interpolation */
86 #define SPC_NOINTERP 1
88 /* Disable echo processing */
89 #define SPC_NOECHO 1
90 #endif
92 #if (CONFIG_CPU == MCF5250)
93 #define IBSS_ATTR_SPC IBSS_ATTR
94 #define ICODE_ATTR_SPC ICODE_ATTR
95 #define ICONST_ATTR_SPC ICONST_ATTR
96 /* Not enough IRAM available to move further data to it. */
97 #define IBSS_ATTR_SPC_LARGE_IRAM
99 #elif (CONFIG_CPU == PP5020)
100 /* spc is slower on PP5020 when moving data to IRAM. */
101 #define IBSS_ATTR_SPC
102 #define ICODE_ATTR_SPC
103 #define ICONST_ATTR_SPC
104 /* Not enough IRAM available to move further data to it. */
105 #define IBSS_ATTR_SPC_LARGE_IRAM
107 #elif (CONFIG_CPU == PP5022) || (CONFIG_CPU == PP5024)
108 #define IBSS_ATTR_SPC IBSS_ATTR
109 #define ICODE_ATTR_SPC ICODE_ATTR
110 #define ICONST_ATTR_SPC ICONST_ATTR
111 /* Not enough IRAM available to move further data to it. */
112 #define IBSS_ATTR_SPC_LARGE_IRAM
114 #elif defined(CPU_S5L870X)
115 #define IBSS_ATTR_SPC IBSS_ATTR
116 #define ICODE_ATTR_SPC ICODE_ATTR
117 #define ICONST_ATTR_SPC ICONST_ATTR
118 /* Very large IRAM. Move even more data to it. */
119 #define IBSS_ATTR_SPC_LARGE_IRAM IBSS_ATTR
121 #else
122 #define IBSS_ATTR_SPC IBSS_ATTR
123 #define ICODE_ATTR_SPC ICODE_ATTR
124 #define ICONST_ATTR_SPC ICONST_ATTR
125 /* Not enough IRAM available to move further data to it. */
126 #define IBSS_ATTR_SPC_LARGE_IRAM
127 #endif
129 #if SPC_DUAL_CORE
130 #undef SHAREDBSS_ATTR
131 #define SHAREDBSS_ATTR __attribute__ ((section(".ibss")))
132 #undef SHAREDDATA_ATTR
133 #define SHAREDDATA_ATTR __attribute__((section(".idata")))
134 #endif
136 /* Samples per channel per iteration */
137 #if defined(CPU_PP) && NUM_CORES == 1
138 #define WAV_CHUNK_SIZE 2048
139 #else
140 #define WAV_CHUNK_SIZE 1024
141 #endif
143 /**************** Little-endian handling ****************/
145 static inline unsigned get_le16( void const* p )
147 return ((unsigned char const*) p) [1] * 0x100u +
148 ((unsigned char const*) p) [0];
151 static inline int get_le16s( void const* p )
153 return ((signed char const*) p) [1] * 0x100 +
154 ((unsigned char const*) p) [0];
157 static inline void set_le16( void* p, unsigned n )
159 ((unsigned char*) p) [1] = (unsigned char) (n >> 8);
160 ((unsigned char*) p) [0] = (unsigned char) n;
163 #define GET_LE16( addr ) get_le16( addr )
164 #define GET_LE16A( addr ) get_le16( addr )
165 #define SET_LE16( addr, data ) set_le16( addr, data )
166 #define INT16A( addr ) (*(uint16_t*) (addr))
167 #define INT16SA( addr ) (*(int16_t*) (addr))
169 #ifdef ROCKBOX_LITTLE_ENDIAN
170 #define GET_LE16SA( addr ) (*( int16_t*) (addr))
171 #define SET_LE16A( addr, data ) (void) (*(uint16_t*) (addr) = (data))
172 #else
173 #define GET_LE16SA( addr ) get_le16s( addr )
174 #define SET_LE16A( addr, data ) set_le16 ( addr, data )
175 #endif
177 struct Spc_Emu;
178 #define THIS struct Spc_Emu* const this
180 /* The CPU portion (shock!) */
182 struct cpu_regs_t
184 long pc; /* more than 16 bits to allow overflow detection */
185 uint8_t a;
186 uint8_t x;
187 uint8_t y;
188 uint8_t status;
189 uint8_t sp;
192 struct src_dir
194 uint16_t start;
195 uint16_t loop;
198 struct cpu_ram_t
200 union {
201 uint8_t padding1 [0x100];
202 uint16_t align;
203 } padding1 [1];
204 union {
205 uint8_t ram [0x10000];
206 struct src_dir sd [0x10000/sizeof(struct src_dir)];
208 uint8_t padding2 [0x100];
211 #undef RAM
212 #define RAM ram.ram
213 extern struct cpu_ram_t ram;
215 long CPU_run( THIS, long start_time ) ICODE_ATTR_SPC;
216 void CPU_Init( THIS );
218 /* The DSP portion (awe!) */
219 enum { VOICE_COUNT = 8 };
220 enum { REGISTER_COUNT = 128 };
222 struct raw_voice_t
224 int8_t volume [2];
225 uint8_t rate [2];
226 uint8_t waveform;
227 uint8_t adsr [2]; /* envelope rates for attack, decay, and sustain */
228 uint8_t gain; /* envelope gain (if not using ADSR) */
229 int8_t envx; /* current envelope level */
230 int8_t outx; /* current sample */
231 int8_t unused [6];
234 struct globals_t
236 int8_t unused1 [12];
237 int8_t volume_0; /* 0C Main Volume Left (-.7) */
238 int8_t echo_feedback; /* 0D Echo Feedback (-.7) */
239 int8_t unused2 [14];
240 int8_t volume_1; /* 1C Main Volume Right (-.7) */
241 int8_t unused3 [15];
242 int8_t echo_volume_0; /* 2C Echo Volume Left (-.7) */
243 uint8_t pitch_mods; /* 2D Pitch Modulation on/off for each voice */
244 int8_t unused4 [14];
245 int8_t echo_volume_1; /* 3C Echo Volume Right (-.7) */
246 uint8_t noise_enables; /* 3D Noise output on/off for each voice */
247 int8_t unused5 [14];
248 uint8_t key_ons; /* 4C Key On for each voice */
249 uint8_t echo_ons; /* 4D Echo on/off for each voice */
250 int8_t unused6 [14];
251 uint8_t key_offs; /* 5C key off for each voice
252 (instantiates release mode) */
253 uint8_t wave_page; /* 5D source directory (wave table offsets) */
254 int8_t unused7 [14];
255 uint8_t flags; /* 6C flags and noise freq */
256 uint8_t echo_page; /* 6D */
257 int8_t unused8 [14];
258 uint8_t wave_ended; /* 7C */
259 uint8_t echo_delay; /* 7D ms >> 4 */
260 char unused9 [2];
263 enum state_t
264 { /* -1, 0, +1 allows more efficient if statements */
265 state_decay = -1,
266 state_sustain = 0,
267 state_attack = +1,
268 state_release = 2
271 struct cache_entry_t
273 int16_t const* samples;
274 unsigned end; /* past-the-end position */
275 unsigned loop; /* number of samples in loop */
276 unsigned start_addr;
279 enum { BRR_BLOCK_SIZE = 16 };
280 enum { BRR_CACHE_SIZE = 0x20000 + 32} ;
282 struct voice_t
284 #if SPC_BRRCACHE
285 int16_t const* samples;
286 long wave_end;
287 int wave_loop;
288 #else
289 int16_t samples [3 + BRR_BLOCK_SIZE + 1];
290 int block_header; /* header byte from current block */
291 #endif
292 uint8_t const* addr;
293 short volume [2];
294 long position;/* position in samples buffer, with 12-bit fraction */
295 short envx;
296 short env_mode;
297 short env_timer;
298 short key_on_delay;
301 #if SPC_BRRCACHE
302 /* a little extra for samples that go past end */
303 extern int16_t BRRcache [BRR_CACHE_SIZE];
304 #endif
306 enum { FIR_BUF_HALF = 8 };
308 #if defined(CPU_COLDFIRE)
309 /* global because of the large aligment requirement for hardware masking -
310 * L-R interleaved 16-bit samples for easy loading and mac.w use.
312 enum
314 FIR_BUF_CNT = FIR_BUF_HALF,
315 FIR_BUF_SIZE = FIR_BUF_CNT * sizeof ( int32_t ),
316 FIR_BUF_ALIGN = FIR_BUF_SIZE * 2,
317 FIR_BUF_MASK = ~((FIR_BUF_ALIGN / 2) | (sizeof ( int32_t ) - 1))
319 #elif defined (CPU_ARM)
320 #if ARM_ARCH >= 6
321 enum
323 FIR_BUF_CNT = FIR_BUF_HALF * 2,
324 FIR_BUF_SIZE = FIR_BUF_CNT * sizeof ( int32_t ),
325 FIR_BUF_ALIGN = FIR_BUF_SIZE,
326 FIR_BUF_MASK = ~((FIR_BUF_ALIGN / 2) | (sizeof ( int32_t ) - 1))
328 #else
329 enum
331 FIR_BUF_CNT = FIR_BUF_HALF * 2 * 2,
332 FIR_BUF_SIZE = FIR_BUF_CNT * sizeof ( int32_t ),
333 FIR_BUF_ALIGN = FIR_BUF_SIZE,
334 FIR_BUF_MASK = ~((FIR_BUF_ALIGN / 2) | (sizeof ( int32_t ) * 2 - 1))
336 #endif /* ARM_ARCH */
337 #endif /* CPU_* */
339 struct Spc_Dsp
341 union
343 struct raw_voice_t voice [VOICE_COUNT];
344 uint8_t reg [REGISTER_COUNT];
345 struct globals_t g;
346 int16_t align;
347 } r;
349 unsigned echo_pos;
350 int keys_down;
351 int noise_count;
352 uint16_t noise; /* also read as int16_t */
354 #if defined(CPU_COLDFIRE)
355 /* FIR history is interleaved. Hardware handles wrapping by mask.
356 * |LR|LR|LR|LR|LR|LR|LR|LR| */
357 int32_t *fir_ptr;
358 /* wrapped address just behind current position -
359 allows mac.w to increment and mask fir_ptr */
360 int32_t *last_fir_ptr;
361 /* copy of echo FIR constants as int16_t for use with mac.w */
362 int16_t fir_coeff [VOICE_COUNT];
363 #elif defined (CPU_ARM)
364 /* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */
365 int32_t *fir_ptr;
366 #if ARM_ARCH >= 6
367 /* FIR history is interleaved with guard to eliminate wrap checking
368 * when convolving.
369 * |LR|LR|LR|LR|LR|LR|LR|LR|--|--|--|--|--|--|--|--| */
370 /* copy of echo FIR constants as int16_t, loaded as int32 for
371 * halfword, packed multiples */
372 int16_t fir_coeff [VOICE_COUNT];
373 #else
374 /* FIR history is interleaved with guard to eliminate wrap checking
375 * when convolving.
376 * |LL|RR|LL|RR|LL|RR|LL|RR|LL|RR|LL|RR|LL|RR|LL|RR|...
377 * |--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--| */
378 /* copy of echo FIR constants as int32_t, for faster access */
379 int32_t fir_coeff [VOICE_COUNT];
380 #endif /* ARM_ARCH */
381 #else /* Unoptimized CPU */
382 /* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */
383 int fir_pos; /* (0 to 7) */
384 int fir_buf [FIR_BUF_HALF * 2] [2];
385 /* copy of echo FIR constants as int, for faster access */
386 int fir_coeff [VOICE_COUNT];
387 #endif
389 struct voice_t voice_state [VOICE_COUNT];
391 #if SPC_BRRCACHE
392 uint8_t oldsize;
393 struct cache_entry_t wave_entry [256];
394 struct cache_entry_t wave_entry_old [256];
395 #endif
398 void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) ICODE_ATTR_SPC;
399 void DSP_reset( struct Spc_Dsp* this );
401 static inline void DSP_run( struct Spc_Dsp* this, long count, int32_t* out )
403 /* Should we just fill the buffer with silence? Flags won't be cleared */
404 /* during this run so it seems it should keep resetting every sample. */
405 if ( this->r.g.flags & 0x80 )
406 DSP_reset( this );
408 DSP_run_( this, count, out );
411 /**************** SPC emulator ****************/
412 /* 1.024 MHz clock / 32000 samples per second */
413 enum { CLOCKS_PER_SAMPLE = 32 };
415 enum { EXTRA_CLOCKS = CLOCKS_PER_SAMPLE / 2 };
417 /* using this disables timer (since this will always be in the future) */
418 enum { TIMER_DISABLED_TIME = 127 };
420 enum { ROM_SIZE = 64 };
421 enum { ROM_ADDR = 0xFFC0 };
423 enum { TIMER_COUNT = 3 };
425 struct Timer
427 long next_tick;
428 int period;
429 int count;
430 int shift;
431 int enabled;
432 int counter;
435 void Timer_run_( struct Timer* t, long time ) ICODE_ATTR_SPC;
437 static inline void Timer_run( struct Timer* t, long time )
439 if ( time >= t->next_tick )
440 Timer_run_( t, time );
443 struct Spc_Emu
445 uint8_t cycle_table [0x100];
446 struct cpu_regs_t r;
448 int32_t* sample_buf;
449 long next_dsp;
450 int rom_enabled;
451 int extra_cycles;
453 struct Timer timer [TIMER_COUNT];
455 /* large objects at end */
456 struct Spc_Dsp dsp;
457 uint8_t extra_ram [ROM_SIZE];
458 uint8_t boot_rom [ROM_SIZE];
461 enum { SPC_FILE_SIZE = 0x10180 };
463 struct spc_file_t
465 char signature [27];
466 char unused [10];
467 uint8_t pc [2];
468 uint8_t a;
469 uint8_t x;
470 uint8_t y;
471 uint8_t status;
472 uint8_t sp;
473 char unused2 [212];
474 uint8_t ram [0x10000];
475 uint8_t dsp [128];
476 uint8_t ipl_rom [128];
479 void SPC_Init( THIS );
481 int SPC_load_spc( THIS, const void* data, long size );
483 /**************** DSP interaction ****************/
484 void DSP_write( struct Spc_Dsp* this, int i, int data ) ICODE_ATTR_SPC;
486 static inline int DSP_read( struct Spc_Dsp* this, int i )
488 assert( (unsigned) i < REGISTER_COUNT );
489 return this->r.reg [i];
492 void SPC_run_dsp_( THIS, long time ) ICODE_ATTR_SPC;
494 static inline void SPC_run_dsp( THIS, long time )
496 if ( time >= this->next_dsp )
497 SPC_run_dsp_( this, time );
500 int SPC_read( THIS, unsigned addr, long const time ) ICODE_ATTR_SPC;
501 void SPC_write( THIS, unsigned addr, int data, long const time ) ICODE_ATTR_SPC;
503 /**************** Sample generation ****************/
504 int SPC_play( THIS, long count, int32_t* out ) ICODE_ATTR_SPC;
506 #endif /* _SPC_CODEC_H_ */