From 8c0be0dc7bf02554e139d10e080b8c079dfbc682 Mon Sep 17 00:00:00 2001 From: Buschel Date: Fri, 20 May 2011 22:53:22 +0000 Subject: [PATCH] Use more IRAM on S5L870x to speed up wma by ~3%. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@29905 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libwma/wmadata.h | 6 +++--- apps/codecs/libwma/wmadec.h | 21 ++++++++++++++++----- apps/codecs/libwma/wmadeci.c | 28 +++++++++++++++++----------- 3 files changed, 36 insertions(+), 19 deletions(-) diff --git a/apps/codecs/libwma/wmadata.h b/apps/codecs/libwma/wmadata.h index 343b6e9e9..07a55df19 100644 --- a/apps/codecs/libwma/wmadata.h +++ b/apps/codecs/libwma/wmadata.h @@ -1426,7 +1426,7 @@ static const CoefVLCTable coef_vlcs[6] = { }; /*table of the values of 10^(index*.05)*/ -const fixed64 pow_table[] = +const fixed64 pow_table[] ICONST_ATTR_WMA_XL_IRAM = { 0x199a, 0x1cb9, 0x203a, 0x2429, 0x2893, 0x2d86, 0x3314, 0x3950, 0x404e, 0x4827, 0x50f4, 0x5ad5, 0x65ea, 0x725a, 0x804e, 0x8ff6, 0xa186, 0xb53c, 0xcb59, 0xe429, 0x10000LL,0x11f3dLL,0x14249LL,0x1699cLL,0x195bcLL, @@ -1499,7 +1499,7 @@ const fixed32 pow_10_to_yover16[] ICONST_ATTR= 0x27100000, 0x2d1bd1e1, 0x341736de, 0x3c2743e8, 0x4576cb4a }; -const fixed32 pow_a_table[] = +const fixed32 pow_a_table[] ICONST_ATTR_WMA_XL_IRAM = { 0x1004,0x1008,0x100c,0x1010,0x1014,0x1018,0x101c,0x1021,0x1025,0x1029,0x102d, 0x1031,0x1036,0x103a,0x103e,0x1043,0x1047,0x104b,0x1050,0x1054,0x1059,0x105d, @@ -1522,7 +1522,7 @@ const fixed32 pow_a_table[] = */ -const fixed64 lsp_pow_e_table[] = +const fixed64 lsp_pow_e_table[] ICONST_ATTR_WMA_XL_IRAM = { 0xb504f30000000000LL, 0x9837f00000000000LL, 0x8000000000000000LL, 0x6ba27e8000000000LL, 0x5a82798000000000LL, 0x4c1bf80000000000LL, 0x4000000000000000LL, 0x35d13f4000000000LL, 0x2d413cc000000000LL, 0x260dfc0000000000LL, diff --git a/apps/codecs/libwma/wmadec.h b/apps/codecs/libwma/wmadec.h index d7fa79d26..76429dede 100644 --- a/apps/codecs/libwma/wmadec.h +++ b/apps/codecs/libwma/wmadec.h @@ -51,16 +51,27 @@ #define LSP_POW_BITS 7 -/*define IRAM for targets with 48k/80k IRAM split*/ -#ifndef IBSS_ATTR_WMA_LARGE_IRAM -#if (CONFIG_CPU == PP5022) || (CONFIG_CPU == PP5024) || defined(CPU_S5L870X) || (CONFIG_CPU == MCF5250) -/* PP5022/24, MCF5250 have 128KB of IRAM. 80KB are allocated for codecs */ + +#if (CONFIG_CPU == PP5022) || (CONFIG_CPU == PP5024) || (CONFIG_CPU == MCF5250) +/* PP5022/24 and MCF5250 have 128KB of IRAM. 80KB are allocated for codecs */ +#define IBSS_ATTR_WMA_LARGE_IRAM IBSS_ATTR +#define IBSS_ATTR_WMA_XL_IRAM +#define ICONST_ATTR_WMA_XL_IRAM + +#elif defined(CPU_S5L870X) +/* S5L870x has even more IRAM. Use it. */ #define IBSS_ATTR_WMA_LARGE_IRAM IBSS_ATTR +#define IBSS_ATTR_WMA_XL_IRAM IBSS_ATTR +#define ICONST_ATTR_WMA_XL_IRAM ICONST_ATTR + #else /* other PP's and MCF5249 have 96KB of IRAM */ #define IBSS_ATTR_WMA_LARGE_IRAM +#define IBSS_ATTR_WMA_XL_IRAM +#define ICONST_ATTR_WMA_XL_IRAM + #endif -#endif + #define VLCBITS 7 /*7 is the lowest without glitching*/ #define VLCMAX ((22+VLCBITS-1)/VLCBITS) diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c index 50a1b4740..d63a7641c 100644 --- a/apps/codecs/libwma/wmadeci.c +++ b/apps/codecs/libwma/wmadeci.c @@ -33,19 +33,25 @@ static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len); /*declarations of statically allocated variables used to remove malloc calls*/ -fixed32 coefsarray[MAX_CHANNELS][BLOCK_MAX_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; +static fixed32 coefsarray[MAX_CHANNELS][BLOCK_MAX_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /*decode and window into IRAM on targets with at least 80KB of codec IRAM*/ -fixed32 frame_out_buf[MAX_CHANNELS][BLOCK_MAX_SIZE * 2] IBSS_ATTR_WMA_LARGE_IRAM MEM_ALIGN_ATTR; +static fixed32 frame_out_buf[MAX_CHANNELS][BLOCK_MAX_SIZE * 2] IBSS_ATTR_WMA_LARGE_IRAM MEM_ALIGN_ATTR; /*MDCT reconstruction windows*/ -fixed32 stat0[2048] MEM_ALIGN_ATTR, stat1[1024] MEM_ALIGN_ATTR, - stat2[512] MEM_ALIGN_ATTR, stat3[256] MEM_ALIGN_ATTR, stat4[128] MEM_ALIGN_ATTR; +static fixed32 stat0[2048] IBSS_ATTR_WMA_XL_IRAM MEM_ALIGN_ATTR; +static fixed32 stat1[1024] IBSS_ATTR_WMA_XL_IRAM MEM_ALIGN_ATTR; +static fixed32 stat2[ 512] IBSS_ATTR_WMA_XL_IRAM MEM_ALIGN_ATTR; +static fixed32 stat3[ 256] IBSS_ATTR_WMA_XL_IRAM MEM_ALIGN_ATTR; +static fixed32 stat4[ 128] IBSS_ATTR_WMA_XL_IRAM MEM_ALIGN_ATTR; /*VLC lookup tables*/ -uint16_t *runtabarray[2], *levtabarray[2]; +static uint16_t *runtabarray[2]; +static uint16_t *levtabarray[2]; -uint16_t runtab_big[1336] MEM_ALIGN_ATTR, runtab_small[1072] MEM_ALIGN_ATTR, - levtab_big[1336] MEM_ALIGN_ATTR, levtab_small[1072] MEM_ALIGN_ATTR; +static uint16_t runtab_big[1336] MEM_ALIGN_ATTR; +static uint16_t runtab_small[1072] MEM_ALIGN_ATTR; +static uint16_t levtab_big[1336] MEM_ALIGN_ATTR; +static uint16_t levtab_small[1072] MEM_ALIGN_ATTR; #define VLCBUF1SIZE 4598 #define VLCBUF2SIZE 3574 @@ -54,11 +60,11 @@ uint16_t runtab_big[1336] MEM_ALIGN_ATTR, runtab_small[1072] MEM_ALIGN_ATTR, /*putting these in IRAM actually makes PP slower*/ -VLC_TYPE vlcbuf1[VLCBUF1SIZE][2] MEM_ALIGN_ATTR; -VLC_TYPE vlcbuf2[VLCBUF2SIZE][2] MEM_ALIGN_ATTR; +static VLC_TYPE vlcbuf1[VLCBUF1SIZE][2] IBSS_ATTR_WMA_XL_IRAM MEM_ALIGN_ATTR; +static VLC_TYPE vlcbuf2[VLCBUF2SIZE][2] MEM_ALIGN_ATTR; /* This buffer gets reused for lsp tables */ -VLC_TYPE vlcbuf3[VLCBUF3SIZE][2] MEM_ALIGN_ATTR; -VLC_TYPE vlcbuf4[VLCBUF4SIZE][2] MEM_ALIGN_ATTR; +static VLC_TYPE vlcbuf3[VLCBUF3SIZE][2] MEM_ALIGN_ATTR; +static VLC_TYPE vlcbuf4[VLCBUF4SIZE][2] MEM_ALIGN_ATTR; -- 2.11.4.GIT