From 8f0bc4f22c5257fcb47816b69d1006acc96369db Mon Sep 17 00:00:00 2001 From: amiconn Date: Wed, 10 Mar 2010 21:39:12 +0000 Subject: [PATCH] Move (small) data into DRAM on PP5020, it's ~4.5% faster that way. Closes about half of the performance gap towards PP5022. The (relatively large) buffers for decoded data stay in IRAM, as does the reciprocal table. Clarify some comments. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25108 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/demac/libdemac/decoder.c | 13 +++++++----- apps/codecs/demac/libdemac/demac_config.h | 34 ++++++++++++++++++++++++------- apps/codecs/demac/libdemac/entropy.c | 18 ++++++++-------- apps/codecs/demac/libdemac/filter.c | 2 +- 4 files changed, 45 insertions(+), 22 deletions(-) diff --git a/apps/codecs/demac/libdemac/decoder.c b/apps/codecs/demac/libdemac/decoder.c index c415db774..6566ba26a 100644 --- a/apps/codecs/demac/libdemac/decoder.c +++ b/apps/codecs/demac/libdemac/decoder.c @@ -35,24 +35,27 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA #ifdef FILTER256_IRAM static filter_int filterbuf32[(32*3 + FILTER_HISTORY_SIZE) * 2] - IBSS_ATTR __attribute__((aligned(16))); /* 2432/4864 bytes */ + IBSS_ATTR_DEMAC __attribute__((aligned(16))); + /* 2432 or 4864 bytes */ static filter_int filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2] - IBSS_ATTR __attribute__((aligned(16))); /* 5120/10240 bytes */ + IBSS_ATTR_DEMAC __attribute__((aligned(16))); + /* 5120 or 10240 bytes */ #define FILTERBUF64 filterbuf256 #define FILTERBUF32 filterbuf32 #define FILTERBUF16 filterbuf32 #else static filter_int filterbuf64[(64*3 + FILTER_HISTORY_SIZE) * 2] - IBSS_ATTR __attribute__((aligned(16))); /* 2432/4864 bytes */ + IBSS_ATTR_DEMAC __attribute__((aligned(16))); + /* 2432 or 4864 bytes */ static filter_int filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2] - __attribute__((aligned(16))); /* 5120/10240 bytes */ + __attribute__((aligned(16))); /* 5120 or 10240 bytes */ #define FILTERBUF64 filterbuf64 #define FILTERBUF32 filterbuf64 #define FILTERBUF16 filterbuf64 #endif /* This is only needed for "insane" files, and no current Rockbox targets - can hope to decode them in realtime, although the Gigabeat S comes close. */ + can hope to decode them in realtime, except the Gigabeat S (at 528MHz). */ static filter_int filterbuf1280[(1280*3 + FILTER_HISTORY_SIZE) * 2] IBSS_ATTR_DEMAC_INSANEBUF __attribute__((aligned(16))); /* 17408 or 34816 bytes */ diff --git a/apps/codecs/demac/libdemac/demac_config.h b/apps/codecs/demac/libdemac/demac_config.h index c908c3ea1..f3b293e1d 100644 --- a/apps/codecs/demac/libdemac/demac_config.h +++ b/apps/codecs/demac/libdemac/demac_config.h @@ -49,18 +49,39 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA #endif #if CONFIG_CPU == PP5002 || defined(CPU_S5L870X) -/* Code in IRAM for speed, not enough IRAM for the insane filter buffer. */ +/* Code and data IRAM for speed (PP5002 has a broken cache), not enough IRAM + * for the insane filter buffer. Reciprocal table for division in IRAM. */ #define ICODE_SECTION_DEMAC_ARM .icode #define ICODE_ATTR_DEMAC ICODE_ATTR +#define ICONST_ATTR_DEMAC ICONST_ATTR +#define IBSS_ATTR_DEMAC IBSS_ATTR #define IBSS_ATTR_DEMAC_INSANEBUF -#elif defined(CPU_PP502x) -/* Insane filter buffer not in IRAM due to division table. */ + +#elif CONFIG_CPU == PP5020 +/* Code and small data in DRAM for speed (PP5020 IRAM isn't completely single + * cycle). Insane filter buffer not in IRAM in favour of reciprocal table for + * divison. Decoded data buffers should be in IRAM (defined by the caller). */ +#define ICODE_SECTION_DEMAC_ARM .text +#define ICODE_ATTR_DEMAC +#define ICONST_ATTR_DEMAC +#define IBSS_ATTR_DEMAC +#define IBSS_ATTR_DEMAC_INSANEBUF + +#elif CONFIG_CPU == PP5022 +/* Code in DRAM, data in IRAM. Insane filter buffer not in IRAM in favour of + * reciprocal table for divison */ #define ICODE_SECTION_DEMAC_ARM .text #define ICODE_ATTR_DEMAC +#define ICONST_ATTR_DEMAC ICONST_ATTR +#define IBSS_ATTR_DEMAC IBSS_ATTR #define IBSS_ATTR_DEMAC_INSANEBUF + #else +/* Code in DRAM, data in IRAM, including insane filter buffer. */ #define ICODE_SECTION_DEMAC_ARM .text #define ICODE_ATTR_DEMAC +#define ICONST_ATTR_DEMAC ICONST_ATTR +#define IBSS_ATTR_DEMAC IBSS_ATTR #define IBSS_ATTR_DEMAC_INSANEBUF IBSS_ATTR #endif @@ -68,11 +89,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA #define APE_OUTPUT_DEPTH (ape_ctx->bps) -#define IBSS_ATTR -#define IBSS_ATTR_DEMAC_INSANEBUF -#define ICONST_ATTR -#define ICODE_ATTR #define ICODE_ATTR_DEMAC +#define ICONST_ATTR_DEMAC +#define IBSS_ATTR_DEMAC +#define IBSS_ATTR_DEMAC_INSANEBUF /* Use to give gcc hints on which branch is most likely taken */ #if defined(__GNUC__) && __GNUC__ >= 3 diff --git a/apps/codecs/demac/libdemac/entropy.c b/apps/codecs/demac/libdemac/entropy.c index 24f5932de..a09ba8f54 100644 --- a/apps/codecs/demac/libdemac/entropy.c +++ b/apps/codecs/demac/libdemac/entropy.c @@ -36,7 +36,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA hard-coded in the Monkey's Audio decoder. */ -static const int counts_3970[65] ICONST_ATTR = +static const int counts_3970[65] ICONST_ATTR_DEMAC = { 0,14824,28224,39348,47855,53994,58171,60926, 62682,63786,64463,64878,65126,65276,65365,65419, @@ -50,7 +50,7 @@ static const int counts_3970[65] ICONST_ATTR = }; /* counts_diff_3970[i] = counts_3970[i+1] - counts_3970[i] */ -static const int counts_diff_3970[64] ICONST_ATTR = +static const int counts_diff_3970[64] ICONST_ATTR_DEMAC = { 14824,13400,11124,8507,6139,4177,2755,1756, 1104,677,415,248,150,89,54,31, @@ -62,7 +62,7 @@ static const int counts_diff_3970[64] ICONST_ATTR = 1,1,1,1,1,1,1,1 }; -static const int counts_3980[65] ICONST_ATTR = +static const int counts_3980[65] ICONST_ATTR_DEMAC = { 0,19578,36160,48417,56323,60899,63265,64435, 64971,65232,65351,65416,65447,65466,65476,65482, @@ -77,7 +77,7 @@ static const int counts_3980[65] ICONST_ATTR = /* counts_diff_3980[i] = counts_3980[i+1] - counts_3980[i] */ -static const int counts_diff_3980[64] ICONST_ATTR = +static const int counts_diff_3980[64] ICONST_ATTR_DEMAC = { 19578,16582,12257,7906,4576,2366,1170,536, 261,119,65,31,19,10,6,3, @@ -122,8 +122,8 @@ each function (and the RNGC macro)). for aligned reads. */ -static unsigned char* bytebuffer IBSS_ATTR; -static int bytebufferoffset IBSS_ATTR; +static unsigned char* bytebuffer IBSS_ATTR_DEMAC; +static int bytebufferoffset IBSS_ATTR_DEMAC; static inline void skip_byte(void) { @@ -159,7 +159,7 @@ struct rangecoder_t unsigned int buffer; /* buffer for input/output */ }; -static struct rangecoder_t rc IBSS_ATTR; +static struct rangecoder_t rc IBSS_ATTR_DEMAC; /* Start the decoder */ static inline void range_start_decoding(void) @@ -276,8 +276,8 @@ struct rice_t uint32_t ksum; }; -static struct rice_t riceX IBSS_ATTR; -static struct rice_t riceY IBSS_ATTR; +static struct rice_t riceX IBSS_ATTR_DEMAC; +static struct rice_t riceY IBSS_ATTR_DEMAC; static inline void update_rice(struct rice_t* rice, int x) { diff --git a/apps/codecs/demac/libdemac/filter.c b/apps/codecs/demac/libdemac/filter.c index 47a0aeb28..805509830 100644 --- a/apps/codecs/demac/libdemac/filter.c +++ b/apps/codecs/demac/libdemac/filter.c @@ -260,7 +260,7 @@ static void ICODE_ATTR_DEMAC do_apply_filter_3970(struct filter_t* f, } } -static struct filter_t filter[2] IBSS_ATTR; +static struct filter_t filter[2] IBSS_ATTR_DEMAC; static void do_init_filter(struct filter_t* f, filter_int* buf) { -- 2.11.4.GIT