From 92d177ceffeace26a96435de1dd2bbe5dc4d92f7 Mon Sep 17 00:00:00 2001 From: Buschel Date: Thu, 10 Jun 2010 19:02:27 +0000 Subject: [PATCH] Submit FS#11365. Speed up mp3 decoding on ARM processors. It is faster to use the C-implementation of dct32 compiled with -O1 as the asm implementation of dct32 compiled with -O2. Configuration for Coldfire processors is untouched. In the new configuration the stack of the COP decoding thread needs to be increased on dualcore targets. Speed is increasing by up to 0.9 MHz (-O2 against -O1 using eabi). git-svn-id: svn://svn.rockbox.org/rockbox/trunk@26746 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libmad/SOURCES | 1 - apps/codecs/libmad/libmad.make | 13 ++++++++++++- apps/codecs/libmad/synth.c | 3 ++- apps/codecs/mpa.c | 2 +- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/apps/codecs/libmad/SOURCES b/apps/codecs/libmad/SOURCES index b7e195732..a4ffcb5c6 100644 --- a/apps/codecs/libmad/SOURCES +++ b/apps/codecs/libmad/SOURCES @@ -14,6 +14,5 @@ imdct_mcf5249.S #endif #if defined(CPU_ARM) imdct_l_arm.S -dct32_arm.S synth_full_arm.S #endif diff --git a/apps/codecs/libmad/libmad.make b/apps/codecs/libmad/libmad.make index 331ee8916..58d2dc84e 100644 --- a/apps/codecs/libmad/libmad.make +++ b/apps/codecs/libmad/libmad.make @@ -11,7 +11,18 @@ # (one for codec, one for mpegplayer) # so a little trickery is necessary -MADFLAGS = $(CODECFLAGS) -UDEBUG -DNDEBUG -O2 -I$(APPSDIR)/codecs/libmad -DHAVE_LIMITS_H +# Extract optimization level ('-O') from compile flags. Will be set later. +MADFLAGS = $(filter-out -O%,$(CODECFLAGS)) -I$(APPSDIR)/codecs/libmad +MADFLAGS += -UDEBUG -DNDEBUG -DHAVE_LIMITS_H + +# libmad is faster on ARM-targets with -O1 than -O2 +ifeq ($(CPU),arm) + MADFLAGS += -O1 +else + MADFLAGS += -O2 +endif + +# MPEGplayer MPEGMADFLAGS = $(MADFLAGS) -DMPEGPLAYER # libmad diff --git a/apps/codecs/libmad/synth.c b/apps/codecs/libmad/synth.c index 7f1c2e6fc..5ae9811ea 100644 --- a/apps/codecs/libmad/synth.c +++ b/apps/codecs/libmad/synth.c @@ -67,7 +67,8 @@ void mad_synth_mute(struct mad_synth *synth) } } -#ifdef FPM_ARM +#if 0 /* dct32 asm implementation is slower on current arm systems */ +/* #ifdef FPM_ARM */ void dct32(mad_fixed_t const in[32], unsigned int slot, mad_fixed_t lo[16][8], mad_fixed_t hi[16][8]); diff --git a/apps/codecs/mpa.c b/apps/codecs/mpa.c index 2fa7d02bb..6fea80807 100644 --- a/apps/codecs/mpa.c +++ b/apps/codecs/mpa.c @@ -202,7 +202,7 @@ static void set_elapsed(struct mp3entry* id3) * Run the synthesis filter on the COProcessor */ -static int mad_synth_thread_stack[DEFAULT_STACK_SIZE/sizeof(int)/2] IBSS_ATTR; +static int mad_synth_thread_stack[DEFAULT_STACK_SIZE/sizeof(int)] IBSS_ATTR; static const unsigned char * const mad_synth_thread_name = "mp3dec"; static unsigned int mad_synth_thread_id = 0; -- 2.11.4.GIT