From 8e832bb511925f43b6d70cfb88e93cc460668457 Mon Sep 17 00:00:00 2001 From: Buschel Date: Sat, 5 Feb 2011 14:29:47 +0000 Subject: [PATCH] Speed up AAC-HE SBR by 2% on S5L8701. Use MEM_ALIGN on critical arrays and avoid stalls in asm code. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@29209 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libfaad/sbr_dct.c | 6 ++--- apps/codecs/libfaad/sbr_dec.c | 4 ++-- apps/codecs/libfaad/sbr_qmf.c | 56 +++++++++++++++++++++---------------------- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/apps/codecs/libfaad/sbr_dct.c b/apps/codecs/libfaad/sbr_dct.c index f22a24a18..da0e5e1f5 100644 --- a/apps/codecs/libfaad/sbr_dct.c +++ b/apps/codecs/libfaad/sbr_dct.c @@ -1453,7 +1453,7 @@ void DCT2_32_unscaled(real_t *y, real_t *x) #else /* #ifdef SBR_LOW_POWER */ /* table for pre-processing within dct4_kernel() */ -static const real_t dct4_pre_tab[] ICONST_ATTR = { +static const real_t dct4_pre_tab[] ICONST_ATTR MEM_ALIGN_ATTR = { COEF_CONST(0.999924719333649), COEF_CONST(-1.01219630241394), COEF_CONST(-0.987653195858002), COEF_CONST(0.998118102550507), COEF_CONST(-1.05943882465363), COEF_CONST(-0.936797380447388), COEF_CONST(0.993906974792480), COEF_CONST(-1.10412919521332), COEF_CONST(-0.883684754371643), @@ -1489,7 +1489,7 @@ static const real_t dct4_pre_tab[] ICONST_ATTR = { }; /* table for post-processing within dct4_kernel() */ -static const real_t dct4_post_tab[] ICONST_ATTR = { +static const real_t dct4_post_tab[] ICONST_ATTR MEM_ALIGN_ATTR = { COEF_CONST(1 ), COEF_CONST(-1 ), COEF_CONST(-1 ), COEF_CONST(0.998795449733734), COEF_CONST(-1.04786312580109), COEF_CONST(-0.949727773666382), COEF_CONST(0.995184719562531), COEF_CONST(-1.09320187568665), COEF_CONST(-0.897167563438416), @@ -1525,7 +1525,7 @@ static const real_t dct4_post_tab[] ICONST_ATTR = { }; // Table adapted from codeclib to fit into IRAM -const uint32_t dct4_revtab[32] ICONST_ATTR = { +const uint32_t dct4_revtab[32] ICONST_ATTR MEM_ALIGN_ATTR = { 0, 24, 12, 22, 6, 30, 11, 19, 3, 27, 15, 21, 5, 29, 9, 17, 1, 25, 13, 23, 7, 31, 10, 18, 2, 26, 14, 20, 4, 28, 8, 16}; diff --git a/apps/codecs/libfaad/sbr_dec.c b/apps/codecs/libfaad/sbr_dec.c index 60bb2a6bd..78c9c79d5 100644 --- a/apps/codecs/libfaad/sbr_dec.c +++ b/apps/codecs/libfaad/sbr_dec.c @@ -521,8 +521,8 @@ uint8_t sbrDecodeSingleFrame(sbr_info *sbr, real_t *channel, return 0; } -ALIGN qmf_t X_left[MAX_NTSRHFG][64];// = {{0}}; -ALIGN qmf_t X_right[MAX_NTSRHFG][64];// = {{0}}; /* must set this to 0 */ +qmf_t X_left [MAX_NTSRHFG][64] MEM_ALIGN_ATTR;// = {{0}}; +qmf_t X_right[MAX_NTSRHFG][64] MEM_ALIGN_ATTR;// = {{0}}; /* must set this to 0 */ #if (defined(PS_DEC) || defined(DRM_PS)) uint8_t sbrDecodeSingleFramePS(sbr_info *sbr, real_t *left_channel, real_t *right_channel, diff --git a/apps/codecs/libfaad/sbr_qmf.c b/apps/codecs/libfaad/sbr_qmf.c index f94c3072a..bb6e176a3 100644 --- a/apps/codecs/libfaad/sbr_qmf.c +++ b/apps/codecs/libfaad/sbr_qmf.c @@ -78,12 +78,12 @@ void qmfa_end(qmfa_info *qmfa) void sbr_qmf_analysis_32(sbr_info *sbr, qmfa_info *qmfa, const real_t *input, qmf_t X[MAX_NTSRHFG][64], uint8_t offset, uint8_t kx) { - ALIGN real_t u[64]; + real_t u[64] MEM_ALIGN_ATTR; #ifndef SBR_LOW_POWER - ALIGN real_t real[32]; - ALIGN real_t imag[32]; + real_t real[32] MEM_ALIGN_ATTR; + real_t imag[32] MEM_ALIGN_ATTR; #else - ALIGN real_t y[32]; + real_t y[32] MEM_ALIGN_ATTR; #endif qmf_t *pX; uint32_t in = 0; @@ -227,8 +227,8 @@ void qmfs_end(qmfs_info *qmfs) void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], real_t *output) { - ALIGN real_t x[16]; - ALIGN real_t y[16]; + real_t x[16] MEM_ALIGN_ATTR; + real_t y[16] MEM_ALIGN_ATTR; int16_t n, k, out = 0; uint8_t l; @@ -291,8 +291,8 @@ void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][6 void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], real_t *output) { - ALIGN real_t x[64]; - ALIGN real_t y[64]; + real_t x[64] MEM_ALIGN_ATTR; + real_t y[64] MEM_ALIGN_ATTR; int16_t n, k, out = 0; uint8_t l; @@ -401,8 +401,8 @@ static const complex_t qmf32_pre_twiddle[] = void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], real_t *output) { - ALIGN real_t x1[32]; - ALIGN real_t x2[32]; + real_t x1[32] MEM_ALIGN_ATTR; + real_t x2[32] MEM_ALIGN_ATTR; int32_t n, k, idx0, idx1, out = 0; uint32_t l; @@ -464,10 +464,10 @@ void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][6 void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], real_t *output) { - ALIGN real_t real1[32]; - ALIGN real_t imag1[32]; - ALIGN real_t real2[32]; - ALIGN real_t imag2[32]; + real_t real1[32] MEM_ALIGN_ATTR; + real_t imag1[32] MEM_ALIGN_ATTR; + real_t real2[32] MEM_ALIGN_ATTR; + real_t imag2[32] MEM_ALIGN_ATTR; qmf_t *pX; real_t *p_buf_1, *p_buf_3; int32_t n, k, idx0, idx1, out = 0; @@ -517,36 +517,36 @@ void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][6 asm volatile ( "ldmia %[qtab]!, { r0-r3 } \n\t" "ldr r4, [%[pbuf]] \n\t" + "ldr r7, [%[pbuf], #192*4] \n\t" "smull r5, r6, r4, r0 \n\t" - "ldr r4, [%[pbuf], #192*4] \n\t" - "smlal r5, r6, r4, r1 \n\t" "ldr r4, [%[pbuf], #256*4] \n\t" + "smlal r5, r6, r7, r1 \n\t" + "ldr r7, [%[pbuf], #448*4] \n\t" "smlal r5, r6, r4, r2 \n\t" - "ldr r4, [%[pbuf], #448*4] \n\t" - "smlal r5, r6, r4, r3 \n\t" + "ldr r4, [%[pbuf], #512*4] \n\t" + "smlal r5, r6, r7, r3 \n\t" "ldmia %[qtab]!, { r0-r3 } \n\t" - "ldr r4, [%[pbuf], #512*4] \n\t" + "ldr r7, [%[pbuf], #704*4] \n\t" "smlal r5, r6, r4, r0 \n\t" - "ldr r4, [%[pbuf], #704*4] \n\t" - "smlal r5, r6, r4, r1 \n\t" "ldr r4, [%[pbuf], #768*4] \n\t" + "smlal r5, r6, r7, r1 \n\t" + "ldr r7, [%[pbuf], #960*4] \n\t" "smlal r5, r6, r4, r2 \n\t" - "ldr r4, [%[pbuf], #960*4] \n\t" - "smlal r5, r6, r4, r3 \n\t" + "mov r2, #1024*4 \n\t" "ldmia %[qtab]!, { r0-r1 } \n\t" - "mov r2, #1024*4 \n\t" "ldr r4, [%[pbuf], r2] \n\t" - "smlal r5, r6, r4, r0 \n\t" + "smlal r5, r6, r7, r3 \n\t" "mov r2, #1216*4 \n\t" - "ldr r4, [%[pbuf], r2] \n\t" - "smlal r5, r6, r4, r1 \n\t" + "ldr r7, [%[pbuf], r2] \n\t" + "smlal r5, r6, r4, r0 \n\t" + "smlal r5, r6, r7, r1 \n\t" "str r6, [%[pout]] \n" : [qtab] "+r" (qtab) : [pbuf] "r" (pbuf), [pout] "r" (pout) - : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "memory"); + : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "memory"); } #elif defined CPU_COLDFIRE const real_t *qtab = qmf_c; -- 2.11.4.GIT