From 293b967ffa7129ea89683e8a387de956f35df341 Mon Sep 17 00:00:00 2001 From: Buschel Date: Sun, 11 Jul 2010 14:40:05 +0000 Subject: [PATCH] Further changes to aac-he decoding. Refactor and rearrange dct tabs to be able to use consecutive addresses (this allows additional optimization for ARM through ldm-usage). Define a macro for identical butterfly operations and exchange 2 butterflyies with less compex operations. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27384 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libfaad/sbr_dct.c | 222 ++++++++++++++++++------------------------ 1 file changed, 94 insertions(+), 128 deletions(-) diff --git a/apps/codecs/libfaad/sbr_dct.c b/apps/codecs/libfaad/sbr_dct.c index 123514f22..f47174581 100644 --- a/apps/codecs/libfaad/sbr_dct.c +++ b/apps/codecs/libfaad/sbr_dct.c @@ -1452,103 +1452,76 @@ void DCT2_32_unscaled(real_t *y, real_t *x) #else /* #ifdef SBR_LOW_POWER */ -static const real_t dct4_64_tab[] ICONST_ATTR = { - COEF_CONST(0.999924719333649), COEF_CONST(0.998118102550507), - COEF_CONST(0.993906974792480), COEF_CONST(0.987301409244537), - COEF_CONST(0.978317379951477), COEF_CONST(0.966976463794708), - COEF_CONST(0.953306019306183), COEF_CONST(0.937339007854462), - COEF_CONST(0.919113874435425), COEF_CONST(0.898674488067627), - COEF_CONST(0.876070082187653), COEF_CONST(0.851355195045471), - COEF_CONST(0.824589252471924), COEF_CONST(0.795836925506592), - COEF_CONST(0.765167236328125), COEF_CONST(0.732654273509979), - COEF_CONST(0.698376238346100), COEF_CONST(0.662415742874146), - COEF_CONST(0.624859452247620), COEF_CONST(0.585797846317291), - COEF_CONST(0.545324981212616), COEF_CONST(0.503538429737091), - COEF_CONST(0.460538715124130), COEF_CONST(0.416429549455643), - COEF_CONST(0.371317148208618), COEF_CONST(0.325310230255127), - COEF_CONST(0.278519600629807), COEF_CONST(0.231058135628700), - COEF_CONST(0.183039888739586), COEF_CONST(0.134580686688423), - COEF_CONST(0.085797272622585), COEF_CONST(0.036807164549828), - COEF_CONST(-1.012196302413940), COEF_CONST(-1.059438824653626), - COEF_CONST(-1.104129195213318), COEF_CONST(-1.146159529685974), - COEF_CONST(-1.185428738594055), COEF_CONST(-1.221842169761658), - COEF_CONST(-1.255311965942383), COEF_CONST(-1.285757660865784), - COEF_CONST(-1.313105940818787), COEF_CONST(-1.337290763854981), - COEF_CONST(-1.358253836631775), COEF_CONST(-1.375944852828980), - COEF_CONST(-1.390321016311646), COEF_CONST(-1.401347875595093), - COEF_CONST(-1.408998727798462), COEF_CONST(-1.413255214691162), - COEF_CONST(-1.414107084274292), COEF_CONST(-1.411552190780640), - COEF_CONST(-1.405596733093262), COEF_CONST(-1.396255016326904), - COEF_CONST(-1.383549690246582), COEF_CONST(-1.367511272430420), - COEF_CONST(-1.348178386688232), COEF_CONST(-1.325597524642944), - COEF_CONST(-1.299823284149170), COEF_CONST(-1.270917654037476), - COEF_CONST(-1.238950133323669), COEF_CONST(-1.203998088836670), - COEF_CONST(-1.166145324707031), COEF_CONST(-1.125483393669128), - COEF_CONST(-1.082109928131104), COEF_CONST(-1.036129593849182), - COEF_CONST(-0.987653195858002), COEF_CONST(-0.936797380447388), - COEF_CONST(-0.883684754371643), COEF_CONST(-0.828443288803101), - COEF_CONST(-0.771206021308899), COEF_CONST(-0.712110757827759), - COEF_CONST(-0.651300072669983), COEF_CONST(-0.588920354843140), - COEF_CONST(-0.525121808052063), COEF_CONST(-0.460058242082596), - COEF_CONST(-0.393886327743530), COEF_CONST(-0.326765477657318), - COEF_CONST(-0.258857429027557), COEF_CONST(-0.190325915813446), - COEF_CONST(-0.121335685253143), COEF_CONST(-0.052053272724152), - COEF_CONST(0.017354607582092), COEF_CONST(0.086720645427704), - COEF_CONST(0.155877828598022), COEF_CONST(0.224659323692322), - COEF_CONST(0.292899727821350), COEF_CONST(0.360434412956238), - COEF_CONST(0.427100926637650), COEF_CONST(0.492738455533981), - COEF_CONST(0.557188928127289), COEF_CONST(0.620297133922577), - COEF_CONST(0.681910991668701), COEF_CONST(0.741881847381592), - COEF_CONST(0.800065577030182), COEF_CONST(0.856321990489960), - COEF_CONST(0.910515367984772), COEF_CONST(0.962515234947205), - COEF_CONST(1.000000000000000), COEF_CONST(0.998795449733734), - COEF_CONST(0.995184719562531), COEF_CONST(0.989176511764526), - COEF_CONST(0.980785250663757), COEF_CONST(0.970031261444092), - COEF_CONST(0.956940352916718), COEF_CONST(0.941544055938721), - COEF_CONST(0.923879504203796), COEF_CONST(0.903989315032959), - COEF_CONST(0.881921231746674), COEF_CONST(0.857728600502014), - COEF_CONST(0.831469595432281), COEF_CONST(0.803207516670227), - COEF_CONST(0.773010432720184), COEF_CONST(0.740951120853424), - COEF_CONST(0.707106769084930), COEF_CONST(0.671558916568756), - COEF_CONST(0.634393274784088), COEF_CONST(0.595699310302734), - COEF_CONST(0.555570185184479), COEF_CONST(0.514102697372437), - COEF_CONST(0.471396654844284), COEF_CONST(0.427555114030838), - COEF_CONST(0.382683426141739), COEF_CONST(0.336889833211899), - COEF_CONST(0.290284633636475), COEF_CONST(0.242980122566223), - COEF_CONST(0.195090234279633), COEF_CONST(0.146730497479439), - COEF_CONST(0.098017133772373), COEF_CONST(0.049067649990320), - COEF_CONST(-1.000000000000000), COEF_CONST(-1.047863125801086), - COEF_CONST(-1.093201875686646), COEF_CONST(-1.135906934738159), - COEF_CONST(-1.175875544548035), COEF_CONST(-1.213011503219605), - COEF_CONST(-1.247225046157837), COEF_CONST(-1.278433918952942), - COEF_CONST(-1.306562900543213), COEF_CONST(-1.331544399261475), - COEF_CONST(-1.353317975997925), COEF_CONST(-1.371831417083740), - COEF_CONST(-1.387039899826050), COEF_CONST(-1.398906826972961), - COEF_CONST(-1.407403707504273), COEF_CONST(-1.412510156631470), - COEF_CONST(0), COEF_CONST(-1.412510156631470), - COEF_CONST(-1.407403707504273), COEF_CONST(-1.398906826972961), - COEF_CONST(-1.387039899826050), COEF_CONST(-1.371831417083740), - COEF_CONST(-1.353317975997925), COEF_CONST(-1.331544399261475), - COEF_CONST(-1.306562900543213), COEF_CONST(-1.278433918952942), - COEF_CONST(-1.247225046157837), COEF_CONST(-1.213011384010315), - COEF_CONST(-1.175875544548035), COEF_CONST(-1.135907053947449), - COEF_CONST(-1.093201875686646), COEF_CONST(-1.047863125801086), - COEF_CONST(-1.000000000000000), COEF_CONST(-0.949727773666382), - COEF_CONST(-0.897167563438416), COEF_CONST(-0.842446029186249), - COEF_CONST(-0.785694956779480), COEF_CONST(-0.727051079273224), - COEF_CONST(-0.666655659675598), COEF_CONST(-0.604654192924500), - COEF_CONST(-0.541196048259735), COEF_CONST(-0.476434230804443), - COEF_CONST(-0.410524487495422), COEF_CONST(-0.343625843524933), - COEF_CONST(-0.275899350643158), COEF_CONST(-0.207508206367493), - COEF_CONST(-0.138617098331451), COEF_CONST(-0.069392144680023), - COEF_CONST(0), COEF_CONST(0.069392263889313), - COEF_CONST(0.138617157936096), COEF_CONST(0.207508206367493), - COEF_CONST(0.275899469852448), COEF_CONST(0.343625962734222), - COEF_CONST(0.410524636507034), COEF_CONST(0.476434201002121), - COEF_CONST(0.541196107864380), COEF_CONST(0.604654192924500), - COEF_CONST(0.666655719280243), COEF_CONST(0.727051138877869), - COEF_CONST(0.785695075988770), COEF_CONST(0.842446029186249), - COEF_CONST(0.897167563438416), COEF_CONST(0.949727773666382) +/* table for pre-processing within dct4_kernel() */ +static const real_t dct4_pre_tab[] ICONST_ATTR = { + COEF_CONST(0.999924719333649), COEF_CONST(-1.01219630241394), COEF_CONST(-0.987653195858002), + COEF_CONST(0.998118102550507), COEF_CONST(-1.05943882465363), COEF_CONST(-0.936797380447388), + COEF_CONST(0.993906974792480), COEF_CONST(-1.10412919521332), COEF_CONST(-0.883684754371643), + COEF_CONST(0.987301409244537), COEF_CONST(-1.14615952968597), COEF_CONST(-0.828443288803101), + COEF_CONST(0.978317379951477), COEF_CONST(-1.18542873859405), COEF_CONST(-0.771206021308899), + COEF_CONST(0.966976463794708), COEF_CONST(-1.22184216976166), COEF_CONST(-0.712110757827759), + COEF_CONST(0.953306019306183), COEF_CONST(-1.25531196594238), COEF_CONST(-0.651300072669983), + COEF_CONST(0.937339007854462), COEF_CONST(-1.28575766086578), COEF_CONST(-0.588920354843140), + COEF_CONST(0.919113874435425), COEF_CONST(-1.31310594081879), COEF_CONST(-0.525121808052063), + COEF_CONST(0.898674488067627), COEF_CONST(-1.33729076385498), COEF_CONST(-0.460058242082596), + COEF_CONST(0.876070082187653), COEF_CONST(-1.35825383663177), COEF_CONST(-0.393886327743530), + COEF_CONST(0.851355195045471), COEF_CONST(-1.37594485282898), COEF_CONST(-0.326765477657318), + COEF_CONST(0.824589252471924), COEF_CONST(-1.39032101631165), COEF_CONST(-0.258857429027557), + COEF_CONST(0.795836925506592), COEF_CONST(-1.40134787559509), COEF_CONST(-0.190325915813446), + COEF_CONST(0.765167236328125), COEF_CONST(-1.40899872779846), COEF_CONST(-0.121335685253143), + COEF_CONST(0.732654273509979), COEF_CONST(-1.41325521469116), COEF_CONST(-0.052053272724152), + COEF_CONST(0.698376238346100), COEF_CONST(-1.41410708427429), COEF_CONST( 0.017354607582092), + COEF_CONST(0.662415742874146), COEF_CONST(-1.41155219078064), COEF_CONST( 0.086720645427704), + COEF_CONST(0.624859452247620), COEF_CONST(-1.40559673309326), COEF_CONST( 0.155877828598022), + COEF_CONST(0.585797846317291), COEF_CONST(-1.39625501632690), COEF_CONST( 0.224659323692322), + COEF_CONST(0.545324981212616), COEF_CONST(-1.38354969024658), COEF_CONST( 0.292899727821350), + COEF_CONST(0.503538429737091), COEF_CONST(-1.36751127243042), COEF_CONST( 0.360434412956238), + COEF_CONST(0.460538715124130), COEF_CONST(-1.34817838668823), COEF_CONST( 0.427100926637650), + COEF_CONST(0.416429549455643), COEF_CONST(-1.32559752464294), COEF_CONST( 0.492738455533981), + COEF_CONST(0.371317148208618), COEF_CONST(-1.29982328414917), COEF_CONST( 0.557188928127289), + COEF_CONST(0.325310230255127), COEF_CONST(-1.27091765403748), COEF_CONST( 0.620297133922577), + COEF_CONST(0.278519600629807), COEF_CONST(-1.23895013332367), COEF_CONST( 0.681910991668701), + COEF_CONST(0.231058135628700), COEF_CONST(-1.20399808883667), COEF_CONST( 0.741881847381592), + COEF_CONST(0.183039888739586), COEF_CONST(-1.16614532470703), COEF_CONST( 0.800065577030182), + COEF_CONST(0.134580686688423), COEF_CONST(-1.12548339366913), COEF_CONST( 0.856321990489960), + COEF_CONST(0.085797272622585), COEF_CONST(-1.08210992813110), COEF_CONST( 0.910515367984772), + COEF_CONST(0.036807164549828), COEF_CONST(-1.03612959384918), COEF_CONST( 0.962515234947205) +}; + +/* table for post-processing within dct4_kernel() */ +static const real_t dct4_post_tab[] ICONST_ATTR = { + COEF_CONST(1 ), COEF_CONST(-1 ), COEF_CONST(-1 ), + COEF_CONST(0.998795449733734), COEF_CONST(-1.04786312580109), COEF_CONST(-0.949727773666382), + COEF_CONST(0.995184719562531), COEF_CONST(-1.09320187568665), COEF_CONST(-0.897167563438416), + COEF_CONST(0.989176511764526), COEF_CONST(-1.13590693473816), COEF_CONST(-0.842446029186249), + COEF_CONST(0.980785250663757), COEF_CONST(-1.17587554454803), COEF_CONST(-0.785694956779480), + COEF_CONST(0.970031261444092), COEF_CONST(-1.21301150321960), COEF_CONST(-0.727051079273224), + COEF_CONST(0.956940352916718), COEF_CONST(-1.24722504615784), COEF_CONST(-0.666655659675598), + COEF_CONST(0.941544055938721), COEF_CONST(-1.27843391895294), COEF_CONST(-0.604654192924500), + COEF_CONST(0.923879504203796), COEF_CONST(-1.30656290054321), COEF_CONST(-0.541196048259735), + COEF_CONST(0.903989315032959), COEF_CONST(-1.33154439926148), COEF_CONST(-0.476434230804443), + COEF_CONST(0.881921231746674), COEF_CONST(-1.35331797599793), COEF_CONST(-0.410524487495422), + COEF_CONST(0.857728600502014), COEF_CONST(-1.37183141708374), COEF_CONST(-0.343625843524933), + COEF_CONST(0.831469595432281), COEF_CONST(-1.38703989982605), COEF_CONST(-0.275899350643158), + COEF_CONST(0.803207516670227), COEF_CONST(-1.39890682697296), COEF_CONST(-0.207508206367493), + COEF_CONST(0.773010432720184), COEF_CONST(-1.40740370750427), COEF_CONST(-0.138617098331451), + COEF_CONST(0.740951120853424), COEF_CONST(-1.41251015663147), COEF_CONST(-0.069392144680023), + COEF_CONST(0.707106769084930), COEF_CONST( 0 ), COEF_CONST( 0 ), + COEF_CONST(0.671558916568756), COEF_CONST(-1.41251015663147), COEF_CONST( 0.069392263889313), + COEF_CONST(0.634393274784088), COEF_CONST(-1.40740370750427), COEF_CONST( 0.138617157936096), + COEF_CONST(0.595699310302734), COEF_CONST(-1.39890682697296), COEF_CONST( 0.207508206367493), + COEF_CONST(0.555570185184479), COEF_CONST(-1.38703989982605), COEF_CONST( 0.275899469852448), + COEF_CONST(0.514102697372437), COEF_CONST(-1.37183141708374), COEF_CONST( 0.343625962734222), + COEF_CONST(0.471396654844284), COEF_CONST(-1.35331797599793), COEF_CONST( 0.410524636507034), + COEF_CONST(0.427555114030838), COEF_CONST(-1.33154439926148), COEF_CONST( 0.476434201002121), + COEF_CONST(0.382683426141739), COEF_CONST(-1.30656290054321), COEF_CONST( 0.541196107864380), + COEF_CONST(0.336889833211899), COEF_CONST(-1.27843391895294), COEF_CONST( 0.604654192924500), + COEF_CONST(0.290284633636475), COEF_CONST(-1.24722504615784), COEF_CONST( 0.666655719280243), + COEF_CONST(0.242980122566223), COEF_CONST(-1.21301138401031), COEF_CONST( 0.727051138877869), + COEF_CONST(0.195090234279633), COEF_CONST(-1.17587554454803), COEF_CONST( 0.785695075988770), + COEF_CONST(0.146730497479439), COEF_CONST(-1.13590705394745), COEF_CONST( 0.842446029186249), + COEF_CONST(0.098017133772373), COEF_CONST(-1.09320187568665), COEF_CONST( 0.897167563438416), + COEF_CONST(0.049067649990320), COEF_CONST(-1.04786312580109), COEF_CONST( 0.949727773666382) }; // Table adapted from codeclib to fit into IRAM @@ -1556,59 +1529,52 @@ const uint32_t dct4_revtab[32] ICONST_ATTR = { 0, 24, 12, 22, 6, 30, 11, 19, 3, 27, 15, 21, 5, 29, 9, 17, 1, 25, 13, 23, 7, 31, 10, 18, 2, 26, 14, 20, 4, 28, 8, 16}; +// Bufferfly used in dct4_kernel()'s pre- and post-processing +#define BUTTERFLY_DCT4(out1, out2, real_part, imag_part, tab, tabidx) \ + x_re = real_part; \ + x_im = imag_part; \ + tmp = MUL_C(x_re + x_im, tab[tabidx++]); \ + out1 = MUL_C(x_re , tab[tabidx++]) + tmp; \ + out2 = MUL_C(x_im , tab[tabidx++]) + tmp; \ + /* size 64 only! */ void dct4_kernel(real_t *real, real_t *imag) { - uint32_t i, idx; + uint32_t i, idx, tabidx; real_t x_re, x_im, tmp; FFTComplex xc[32]; /* used for calling codeclib's fft implementation */ /* Step 2: modulate and pre-rotate for codeclib's fft implementation */ // 3*32=96 multiplications // 3*32=96 additions - for (i = 0; i < 32; i++) + for (i = 0, tabidx = 0; i < 32; i++) { - idx = dct4_revtab[i]; - x_re = real[i]; - x_im = imag[i]; - tmp = MUL_C(x_re + x_im, dct4_64_tab[i ]); - xc[idx].re = MUL_C(x_im , dct4_64_tab[i + 64]) + tmp; - xc[idx].im = MUL_C(x_re , dct4_64_tab[i + 32]) + tmp; + idx = dct4_revtab[i]; + BUTTERFLY_DCT4(xc[idx].im, xc[idx].re, real[i], imag[i], dct4_pre_tab, tabidx); } /* Step 3: FFT (codeclib's implementation) */ ff_fft_calc_c(5, xc); /* Step 4: modulate + reordering */ - // 3*31+2=95 multiplications - // 3*31+2=95 additions - x_re = xc[0].re; - x_im = xc[0].im; - tmp = MUL_C(x_re + x_im, dct4_64_tab[0 + 3*32]); - real[0] = MUL_C(x_im , dct4_64_tab[0 + 5*32]) + tmp; - imag[0] = MUL_C(x_re , dct4_64_tab[0 + 4*32]) + tmp; - for (i = 1; i < 16; i++) + // 3*30+2=92 multiplications + // 3*30+2=92 additions + imag[0] = xc[0].im; + real[0] = xc[0].re; + for (i = 1, tabidx = 3*1; i < 16; i++) { - idx = 32-i; - x_re = xc[idx].re; - x_im = xc[idx].im; - tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]); - real[i] = MUL_C(x_im , dct4_64_tab[i + 5*32]) + tmp; - imag[i] = MUL_C(x_re , dct4_64_tab[i + 4*32]) + tmp; + idx = 32-i; + BUTTERFLY_DCT4(imag[i], real[i], xc[idx].re, xc[idx].im, dct4_post_tab, tabidx); } // i = 16, idx = 16 = reorder_tab[16]; x_re = xc[16].re; x_im = xc[16].im; - imag[16] = MUL_C(x_im - x_re, dct4_64_tab[16 + 3*32]); - real[16] = MUL_C(x_re + x_im, dct4_64_tab[16 + 3*32]); - for (i = 17; i < 32; i++) + imag[16] = MUL_C(x_im - x_re, COEF_CONST(0.707106769084930)); + real[16] = MUL_C(x_re + x_im, COEF_CONST(0.707106769084930)); + for (i = 17, tabidx = 3*17; i < 32; i++) { - idx = 32-i; - x_re = xc[idx].re; - x_im = xc[idx].im; - tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]); - real[i] = MUL_C(x_im , dct4_64_tab[i + 5*32]) + tmp; - imag[i] = MUL_C(x_re , dct4_64_tab[i + 4*32]) + tmp; + idx = 32-i; + BUTTERFLY_DCT4(imag[i], real[i], xc[idx].re, xc[idx].im, dct4_post_tab, tabidx); } } -- 2.11.4.GIT