apps/codecs/libmusepack/synth_filter.c

   1 /*
   2   Copyright (c) 2005, The Musepack Development Team
   3   All rights reserved.
   4
   5   Redistribution and use in source and binary forms, with or without
   6   modification, are permitted provided that the following conditions are
   7   met:
   8
   9   * Redistributions of source code must retain the above copyright
  10   notice, this list of conditions and the following disclaimer.
  11
  12   * Redistributions in binary form must reproduce the above
  13   copyright notice, this list of conditions and the following
  14   disclaimer in the documentation and/or other materials provided
  15   with the distribution.
  16
  17   * Neither the name of the The Musepack Development Team nor the
  18   names of its contributors may be used to endorse or promote
  19   products derived from this software without specific prior
  20   written permission.
  21
  22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  23   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  24   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  25   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  26   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  27   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  28   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  29   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  30   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  31   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  32   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  33 */
  34
  35 /// \file synth_filter.c
  36 /// Synthesis functions.
  37 /// \todo document me
  38
  39 #include "musepack.h"
  40 #include "internal.h"
  41
  42 /* C O N S T A N T S */
  43 #undef _
  44
  45 #if defined(MPC_FIXED_POINT)
  46    #if defined(OPTIMIZE_FOR_SPEED)
  47       // round to +/- 2^14 as pre-shift before 32=32x32-multiply
  48       #define _(value)  (MPC_SHR_RND(value, 3))
  49
  50       // round to +/- 2^17 as pre-shift before 32=32x32-multiply
  51       #define MPC_V_PRESHIFT(X) MPC_SHR_RND(X, 14)
  52    #else
  53       // saturate to +/- 2^31 (= value << (31-17)), D-values are +/- 2^17
  54       #define _(value)  (value << (14))
  55
  56       // do not perform pre-shift
  57       #define MPC_V_PRESHIFT(X) (X)
  58    #endif
  59 #else
  60    // IMPORTANT: internal scaling is somehow strange for floating point, therefore we scale the coefficients Di_opt
  61    // by the correct amount to have proper scaled output
  62    #define _(value)  MAKE_MPC_SAMPLE((double)value*(double)(0x1000))
  63
  64    // do not perform pre-shift
  65    #define MPC_V_PRESHIFT(X) (X)
  66 #endif
  67
  68 // Di_opt coefficients are +/- 2^17
  69 static const MPC_SAMPLE_FORMAT  Di_opt [512] ICONST_ATTR = {
  70 /*           0        1        2         3         4         5          6          7         8         9       10        11       12       13      14     15  */
  71 /*  0 */ _(  0), _( -29), _( 213), _( -459), _( 2037), _(-5153), _(  6574), _(-37489), _(75038), _(37489), _(6574), _( 5153), _(2037), _( 459), _(213), _(29),
  72 /*  1 */ _( -1), _( -31), _( 218), _( -519), _( 2000), _(-5517), _(  5959), _(-39336), _(74992), _(35640), _(7134), _( 4788), _(2063), _( 401), _(208), _(26),
  73 /*  2 */ _( -1), _( -35), _( 222), _( -581), _( 1952), _(-5879), _(  5288), _(-41176), _(74856), _(33791), _(7640), _( 4425), _(2080), _( 347), _(202), _(24),
  74 /*  3 */ _( -1), _( -38), _( 225), _( -645), _( 1893), _(-6237), _(  4561), _(-43006), _(74630), _(31947), _(8092), _( 4063), _(2087), _( 294), _(196), _(21),
  75 /*  4 */ _( -1), _( -41), _( 227), _( -711), _( 1822), _(-6589), _(  3776), _(-44821), _(74313), _(30112), _(8492), _( 3705), _(2085), _( 244), _(190), _(19),
  76 /*  5 */ _( -1), _( -45), _( 228), _( -779), _( 1739), _(-6935), _(  2935), _(-46617), _(73908), _(28289), _(8840), _( 3351), _(2075), _( 197), _(183), _(17),
  77 /*  6 */ _( -1), _( -49), _( 228), _( -848), _( 1644), _(-7271), _(  2037), _(-48390), _(73415), _(26482), _(9139), _( 3004), _(2057), _( 153), _(176), _(16),
  78 /*  7 */ _( -2), _( -53), _( 227), _( -919), _( 1535), _(-7597), _(  1082), _(-50137), _(72835), _(24694), _(9389), _( 2663), _(2032), _( 111), _(169), _(14),
  79 /*  8 */ _( -2), _( -58), _( 224), _( -991), _( 1414), _(-7910), _(    70), _(-51853), _(72169), _(22929), _(9592), _( 2330), _(2001), _(  72), _(161), _(13),
  80 /*  9 */ _( -2), _( -63), _( 221), _(-1064), _( 1280), _(-8209), _(  -998), _(-53534), _(71420), _(21189), _(9750), _( 2006), _(1962), _(  36), _(154), _(11),
  81 /* 10 */ _( -2), _( -68), _( 215), _(-1137), _( 1131), _(-8491), _( -2122), _(-55178), _(70590), _(19478), _(9863), _( 1692), _(1919), _(   2), _(147), _(10),
  82 /* 11 */ _( -3), _( -73), _( 208), _(-1210), _(  970), _(-8755), _( -3300), _(-56778), _(69679), _(17799), _(9935), _( 1388), _(1870), _( -29), _(139), _( 9),
  83 /* 12 */ _( -3), _( -79), _( 200), _(-1283), _(  794), _(-8998), _( -4533), _(-58333), _(68692), _(16155), _(9966), _( 1095), _(1817), _( -57), _(132), _( 8),
  84 /* 13 */ _( -4), _( -85), _( 189), _(-1356), _(  605), _(-9219), _( -5818), _(-59838), _(67629), _(14548), _(9959), _(  814), _(1759), _( -83), _(125), _( 7),
  85 /* 14 */ _( -4), _( -91), _( 177), _(-1428), _(  402), _(-9416), _( -7154), _(-61289), _(66494), _(12980), _(9916), _(  545), _(1698), _(-106), _(117), _( 7),
  86 /* 15 */ _( -5), _( -97), _( 163), _(-1498), _(  185), _(-9585), _( -8540), _(-62684), _(65290), _(11455), _(9838), _(  288), _(1634), _(-127), _(111), _( 6),
  87 /* 16 */ _( -5), _(-104), _( 146), _(-1567), _(  -45), _(-9727), _( -9975), _(-64019), _(64019), _( 9975), _(9727), _(   45), _(1567), _(-146), _(104), _( 5),
  88 /* 17 */ _( -6), _(-111), _( 127), _(-1634), _( -288), _(-9838), _(-11455), _(-65290), _(62684), _( 8540), _(9585), _( -185), _(1498), _(-163), _( 97), _( 5),
  89 /* 18 */ _( -7), _(-117), _( 106), _(-1698), _( -545), _(-9916), _(-12980), _(-66494), _(61289), _( 7154), _(9416), _( -402), _(1428), _(-177), _( 91), _( 4),
  90 /* 19 */ _( -7), _(-125), _(  83), _(-1759), _( -814), _(-9959), _(-14548), _(-67629), _(59838), _( 5818), _(9219), _( -605), _(1356), _(-189), _( 85), _( 4),
  91 /* 20 */ _( -8), _(-132), _(  57), _(-1817), _(-1095), _(-9966), _(-16155), _(-68692), _(58333), _( 4533), _(8998), _( -794), _(1283), _(-200), _( 79), _( 3),
  92 /* 21 */ _( -9), _(-139), _(  29), _(-1870), _(-1388), _(-9935), _(-17799), _(-69679), _(56778), _( 3300), _(8755), _( -970), _(1210), _(-208), _( 73), _( 3),
  93 /* 22 */ _(-10), _(-147), _(  -2), _(-1919), _(-1692), _(-9863), _(-19478), _(-70590), _(55178), _( 2122), _(8491), _(-1131), _(1137), _(-215), _( 68), _( 2),
  94 /* 23 */ _(-11), _(-154), _( -36), _(-1962), _(-2006), _(-9750), _(-21189), _(-71420), _(53534), _(  998), _(8209), _(-1280), _(1064), _(-221), _( 63), _( 2),
  95 /* 24 */ _(-13), _(-161), _( -72), _(-2001), _(-2330), _(-9592), _(-22929), _(-72169), _(51853), _(  -70), _(7910), _(-1414), _( 991), _(-224), _( 58), _( 2),
  96 /* 25 */ _(-14), _(-169), _(-111), _(-2032), _(-2663), _(-9389), _(-24694), _(-72835), _(50137), _(-1082), _(7597), _(-1535), _( 919), _(-227), _( 53), _( 2),
  97 /* 26 */ _(-16), _(-176), _(-153), _(-2057), _(-3004), _(-9139), _(-26482), _(-73415), _(48390), _(-2037), _(7271), _(-1644), _( 848), _(-228), _( 49), _( 1),
  98 /* 27 */ _(-17), _(-183), _(-197), _(-2075), _(-3351), _(-8840), _(-28289), _(-73908), _(46617), _(-2935), _(6935), _(-1739), _( 779), _(-228), _( 45), _( 1),
  99 /* 28 */ _(-19), _(-190), _(-244), _(-2085), _(-3705), _(-8492), _(-30112), _(-74313), _(44821), _(-3776), _(6589), _(-1822), _( 711), _(-227), _( 41), _( 1),
 100 /* 29 */ _(-21), _(-196), _(-294), _(-2087), _(-4063), _(-8092), _(-31947), _(-74630), _(43006), _(-4561), _(6237), _(-1893), _( 645), _(-225), _( 38), _( 1),
 101 /* 30 */ _(-24), _(-202), _(-347), _(-2080), _(-4425), _(-7640), _(-33791), _(-74856), _(41176), _(-5288), _(5879), _(-1952), _( 581), _(-222), _( 35), _( 1),
 102 /* 31 */ _(-26), _(-208), _(-401), _(-2063), _(-4788), _(-7134), _(-35640), _(-74992), _(39336), _(-5959), _(5517), _(-2000), _( 519), _(-218), _( 31), _( 1)
 103 };
 104
 105 #undef  _
 106
 107 // needed to prevent from internal overflow in calculate_V
 108 #define OVERFLOW_FIX 1
 109
 110 // V-coefficients were expanded (<<) by V_COEFFICIENT_EXPAND
 111 #define V_COEFFICIENT_EXPAND 27
 112
 113 #if defined(MPC_FIXED_POINT)
 114    #if defined(OPTIMIZE_FOR_SPEED)
 115       // define 32=32x32-multiplication for DCT-coefficients with samples, vcoef will be pre-shifted on creation
 116       // samples are rounded to +/- 2^19 as pre-shift before 32=32x32-multiply
 117       #define MPC_MULTIPLY_V(sample, vcoef) ( MPC_SHR_RND(sample, 12) * vcoef )
 118
 119       // pre- and postscale are used to avoid internal overflow in synthesis calculation
 120       #define MPC_MULTIPLY_V_PRESCALE(sample, vcoef)  ( MPC_SHR_RND(sample, (12+OVERFLOW_FIX)) * vcoef )
 121       #define MPC_MULTIPLY_V_POSTSCALE(sample, vcoef) ( MPC_SHR_RND(sample, (12-OVERFLOW_FIX)) * vcoef )
 122       #define MPC_V_POSTSCALE(sample) (sample<<OVERFLOW_FIX)
 123
 124       // round to +/- 2^16 as pre-shift before 32=32x32-multiply
 125       #define MPC_MAKE_INVCOS(value) (MPC_SHR_RND(value, 15))
 126    #else
 127       // define 64=32x32-multiplication for DCT-coefficients with samples. Via usage of MPC_FRACT highly optimized assembler might be used
 128       // MULTIPLY_FRACT will do >>32 after multiplication, as V-coef were expanded by V_COEFFICIENT_EXPAND we'll correct this on the result.
 129       // Will loose 5bit accuracy on result in fract part without effect on final audio result
 130       #define MPC_MULTIPLY_V(sample, vcoef) ( (MPC_MULTIPLY_FRACT(sample, vcoef)) << (32-V_COEFFICIENT_EXPAND) )
 131
 132       // pre- and postscale are used to avoid internal overflow in synthesis calculation
 133       #define MPC_MULTIPLY_V_PRESCALE(sample, vcoef)  ( (MPC_MULTIPLY_FRACT(sample, vcoef)) << (32-V_COEFFICIENT_EXPAND-OVERFLOW_FIX) )
 134       #define MPC_MULTIPLY_V_POSTSCALE(sample, vcoef) ( (MPC_MULTIPLY_FRACT(sample, vcoef)) << (32-V_COEFFICIENT_EXPAND+OVERFLOW_FIX) )
 135       #define MPC_V_POSTSCALE(sample) (sample<<OVERFLOW_FIX)
 136
 137       // directly use accurate 32bit-coefficients
 138       #define MPC_MAKE_INVCOS(value) (value)
 139    #endif
 140 #else
 141    // for floating point use the standard multiplication macro
 142    #define MPC_MULTIPLY_V          (sample, vcoef) ( MPC_MULTIPLY(sample, vcoef) )
 143    #define MPC_MULTIPLY_V_PRESCALE (sample, vcoef) ( MPC_MULTIPLY(sample, vcoef) )
 144    #define MPC_MULTIPLY_V_POSTSCALE(sample, vcoef) ( MPC_MULTIPLY(sample, vcoef) )
 145    #define MPC_V_POSTSCALE(sample) (sample)
 146
 147    // downscale the accurate 32bit-coefficients and convert to float
 148    #define MPC_MAKE_INVCOS(value) MAKE_MPC_SAMPLE((double)value/(double)(1<<V_COEFFICIENT_EXPAND))
 149 #endif
 150
 151 // define constants for DCT-synthesis
 152 // INVCOSxx = (0.5 / cos(xx*PI/64)) << 27, <<27 to saturate to +/- 2^31
 153 #define INVCOS01 MPC_MAKE_INVCOS(  67189797)
 154 #define INVCOS02 MPC_MAKE_INVCOS(  67433575)
 155 #define INVCOS03 MPC_MAKE_INVCOS(  67843164)
 156 #define INVCOS04 MPC_MAKE_INVCOS(  68423604)
 157 #define INVCOS05 MPC_MAKE_INVCOS(  69182167)
 158 #define INVCOS06 MPC_MAKE_INVCOS(  70128577)
 159 #define INVCOS07 MPC_MAKE_INVCOS(  71275330)
 160 #define INVCOS08 MPC_MAKE_INVCOS(  72638111)
 161 #define INVCOS09 MPC_MAKE_INVCOS(  74236348)
 162 #define INVCOS10 MPC_MAKE_INVCOS(  76093940)
 163 #define INVCOS11 MPC_MAKE_INVCOS(  78240207)
 164 #define INVCOS12 MPC_MAKE_INVCOS(  80711144)
 165 #define INVCOS13 MPC_MAKE_INVCOS(  83551089)
 166 #define INVCOS14 MPC_MAKE_INVCOS(  86814950)
 167 #define INVCOS15 MPC_MAKE_INVCOS(  90571242)
 168 #define INVCOS16 MPC_MAKE_INVCOS(  94906266)
 169 #define INVCOS17 MPC_MAKE_INVCOS(  99929967)
 170 #define INVCOS18 MPC_MAKE_INVCOS( 105784321)
 171 #define INVCOS19 MPC_MAKE_INVCOS( 112655602)
 172 #define INVCOS20 MPC_MAKE_INVCOS( 120792764)
 173 #define INVCOS21 MPC_MAKE_INVCOS( 130535899)
 174 #define INVCOS22 MPC_MAKE_INVCOS( 142361749)
 175 #define INVCOS23 MPC_MAKE_INVCOS( 156959571)
 176 #define INVCOS24 MPC_MAKE_INVCOS( 175363913)
 177 #define INVCOS25 MPC_MAKE_INVCOS( 199201203)
 178 #define INVCOS26 MPC_MAKE_INVCOS( 231182936)
 179 #define INVCOS27 MPC_MAKE_INVCOS( 276190692)
 180 #define INVCOS28 MPC_MAKE_INVCOS( 343988688)
 181 #define INVCOS29 MPC_MAKE_INVCOS( 457361460)
 182 #define INVCOS30 MPC_MAKE_INVCOS( 684664578)
 183 #define INVCOS31 MPC_MAKE_INVCOS(1367679739)
 184
 185 void
 186 mpc_calculate_new_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V )
 187 ICODE_ATTR_MPC_LARGE_IRAM;
 188
 189 void
 190 mpc_calculate_new_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V )
 191 {
 192     // Calculating new V-buffer values for left channel
 193     // calculate new V-values (ISO-11172-3, p. 39)
 194     // based upon fast-MDCT algorithm by Byeong Gi Lee
 195     MPC_SAMPLE_FORMAT A[16];
 196     MPC_SAMPLE_FORMAT B[16];
 197     MPC_SAMPLE_FORMAT tmp;
 198
 199     A[ 0] = Sample[ 0] + Sample[31];
 200     A[ 1] = Sample[ 1] + Sample[30];
 201     A[ 2] = Sample[ 2] + Sample[29];
 202     A[ 3] = Sample[ 3] + Sample[28];
 203     A[ 4] = Sample[ 4] + Sample[27];
 204     A[ 5] = Sample[ 5] + Sample[26];
 205     A[ 6] = Sample[ 6] + Sample[25];
 206     A[ 7] = Sample[ 7] + Sample[24];
 207     A[ 8] = Sample[ 8] + Sample[23];
 208     A[ 9] = Sample[ 9] + Sample[22];
 209     A[10] = Sample[10] + Sample[21];
 210     A[11] = Sample[11] + Sample[20];
 211     A[12] = Sample[12] + Sample[19];
 212     A[13] = Sample[13] + Sample[18];
 213     A[14] = Sample[14] + Sample[17];
 214     A[15] = Sample[15] + Sample[16];
 215     // 16 adds
 216
 217     B[ 0] = A[ 0] + A[15];
 218     B[ 1] = A[ 1] + A[14];
 219     B[ 2] = A[ 2] + A[13];
 220     B[ 3] = A[ 3] + A[12];
 221     B[ 4] = A[ 4] + A[11];
 222     B[ 5] = A[ 5] + A[10];
 223     B[ 6] = A[ 6] + A[ 9];
 224     B[ 7] = A[ 7] + A[ 8];;
 225     B[ 8] = MPC_MULTIPLY_V((A[ 0] - A[15]), INVCOS02);
 226     B[ 9] = MPC_MULTIPLY_V((A[ 1] - A[14]), INVCOS06);
 227     B[10] = MPC_MULTIPLY_V((A[ 2] - A[13]), INVCOS10);
 228     B[11] = MPC_MULTIPLY_V((A[ 3] - A[12]), INVCOS14);
 229     B[12] = MPC_MULTIPLY_V((A[ 4] - A[11]), INVCOS18);
 230     B[13] = MPC_MULTIPLY_V((A[ 5] - A[10]), INVCOS22);
 231     B[14] = MPC_MULTIPLY_V((A[ 6] - A[ 9]), INVCOS26);
 232     B[15] = MPC_MULTIPLY_V((A[ 7] - A[ 8]), INVCOS30);
 233     // 8 adds, 8 subs, 8 muls, 8 shifts
 234
 235     A[ 0] = B[ 0] + B[ 7];
 236     A[ 1] = B[ 1] + B[ 6];
 237     A[ 2] = B[ 2] + B[ 5];
 238     A[ 3] = B[ 3] + B[ 4];
 239     A[ 4] = MPC_MULTIPLY_V((B[ 0] - B[ 7]), INVCOS04);
 240     A[ 5] = MPC_MULTIPLY_V((B[ 1] - B[ 6]), INVCOS12);
 241     A[ 6] = MPC_MULTIPLY_V((B[ 2] - B[ 5]), INVCOS20);
 242     A[ 7] = MPC_MULTIPLY_V((B[ 3] - B[ 4]), INVCOS28);
 243     A[ 8] = B[ 8] + B[15];
 244     A[ 9] = B[ 9] + B[14];
 245     A[10] = B[10] + B[13];
 246     A[11] = B[11] + B[12];
 247     A[12] = MPC_MULTIPLY_V((B[ 8] - B[15]), INVCOS04);
 248     A[13] = MPC_MULTIPLY_V((B[ 9] - B[14]), INVCOS12);
 249     A[14] = MPC_MULTIPLY_V((B[10] - B[13]), INVCOS20);
 250     A[15] = MPC_MULTIPLY_V((B[11] - B[12]), INVCOS28);
 251     // 8 adds, 8 subs, 8 muls, 8 shifts
 252
 253     B[ 0] = A[ 0] + A[ 3];
 254     B[ 1] = A[ 1] + A[ 2];
 255     B[ 2] = MPC_MULTIPLY_V((A[ 0] - A[ 3]), INVCOS08);
 256     B[ 3] = MPC_MULTIPLY_V((A[ 1] - A[ 2]), INVCOS24);
 257     B[ 4] = A[ 4] + A[ 7];
 258     B[ 5] = A[ 5] + A[ 6];
 259     B[ 6] = MPC_MULTIPLY_V((A[ 4] - A[ 7]), INVCOS08);
 260     B[ 7] = MPC_MULTIPLY_V((A[ 5] - A[ 6]), INVCOS24);
 261     B[ 8] = A[ 8] + A[11];
 262     B[ 9] = A[ 9] + A[10];
 263     B[10] = MPC_MULTIPLY_V((A[ 8] - A[11]), INVCOS08);
 264     B[11] = MPC_MULTIPLY_V((A[ 9] - A[10]), INVCOS24);
 265     B[12] = A[12] + A[15];
 266     B[13] = A[13] + A[14];
 267     B[14] = MPC_MULTIPLY_V((A[12] - A[15]), INVCOS08);
 268     B[15] = MPC_MULTIPLY_V((A[13] - A[14]), INVCOS24);
 269     // 8 adds, 8 subs, 8 muls, 8 shifts
 270
 271     A[ 0] = B[ 0] + B[ 1];
 272     A[ 1] = MPC_MULTIPLY_V((B[ 0] - B[ 1]), INVCOS16);
 273     A[ 2] = B[ 2] + B[ 3];
 274     A[ 3] = MPC_MULTIPLY_V((B[ 2] - B[ 3]), INVCOS16);
 275     A[ 4] = B[ 4] + B[ 5];
 276     A[ 5] = MPC_MULTIPLY_V((B[ 4] - B[ 5]), INVCOS16);
 277     A[ 6] = B[ 6] + B[ 7];
 278     A[ 7] = MPC_MULTIPLY_V((B[ 6] - B[ 7]), INVCOS16);
 279     A[ 8] = B[ 8] + B[ 9];
 280     A[ 9] = MPC_MULTIPLY_V((B[ 8] - B[ 9]), INVCOS16);
 281     A[10] = B[10] + B[11];
 282     A[11] = MPC_MULTIPLY_V((B[10] - B[11]), INVCOS16);
 283     A[12] = B[12] + B[13];
 284     A[13] = MPC_MULTIPLY_V((B[12] - B[13]), INVCOS16);
 285     A[14] = B[14] + B[15];
 286     A[15] = MPC_MULTIPLY_V((B[14] - B[15]), INVCOS16);
 287     // 8 adds, 8 subs, 8 muls, 8 shifts
 288
 289     // multiple used expressions: -(A[12] + A[14] + A[15])
 290     V[48] = -A[ 0];
 291     V[ 0] =  A[ 1];
 292     V[40] = -A[ 2] - (V[ 8] = A[ 3]);
 293     V[36] = -((V[ 4] = A[ 5] + (V[12] = A[ 7])) + A[ 6]);
 294     V[44] = - A[ 4] - A[ 6] - A[ 7];
 295     V[ 6] = (V[10] = A[11] + (V[14] = A[15])) + A[13];
 296     V[38] = (V[34] = -(V[ 2] = A[ 9] + A[13] + A[15]) - A[14]) + A[ 9] - A[10] - A[11];
 297     V[46] = (tmp = -(A[12] + A[14] + A[15])) - A[ 8];
 298     V[42] = tmp - A[10] - A[11];
 299     // 9 adds, 9 subs
 300
 301     A[ 0] = MPC_MULTIPLY_V_PRESCALE((Sample[ 0] - Sample[31]), INVCOS01);
 302     A[ 1] = MPC_MULTIPLY_V_PRESCALE((Sample[ 1] - Sample[30]), INVCOS03);
 303     A[ 2] = MPC_MULTIPLY_V_PRESCALE((Sample[ 2] - Sample[29]), INVCOS05);
 304     A[ 3] = MPC_MULTIPLY_V_PRESCALE((Sample[ 3] - Sample[28]), INVCOS07);
 305     A[ 4] = MPC_MULTIPLY_V_PRESCALE((Sample[ 4] - Sample[27]), INVCOS09);
 306     A[ 5] = MPC_MULTIPLY_V_PRESCALE((Sample[ 5] - Sample[26]), INVCOS11);
 307     A[ 6] = MPC_MULTIPLY_V_PRESCALE((Sample[ 6] - Sample[25]), INVCOS13);
 308     A[ 7] = MPC_MULTIPLY_V_PRESCALE((Sample[ 7] - Sample[24]), INVCOS15);
 309     A[ 8] = MPC_MULTIPLY_V_PRESCALE((Sample[ 8] - Sample[23]), INVCOS17);
 310     A[ 9] = MPC_MULTIPLY_V_PRESCALE((Sample[ 9] - Sample[22]), INVCOS19);
 311     A[10] = MPC_MULTIPLY_V_PRESCALE((Sample[10] - Sample[21]), INVCOS21);
 312     A[11] = MPC_MULTIPLY_V_PRESCALE((Sample[11] - Sample[20]), INVCOS23);
 313     A[12] = MPC_MULTIPLY_V_PRESCALE((Sample[12] - Sample[19]), INVCOS25);
 314     A[13] = MPC_MULTIPLY_V_PRESCALE((Sample[13] - Sample[18]), INVCOS27);
 315     A[14] = MPC_MULTIPLY_V_PRESCALE((Sample[14] - Sample[17]), INVCOS29);
 316     A[15] = MPC_MULTIPLY_V_PRESCALE((Sample[15] - Sample[16]), INVCOS31);
 317     // 16 subs, 16 muls, 16 shifts
 318
 319     B[ 0] = A[ 0] + A[15];
 320     B[ 1] = A[ 1] + A[14];
 321     B[ 2] = A[ 2] + A[13];
 322     B[ 3] = A[ 3] + A[12];
 323     B[ 4] = A[ 4] + A[11];
 324     B[ 5] = A[ 5] + A[10];
 325     B[ 6] = A[ 6] + A[ 9];
 326     B[ 7] = A[ 7] + A[ 8];
 327     B[ 8] = MPC_MULTIPLY_V((A[ 0] - A[15]), INVCOS02);
 328     B[ 9] = MPC_MULTIPLY_V((A[ 1] - A[14]), INVCOS06);
 329     B[10] = MPC_MULTIPLY_V((A[ 2] - A[13]), INVCOS10);
 330     B[11] = MPC_MULTIPLY_V((A[ 3] - A[12]), INVCOS14);
 331     B[12] = MPC_MULTIPLY_V((A[ 4] - A[11]), INVCOS18);
 332     B[13] = MPC_MULTIPLY_V((A[ 5] - A[10]), INVCOS22);
 333     B[14] = MPC_MULTIPLY_V((A[ 6] - A[ 9]), INVCOS26);
 334     B[15] = MPC_MULTIPLY_V((A[ 7] - A[ 8]), INVCOS30);
 335     // 8 adds, 8 subs, 8 muls, 8 shift
 336
 337     A[ 0] = B[ 0] + B[ 7];
 338     A[ 1] = B[ 1] + B[ 6];
 339     A[ 2] = B[ 2] + B[ 5];
 340     A[ 3] = B[ 3] + B[ 4];
 341     A[ 4] = MPC_MULTIPLY_V((B[ 0] - B[ 7]), INVCOS04);
 342     A[ 5] = MPC_MULTIPLY_V((B[ 1] - B[ 6]), INVCOS12);
 343     A[ 6] = MPC_MULTIPLY_V((B[ 2] - B[ 5]), INVCOS20);
 344     A[ 7] = MPC_MULTIPLY_V((B[ 3] - B[ 4]), INVCOS28);
 345     A[ 8] = B[ 8] + B[15];
 346     A[ 9] = B[ 9] + B[14];
 347     A[10] = B[10] + B[13];
 348     A[11] = B[11] + B[12];
 349     A[12] = MPC_MULTIPLY_V((B[ 8] - B[15]), INVCOS04);
 350     A[13] = MPC_MULTIPLY_V((B[ 9] - B[14]), INVCOS12);
 351     A[14] = MPC_MULTIPLY_V((B[10] - B[13]), INVCOS20);
 352     A[15] = MPC_MULTIPLY_V((B[11] - B[12]), INVCOS28);
 353     // 8 adds, 8 subs, 8 muls, 8 shift
 354
 355     B[ 0] = A[ 0] + A[ 3];
 356     B[ 1] = A[ 1] + A[ 2];
 357     B[ 2] = MPC_MULTIPLY_V((A[ 0] - A[ 3]), INVCOS08);
 358     B[ 3] = MPC_MULTIPLY_V((A[ 1] - A[ 2]), INVCOS24);
 359     B[ 4] = A[ 4] + A[ 7];
 360     B[ 5] = A[ 5] + A[ 6];
 361     B[ 6] = MPC_MULTIPLY_V((A[ 4] - A[ 7]), INVCOS08);
 362     B[ 7] = MPC_MULTIPLY_V((A[ 5] - A[ 6]), INVCOS24);
 363     B[ 8] = A[ 8] + A[11];
 364     B[ 9] = A[ 9] + A[10];
 365     B[10] = MPC_MULTIPLY_V((A[ 8] - A[11]), INVCOS08);
 366     B[11] = MPC_MULTIPLY_V((A[ 9] - A[10]), INVCOS24);
 367     B[12] = A[12] + A[15];
 368     B[13] = A[13] + A[14];
 369     B[14] = MPC_MULTIPLY_V((A[12] - A[15]), INVCOS08);
 370     B[15] = MPC_MULTIPLY_V((A[13] - A[14]), INVCOS24);
 371     // 8 adds, 8 subs, 8 muls, 8 shift
 372
 373     A[ 0] = MPC_V_POSTSCALE((B[ 0] + B[ 1]));
 374     A[ 1] = MPC_MULTIPLY_V_POSTSCALE((B[ 0] - B[ 1]), INVCOS16);
 375     A[ 2] = MPC_V_POSTSCALE((B[ 2] + B[ 3]));
 376     A[ 3] = MPC_MULTIPLY_V_POSTSCALE((B[ 2] - B[ 3]), INVCOS16);
 377     A[ 4] = MPC_V_POSTSCALE((B[ 4] + B[ 5]));
 378     A[ 5] = MPC_MULTIPLY_V_POSTSCALE((B[ 4] - B[ 5]), INVCOS16);
 379     A[ 6] = MPC_V_POSTSCALE((B[ 6] + B[ 7]));
 380     A[ 7] = MPC_MULTIPLY_V_POSTSCALE((B[ 6] - B[ 7]), INVCOS16);
 381     A[ 8] = MPC_V_POSTSCALE((B[ 8] + B[ 9]));
 382     A[ 9] = MPC_MULTIPLY_V_POSTSCALE((B[ 8] - B[ 9]), INVCOS16);
 383     A[10] = MPC_V_POSTSCALE((B[10] + B[11]));
 384     A[11] = MPC_MULTIPLY_V_POSTSCALE((B[10] - B[11]), INVCOS16);
 385     A[12] = MPC_V_POSTSCALE((B[12] + B[13]));
 386     A[13] = MPC_MULTIPLY_V_POSTSCALE((B[12] - B[13]), INVCOS16);
 387     A[14] = MPC_V_POSTSCALE((B[14] + B[15]));
 388     A[15] = MPC_MULTIPLY_V_POSTSCALE((B[14] - B[15]), INVCOS16);
 389     // 8 adds, 8 subs, 8 muls, 8 shift
 390
 391     // multiple used expressions: A[ 4]+A[ 6]+A[ 7], A[ 9]+A[13]+A[15]
 392     V[ 5] = (V[11] = (V[13] = A[ 7] + (V[15] = A[15])) + A[11]) + A[ 5] + A[13];
 393     V[ 7] = (V[ 9] = A[ 3] + A[11] + A[15]) + A[13];
 394     V[33] = -(V[ 1] = A[ 1] + A[ 9] + A[13] + A[15]) - A[14];
 395     V[35] = -(V[ 3] = A[ 5] + A[ 7] + A[ 9] + A[13] + A[15]) - A[ 6] - A[14];
 396     V[37] = (tmp = -(A[10] + A[11] + A[13] + A[14] + A[15])) - A[ 5] - A[ 6] - A[ 7];
 397     V[39] = tmp - A[ 2] - A[ 3];
 398     V[41] = (tmp += A[13] - A[12]) - A[ 2] - A[ 3];
 399     V[43] = tmp - A[ 4] - A[ 6] - A[ 7];
 400     V[47] = (tmp = -(A[ 8] + A[12] + A[14] + A[15])) - A[ 0];
 401     V[45] = tmp - A[ 4] - A[ 6] - A[ 7];
 402     // 22 adds, 18 subs
 403
 404     V[32] = -(V[ 0] = MPC_V_PRESHIFT(V[ 0]));
 405     V[31] = -(V[ 1] = MPC_V_PRESHIFT(V[ 1]));
 406     V[30] = -(V[ 2] = MPC_V_PRESHIFT(V[ 2]));
 407     V[29] = -(V[ 3] = MPC_V_PRESHIFT(V[ 3]));
 408     V[28] = -(V[ 4] = MPC_V_PRESHIFT(V[ 4]));
 409     V[27] = -(V[ 5] = MPC_V_PRESHIFT(V[ 5]));
 410     V[26] = -(V[ 6] = MPC_V_PRESHIFT(V[ 6]));
 411     V[25] = -(V[ 7] = MPC_V_PRESHIFT(V[ 7]));
 412     V[24] = -(V[ 8] = MPC_V_PRESHIFT(V[ 8]));
 413     V[23] = -(V[ 9] = MPC_V_PRESHIFT(V[ 9]));
 414     V[22] = -(V[10] = MPC_V_PRESHIFT(V[10]));
 415     V[21] = -(V[11] = MPC_V_PRESHIFT(V[11]));
 416     V[20] = -(V[12] = MPC_V_PRESHIFT(V[12]));
 417     V[19] = -(V[13] = MPC_V_PRESHIFT(V[13]));
 418     V[18] = -(V[14] = MPC_V_PRESHIFT(V[14]));
 419     V[17] = -(V[15] = MPC_V_PRESHIFT(V[15]));
 420     // 16 adds, 16 shifts (OPTIMIZE_FOR_SPEED only)
 421
 422     V[63] =  (V[33] = MPC_V_PRESHIFT(V[33]));
 423     V[62] =  (V[34] = MPC_V_PRESHIFT(V[34]));
 424     V[61] =  (V[35] = MPC_V_PRESHIFT(V[35]));
 425     V[60] =  (V[36] = MPC_V_PRESHIFT(V[36]));
 426     V[59] =  (V[37] = MPC_V_PRESHIFT(V[37]));
 427     V[58] =  (V[38] = MPC_V_PRESHIFT(V[38]));
 428     V[57] =  (V[39] = MPC_V_PRESHIFT(V[39]));
 429     V[56] =  (V[40] = MPC_V_PRESHIFT(V[40]));
 430     V[55] =  (V[41] = MPC_V_PRESHIFT(V[41]));
 431     V[54] =  (V[42] = MPC_V_PRESHIFT(V[42]));
 432     V[53] =  (V[43] = MPC_V_PRESHIFT(V[43]));
 433     V[52] =  (V[44] = MPC_V_PRESHIFT(V[44]));
 434     V[51] =  (V[45] = MPC_V_PRESHIFT(V[45]));
 435     V[50] =  (V[46] = MPC_V_PRESHIFT(V[46]));
 436     V[49] =  (V[47] = MPC_V_PRESHIFT(V[47]));
 437     V[48] =  (V[48] = MPC_V_PRESHIFT(V[48]));
 438     // 16 adds, 16 shifts (OPTIMIZE_FOR_SPEED only)
 439
 440     // OPTIMIZE_FOR_SPEED total: 143 adds, 107 subs, 80 muls, 112 shifts
 441     //                    total: 111 adds, 107 subs, 80 muls,  80 shifts
 442 }
 443
 444 #if defined(CPU_ARM)
 445 extern void
 446 mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
 447                         const MPC_SAMPLE_FORMAT * V,
 448                         const MPC_SAMPLE_FORMAT * D);
 449 #else
 450 static void
 451 mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
 452                         const MPC_SAMPLE_FORMAT * V,
 453                         const MPC_SAMPLE_FORMAT * D)
 454 {
 455    mpc_int32_t k;
 456
 457    #if defined(OPTIMIZE_FOR_SPEED)
 458       // 32=32x32-multiply (FIXED_POINT)
 459       for ( k = 0; k < 32; k++, D += 16, V++ )
 460       {
 461          *Data = V[  0]*D[ 0] + V[ 96]*D[ 1] + V[128]*D[ 2] + V[224]*D[ 3]
 462                + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7]
 463                + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11]
 464                + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15];
 465          Data += 1;
 466          // total: 32 * (16 muls, 15 adds)
 467       }
 468    #else
 469       #if defined(CPU_COLDFIRE)
 470          // 64=32x32-multiply assembler for Coldfire
 471          for ( k = 0; k < 32; k++, D += 16, V++ )
 472          {
 473             asm volatile (
 474                "movem.l (%[D]), %%d0-%%d3                    \n\t"
 475                "move.l (%[V]), %%a5                          \n\t"
 476                "mac.l %%d0, %%a5, (96*4, %[V]), %%a5, %%acc0 \n\t"
 477                "mac.l %%d1, %%a5, (128*4, %[V]), %%a5, %%acc0\n\t"
 478                "mac.l %%d2, %%a5, (224*4, %[V]), %%a5, %%acc0\n\t"
 479                "mac.l %%d3, %%a5, (256*4, %[V]), %%a5, %%acc0\n\t"
 480                "movem.l (4*4, %[D]), %%d0-%%d3               \n\t"
 481                "mac.l %%d0, %%a5, (352*4, %[V]), %%a5, %%acc0\n\t"
 482                "mac.l %%d1, %%a5, (384*4, %[V]), %%a5, %%acc0\n\t"
 483                "mac.l %%d2, %%a5, (480*4, %[V]), %%a5, %%acc0\n\t"
 484                "mac.l %%d3, %%a5, (512*4, %[V]), %%a5, %%acc0\n\t"
 485                "movem.l (8*4, %[D]), %%d0-%%d3               \n\t"
 486                "mac.l %%d0, %%a5, (608*4, %[V]), %%a5, %%acc0\n\t"
 487                "mac.l %%d1, %%a5, (640*4, %[V]), %%a5, %%acc0\n\t"
 488                "mac.l %%d2, %%a5, (736*4, %[V]), %%a5, %%acc0\n\t"
 489                "mac.l %%d3, %%a5, (768*4, %[V]), %%a5, %%acc0\n\t"
 490                "movem.l (12*4, %[D]), %%d0-%%d3              \n\t"
 491                "mac.l %%d0, %%a5, (864*4, %[V]), %%a5, %%acc0\n\t"
 492                "mac.l %%d1, %%a5, (896*4, %[V]), %%a5, %%acc0\n\t"
 493                "mac.l %%d2, %%a5, (992*4, %[V]), %%a5, %%acc0\n\t"
 494                "mac.l %%d3, %%a5, %%acc0                     \n\t"
 495                "movclr.l %%acc0, %%d0                        \n\t"
 496                "move.l %%d0, (%[Data])+                      \n"
 497                : [Data] "+a" (Data)
 498                : [V] "a" (V), [D] "a" (D)
 499                : "d0", "d1", "d2", "d3", "a5");
 500          }
 501       #else
 502          // 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C
 503          for ( k = 0; k < 32; k++, D += 16, V++ )
 504          {
 505             *Data = MPC_MULTIPLY_EX(V[  0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31)
 506                   + MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31)
 507                   + MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31)
 508                   + MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31)
 509                   + MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31)
 510                   + MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31)
 511                   + MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31)
 512                   + MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31);
 513             Data += 1;
 514             // total: 16 muls, 15 adds, 16 shifts
 515          }
 516       #endif
 517    #endif
 518 }
 519 #endif /* CPU_ARM */
 520
 521 static void
 522 mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, const MPC_SAMPLE_FORMAT *Y)
 523 {
 524     mpc_uint32_t n;
 525
 526     if (NULL != OutData)
 527     {
 528         for ( n = 0; n < 36; n++, Y += 32, OutData += 32 )
 529         {
 530             V -= 64;
 531             mpc_calculate_new_V ( Y, V );
 532             mpc_decoder_windowing_D( OutData, V, Di_opt );
 533         }
 534      }
 535 }
 536
 537 void
 538 mpc_decoder_synthese_filter_float(mpc_decoder *d, MPC_SAMPLE_FORMAT *OutData)
 539 {
 540     /********* left channel ********/
 541     memmove(d->V_L + MPC_V_MEM, d->V_L, 960 * sizeof(MPC_SAMPLE_FORMAT) );
 542
 543     mpc_full_synthesis_filter(
 544         OutData,
 545         (MPC_SAMPLE_FORMAT *)(d->V_L + MPC_V_MEM),
 546         (MPC_SAMPLE_FORMAT *)(d->Y_L [0]));
 547
 548     /******** right channel ********/
 549     memmove(d->V_R + MPC_V_MEM, d->V_R, 960 * sizeof(MPC_SAMPLE_FORMAT) );
 550
 551     mpc_full_synthesis_filter(
 552         (OutData == NULL ? NULL : OutData + MPC_FRAME_LENGTH),
 553         (MPC_SAMPLE_FORMAT *)(d->V_R + MPC_V_MEM),
 554         (MPC_SAMPLE_FORMAT *)(d->Y_R [0]));
 555 }
 556
 557 /*******************************************/
 558 /*                                         */
 559 /*            dithered synthesis           */
 560 /*                                         */
 561 /*******************************************/
 562
 563 static const unsigned char Parity [256] ICONST_ATTR = {  // parity
 564     0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
 565     1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
 566     1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
 567     0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
 568     1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
 569     0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
 570     0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
 571     1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0
 572 };
 573
 574 /*
 575  *  This is a simple random number generator with good quality for audio purposes.
 576  *  It consists of two polycounters with opposite rotation direction and different
 577  *  periods. The periods are coprime, so the total period is the product of both.
 578  *
 579  *     -------------------------------------------------------------------------------------------------
 580  * +-> |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0|
 581  * |   -------------------------------------------------------------------------------------------------
 582  * |                                                                          |  |  |  |     |        |
 583  * |                                                                          +--+--+--+-XOR-+--------+
 584  * |                                                                                      |
 585  * +--------------------------------------------------------------------------------------+
 586  *
 587  *     -------------------------------------------------------------------------------------------------
 588  *     |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0| <-+
 589  *     -------------------------------------------------------------------------------------------------   |
 590  *       |  |           |  |                                                                               |
 591  *       +--+----XOR----+--+                                                                               |
 592  *                |                                                                                        |
 593  *                +----------------------------------------------------------------------------------------+
 594  *
 595  *
 596  *  The first has an period of 3*5*17*257*65537, the second of 7*47*73*178481,
 597  *  which gives a period of 18.410.713.077.675.721.215. The result is the
 598  *  XORed values of both generators.
 599  */
 600 mpc_uint32_t
 601 mpc_random_int(mpc_decoder *d)
 602 {
 603 #if 1
 604     mpc_uint32_t  t1, t2, t3, t4;
 605
 606     t3   = t1 = d->__r1;   t4   = t2 = d->__r2;  // Parity calculation is done via table lookup, this is also available
 607     t1  &= 0xF5;        t2 >>= 25;               // on CPUs without parity, can be implemented in C and avoid unpredictable
 608     t1   = Parity [t1]; t2  &= 0x63;             // jumps and slow rotate through the carry flag operations.
 609     t1 <<= 31;          t2   = Parity [t2];
 610
 611     return (d->__r1 = (t3 >> 1) | t1 ) ^ (d->__r2 = (t4 + t4) | t2 );
 612 #else
 613     return (d->__r1 = (d->__r1 >> 1) | ((mpc_uint32_t)Parity [d->__r1 & 0xF5] << 31) ) ^
 614         (d->__r2 = (d->__r2 << 1) |  (mpc_uint32_t)Parity [(d->__r2 >> 25) & 0x63] );
 615 #endif
 616 }