apps/codecs/libmusepack/synth_filter.c

   1 /*
   2   Copyright (c) 2005, The Musepack Development Team
   3   All rights reserved.
   4
   5   Redistribution and use in source and binary forms, with or without
   6   modification, are permitted provided that the following conditions are
   7   met:
   8
   9   * Redistributions of source code must retain the above copyright
  10   notice, this list of conditions and the following disclaimer.
  11
  12   * Redistributions in binary form must reproduce the above
  13   copyright notice, this list of conditions and the following
  14   disclaimer in the documentation and/or other materials provided
  15   with the distribution.
  16
  17   * Neither the name of the The Musepack Development Team nor the
  18   names of its contributors may be used to endorse or promote
  19   products derived from this software without specific prior
  20   written permission.
  21
  22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  23   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  24   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  25   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  26   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  27   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  28   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  29   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  30   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  31   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  32   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  33 */
  34
  35 /// \file synth_filter.c
  36 /// Synthesis functions.
  37 /// \todo document me
  38
  39 #include "musepack.h"
  40 #include "internal.h"
  41
  42 /* C O N S T A N T S */
  43 #undef _
  44
  45 #if defined(MPC_FIXED_POINT)
  46    #if defined(OPTIMIZE_FOR_SPEED)
  47       // round at compile time to +/- 2^14 as a pre-shift before 32=32x32-multiply
  48       #define D(value)  (MPC_SHR_RND(value, 3))
  49
  50       // round at runtime to +/- 2^17 as a pre-shift before 32=32x32-multiply
  51       // samples are 18.14 fixed point. 30.2 after this shift, whereas the
  52       // 15.2 bits are significant (not including sign)
  53       #define MPC_V_PRESHIFT(X) MPC_SHR_RND(X, 12)
  54
  55       // in this configuration a post-shift by >>1 is needed after synthesis
  56    #else
  57       #if defined(CPU_ARM)
  58           // do not up-scale D-values to achieve higher speed in smull/mlal
  59           // operations. saves ~14/8 = 1.75 cycles per multiplication
  60           #define D(value)  (value)
  61
  62           // in this configuration a post-shift by >>16 is needed after synthesis
  63       #else
  64           // saturate to +/- 2^31 (= value << (31-17)), D-values are +/- 2^17
  65           #define D(value)  (value << (14))
  66       #endif
  67       // do not perform pre-shift
  68       #define MPC_V_PRESHIFT(X) (X)
  69    #endif
  70 #else
  71    // IMPORTANT: internal scaling is somehow strange for floating point, therefore we scale the coefficients Di_opt
  72    // by the correct amount to have proper scaled output
  73    #define D(value)  MAKE_MPC_SAMPLE((double)value*(double)(0x1000))
  74
  75    // do not perform pre-shift
  76    #define MPC_V_PRESHIFT(X) (X)
  77 #endif
  78
  79 // Di_opt coefficients are +/- 2^17 (pre-shifted by <<16)
  80 static const MPC_SAMPLE_FORMAT  Di_opt [512] ICONST_ATTR = {
  81 /*           0        1        2         3         4         5          6          7         8         9       10        11       12       13      14     15  */
  82 /*  0 */  D( 0), -D( 29),  D(213), -D( 459),  D(2037), -D(5153),  D( 6574), -D(37489), D(75038),  D(37489), D(6574),  D(5153), D(2037),  D(459), D(213), D(29),
  83 /*  1 */ -D( 1), -D( 31),  D(218), -D( 519),  D(2000), -D(5517),  D( 5959), -D(39336), D(74992),  D(35640), D(7134),  D(4788), D(2063),  D(401), D(208), D(26),
  84 /*  2 */ -D( 1), -D( 35),  D(222), -D( 581),  D(1952), -D(5879),  D( 5288), -D(41176), D(74856),  D(33791), D(7640),  D(4425), D(2080),  D(347), D(202), D(24),
  85 /*  3 */ -D( 1), -D( 38),  D(225), -D( 645),  D(1893), -D(6237),  D( 4561), -D(43006), D(74630),  D(31947), D(8092),  D(4063), D(2087),  D(294), D(196), D(21),
  86 /*  4 */ -D( 1), -D( 41),  D(227), -D( 711),  D(1822), -D(6589),  D( 3776), -D(44821), D(74313),  D(30112), D(8492),  D(3705), D(2085),  D(244), D(190), D(19),
  87 /*  5 */ -D( 1), -D( 45),  D(228), -D( 779),  D(1739), -D(6935),  D( 2935), -D(46617), D(73908),  D(28289), D(8840),  D(3351), D(2075),  D(197), D(183), D(17),
  88 /*  6 */ -D( 1), -D( 49),  D(228), -D( 848),  D(1644), -D(7271),  D( 2037), -D(48390), D(73415),  D(26482), D(9139),  D(3004), D(2057),  D(153), D(176), D(16),
  89 /*  7 */ -D( 2), -D( 53),  D(227), -D( 919),  D(1535), -D(7597),  D( 1082), -D(50137), D(72835),  D(24694), D(9389),  D(2663), D(2032),  D(111), D(169), D(14),
  90 /*  8 */ -D( 2), -D( 58),  D(224), -D( 991),  D(1414), -D(7910),  D(   70), -D(51853), D(72169),  D(22929), D(9592),  D(2330), D(2001),  D( 72), D(161), D(13),
  91 /*  9 */ -D( 2), -D( 63),  D(221), -D(1064),  D(1280), -D(8209), -D(  998), -D(53534), D(71420),  D(21189), D(9750),  D(2006), D(1962),  D( 36), D(154), D(11),
  92 /* 10 */ -D( 2), -D( 68),  D(215), -D(1137),  D(1131), -D(8491), -D( 2122), -D(55178), D(70590),  D(19478), D(9863),  D(1692), D(1919),  D(  2), D(147), D(10),
  93 /* 11 */ -D( 3), -D( 73),  D(208), -D(1210),  D( 970), -D(8755), -D( 3300), -D(56778), D(69679),  D(17799), D(9935),  D(1388), D(1870), -D( 29), D(139), D( 9),
  94 /* 12 */ -D( 3), -D( 79),  D(200), -D(1283),  D( 794), -D(8998), -D( 4533), -D(58333), D(68692),  D(16155), D(9966),  D(1095), D(1817), -D( 57), D(132), D( 8),
  95 /* 13 */ -D( 4), -D( 85),  D(189), -D(1356),  D( 605), -D(9219), -D( 5818), -D(59838), D(67629),  D(14548), D(9959),  D( 814), D(1759), -D( 83), D(125), D( 7),
  96 /* 14 */ -D( 4), -D( 91),  D(177), -D(1428),  D( 402), -D(9416), -D( 7154), -D(61289), D(66494),  D(12980), D(9916),  D( 545), D(1698), -D(106), D(117), D( 7),
  97 /* 15 */ -D( 5), -D( 97),  D(163), -D(1498),  D( 185), -D(9585), -D( 8540), -D(62684), D(65290),  D(11455), D(9838),  D( 288), D(1634), -D(127), D(111), D( 6),
  98 /* 16 */ -D( 5), -D(104),  D(146), -D(1567), -D(  45), -D(9727), -D( 9975), -D(64019), D(64019),  D( 9975), D(9727),  D(  45), D(1567), -D(146), D(104), D( 5),
  99 /* 17 */ -D( 6), -D(111),  D(127), -D(1634), -D( 288), -D(9838), -D(11455), -D(65290), D(62684),  D( 8540), D(9585), -D( 185), D(1498), -D(163), D( 97), D( 5),
 100 /* 18 */ -D( 7), -D(117),  D(106), -D(1698), -D( 545), -D(9916), -D(12980), -D(66494), D(61289),  D( 7154), D(9416), -D( 402), D(1428), -D(177), D( 91), D( 4),
 101 /* 19 */ -D( 7), -D(125),  D( 83), -D(1759), -D( 814), -D(9959), -D(14548), -D(67629), D(59838),  D( 5818), D(9219), -D( 605), D(1356), -D(189), D( 85), D( 4),
 102 /* 20 */ -D( 8), -D(132),  D( 57), -D(1817), -D(1095), -D(9966), -D(16155), -D(68692), D(58333),  D( 4533), D(8998), -D( 794), D(1283), -D(200), D( 79), D( 3),
 103 /* 21 */ -D( 9), -D(139),  D( 29), -D(1870), -D(1388), -D(9935), -D(17799), -D(69679), D(56778),  D( 3300), D(8755), -D( 970), D(1210), -D(208), D( 73), D( 3),
 104 /* 22 */ -D(10), -D(147), -D(  2), -D(1919), -D(1692), -D(9863), -D(19478), -D(70590), D(55178),  D( 2122), D(8491), -D(1131), D(1137), -D(215), D( 68), D( 2),
 105 /* 23 */ -D(11), -D(154), -D( 36), -D(1962), -D(2006), -D(9750), -D(21189), -D(71420), D(53534),  D(  998), D(8209), -D(1280), D(1064), -D(221), D( 63), D( 2),
 106 /* 24 */ -D(13), -D(161), -D( 72), -D(2001), -D(2330), -D(9592), -D(22929), -D(72169), D(51853), -D(   70), D(7910), -D(1414), D( 991), -D(224), D( 58), D( 2),
 107 /* 25 */ -D(14), -D(169), -D(111), -D(2032), -D(2663), -D(9389), -D(24694), -D(72835), D(50137), -D( 1082), D(7597), -D(1535), D( 919), -D(227), D( 53), D( 2),
 108 /* 26 */ -D(16), -D(176), -D(153), -D(2057), -D(3004), -D(9139), -D(26482), -D(73415), D(48390), -D( 2037), D(7271), -D(1644), D( 848), -D(228), D( 49), D( 1),
 109 /* 27 */ -D(17), -D(183), -D(197), -D(2075), -D(3351), -D(8840), -D(28289), -D(73908), D(46617), -D( 2935), D(6935), -D(1739), D( 779), -D(228), D( 45), D( 1),
 110 /* 28 */ -D(19), -D(190), -D(244), -D(2085), -D(3705), -D(8492), -D(30112), -D(74313), D(44821), -D( 3776), D(6589), -D(1822), D( 711), -D(227), D( 41), D( 1),
 111 /* 29 */ -D(21), -D(196), -D(294), -D(2087), -D(4063), -D(8092), -D(31947), -D(74630), D(43006), -D( 4561), D(6237), -D(1893), D( 645), -D(225), D( 38), D( 1),
 112 /* 30 */ -D(24), -D(202), -D(347), -D(2080), -D(4425), -D(7640), -D(33791), -D(74856), D(41176), -D( 5288), D(5879), -D(1952), D( 581), -D(222), D( 35), D( 1),
 113 /* 31 */ -D(26), -D(208), -D(401), -D(2063), -D(4788), -D(7134), -D(35640), -D(74992), D(39336), -D( 5959), D(5517), -D(2000), D( 519), -D(218), D( 31), D( 1)
 114 };
 115
 116 #undef  D
 117
 118 // needed to prevent from internal overflow in calculate_V (see below)
 119 #define OVERFLOW_FIX 2
 120
 121 // V-coefficients were expanded (<<) by V_COEFFICIENT_EXPAND
 122 #define V_COEFFICIENT_EXPAND 27
 123
 124 #if defined(MPC_FIXED_POINT)
 125    #if defined(OPTIMIZE_FOR_SPEED)
 126       // define 32=32x32-multiplication for DCT-coefficients with samples, vcoef will be pre-shifted on creation
 127       // samples are rounded to +/- 2^19 as pre-shift before 32=32x32-multiply
 128       #define MPC_MULTIPLY_V(sample, vcoef) ( MPC_SHR_RND(sample, 12) * vcoef )
 129
 130       // pre- and postscale are used to avoid internal overflow in synthesis calculation
 131       // samples are s15.0, v-coefs are 4.12 -> internal format is s19.12
 132       #define MPC_MULTIPLY_V_PRESCALE(sample, vcoef)  ( MPC_SHR_RND(sample, (12+OVERFLOW_FIX)) * vcoef )
 133       #define MPC_MULTIPLY_V_POSTSCALE(sample, vcoef) ( MPC_SHR_RND(sample, (12-OVERFLOW_FIX)) * vcoef )
 134       #define MPC_V_POSTSCALE(sample) (sample<<OVERFLOW_FIX)
 135
 136       // round to +/- 2^16 as pre-shift before 32=32x32-multiply
 137       #define MPC_MAKE_INVCOS(value) (MPC_SHR_RND(value, 15))
 138    #else
 139       // define 64=32x32-multiplication for DCT-coefficients with samples. Via usage of MPC_FRACT highly optimized assembler might be used
 140       // MULTIPLY_FRACT will do >>32 after multiplication, as V-coef were expanded by V_COEFFICIENT_EXPAND we'll correct this on the result.
 141       // Will loose 5bit accuracy on result in fract part without effect on final audio result
 142       #define MPC_MULTIPLY_V(sample, vcoef) ( (MPC_MULTIPLY_FRACT(sample, vcoef)) << (32-V_COEFFICIENT_EXPAND) )
 143
 144       // pre- and postscale are used to avoid internal overflow in synthesis calculation
 145       // samples are s15.14, v-coefs are 4.27 -> internal format is s19.12
 146       #define MPC_MULTIPLY_V_PRESCALE(sample, vcoef)  ( (MPC_MULTIPLY_FRACT(sample, vcoef)) << (32-V_COEFFICIENT_EXPAND-OVERFLOW_FIX) )
 147       #define MPC_MULTIPLY_V_POSTSCALE(sample, vcoef) ( (MPC_MULTIPLY_FRACT(sample, vcoef)) << (32-V_COEFFICIENT_EXPAND+OVERFLOW_FIX) )
 148       #define MPC_V_POSTSCALE(sample) (sample<<OVERFLOW_FIX)
 149
 150       // directly use accurate 32bit-coefficients
 151       #define MPC_MAKE_INVCOS(value) (value)
 152    #endif
 153 #else
 154    // for floating point use the standard multiplication macro
 155    #define MPC_MULTIPLY_V          (sample, vcoef) ( MPC_MULTIPLY(sample, vcoef) )
 156    #define MPC_MULTIPLY_V_PRESCALE (sample, vcoef) ( MPC_MULTIPLY(sample, vcoef) )
 157    #define MPC_MULTIPLY_V_POSTSCALE(sample, vcoef) ( MPC_MULTIPLY(sample, vcoef) )
 158    #define MPC_V_POSTSCALE(sample) (sample)
 159
 160    // downscale the accurate 32bit-coefficients and convert to float
 161    #define MPC_MAKE_INVCOS(value) MAKE_MPC_SAMPLE((double)value/(double)(1<<V_COEFFICIENT_EXPAND))
 162 #endif
 163
 164 // define constants for DCT-synthesis
 165 // INVCOSxx = (0.5 / cos(xx*PI/64)) << 27, <<27 to saturate to +/- 2^31
 166 #define INVCOS01 MPC_MAKE_INVCOS(  67189797)
 167 #define INVCOS02 MPC_MAKE_INVCOS(  67433575)
 168 #define INVCOS03 MPC_MAKE_INVCOS(  67843164)
 169 #define INVCOS04 MPC_MAKE_INVCOS(  68423604)
 170 #define INVCOS05 MPC_MAKE_INVCOS(  69182167)
 171 #define INVCOS06 MPC_MAKE_INVCOS(  70128577)
 172 #define INVCOS07 MPC_MAKE_INVCOS(  71275330)
 173 #define INVCOS08 MPC_MAKE_INVCOS(  72638111)
 174 #define INVCOS09 MPC_MAKE_INVCOS(  74236348)
 175 #define INVCOS10 MPC_MAKE_INVCOS(  76093940)
 176 #define INVCOS11 MPC_MAKE_INVCOS(  78240207)
 177 #define INVCOS12 MPC_MAKE_INVCOS(  80711144)
 178 #define INVCOS13 MPC_MAKE_INVCOS(  83551089)
 179 #define INVCOS14 MPC_MAKE_INVCOS(  86814950)
 180 #define INVCOS15 MPC_MAKE_INVCOS(  90571242)
 181 #define INVCOS16 MPC_MAKE_INVCOS(  94906266)
 182 #define INVCOS17 MPC_MAKE_INVCOS(  99929967)
 183 #define INVCOS18 MPC_MAKE_INVCOS( 105784321)
 184 #define INVCOS19 MPC_MAKE_INVCOS( 112655602)
 185 #define INVCOS20 MPC_MAKE_INVCOS( 120792764)
 186 #define INVCOS21 MPC_MAKE_INVCOS( 130535899)
 187 #define INVCOS22 MPC_MAKE_INVCOS( 142361749)
 188 #define INVCOS23 MPC_MAKE_INVCOS( 156959571)
 189 #define INVCOS24 MPC_MAKE_INVCOS( 175363913)
 190 #define INVCOS25 MPC_MAKE_INVCOS( 199201203)
 191 #define INVCOS26 MPC_MAKE_INVCOS( 231182936)
 192 #define INVCOS27 MPC_MAKE_INVCOS( 276190692)
 193 #define INVCOS28 MPC_MAKE_INVCOS( 343988688)
 194 #define INVCOS29 MPC_MAKE_INVCOS( 457361460)
 195 #define INVCOS30 MPC_MAKE_INVCOS( 684664578)
 196 #define INVCOS31 MPC_MAKE_INVCOS(1367679739)
 197
 198 void
 199 mpc_calculate_new_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V )
 200 ICODE_ATTR_MPC_LARGE_IRAM;
 201
 202 void
 203 mpc_calculate_new_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V )
 204 {
 205     // Calculating new V-buffer values for left channel
 206     // calculate new V-values (ISO-11172-3, p. 39)
 207     // based upon fast-MDCT algorithm by Byeong Gi Lee
 208     MPC_SAMPLE_FORMAT A[16];
 209     MPC_SAMPLE_FORMAT B[16];
 210     MPC_SAMPLE_FORMAT tmp;
 211
 212     A[ 0] = Sample[ 0] + Sample[31];
 213     A[ 1] = Sample[ 1] + Sample[30];
 214     A[ 2] = Sample[ 2] + Sample[29];
 215     A[ 3] = Sample[ 3] + Sample[28];
 216     A[ 4] = Sample[ 4] + Sample[27];
 217     A[ 5] = Sample[ 5] + Sample[26];
 218     A[ 6] = Sample[ 6] + Sample[25];
 219     A[ 7] = Sample[ 7] + Sample[24];
 220     A[ 8] = Sample[ 8] + Sample[23];
 221     A[ 9] = Sample[ 9] + Sample[22];
 222     A[10] = Sample[10] + Sample[21];
 223     A[11] = Sample[11] + Sample[20];
 224     A[12] = Sample[12] + Sample[19];
 225     A[13] = Sample[13] + Sample[18];
 226     A[14] = Sample[14] + Sample[17];
 227     A[15] = Sample[15] + Sample[16];
 228     // 16 adds
 229
 230     B[ 0] = A[ 0] + A[15];
 231     B[ 1] = A[ 1] + A[14];
 232     B[ 2] = A[ 2] + A[13];
 233     B[ 3] = A[ 3] + A[12];
 234     B[ 4] = A[ 4] + A[11];
 235     B[ 5] = A[ 5] + A[10];
 236     B[ 6] = A[ 6] + A[ 9];
 237     B[ 7] = A[ 7] + A[ 8];
 238     B[ 8] = MPC_MULTIPLY_V((A[ 0] - A[15]), INVCOS02);
 239     B[ 9] = MPC_MULTIPLY_V((A[ 1] - A[14]), INVCOS06);
 240     B[10] = MPC_MULTIPLY_V((A[ 2] - A[13]), INVCOS10);
 241     B[11] = MPC_MULTIPLY_V((A[ 3] - A[12]), INVCOS14);
 242     B[12] = MPC_MULTIPLY_V((A[ 4] - A[11]), INVCOS18);
 243     B[13] = MPC_MULTIPLY_V((A[ 5] - A[10]), INVCOS22);
 244     B[14] = MPC_MULTIPLY_V((A[ 6] - A[ 9]), INVCOS26);
 245     B[15] = MPC_MULTIPLY_V((A[ 7] - A[ 8]), INVCOS30);
 246     // 8 adds, 8 subs, 8 muls, 8 shifts
 247
 248     A[ 0] = B[ 0] + B[ 7];
 249     A[ 1] = B[ 1] + B[ 6];
 250     A[ 2] = B[ 2] + B[ 5];
 251     A[ 3] = B[ 3] + B[ 4];
 252     A[ 4] = MPC_MULTIPLY_V((B[ 0] - B[ 7]), INVCOS04);
 253     A[ 5] = MPC_MULTIPLY_V((B[ 1] - B[ 6]), INVCOS12);
 254     A[ 6] = MPC_MULTIPLY_V((B[ 2] - B[ 5]), INVCOS20);
 255     A[ 7] = MPC_MULTIPLY_V((B[ 3] - B[ 4]), INVCOS28);
 256     A[ 8] = B[ 8] + B[15];
 257     A[ 9] = B[ 9] + B[14];
 258     A[10] = B[10] + B[13];
 259     A[11] = B[11] + B[12];
 260     A[12] = MPC_MULTIPLY_V((B[ 8] - B[15]), INVCOS04);
 261     A[13] = MPC_MULTIPLY_V((B[ 9] - B[14]), INVCOS12);
 262     A[14] = MPC_MULTIPLY_V((B[10] - B[13]), INVCOS20);
 263     A[15] = MPC_MULTIPLY_V((B[11] - B[12]), INVCOS28);
 264     // 8 adds, 8 subs, 8 muls, 8 shifts
 265
 266     B[ 0] = A[ 0] + A[ 3];
 267     B[ 1] = A[ 1] + A[ 2];
 268     B[ 2] = MPC_MULTIPLY_V((A[ 0] - A[ 3]), INVCOS08);
 269     B[ 3] = MPC_MULTIPLY_V((A[ 1] - A[ 2]), INVCOS24);
 270     B[ 4] = A[ 4] + A[ 7];
 271     B[ 5] = A[ 5] + A[ 6];
 272     B[ 6] = MPC_MULTIPLY_V((A[ 4] - A[ 7]), INVCOS08);
 273     B[ 7] = MPC_MULTIPLY_V((A[ 5] - A[ 6]), INVCOS24);
 274     B[ 8] = A[ 8] + A[11];
 275     B[ 9] = A[ 9] + A[10];
 276     B[10] = MPC_MULTIPLY_V((A[ 8] - A[11]), INVCOS08);
 277     B[11] = MPC_MULTIPLY_V((A[ 9] - A[10]), INVCOS24);
 278     B[12] = A[12] + A[15];
 279     B[13] = A[13] + A[14];
 280     B[14] = MPC_MULTIPLY_V((A[12] - A[15]), INVCOS08);
 281     B[15] = MPC_MULTIPLY_V((A[13] - A[14]), INVCOS24);
 282     // 8 adds, 8 subs, 8 muls, 8 shifts
 283
 284     A[ 0] = B[ 0] + B[ 1];
 285     A[ 1] = MPC_MULTIPLY_V((B[ 0] - B[ 1]), INVCOS16);
 286     A[ 2] = B[ 2] + B[ 3];
 287     A[ 3] = MPC_MULTIPLY_V((B[ 2] - B[ 3]), INVCOS16);
 288     A[ 4] = B[ 4] + B[ 5];
 289     A[ 5] = MPC_MULTIPLY_V((B[ 4] - B[ 5]), INVCOS16);
 290     A[ 6] = B[ 6] + B[ 7];
 291     A[ 7] = MPC_MULTIPLY_V((B[ 6] - B[ 7]), INVCOS16);
 292     A[ 8] = B[ 8] + B[ 9];
 293     A[ 9] = MPC_MULTIPLY_V((B[ 8] - B[ 9]), INVCOS16);
 294     A[10] = B[10] + B[11];
 295     A[11] = MPC_MULTIPLY_V((B[10] - B[11]), INVCOS16);
 296     A[12] = B[12] + B[13];
 297     A[13] = MPC_MULTIPLY_V((B[12] - B[13]), INVCOS16);
 298     A[14] = B[14] + B[15];
 299     A[15] = MPC_MULTIPLY_V((B[14] - B[15]), INVCOS16);
 300     // 8 adds, 8 subs, 8 muls, 8 shifts
 301
 302     // multiple used expressions: -(A[12] + A[14] + A[15])
 303     V[48] = -A[ 0];
 304     V[ 0] =  A[ 1];
 305     V[40] = -A[ 2] - (V[ 8] = A[ 3]);
 306     V[36] = -((V[ 4] = A[ 5] + (V[12] = A[ 7])) + A[ 6]);
 307     V[44] = - A[ 4] - A[ 6] - A[ 7];
 308     V[ 6] = (V[10] = A[11] + (V[14] = A[15])) + A[13];
 309     V[38] = (V[34] = -(V[ 2] = A[ 9] + A[13] + A[15]) - A[14]) + A[ 9] - A[10] - A[11];
 310     V[46] = (tmp = -(A[12] + A[14] + A[15])) - A[ 8];
 311     V[42] = tmp - A[10] - A[11];
 312     // 9 adds, 9 subs
 313
 314     A[ 0] = MPC_MULTIPLY_V_PRESCALE((Sample[ 0] - Sample[31]), INVCOS01);
 315     A[ 1] = MPC_MULTIPLY_V_PRESCALE((Sample[ 1] - Sample[30]), INVCOS03);
 316     A[ 2] = MPC_MULTIPLY_V_PRESCALE((Sample[ 2] - Sample[29]), INVCOS05);
 317     A[ 3] = MPC_MULTIPLY_V_PRESCALE((Sample[ 3] - Sample[28]), INVCOS07);
 318     A[ 4] = MPC_MULTIPLY_V_PRESCALE((Sample[ 4] - Sample[27]), INVCOS09);
 319     A[ 5] = MPC_MULTIPLY_V_PRESCALE((Sample[ 5] - Sample[26]), INVCOS11);
 320     A[ 6] = MPC_MULTIPLY_V_PRESCALE((Sample[ 6] - Sample[25]), INVCOS13);
 321     A[ 7] = MPC_MULTIPLY_V_PRESCALE((Sample[ 7] - Sample[24]), INVCOS15);
 322     A[ 8] = MPC_MULTIPLY_V_PRESCALE((Sample[ 8] - Sample[23]), INVCOS17);
 323     A[ 9] = MPC_MULTIPLY_V_PRESCALE((Sample[ 9] - Sample[22]), INVCOS19);
 324     A[10] = MPC_MULTIPLY_V_PRESCALE((Sample[10] - Sample[21]), INVCOS21);
 325     A[11] = MPC_MULTIPLY_V_PRESCALE((Sample[11] - Sample[20]), INVCOS23);
 326     A[12] = MPC_MULTIPLY_V_PRESCALE((Sample[12] - Sample[19]), INVCOS25);
 327     A[13] = MPC_MULTIPLY_V_PRESCALE((Sample[13] - Sample[18]), INVCOS27);
 328     A[14] = MPC_MULTIPLY_V_PRESCALE((Sample[14] - Sample[17]), INVCOS29);
 329     A[15] = MPC_MULTIPLY_V_PRESCALE((Sample[15] - Sample[16]), INVCOS31);
 330     // 16 subs, 16 muls, 16 shifts
 331
 332     B[ 0] = A[ 0] + A[15];
 333     B[ 1] = A[ 1] + A[14];
 334     B[ 2] = A[ 2] + A[13];
 335     B[ 3] = A[ 3] + A[12];
 336     B[ 4] = A[ 4] + A[11];
 337     B[ 5] = A[ 5] + A[10];
 338     B[ 6] = A[ 6] + A[ 9];
 339     B[ 7] = A[ 7] + A[ 8];
 340     B[ 8] = MPC_MULTIPLY_V((A[ 0] - A[15]), INVCOS02);
 341     B[ 9] = MPC_MULTIPLY_V((A[ 1] - A[14]), INVCOS06);
 342     B[10] = MPC_MULTIPLY_V((A[ 2] - A[13]), INVCOS10);
 343     B[11] = MPC_MULTIPLY_V((A[ 3] - A[12]), INVCOS14);
 344     B[12] = MPC_MULTIPLY_V((A[ 4] - A[11]), INVCOS18);
 345     B[13] = MPC_MULTIPLY_V((A[ 5] - A[10]), INVCOS22);
 346     B[14] = MPC_MULTIPLY_V((A[ 6] - A[ 9]), INVCOS26);
 347     B[15] = MPC_MULTIPLY_V((A[ 7] - A[ 8]), INVCOS30);
 348     // 8 adds, 8 subs, 8 muls, 8 shift
 349
 350     A[ 0] = B[ 0] + B[ 7];
 351     A[ 1] = B[ 1] + B[ 6];
 352     A[ 2] = B[ 2] + B[ 5];
 353     A[ 3] = B[ 3] + B[ 4];
 354     A[ 4] = MPC_MULTIPLY_V((B[ 0] - B[ 7]), INVCOS04);
 355     A[ 5] = MPC_MULTIPLY_V((B[ 1] - B[ 6]), INVCOS12);
 356     A[ 6] = MPC_MULTIPLY_V((B[ 2] - B[ 5]), INVCOS20);
 357     A[ 7] = MPC_MULTIPLY_V((B[ 3] - B[ 4]), INVCOS28);
 358     A[ 8] = B[ 8] + B[15];
 359     A[ 9] = B[ 9] + B[14];
 360     A[10] = B[10] + B[13];
 361     A[11] = B[11] + B[12];
 362     A[12] = MPC_MULTIPLY_V((B[ 8] - B[15]), INVCOS04);
 363     A[13] = MPC_MULTIPLY_V((B[ 9] - B[14]), INVCOS12);
 364     A[14] = MPC_MULTIPLY_V((B[10] - B[13]), INVCOS20);
 365     A[15] = MPC_MULTIPLY_V((B[11] - B[12]), INVCOS28);
 366     // 8 adds, 8 subs, 8 muls, 8 shift
 367
 368     B[ 0] = A[ 0] + A[ 3];
 369     B[ 1] = A[ 1] + A[ 2];
 370     B[ 2] = MPC_MULTIPLY_V((A[ 0] - A[ 3]), INVCOS08);
 371     B[ 3] = MPC_MULTIPLY_V((A[ 1] - A[ 2]), INVCOS24);
 372     B[ 4] = A[ 4] + A[ 7];
 373     B[ 5] = A[ 5] + A[ 6];
 374     B[ 6] = MPC_MULTIPLY_V((A[ 4] - A[ 7]), INVCOS08);
 375     B[ 7] = MPC_MULTIPLY_V((A[ 5] - A[ 6]), INVCOS24);
 376     B[ 8] = A[ 8] + A[11];
 377     B[ 9] = A[ 9] + A[10];
 378     B[10] = MPC_MULTIPLY_V((A[ 8] - A[11]), INVCOS08);
 379     B[11] = MPC_MULTIPLY_V((A[ 9] - A[10]), INVCOS24);
 380     B[12] = A[12] + A[15];
 381     B[13] = A[13] + A[14];
 382     B[14] = MPC_MULTIPLY_V((A[12] - A[15]), INVCOS08);
 383     B[15] = MPC_MULTIPLY_V((A[13] - A[14]), INVCOS24);
 384     // 8 adds, 8 subs, 8 muls, 8 shift
 385
 386     A[ 0] = MPC_V_POSTSCALE((B[ 0] + B[ 1]));
 387     A[ 1] = MPC_MULTIPLY_V_POSTSCALE((B[ 0] - B[ 1]), INVCOS16);
 388     A[ 2] = MPC_V_POSTSCALE((B[ 2] + B[ 3]));
 389     A[ 3] = MPC_MULTIPLY_V_POSTSCALE((B[ 2] - B[ 3]), INVCOS16);
 390     A[ 4] = MPC_V_POSTSCALE((B[ 4] + B[ 5]));
 391     A[ 5] = MPC_MULTIPLY_V_POSTSCALE((B[ 4] - B[ 5]), INVCOS16);
 392     A[ 6] = MPC_V_POSTSCALE((B[ 6] + B[ 7]));
 393     A[ 7] = MPC_MULTIPLY_V_POSTSCALE((B[ 6] - B[ 7]), INVCOS16);
 394     A[ 8] = MPC_V_POSTSCALE((B[ 8] + B[ 9]));
 395     A[ 9] = MPC_MULTIPLY_V_POSTSCALE((B[ 8] - B[ 9]), INVCOS16);
 396     A[10] = MPC_V_POSTSCALE((B[10] + B[11]));
 397     A[11] = MPC_MULTIPLY_V_POSTSCALE((B[10] - B[11]), INVCOS16);
 398     A[12] = MPC_V_POSTSCALE((B[12] + B[13]));
 399     A[13] = MPC_MULTIPLY_V_POSTSCALE((B[12] - B[13]), INVCOS16);
 400     A[14] = MPC_V_POSTSCALE((B[14] + B[15]));
 401     A[15] = MPC_MULTIPLY_V_POSTSCALE((B[14] - B[15]), INVCOS16);
 402     // 8 adds, 8 subs, 8 muls, 8 shift
 403
 404     // multiple used expressions: A[ 4]+A[ 6]+A[ 7], A[ 9]+A[13]+A[15]
 405     V[ 5] = (V[11] = (V[13] = A[ 7] + (V[15] = A[15])) + A[11]) + A[ 5] + A[13];
 406     V[ 7] = (V[ 9] = A[ 3] + A[11] + A[15]) + A[13];
 407     V[33] = -(V[ 1] = A[ 1] + A[ 9] + A[13] + A[15]) - A[14];
 408     V[35] = -(V[ 3] = A[ 5] + A[ 7] + A[ 9] + A[13] + A[15]) - A[ 6] - A[14];
 409     V[37] = (tmp = -(A[10] + A[11] + A[13] + A[14] + A[15])) - A[ 5] - A[ 6] - A[ 7];
 410     V[39] = tmp - A[ 2] - A[ 3];
 411     V[41] = (tmp += A[13] - A[12]) - A[ 2] - A[ 3];
 412     V[43] = tmp - A[ 4] - A[ 6] - A[ 7];
 413     V[47] = (tmp = -(A[ 8] + A[12] + A[14] + A[15])) - A[ 0];
 414     V[45] = tmp - A[ 4] - A[ 6] - A[ 7];
 415     // 22 adds, 18 subs
 416
 417     V[32] = -(V[ 0] = MPC_V_PRESHIFT(V[ 0]));
 418     V[31] = -(V[ 1] = MPC_V_PRESHIFT(V[ 1]));
 419     V[30] = -(V[ 2] = MPC_V_PRESHIFT(V[ 2]));
 420     V[29] = -(V[ 3] = MPC_V_PRESHIFT(V[ 3]));
 421     V[28] = -(V[ 4] = MPC_V_PRESHIFT(V[ 4]));
 422     V[27] = -(V[ 5] = MPC_V_PRESHIFT(V[ 5]));
 423     V[26] = -(V[ 6] = MPC_V_PRESHIFT(V[ 6]));
 424     V[25] = -(V[ 7] = MPC_V_PRESHIFT(V[ 7]));
 425     V[24] = -(V[ 8] = MPC_V_PRESHIFT(V[ 8]));
 426     V[23] = -(V[ 9] = MPC_V_PRESHIFT(V[ 9]));
 427     V[22] = -(V[10] = MPC_V_PRESHIFT(V[10]));
 428     V[21] = -(V[11] = MPC_V_PRESHIFT(V[11]));
 429     V[20] = -(V[12] = MPC_V_PRESHIFT(V[12]));
 430     V[19] = -(V[13] = MPC_V_PRESHIFT(V[13]));
 431     V[18] = -(V[14] = MPC_V_PRESHIFT(V[14]));
 432     V[17] = -(V[15] = MPC_V_PRESHIFT(V[15]));
 433     // 16 adds, 16 shifts (OPTIMIZE_FOR_SPEED only)
 434
 435     V[63] =  (V[33] = MPC_V_PRESHIFT(V[33]));
 436     V[62] =  (V[34] = MPC_V_PRESHIFT(V[34]));
 437     V[61] =  (V[35] = MPC_V_PRESHIFT(V[35]));
 438     V[60] =  (V[36] = MPC_V_PRESHIFT(V[36]));
 439     V[59] =  (V[37] = MPC_V_PRESHIFT(V[37]));
 440     V[58] =  (V[38] = MPC_V_PRESHIFT(V[38]));
 441     V[57] =  (V[39] = MPC_V_PRESHIFT(V[39]));
 442     V[56] =  (V[40] = MPC_V_PRESHIFT(V[40]));
 443     V[55] =  (V[41] = MPC_V_PRESHIFT(V[41]));
 444     V[54] =  (V[42] = MPC_V_PRESHIFT(V[42]));
 445     V[53] =  (V[43] = MPC_V_PRESHIFT(V[43]));
 446     V[52] =  (V[44] = MPC_V_PRESHIFT(V[44]));
 447     V[51] =  (V[45] = MPC_V_PRESHIFT(V[45]));
 448     V[50] =  (V[46] = MPC_V_PRESHIFT(V[46]));
 449     V[49] =  (V[47] = MPC_V_PRESHIFT(V[47]));
 450     V[48] =  (V[48] = MPC_V_PRESHIFT(V[48]));
 451     // 16 adds, 16 shifts (OPTIMIZE_FOR_SPEED only)
 452
 453     // OPTIMIZE_FOR_SPEED total: 143 adds, 107 subs, 80 muls, 112 shifts
 454     //                    total: 111 adds, 107 subs, 80 muls,  80 shifts
 455 }
 456
 457 #if defined(CPU_ARM)
 458 extern void
 459 mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
 460                         const MPC_SAMPLE_FORMAT * V,
 461                         const MPC_SAMPLE_FORMAT * D);
 462 #else
 463 static void
 464 mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
 465                         const MPC_SAMPLE_FORMAT * V,
 466                         const MPC_SAMPLE_FORMAT * D)
 467 {
 468    mpc_int32_t k;
 469
 470    #if defined(OPTIMIZE_FOR_SPEED)
 471       // 32=32x32-multiply (FIXED_POINT)
 472       for ( k = 0; k < 32; k++, D += 16, V++ )
 473       {
 474          *Data = V[  0]*D[ 0] + V[ 96]*D[ 1] + V[128]*D[ 2] + V[224]*D[ 3]
 475                + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7]
 476                + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11]
 477                + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15];
 478          *Data >>= 1; // post shift to compensate for pre-shifting
 479          Data += 1;
 480          // total: 32 * (16 muls, 15 adds)
 481       }
 482    #else
 483       #if defined(CPU_COLDFIRE)
 484          // 64=32x32-multiply assembler for Coldfire
 485          for ( k = 0; k < 32; k++, D += 16, V++ )
 486          {
 487             asm volatile (
 488                "movem.l (%[D]), %%d0-%%d3                    \n\t"
 489                "move.l (%[V]), %%a5                          \n\t"
 490                "mac.l %%d0, %%a5, (96*4, %[V]), %%a5, %%acc0 \n\t"
 491                "mac.l %%d1, %%a5, (128*4, %[V]), %%a5, %%acc0\n\t"
 492                "mac.l %%d2, %%a5, (224*4, %[V]), %%a5, %%acc0\n\t"
 493                "mac.l %%d3, %%a5, (256*4, %[V]), %%a5, %%acc0\n\t"
 494                "movem.l (4*4, %[D]), %%d0-%%d3               \n\t"
 495                "mac.l %%d0, %%a5, (352*4, %[V]), %%a5, %%acc0\n\t"
 496                "mac.l %%d1, %%a5, (384*4, %[V]), %%a5, %%acc0\n\t"
 497                "mac.l %%d2, %%a5, (480*4, %[V]), %%a5, %%acc0\n\t"
 498                "mac.l %%d3, %%a5, (512*4, %[V]), %%a5, %%acc0\n\t"
 499                "movem.l (8*4, %[D]), %%d0-%%d3               \n\t"
 500                "mac.l %%d0, %%a5, (608*4, %[V]), %%a5, %%acc0\n\t"
 501                "mac.l %%d1, %%a5, (640*4, %[V]), %%a5, %%acc0\n\t"
 502                "mac.l %%d2, %%a5, (736*4, %[V]), %%a5, %%acc0\n\t"
 503                "mac.l %%d3, %%a5, (768*4, %[V]), %%a5, %%acc0\n\t"
 504                "movem.l (12*4, %[D]), %%d0-%%d3              \n\t"
 505                "mac.l %%d0, %%a5, (864*4, %[V]), %%a5, %%acc0\n\t"
 506                "mac.l %%d1, %%a5, (896*4, %[V]), %%a5, %%acc0\n\t"
 507                "mac.l %%d2, %%a5, (992*4, %[V]), %%a5, %%acc0\n\t"
 508                "mac.l %%d3, %%a5, %%acc0                     \n\t"
 509                "movclr.l %%acc0, %%d0                        \n\t"
 510                "lsl.l #1, %%d0                               \n\t"
 511                "move.l %%d0, (%[Data])+                      \n"
 512                : [Data] "+a" (Data)
 513                : [V] "a" (V), [D] "a" (D)
 514                : "d0", "d1", "d2", "d3", "a5");
 515          }
 516       #else
 517          // 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C
 518          for ( k = 0; k < 32; k++, D += 16, V++ )
 519          {
 520             *Data = MPC_MULTIPLY_EX(V[  0],D[ 0],30) + MPC_MULTIPLY_EX(V[ 96],D[ 1],30)
 521                   + MPC_MULTIPLY_EX(V[128],D[ 2],30) + MPC_MULTIPLY_EX(V[224],D[ 3],30)
 522                   + MPC_MULTIPLY_EX(V[256],D[ 4],30) + MPC_MULTIPLY_EX(V[352],D[ 5],30)
 523                   + MPC_MULTIPLY_EX(V[384],D[ 6],30) + MPC_MULTIPLY_EX(V[480],D[ 7],30)
 524                   + MPC_MULTIPLY_EX(V[512],D[ 8],30) + MPC_MULTIPLY_EX(V[608],D[ 9],30)
 525                   + MPC_MULTIPLY_EX(V[640],D[10],30) + MPC_MULTIPLY_EX(V[736],D[11],30)
 526                   + MPC_MULTIPLY_EX(V[768],D[12],30) + MPC_MULTIPLY_EX(V[864],D[13],30)
 527                   + MPC_MULTIPLY_EX(V[896],D[14],30) + MPC_MULTIPLY_EX(V[992],D[15],30);
 528             Data += 1;
 529             // total: 16 muls, 15 adds, 16 shifts
 530          }
 531       #endif
 532    #endif
 533 }
 534 #endif /* CPU_ARM */
 535
 536 static void
 537 mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, const MPC_SAMPLE_FORMAT *Y)
 538 {
 539     mpc_uint32_t n;
 540
 541     if (NULL != OutData)
 542     {
 543         for ( n = 0; n < 36; n++, Y += 32, OutData += 32 )
 544         {
 545             V -= 64;
 546             mpc_calculate_new_V ( Y, V );
 547             mpc_decoder_windowing_D( OutData, V, Di_opt );
 548         }
 549      }
 550 }
 551
 552 void
 553 mpc_decoder_synthese_filter_float(mpc_decoder *d, MPC_SAMPLE_FORMAT *OutData)
 554 {
 555     /********* left channel ********/
 556     memmove(d->V_L + MPC_V_MEM, d->V_L, 960 * sizeof(MPC_SAMPLE_FORMAT) );
 557
 558     mpc_full_synthesis_filter(
 559         OutData,
 560         (MPC_SAMPLE_FORMAT *)(d->V_L + MPC_V_MEM),
 561         (MPC_SAMPLE_FORMAT *)(d->Y_L [0]));
 562
 563     /******** right channel ********/
 564     memmove(d->V_R + MPC_V_MEM, d->V_R, 960 * sizeof(MPC_SAMPLE_FORMAT) );
 565
 566     mpc_full_synthesis_filter(
 567         (OutData == NULL ? NULL : OutData + MPC_FRAME_LENGTH),
 568         (MPC_SAMPLE_FORMAT *)(d->V_R + MPC_V_MEM),
 569         (MPC_SAMPLE_FORMAT *)(d->Y_R [0]));
 570 }
 571
 572 /*******************************************/
 573 /*                                         */
 574 /*            dithered synthesis           */
 575 /*                                         */
 576 /*******************************************/
 577
 578 static const unsigned char Parity [256] ICONST_ATTR = {  // parity
 579     0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
 580     1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
 581     1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
 582     0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
 583     1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
 584     0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
 585     0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
 586     1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0
 587 };
 588
 589 /*
 590  *  This is a simple random number generator with good quality for audio purposes.
 591  *  It consists of two polycounters with opposite rotation direction and different
 592  *  periods. The periods are coprime, so the total period is the product of both.
 593  *
 594  *     -------------------------------------------------------------------------------------------------
 595  * +-> |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0|
 596  * |   -------------------------------------------------------------------------------------------------
 597  * |                                                                          |  |  |  |     |        |
 598  * |                                                                          +--+--+--+-XOR-+--------+
 599  * |                                                                                      |
 600  * +--------------------------------------------------------------------------------------+
 601  *
 602  *     -------------------------------------------------------------------------------------------------
 603  *     |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0| <-+
 604  *     -------------------------------------------------------------------------------------------------   |
 605  *       |  |           |  |                                                                               |
 606  *       +--+----XOR----+--+                                                                               |
 607  *                |                                                                                        |
 608  *                +----------------------------------------------------------------------------------------+
 609  *
 610  *
 611  *  The first has an period of 3*5*17*257*65537, the second of 7*47*73*178481,
 612  *  which gives a period of 18.410.713.077.675.721.215. The result is the
 613  *  XORed values of both generators.
 614  */
 615 mpc_uint32_t
 616 mpc_random_int(mpc_decoder *d)
 617 {
 618 #if 1
 619     mpc_uint32_t  t1, t2, t3, t4;
 620
 621     t3   = t1 = d->__r1;   t4   = t2 = d->__r2;  // Parity calculation is done via table lookup, this is also available
 622     t1  &= 0xF5;        t2 >>= 25;               // on CPUs without parity, can be implemented in C and avoid unpredictable
 623     t1   = Parity [t1]; t2  &= 0x63;             // jumps and slow rotate through the carry flag operations.
 624     t1 <<= 31;          t2   = Parity [t2];
 625
 626     return (d->__r1 = (t3 >> 1) | t1 ) ^ (d->__r2 = (t4 + t4) | t2 );
 627 #else
 628     return (d->__r1 = (d->__r1 >> 1) | ((mpc_uint32_t)Parity [d->__r1 & 0xF5] << 31) ) ^
 629         (d->__r2 = (d->__r2 << 1) |  (mpc_uint32_t)Parity [(d->__r2 >> 25) & 0x63] );
 630 #endif
 631 }