apps/codecs/libmusepack/synth_filter.c

   1 /*
   2   Copyright (c) 2005, The Musepack Development Team
   3   All rights reserved.
   4
   5   Redistribution and use in source and binary forms, with or without
   6   modification, are permitted provided that the following conditions are
   7   met:
   8
   9   * Redistributions of source code must retain the above copyright
  10   notice, this list of conditions and the following disclaimer.
  11
  12   * Redistributions in binary form must reproduce the above
  13   copyright notice, this list of conditions and the following
  14   disclaimer in the documentation and/or other materials provided
  15   with the distribution.
  16
  17   * Neither the name of the The Musepack Development Team nor the
  18   names of its contributors may be used to endorse or promote
  19   products derived from this software without specific prior
  20   written permission.
  21
  22   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  23   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  24   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  25   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  26   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  27   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  28   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  29   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  30   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  31   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  32   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  33 */
  34
  35 /// \file synth_filter.c
  36 /// Synthesis functions.
  37 /// \todo document me
  38
  39 #include "musepack.h"
  40 #include "internal.h"
  41
  42 /* C O N S T A N T S */
  43 #undef _
  44
  45 #if defined(MPC_FIXED_POINT)
  46    #if defined(OPTIMIZE_FOR_SPEED)
  47       // round at compile time to +/- 2^14 as a pre-shift before 32=32x32-multiply
  48       #define D(value)  (MPC_SHR_RND(value, 3))
  49
  50       // round at runtime to +/- 2^17 as a pre-shift before 32=32x32-multiply
  51       // samples are 18.14 fixed point. 30.2 after this shift, whereas the
  52       // 15.2 bits are significant (not including sign)
  53       #define MPC_V_PRESHIFT(X) MPC_SHR_RND(X, 12)
  54
  55       // in this configuration a post-shift by >>1 is needed after synthesis
  56    #else
  57       #if defined(CPU_ARM)
  58           // do not up-scale D-values to achieve higher speed in smull/mlal
  59           // operations. saves ~14/8 = 1.75 cycles per multiplication
  60           #define D(value)  (value)
  61
  62           // in this configuration a post-shift by >>16 is needed after synthesis
  63       #else
  64           // saturate to +/- 2^31 (= value << (31-17)), D-values are +/- 2^17
  65           #define D(value)  (value << (14))
  66       #endif
  67       // do not perform pre-shift
  68       #define MPC_V_PRESHIFT(X) (X)
  69    #endif
  70 #else
  71    // IMPORTANT: internal scaling is somehow strange for floating point, therefore we scale the coefficients Di_opt
  72    // by the correct amount to have proper scaled output
  73    #define D(value)  MAKE_MPC_SAMPLE((double)value*(double)(0x1000))
  74
  75    // do not perform pre-shift
  76    #define MPC_V_PRESHIFT(X) (X)
  77 #endif
  78
  79 // Di_opt coefficients are +/- 2^17 (pre-shifted by <<16)
  80 static const MPC_SAMPLE_FORMAT  Di_opt [512] ICONST_ATTR = {
  81 /*           0        1        2         3         4         5          6          7         8         9       10        11       12       13      14     15  */
  82 /*  0 */  D( 0), -D( 29), D( 213), -D( 459),  D(2037), -D(5153),  D( 6574), -D(37489), D(75038),  D(37489), D(6574),  D(5153), D(2037),  D(459), D(213), D(29),
  83 /*  1 */ -D( 1), -D( 31), D( 218), -D( 519),  D(2000), -D(5517),  D( 5959), -D(39336), D(74992),  D(35640), D(7134),  D(4788), D(2063),  D(401), D(208), D(26),
  84 /*  2 */ -D( 1), -D( 35), D( 222), -D( 581),  D(1952), -D(5879),  D( 5288), -D(41176), D(74856),  D(33791), D(7640),  D(4425), D(2080),  D(347), D(202), D(24),
  85 /*  3 */ -D( 1), -D( 38), D( 225), -D( 645),  D(1893), -D(6237),  D( 4561), -D(43006), D(74630),  D(31947), D(8092),  D(4063), D(2087),  D(294), D(196), D(21),
  86 /*  4 */ -D( 1), -D( 41), D( 227), -D( 711),  D(1822), -D(6589),  D( 3776), -D(44821), D(74313),  D(30112), D(8492),  D(3705), D(2085),  D(244), D(190), D(19),
  87 /*  5 */ -D( 1), -D( 45), D( 228), -D( 779),  D(1739), -D(6935),  D( 2935), -D(46617), D(73908),  D(28289), D(8840),  D(3351), D(2075),  D(197), D(183), D(17),
  88 /*  6 */ -D( 1), -D( 49), D( 228), -D( 848),  D(1644), -D(7271),  D( 2037), -D(48390), D(73415),  D(26482), D(9139),  D(3004), D(2057),  D(153), D(176), D(16),
  89 /*  7 */ -D( 2), -D( 53), D( 227), -D( 919),  D(1535), -D(7597),  D( 1082), -D(50137), D(72835),  D(24694), D(9389),  D(2663), D(2032),  D(111), D(169), D(14),
  90 /*  8 */ -D( 2), -D( 58), D( 224), -D( 991),  D(1414), -D(7910),  D(   70), -D(51853), D(72169),  D(22929), D(9592),  D(2330), D(2001),  D( 72), D(161), D(13),
  91 /*  9 */ -D( 2), -D( 63), D( 221), -D(1064),  D(1280), -D(8209), -D(  998), -D(53534), D(71420),  D(21189), D(9750),  D(2006), D(1962),  D( 36), D(154), D(11),
  92 /* 10 */ -D( 2), -D( 68), D( 215), -D(1137),  D(1131), -D(8491), -D( 2122), -D(55178), D(70590),  D(19478), D(9863),  D(1692), D(1919),  D(  2), D(147), D(10),
  93 /* 11 */ -D( 3), -D( 73), D( 208), -D(1210),  D( 970), -D(8755), -D( 3300), -D(56778), D(69679),  D(17799), D(9935),  D(1388), D(1870), -D( 29), D(139), D( 9),
  94 /* 12 */ -D( 3), -D( 79), D( 200), -D(1283),  D( 794), -D(8998), -D( 4533), -D(58333), D(68692),  D(16155), D(9966),  D(1095), D(1817), -D( 57), D(132), D( 8),
  95 /* 13 */ -D( 4), -D( 85), D( 189), -D(1356),  D( 605), -D(9219), -D( 5818), -D(59838), D(67629),  D(14548), D(9959),  D( 814), D(1759), -D( 83), D(125), D( 7),
  96 /* 14 */ -D( 4), -D( 91), D( 177), -D(1428),  D( 402), -D(9416), -D( 7154), -D(61289), D(66494),  D(12980), D(9916),  D( 545), D(1698), -D(106), D(117), D( 7),
  97 /* 15 */ -D( 5), -D( 97), D( 163), -D(1498),  D( 185), -D(9585), -D( 8540), -D(62684), D(65290),  D(11455), D(9838),  D( 288), D(1634), -D(127), D(111), D( 6),
  98 /* 16 */ -D( 5), -D(104), D( 146), -D(1567), -D(  45), -D(9727), -D( 9975), -D(64019), D(64019),  D( 9975), D(9727),  D(  45), D(1567), -D(146), D(104), D( 5),
  99 /* 17 */ -D( 6), -D(111), D( 127), -D(1634), -D( 288), -D(9838), -D(11455), -D(65290), D(62684),  D( 8540), D(9585), -D( 185), D(1498), -D(163), D( 97), D( 5),
 100 /* 18 */ -D( 7), -D(117), D( 106), -D(1698), -D( 545), -D(9916), -D(12980), -D(66494), D(61289),  D( 7154), D(9416), -D( 402), D(1428), -D(177), D( 91), D( 4),
 101 /* 19 */ -D( 7), -D(125), D(  83), -D(1759), -D( 814), -D(9959), -D(14548), -D(67629), D(59838),  D( 5818), D(9219), -D( 605), D(1356), -D(189), D( 85), D( 4),
 102 /* 20 */ -D( 8), -D(132), D(  57), -D(1817), -D(1095), -D(9966), -D(16155), -D(68692), D(58333),  D( 4533), D(8998), -D( 794), D(1283), -D(200), D( 79), D( 3),
 103 /* 21 */ -D( 9), -D(139), D(  29), -D(1870), -D(1388), -D(9935), -D(17799), -D(69679), D(56778),  D( 3300), D(8755), -D( 970), D(1210), -D(208), D( 73), D( 3),
 104 /* 22 */ -D(10), -D(147), -D(  2), -D(1919), -D(1692), -D(9863), -D(19478), -D(70590), D(55178),  D( 2122), D(8491), -D(1131), D(1137), -D(215), D( 68), D( 2),
 105 /* 23 */ -D(11), -D(154), -D( 36), -D(1962), -D(2006), -D(9750), -D(21189), -D(71420), D(53534),  D(  998), D(8209), -D(1280), D(1064), -D(221), D( 63), D( 2),
 106 /* 24 */ -D(13), -D(161), -D( 72), -D(2001), -D(2330), -D(9592), -D(22929), -D(72169), D(51853), -D(   70), D(7910), -D(1414), D( 991), -D(224), D( 58), D( 2),
 107 /* 25 */ -D(14), -D(169), -D(111), -D(2032), -D(2663), -D(9389), -D(24694), -D(72835), D(50137), -D( 1082), D(7597), -D(1535), D( 919), -D(227), D( 53), D( 2),
 108 /* 26 */ -D(16), -D(176), -D(153), -D(2057), -D(3004), -D(9139), -D(26482), -D(73415), D(48390), -D( 2037), D(7271), -D(1644), D( 848), -D(228), D( 49), D( 1),
 109 /* 27 */ -D(17), -D(183), -D(197), -D(2075), -D(3351), -D(8840), -D(28289), -D(73908), D(46617), -D( 2935), D(6935), -D(1739), D( 779), -D(228), D( 45), D( 1),
 110 /* 28 */ -D(19), -D(190), -D(244), -D(2085), -D(3705), -D(8492), -D(30112), -D(74313), D(44821), -D( 3776), D(6589), -D(1822), D( 711), -D(227), D( 41), D( 1),
 111 /* 29 */ -D(21), -D(196), -D(294), -D(2087), -D(4063), -D(8092), -D(31947), -D(74630), D(43006), -D( 4561), D(6237), -D(1893), D( 645), -D(225), D( 38), D( 1),
 112 /* 30 */ -D(24), -D(202), -D(347), -D(2080), -D(4425), -D(7640), -D(33791), -D(74856), D(41176), -D( 5288), D(5879), -D(1952), D( 581), -D(222), D( 35), D( 1),
 113 /* 31 */ -D(26), -D(208), -D(401), -D(2063), -D(4788), -D(7134), -D(35640), -D(74992), D(39336), -D( 5959), D(5517), -D(2000), D( 519), -D(218), D( 31), D( 1)
 114 };
 115
 116 #undef  D
 117
 118 // needed to prevent from internal overflow in calculate_V
 119 #define OVERFLOW_FIX 1
 120
 121 // V-coefficients were expanded (<<) by V_COEFFICIENT_EXPAND
 122 #define V_COEFFICIENT_EXPAND 27
 123
 124 #if defined(MPC_FIXED_POINT)
 125    #if defined(OPTIMIZE_FOR_SPEED)
 126       // define 32=32x32-multiplication for DCT-coefficients with samples, vcoef will be pre-shifted on creation
 127       // samples are rounded to +/- 2^19 as pre-shift before 32=32x32-multiply
 128       #define MPC_MULTIPLY_V(sample, vcoef) ( MPC_SHR_RND(sample, 12) * vcoef )
 129
 130       // pre- and postscale are used to avoid internal overflow in synthesis calculation
 131       #define MPC_MULTIPLY_V_PRESCALE(sample, vcoef)  ( MPC_SHR_RND(sample, (12+OVERFLOW_FIX)) * vcoef )
 132       #define MPC_MULTIPLY_V_POSTSCALE(sample, vcoef) ( MPC_SHR_RND(sample, (12-OVERFLOW_FIX)) * vcoef )
 133       #define MPC_V_POSTSCALE(sample) (sample<<OVERFLOW_FIX)
 134
 135       // round to +/- 2^16 as pre-shift before 32=32x32-multiply
 136       #define MPC_MAKE_INVCOS(value) (MPC_SHR_RND(value, 15))
 137    #else
 138       // define 64=32x32-multiplication for DCT-coefficients with samples. Via usage of MPC_FRACT highly optimized assembler might be used
 139       // MULTIPLY_FRACT will do >>32 after multiplication, as V-coef were expanded by V_COEFFICIENT_EXPAND we'll correct this on the result.
 140       // Will loose 5bit accuracy on result in fract part without effect on final audio result
 141       #define MPC_MULTIPLY_V(sample, vcoef) ( (MPC_MULTIPLY_FRACT(sample, vcoef)) << (32-V_COEFFICIENT_EXPAND) )
 142
 143       // pre- and postscale are used to avoid internal overflow in synthesis calculation
 144       #define MPC_MULTIPLY_V_PRESCALE(sample, vcoef)  ( (MPC_MULTIPLY_FRACT(sample, vcoef)) << (32-V_COEFFICIENT_EXPAND-OVERFLOW_FIX) )
 145       #define MPC_MULTIPLY_V_POSTSCALE(sample, vcoef) ( (MPC_MULTIPLY_FRACT(sample, vcoef)) << (32-V_COEFFICIENT_EXPAND+OVERFLOW_FIX) )
 146       #define MPC_V_POSTSCALE(sample) (sample<<OVERFLOW_FIX)
 147
 148       // directly use accurate 32bit-coefficients
 149       #define MPC_MAKE_INVCOS(value) (value)
 150    #endif
 151 #else
 152    // for floating point use the standard multiplication macro
 153    #define MPC_MULTIPLY_V          (sample, vcoef) ( MPC_MULTIPLY(sample, vcoef) )
 154    #define MPC_MULTIPLY_V_PRESCALE (sample, vcoef) ( MPC_MULTIPLY(sample, vcoef) )
 155    #define MPC_MULTIPLY_V_POSTSCALE(sample, vcoef) ( MPC_MULTIPLY(sample, vcoef) )
 156    #define MPC_V_POSTSCALE(sample) (sample)
 157
 158    // downscale the accurate 32bit-coefficients and convert to float
 159    #define MPC_MAKE_INVCOS(value) MAKE_MPC_SAMPLE((double)value/(double)(1<<V_COEFFICIENT_EXPAND))
 160 #endif
 161
 162 // define constants for DCT-synthesis
 163 // INVCOSxx = (0.5 / cos(xx*PI/64)) << 27, <<27 to saturate to +/- 2^31
 164 #define INVCOS01 MPC_MAKE_INVCOS(  67189797)
 165 #define INVCOS02 MPC_MAKE_INVCOS(  67433575)
 166 #define INVCOS03 MPC_MAKE_INVCOS(  67843164)
 167 #define INVCOS04 MPC_MAKE_INVCOS(  68423604)
 168 #define INVCOS05 MPC_MAKE_INVCOS(  69182167)
 169 #define INVCOS06 MPC_MAKE_INVCOS(  70128577)
 170 #define INVCOS07 MPC_MAKE_INVCOS(  71275330)
 171 #define INVCOS08 MPC_MAKE_INVCOS(  72638111)
 172 #define INVCOS09 MPC_MAKE_INVCOS(  74236348)
 173 #define INVCOS10 MPC_MAKE_INVCOS(  76093940)
 174 #define INVCOS11 MPC_MAKE_INVCOS(  78240207)
 175 #define INVCOS12 MPC_MAKE_INVCOS(  80711144)
 176 #define INVCOS13 MPC_MAKE_INVCOS(  83551089)
 177 #define INVCOS14 MPC_MAKE_INVCOS(  86814950)
 178 #define INVCOS15 MPC_MAKE_INVCOS(  90571242)
 179 #define INVCOS16 MPC_MAKE_INVCOS(  94906266)
 180 #define INVCOS17 MPC_MAKE_INVCOS(  99929967)
 181 #define INVCOS18 MPC_MAKE_INVCOS( 105784321)
 182 #define INVCOS19 MPC_MAKE_INVCOS( 112655602)
 183 #define INVCOS20 MPC_MAKE_INVCOS( 120792764)
 184 #define INVCOS21 MPC_MAKE_INVCOS( 130535899)
 185 #define INVCOS22 MPC_MAKE_INVCOS( 142361749)
 186 #define INVCOS23 MPC_MAKE_INVCOS( 156959571)
 187 #define INVCOS24 MPC_MAKE_INVCOS( 175363913)
 188 #define INVCOS25 MPC_MAKE_INVCOS( 199201203)
 189 #define INVCOS26 MPC_MAKE_INVCOS( 231182936)
 190 #define INVCOS27 MPC_MAKE_INVCOS( 276190692)
 191 #define INVCOS28 MPC_MAKE_INVCOS( 343988688)
 192 #define INVCOS29 MPC_MAKE_INVCOS( 457361460)
 193 #define INVCOS30 MPC_MAKE_INVCOS( 684664578)
 194 #define INVCOS31 MPC_MAKE_INVCOS(1367679739)
 195
 196 void
 197 mpc_calculate_new_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V )
 198 ICODE_ATTR_MPC_LARGE_IRAM;
 199
 200 void
 201 mpc_calculate_new_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V )
 202 {
 203     // Calculating new V-buffer values for left channel
 204     // calculate new V-values (ISO-11172-3, p. 39)
 205     // based upon fast-MDCT algorithm by Byeong Gi Lee
 206     MPC_SAMPLE_FORMAT A[16];
 207     MPC_SAMPLE_FORMAT B[16];
 208     MPC_SAMPLE_FORMAT tmp;
 209
 210     A[ 0] = Sample[ 0] + Sample[31];
 211     A[ 1] = Sample[ 1] + Sample[30];
 212     A[ 2] = Sample[ 2] + Sample[29];
 213     A[ 3] = Sample[ 3] + Sample[28];
 214     A[ 4] = Sample[ 4] + Sample[27];
 215     A[ 5] = Sample[ 5] + Sample[26];
 216     A[ 6] = Sample[ 6] + Sample[25];
 217     A[ 7] = Sample[ 7] + Sample[24];
 218     A[ 8] = Sample[ 8] + Sample[23];
 219     A[ 9] = Sample[ 9] + Sample[22];
 220     A[10] = Sample[10] + Sample[21];
 221     A[11] = Sample[11] + Sample[20];
 222     A[12] = Sample[12] + Sample[19];
 223     A[13] = Sample[13] + Sample[18];
 224     A[14] = Sample[14] + Sample[17];
 225     A[15] = Sample[15] + Sample[16];
 226     // 16 adds
 227
 228     B[ 0] = A[ 0] + A[15];
 229     B[ 1] = A[ 1] + A[14];
 230     B[ 2] = A[ 2] + A[13];
 231     B[ 3] = A[ 3] + A[12];
 232     B[ 4] = A[ 4] + A[11];
 233     B[ 5] = A[ 5] + A[10];
 234     B[ 6] = A[ 6] + A[ 9];
 235     B[ 7] = A[ 7] + A[ 8];;
 236     B[ 8] = MPC_MULTIPLY_V((A[ 0] - A[15]), INVCOS02);
 237     B[ 9] = MPC_MULTIPLY_V((A[ 1] - A[14]), INVCOS06);
 238     B[10] = MPC_MULTIPLY_V((A[ 2] - A[13]), INVCOS10);
 239     B[11] = MPC_MULTIPLY_V((A[ 3] - A[12]), INVCOS14);
 240     B[12] = MPC_MULTIPLY_V((A[ 4] - A[11]), INVCOS18);
 241     B[13] = MPC_MULTIPLY_V((A[ 5] - A[10]), INVCOS22);
 242     B[14] = MPC_MULTIPLY_V((A[ 6] - A[ 9]), INVCOS26);
 243     B[15] = MPC_MULTIPLY_V((A[ 7] - A[ 8]), INVCOS30);
 244     // 8 adds, 8 subs, 8 muls, 8 shifts
 245
 246     A[ 0] = B[ 0] + B[ 7];
 247     A[ 1] = B[ 1] + B[ 6];
 248     A[ 2] = B[ 2] + B[ 5];
 249     A[ 3] = B[ 3] + B[ 4];
 250     A[ 4] = MPC_MULTIPLY_V((B[ 0] - B[ 7]), INVCOS04);
 251     A[ 5] = MPC_MULTIPLY_V((B[ 1] - B[ 6]), INVCOS12);
 252     A[ 6] = MPC_MULTIPLY_V((B[ 2] - B[ 5]), INVCOS20);
 253     A[ 7] = MPC_MULTIPLY_V((B[ 3] - B[ 4]), INVCOS28);
 254     A[ 8] = B[ 8] + B[15];
 255     A[ 9] = B[ 9] + B[14];
 256     A[10] = B[10] + B[13];
 257     A[11] = B[11] + B[12];
 258     A[12] = MPC_MULTIPLY_V((B[ 8] - B[15]), INVCOS04);
 259     A[13] = MPC_MULTIPLY_V((B[ 9] - B[14]), INVCOS12);
 260     A[14] = MPC_MULTIPLY_V((B[10] - B[13]), INVCOS20);
 261     A[15] = MPC_MULTIPLY_V((B[11] - B[12]), INVCOS28);
 262     // 8 adds, 8 subs, 8 muls, 8 shifts
 263
 264     B[ 0] = A[ 0] + A[ 3];
 265     B[ 1] = A[ 1] + A[ 2];
 266     B[ 2] = MPC_MULTIPLY_V((A[ 0] - A[ 3]), INVCOS08);
 267     B[ 3] = MPC_MULTIPLY_V((A[ 1] - A[ 2]), INVCOS24);
 268     B[ 4] = A[ 4] + A[ 7];
 269     B[ 5] = A[ 5] + A[ 6];
 270     B[ 6] = MPC_MULTIPLY_V((A[ 4] - A[ 7]), INVCOS08);
 271     B[ 7] = MPC_MULTIPLY_V((A[ 5] - A[ 6]), INVCOS24);
 272     B[ 8] = A[ 8] + A[11];
 273     B[ 9] = A[ 9] + A[10];
 274     B[10] = MPC_MULTIPLY_V((A[ 8] - A[11]), INVCOS08);
 275     B[11] = MPC_MULTIPLY_V((A[ 9] - A[10]), INVCOS24);
 276     B[12] = A[12] + A[15];
 277     B[13] = A[13] + A[14];
 278     B[14] = MPC_MULTIPLY_V((A[12] - A[15]), INVCOS08);
 279     B[15] = MPC_MULTIPLY_V((A[13] - A[14]), INVCOS24);
 280     // 8 adds, 8 subs, 8 muls, 8 shifts
 281
 282     A[ 0] = B[ 0] + B[ 1];
 283     A[ 1] = MPC_MULTIPLY_V((B[ 0] - B[ 1]), INVCOS16);
 284     A[ 2] = B[ 2] + B[ 3];
 285     A[ 3] = MPC_MULTIPLY_V((B[ 2] - B[ 3]), INVCOS16);
 286     A[ 4] = B[ 4] + B[ 5];
 287     A[ 5] = MPC_MULTIPLY_V((B[ 4] - B[ 5]), INVCOS16);
 288     A[ 6] = B[ 6] + B[ 7];
 289     A[ 7] = MPC_MULTIPLY_V((B[ 6] - B[ 7]), INVCOS16);
 290     A[ 8] = B[ 8] + B[ 9];
 291     A[ 9] = MPC_MULTIPLY_V((B[ 8] - B[ 9]), INVCOS16);
 292     A[10] = B[10] + B[11];
 293     A[11] = MPC_MULTIPLY_V((B[10] - B[11]), INVCOS16);
 294     A[12] = B[12] + B[13];
 295     A[13] = MPC_MULTIPLY_V((B[12] - B[13]), INVCOS16);
 296     A[14] = B[14] + B[15];
 297     A[15] = MPC_MULTIPLY_V((B[14] - B[15]), INVCOS16);
 298     // 8 adds, 8 subs, 8 muls, 8 shifts
 299
 300     // multiple used expressions: -(A[12] + A[14] + A[15])
 301     V[48] = -A[ 0];
 302     V[ 0] =  A[ 1];
 303     V[40] = -A[ 2] - (V[ 8] = A[ 3]);
 304     V[36] = -((V[ 4] = A[ 5] + (V[12] = A[ 7])) + A[ 6]);
 305     V[44] = - A[ 4] - A[ 6] - A[ 7];
 306     V[ 6] = (V[10] = A[11] + (V[14] = A[15])) + A[13];
 307     V[38] = (V[34] = -(V[ 2] = A[ 9] + A[13] + A[15]) - A[14]) + A[ 9] - A[10] - A[11];
 308     V[46] = (tmp = -(A[12] + A[14] + A[15])) - A[ 8];
 309     V[42] = tmp - A[10] - A[11];
 310     // 9 adds, 9 subs
 311
 312     A[ 0] = MPC_MULTIPLY_V_PRESCALE((Sample[ 0] - Sample[31]), INVCOS01);
 313     A[ 1] = MPC_MULTIPLY_V_PRESCALE((Sample[ 1] - Sample[30]), INVCOS03);
 314     A[ 2] = MPC_MULTIPLY_V_PRESCALE((Sample[ 2] - Sample[29]), INVCOS05);
 315     A[ 3] = MPC_MULTIPLY_V_PRESCALE((Sample[ 3] - Sample[28]), INVCOS07);
 316     A[ 4] = MPC_MULTIPLY_V_PRESCALE((Sample[ 4] - Sample[27]), INVCOS09);
 317     A[ 5] = MPC_MULTIPLY_V_PRESCALE((Sample[ 5] - Sample[26]), INVCOS11);
 318     A[ 6] = MPC_MULTIPLY_V_PRESCALE((Sample[ 6] - Sample[25]), INVCOS13);
 319     A[ 7] = MPC_MULTIPLY_V_PRESCALE((Sample[ 7] - Sample[24]), INVCOS15);
 320     A[ 8] = MPC_MULTIPLY_V_PRESCALE((Sample[ 8] - Sample[23]), INVCOS17);
 321     A[ 9] = MPC_MULTIPLY_V_PRESCALE((Sample[ 9] - Sample[22]), INVCOS19);
 322     A[10] = MPC_MULTIPLY_V_PRESCALE((Sample[10] - Sample[21]), INVCOS21);
 323     A[11] = MPC_MULTIPLY_V_PRESCALE((Sample[11] - Sample[20]), INVCOS23);
 324     A[12] = MPC_MULTIPLY_V_PRESCALE((Sample[12] - Sample[19]), INVCOS25);
 325     A[13] = MPC_MULTIPLY_V_PRESCALE((Sample[13] - Sample[18]), INVCOS27);
 326     A[14] = MPC_MULTIPLY_V_PRESCALE((Sample[14] - Sample[17]), INVCOS29);
 327     A[15] = MPC_MULTIPLY_V_PRESCALE((Sample[15] - Sample[16]), INVCOS31);
 328     // 16 subs, 16 muls, 16 shifts
 329
 330     B[ 0] = A[ 0] + A[15];
 331     B[ 1] = A[ 1] + A[14];
 332     B[ 2] = A[ 2] + A[13];
 333     B[ 3] = A[ 3] + A[12];
 334     B[ 4] = A[ 4] + A[11];
 335     B[ 5] = A[ 5] + A[10];
 336     B[ 6] = A[ 6] + A[ 9];
 337     B[ 7] = A[ 7] + A[ 8];
 338     B[ 8] = MPC_MULTIPLY_V((A[ 0] - A[15]), INVCOS02);
 339     B[ 9] = MPC_MULTIPLY_V((A[ 1] - A[14]), INVCOS06);
 340     B[10] = MPC_MULTIPLY_V((A[ 2] - A[13]), INVCOS10);
 341     B[11] = MPC_MULTIPLY_V((A[ 3] - A[12]), INVCOS14);
 342     B[12] = MPC_MULTIPLY_V((A[ 4] - A[11]), INVCOS18);
 343     B[13] = MPC_MULTIPLY_V((A[ 5] - A[10]), INVCOS22);
 344     B[14] = MPC_MULTIPLY_V((A[ 6] - A[ 9]), INVCOS26);
 345     B[15] = MPC_MULTIPLY_V((A[ 7] - A[ 8]), INVCOS30);
 346     // 8 adds, 8 subs, 8 muls, 8 shift
 347
 348     A[ 0] = B[ 0] + B[ 7];
 349     A[ 1] = B[ 1] + B[ 6];
 350     A[ 2] = B[ 2] + B[ 5];
 351     A[ 3] = B[ 3] + B[ 4];
 352     A[ 4] = MPC_MULTIPLY_V((B[ 0] - B[ 7]), INVCOS04);
 353     A[ 5] = MPC_MULTIPLY_V((B[ 1] - B[ 6]), INVCOS12);
 354     A[ 6] = MPC_MULTIPLY_V((B[ 2] - B[ 5]), INVCOS20);
 355     A[ 7] = MPC_MULTIPLY_V((B[ 3] - B[ 4]), INVCOS28);
 356     A[ 8] = B[ 8] + B[15];
 357     A[ 9] = B[ 9] + B[14];
 358     A[10] = B[10] + B[13];
 359     A[11] = B[11] + B[12];
 360     A[12] = MPC_MULTIPLY_V((B[ 8] - B[15]), INVCOS04);
 361     A[13] = MPC_MULTIPLY_V((B[ 9] - B[14]), INVCOS12);
 362     A[14] = MPC_MULTIPLY_V((B[10] - B[13]), INVCOS20);
 363     A[15] = MPC_MULTIPLY_V((B[11] - B[12]), INVCOS28);
 364     // 8 adds, 8 subs, 8 muls, 8 shift
 365
 366     B[ 0] = A[ 0] + A[ 3];
 367     B[ 1] = A[ 1] + A[ 2];
 368     B[ 2] = MPC_MULTIPLY_V((A[ 0] - A[ 3]), INVCOS08);
 369     B[ 3] = MPC_MULTIPLY_V((A[ 1] - A[ 2]), INVCOS24);
 370     B[ 4] = A[ 4] + A[ 7];
 371     B[ 5] = A[ 5] + A[ 6];
 372     B[ 6] = MPC_MULTIPLY_V((A[ 4] - A[ 7]), INVCOS08);
 373     B[ 7] = MPC_MULTIPLY_V((A[ 5] - A[ 6]), INVCOS24);
 374     B[ 8] = A[ 8] + A[11];
 375     B[ 9] = A[ 9] + A[10];
 376     B[10] = MPC_MULTIPLY_V((A[ 8] - A[11]), INVCOS08);
 377     B[11] = MPC_MULTIPLY_V((A[ 9] - A[10]), INVCOS24);
 378     B[12] = A[12] + A[15];
 379     B[13] = A[13] + A[14];
 380     B[14] = MPC_MULTIPLY_V((A[12] - A[15]), INVCOS08);
 381     B[15] = MPC_MULTIPLY_V((A[13] - A[14]), INVCOS24);
 382     // 8 adds, 8 subs, 8 muls, 8 shift
 383
 384     A[ 0] = MPC_V_POSTSCALE((B[ 0] + B[ 1]));
 385     A[ 1] = MPC_MULTIPLY_V_POSTSCALE((B[ 0] - B[ 1]), INVCOS16);
 386     A[ 2] = MPC_V_POSTSCALE((B[ 2] + B[ 3]));
 387     A[ 3] = MPC_MULTIPLY_V_POSTSCALE((B[ 2] - B[ 3]), INVCOS16);
 388     A[ 4] = MPC_V_POSTSCALE((B[ 4] + B[ 5]));
 389     A[ 5] = MPC_MULTIPLY_V_POSTSCALE((B[ 4] - B[ 5]), INVCOS16);
 390     A[ 6] = MPC_V_POSTSCALE((B[ 6] + B[ 7]));
 391     A[ 7] = MPC_MULTIPLY_V_POSTSCALE((B[ 6] - B[ 7]), INVCOS16);
 392     A[ 8] = MPC_V_POSTSCALE((B[ 8] + B[ 9]));
 393     A[ 9] = MPC_MULTIPLY_V_POSTSCALE((B[ 8] - B[ 9]), INVCOS16);
 394     A[10] = MPC_V_POSTSCALE((B[10] + B[11]));
 395     A[11] = MPC_MULTIPLY_V_POSTSCALE((B[10] - B[11]), INVCOS16);
 396     A[12] = MPC_V_POSTSCALE((B[12] + B[13]));
 397     A[13] = MPC_MULTIPLY_V_POSTSCALE((B[12] - B[13]), INVCOS16);
 398     A[14] = MPC_V_POSTSCALE((B[14] + B[15]));
 399     A[15] = MPC_MULTIPLY_V_POSTSCALE((B[14] - B[15]), INVCOS16);
 400     // 8 adds, 8 subs, 8 muls, 8 shift
 401
 402     // multiple used expressions: A[ 4]+A[ 6]+A[ 7], A[ 9]+A[13]+A[15]
 403     V[ 5] = (V[11] = (V[13] = A[ 7] + (V[15] = A[15])) + A[11]) + A[ 5] + A[13];
 404     V[ 7] = (V[ 9] = A[ 3] + A[11] + A[15]) + A[13];
 405     V[33] = -(V[ 1] = A[ 1] + A[ 9] + A[13] + A[15]) - A[14];
 406     V[35] = -(V[ 3] = A[ 5] + A[ 7] + A[ 9] + A[13] + A[15]) - A[ 6] - A[14];
 407     V[37] = (tmp = -(A[10] + A[11] + A[13] + A[14] + A[15])) - A[ 5] - A[ 6] - A[ 7];
 408     V[39] = tmp - A[ 2] - A[ 3];
 409     V[41] = (tmp += A[13] - A[12]) - A[ 2] - A[ 3];
 410     V[43] = tmp - A[ 4] - A[ 6] - A[ 7];
 411     V[47] = (tmp = -(A[ 8] + A[12] + A[14] + A[15])) - A[ 0];
 412     V[45] = tmp - A[ 4] - A[ 6] - A[ 7];
 413     // 22 adds, 18 subs
 414
 415     V[32] = -(V[ 0] = MPC_V_PRESHIFT(V[ 0]));
 416     V[31] = -(V[ 1] = MPC_V_PRESHIFT(V[ 1]));
 417     V[30] = -(V[ 2] = MPC_V_PRESHIFT(V[ 2]));
 418     V[29] = -(V[ 3] = MPC_V_PRESHIFT(V[ 3]));
 419     V[28] = -(V[ 4] = MPC_V_PRESHIFT(V[ 4]));
 420     V[27] = -(V[ 5] = MPC_V_PRESHIFT(V[ 5]));
 421     V[26] = -(V[ 6] = MPC_V_PRESHIFT(V[ 6]));
 422     V[25] = -(V[ 7] = MPC_V_PRESHIFT(V[ 7]));
 423     V[24] = -(V[ 8] = MPC_V_PRESHIFT(V[ 8]));
 424     V[23] = -(V[ 9] = MPC_V_PRESHIFT(V[ 9]));
 425     V[22] = -(V[10] = MPC_V_PRESHIFT(V[10]));
 426     V[21] = -(V[11] = MPC_V_PRESHIFT(V[11]));
 427     V[20] = -(V[12] = MPC_V_PRESHIFT(V[12]));
 428     V[19] = -(V[13] = MPC_V_PRESHIFT(V[13]));
 429     V[18] = -(V[14] = MPC_V_PRESHIFT(V[14]));
 430     V[17] = -(V[15] = MPC_V_PRESHIFT(V[15]));
 431     // 16 adds, 16 shifts (OPTIMIZE_FOR_SPEED only)
 432
 433     V[63] =  (V[33] = MPC_V_PRESHIFT(V[33]));
 434     V[62] =  (V[34] = MPC_V_PRESHIFT(V[34]));
 435     V[61] =  (V[35] = MPC_V_PRESHIFT(V[35]));
 436     V[60] =  (V[36] = MPC_V_PRESHIFT(V[36]));
 437     V[59] =  (V[37] = MPC_V_PRESHIFT(V[37]));
 438     V[58] =  (V[38] = MPC_V_PRESHIFT(V[38]));
 439     V[57] =  (V[39] = MPC_V_PRESHIFT(V[39]));
 440     V[56] =  (V[40] = MPC_V_PRESHIFT(V[40]));
 441     V[55] =  (V[41] = MPC_V_PRESHIFT(V[41]));
 442     V[54] =  (V[42] = MPC_V_PRESHIFT(V[42]));
 443     V[53] =  (V[43] = MPC_V_PRESHIFT(V[43]));
 444     V[52] =  (V[44] = MPC_V_PRESHIFT(V[44]));
 445     V[51] =  (V[45] = MPC_V_PRESHIFT(V[45]));
 446     V[50] =  (V[46] = MPC_V_PRESHIFT(V[46]));
 447     V[49] =  (V[47] = MPC_V_PRESHIFT(V[47]));
 448     V[48] =  (V[48] = MPC_V_PRESHIFT(V[48]));
 449     // 16 adds, 16 shifts (OPTIMIZE_FOR_SPEED only)
 450
 451     // OPTIMIZE_FOR_SPEED total: 143 adds, 107 subs, 80 muls, 112 shifts
 452     //                    total: 111 adds, 107 subs, 80 muls,  80 shifts
 453 }
 454
 455 #if defined(CPU_ARM)
 456 extern void
 457 mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
 458                         const MPC_SAMPLE_FORMAT * V,
 459                         const MPC_SAMPLE_FORMAT * D);
 460 #else
 461 static void
 462 mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
 463                         const MPC_SAMPLE_FORMAT * V,
 464                         const MPC_SAMPLE_FORMAT * D)
 465 {
 466    mpc_int32_t k;
 467
 468    #if defined(OPTIMIZE_FOR_SPEED)
 469       // 32=32x32-multiply (FIXED_POINT)
 470       for ( k = 0; k < 32; k++, D += 16, V++ )
 471       {
 472          *Data = V[  0]*D[ 0] + V[ 96]*D[ 1] + V[128]*D[ 2] + V[224]*D[ 3]
 473                + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7]
 474                + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11]
 475                + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15];
 476          *Data >>= 1; // post shift to compensate for pre-shifting
 477          Data += 1;
 478          // total: 32 * (16 muls, 15 adds)
 479       }
 480    #else
 481       #if defined(CPU_COLDFIRE)
 482          // 64=32x32-multiply assembler for Coldfire
 483          for ( k = 0; k < 32; k++, D += 16, V++ )
 484          {
 485             asm volatile (
 486                "movem.l (%[D]), %%d0-%%d3                    \n\t"
 487                "move.l (%[V]), %%a5                          \n\t"
 488                "mac.l %%d0, %%a5, (96*4, %[V]), %%a5, %%acc0 \n\t"
 489                "mac.l %%d1, %%a5, (128*4, %[V]), %%a5, %%acc0\n\t"
 490                "mac.l %%d2, %%a5, (224*4, %[V]), %%a5, %%acc0\n\t"
 491                "mac.l %%d3, %%a5, (256*4, %[V]), %%a5, %%acc0\n\t"
 492                "movem.l (4*4, %[D]), %%d0-%%d3               \n\t"
 493                "mac.l %%d0, %%a5, (352*4, %[V]), %%a5, %%acc0\n\t"
 494                "mac.l %%d1, %%a5, (384*4, %[V]), %%a5, %%acc0\n\t"
 495                "mac.l %%d2, %%a5, (480*4, %[V]), %%a5, %%acc0\n\t"
 496                "mac.l %%d3, %%a5, (512*4, %[V]), %%a5, %%acc0\n\t"
 497                "movem.l (8*4, %[D]), %%d0-%%d3               \n\t"
 498                "mac.l %%d0, %%a5, (608*4, %[V]), %%a5, %%acc0\n\t"
 499                "mac.l %%d1, %%a5, (640*4, %[V]), %%a5, %%acc0\n\t"
 500                "mac.l %%d2, %%a5, (736*4, %[V]), %%a5, %%acc0\n\t"
 501                "mac.l %%d3, %%a5, (768*4, %[V]), %%a5, %%acc0\n\t"
 502                "movem.l (12*4, %[D]), %%d0-%%d3              \n\t"
 503                "mac.l %%d0, %%a5, (864*4, %[V]), %%a5, %%acc0\n\t"
 504                "mac.l %%d1, %%a5, (896*4, %[V]), %%a5, %%acc0\n\t"
 505                "mac.l %%d2, %%a5, (992*4, %[V]), %%a5, %%acc0\n\t"
 506                "mac.l %%d3, %%a5, %%acc0                     \n\t"
 507                "movclr.l %%acc0, %%d0                        \n\t"
 508                "lsl.l #1, %%d0                               \n\t"
 509                "move.l %%d0, (%[Data])+                      \n"
 510                : [Data] "+a" (Data)
 511                : [V] "a" (V), [D] "a" (D)
 512                : "d0", "d1", "d2", "d3", "a5");
 513          }
 514       #else
 515          // 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C
 516          for ( k = 0; k < 32; k++, D += 16, V++ )
 517          {
 518             *Data = MPC_MULTIPLY_EX(V[  0],D[ 0],30) + MPC_MULTIPLY_EX(V[ 96],D[ 1],30)
 519                   + MPC_MULTIPLY_EX(V[128],D[ 2],30) + MPC_MULTIPLY_EX(V[224],D[ 3],30)
 520                   + MPC_MULTIPLY_EX(V[256],D[ 4],30) + MPC_MULTIPLY_EX(V[352],D[ 5],30)
 521                   + MPC_MULTIPLY_EX(V[384],D[ 6],30) + MPC_MULTIPLY_EX(V[480],D[ 7],30)
 522                   + MPC_MULTIPLY_EX(V[512],D[ 8],30) + MPC_MULTIPLY_EX(V[608],D[ 9],30)
 523                   + MPC_MULTIPLY_EX(V[640],D[10],30) + MPC_MULTIPLY_EX(V[736],D[11],30)
 524                   + MPC_MULTIPLY_EX(V[768],D[12],30) + MPC_MULTIPLY_EX(V[864],D[13],30)
 525                   + MPC_MULTIPLY_EX(V[896],D[14],30) + MPC_MULTIPLY_EX(V[992],D[15],30);
 526             Data += 1;
 527             // total: 16 muls, 15 adds, 16 shifts
 528          }
 529       #endif
 530    #endif
 531 }
 532 #endif /* CPU_ARM */
 533
 534 static void
 535 mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, const MPC_SAMPLE_FORMAT *Y)
 536 {
 537     mpc_uint32_t n;
 538
 539     if (NULL != OutData)
 540     {
 541         for ( n = 0; n < 36; n++, Y += 32, OutData += 32 )
 542         {
 543             V -= 64;
 544             mpc_calculate_new_V ( Y, V );
 545             mpc_decoder_windowing_D( OutData, V, Di_opt );
 546         }
 547      }
 548 }
 549
 550 void
 551 mpc_decoder_synthese_filter_float(mpc_decoder *d, MPC_SAMPLE_FORMAT *OutData)
 552 {
 553     /********* left channel ********/
 554     memmove(d->V_L + MPC_V_MEM, d->V_L, 960 * sizeof(MPC_SAMPLE_FORMAT) );
 555
 556     mpc_full_synthesis_filter(
 557         OutData,
 558         (MPC_SAMPLE_FORMAT *)(d->V_L + MPC_V_MEM),
 559         (MPC_SAMPLE_FORMAT *)(d->Y_L [0]));
 560
 561     /******** right channel ********/
 562     memmove(d->V_R + MPC_V_MEM, d->V_R, 960 * sizeof(MPC_SAMPLE_FORMAT) );
 563
 564     mpc_full_synthesis_filter(
 565         (OutData == NULL ? NULL : OutData + MPC_FRAME_LENGTH),
 566         (MPC_SAMPLE_FORMAT *)(d->V_R + MPC_V_MEM),
 567         (MPC_SAMPLE_FORMAT *)(d->Y_R [0]));
 568 }
 569
 570 /*******************************************/
 571 /*                                         */
 572 /*            dithered synthesis           */
 573 /*                                         */
 574 /*******************************************/
 575
 576 static const unsigned char Parity [256] ICONST_ATTR = {  // parity
 577     0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
 578     1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
 579     1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
 580     0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
 581     1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
 582     0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
 583     0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
 584     1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0
 585 };
 586
 587 /*
 588  *  This is a simple random number generator with good quality for audio purposes.
 589  *  It consists of two polycounters with opposite rotation direction and different
 590  *  periods. The periods are coprime, so the total period is the product of both.
 591  *
 592  *     -------------------------------------------------------------------------------------------------
 593  * +-> |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0|
 594  * |   -------------------------------------------------------------------------------------------------
 595  * |                                                                          |  |  |  |     |        |
 596  * |                                                                          +--+--+--+-XOR-+--------+
 597  * |                                                                                      |
 598  * +--------------------------------------------------------------------------------------+
 599  *
 600  *     -------------------------------------------------------------------------------------------------
 601  *     |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0| <-+
 602  *     -------------------------------------------------------------------------------------------------   |
 603  *       |  |           |  |                                                                               |
 604  *       +--+----XOR----+--+                                                                               |
 605  *                |                                                                                        |
 606  *                +----------------------------------------------------------------------------------------+
 607  *
 608  *
 609  *  The first has an period of 3*5*17*257*65537, the second of 7*47*73*178481,
 610  *  which gives a period of 18.410.713.077.675.721.215. The result is the
 611  *  XORed values of both generators.
 612  */
 613 mpc_uint32_t
 614 mpc_random_int(mpc_decoder *d)
 615 {
 616 #if 1
 617     mpc_uint32_t  t1, t2, t3, t4;
 618
 619     t3   = t1 = d->__r1;   t4   = t2 = d->__r2;  // Parity calculation is done via table lookup, this is also available
 620     t1  &= 0xF5;        t2 >>= 25;               // on CPUs without parity, can be implemented in C and avoid unpredictable
 621     t1   = Parity [t1]; t2  &= 0x63;             // jumps and slow rotate through the carry flag operations.
 622     t1 <<= 31;          t2   = Parity [t2];
 623
 624     return (d->__r1 = (t3 >> 1) | t1 ) ^ (d->__r2 = (t4 + t4) | t2 );
 625 #else
 626     return (d->__r1 = (d->__r1 >> 1) | ((mpc_uint32_t)Parity [d->__r1 & 0xF5] << 31) ) ^
 627         (d->__r2 = (d->__r2 << 1) |  (mpc_uint32_t)Parity [(d->__r2 >> 25) & 0x63] );
 628 #endif
 629 }