Resort operands in mul/smull/mla/smlal to use D[] as second multiplication operand...
[kugel-rb.git] / apps / codecs / libmusepack / synth_filter.c
blob9c8d27eed24dedd5472fc30ec0a7ee018ba79c28
1 /*
2 Copyright (c) 2005, The Musepack Development Team
3 All rights reserved.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
9 * Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following
14 disclaimer in the documentation and/or other materials provided
15 with the distribution.
17 * Neither the name of the The Musepack Development Team nor the
18 names of its contributors may be used to endorse or promote
19 products derived from this software without specific prior
20 written permission.
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 /// \file synth_filter.c
36 /// Synthesis functions.
37 /// \todo document me
39 #include "musepack.h"
40 #include "internal.h"
42 /* C O N S T A N T S */
43 #undef _
45 #if defined(MPC_FIXED_POINT)
46 #if defined(OPTIMIZE_FOR_SPEED)
47 // round at compile time to +/- 2^14 as a pre-shift before 32=32x32-multiply
48 #define D(value) (MPC_SHR_RND(value, 3))
50 // round at runtime to +/- 2^17 as a pre-shift before 32=32x32-multiply
51 // samples are 18.14 fixed point. 30.2 after this shift, whereas the
52 // 15.2 bits are significant (not including sign)
53 #define MPC_V_PRESHIFT(X) MPC_SHR_RND(X, 12)
55 // in this configuration a post-shift by >>1 is needed after synthesis
56 #else
57 #if defined(CPU_ARM)
58 // do not up-scale D-values to achieve higher speed in smull/mlal
59 // operations. saves ~14/8 = 1.75 cycles per multiplication
60 #define D(value) (value)
62 // in this configuration a post-shift by >>16 is needed after synthesis
63 #else
64 // saturate to +/- 2^31 (= value << (31-17)), D-values are +/- 2^17
65 #define D(value) (value << (14))
66 #endif
67 // do not perform pre-shift
68 #define MPC_V_PRESHIFT(X) (X)
69 #endif
70 #else
71 // IMPORTANT: internal scaling is somehow strange for floating point, therefore we scale the coefficients Di_opt
72 // by the correct amount to have proper scaled output
73 #define D(value) MAKE_MPC_SAMPLE((double)value*(double)(0x1000))
75 // do not perform pre-shift
76 #define MPC_V_PRESHIFT(X) (X)
77 #endif
79 // Di_opt coefficients are +/- 2^17 (pre-shifted by <<16)
80 static const MPC_SAMPLE_FORMAT Di_opt [512] ICONST_ATTR = {
81 /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
82 /* 0 */ D( 0), -D( 29), D( 213), -D( 459), D(2037), -D(5153), D( 6574), -D(37489), D(75038), D(37489), D(6574), D(5153), D(2037), D(459), D(213), D(29),
83 /* 1 */ -D( 1), -D( 31), D( 218), -D( 519), D(2000), -D(5517), D( 5959), -D(39336), D(74992), D(35640), D(7134), D(4788), D(2063), D(401), D(208), D(26),
84 /* 2 */ -D( 1), -D( 35), D( 222), -D( 581), D(1952), -D(5879), D( 5288), -D(41176), D(74856), D(33791), D(7640), D(4425), D(2080), D(347), D(202), D(24),
85 /* 3 */ -D( 1), -D( 38), D( 225), -D( 645), D(1893), -D(6237), D( 4561), -D(43006), D(74630), D(31947), D(8092), D(4063), D(2087), D(294), D(196), D(21),
86 /* 4 */ -D( 1), -D( 41), D( 227), -D( 711), D(1822), -D(6589), D( 3776), -D(44821), D(74313), D(30112), D(8492), D(3705), D(2085), D(244), D(190), D(19),
87 /* 5 */ -D( 1), -D( 45), D( 228), -D( 779), D(1739), -D(6935), D( 2935), -D(46617), D(73908), D(28289), D(8840), D(3351), D(2075), D(197), D(183), D(17),
88 /* 6 */ -D( 1), -D( 49), D( 228), -D( 848), D(1644), -D(7271), D( 2037), -D(48390), D(73415), D(26482), D(9139), D(3004), D(2057), D(153), D(176), D(16),
89 /* 7 */ -D( 2), -D( 53), D( 227), -D( 919), D(1535), -D(7597), D( 1082), -D(50137), D(72835), D(24694), D(9389), D(2663), D(2032), D(111), D(169), D(14),
90 /* 8 */ -D( 2), -D( 58), D( 224), -D( 991), D(1414), -D(7910), D( 70), -D(51853), D(72169), D(22929), D(9592), D(2330), D(2001), D( 72), D(161), D(13),
91 /* 9 */ -D( 2), -D( 63), D( 221), -D(1064), D(1280), -D(8209), -D( 998), -D(53534), D(71420), D(21189), D(9750), D(2006), D(1962), D( 36), D(154), D(11),
92 /* 10 */ -D( 2), -D( 68), D( 215), -D(1137), D(1131), -D(8491), -D( 2122), -D(55178), D(70590), D(19478), D(9863), D(1692), D(1919), D( 2), D(147), D(10),
93 /* 11 */ -D( 3), -D( 73), D( 208), -D(1210), D( 970), -D(8755), -D( 3300), -D(56778), D(69679), D(17799), D(9935), D(1388), D(1870), -D( 29), D(139), D( 9),
94 /* 12 */ -D( 3), -D( 79), D( 200), -D(1283), D( 794), -D(8998), -D( 4533), -D(58333), D(68692), D(16155), D(9966), D(1095), D(1817), -D( 57), D(132), D( 8),
95 /* 13 */ -D( 4), -D( 85), D( 189), -D(1356), D( 605), -D(9219), -D( 5818), -D(59838), D(67629), D(14548), D(9959), D( 814), D(1759), -D( 83), D(125), D( 7),
96 /* 14 */ -D( 4), -D( 91), D( 177), -D(1428), D( 402), -D(9416), -D( 7154), -D(61289), D(66494), D(12980), D(9916), D( 545), D(1698), -D(106), D(117), D( 7),
97 /* 15 */ -D( 5), -D( 97), D( 163), -D(1498), D( 185), -D(9585), -D( 8540), -D(62684), D(65290), D(11455), D(9838), D( 288), D(1634), -D(127), D(111), D( 6),
98 /* 16 */ -D( 5), -D(104), D( 146), -D(1567), -D( 45), -D(9727), -D( 9975), -D(64019), D(64019), D( 9975), D(9727), D( 45), D(1567), -D(146), D(104), D( 5),
99 /* 17 */ -D( 6), -D(111), D( 127), -D(1634), -D( 288), -D(9838), -D(11455), -D(65290), D(62684), D( 8540), D(9585), -D( 185), D(1498), -D(163), D( 97), D( 5),
100 /* 18 */ -D( 7), -D(117), D( 106), -D(1698), -D( 545), -D(9916), -D(12980), -D(66494), D(61289), D( 7154), D(9416), -D( 402), D(1428), -D(177), D( 91), D( 4),
101 /* 19 */ -D( 7), -D(125), D( 83), -D(1759), -D( 814), -D(9959), -D(14548), -D(67629), D(59838), D( 5818), D(9219), -D( 605), D(1356), -D(189), D( 85), D( 4),
102 /* 20 */ -D( 8), -D(132), D( 57), -D(1817), -D(1095), -D(9966), -D(16155), -D(68692), D(58333), D( 4533), D(8998), -D( 794), D(1283), -D(200), D( 79), D( 3),
103 /* 21 */ -D( 9), -D(139), D( 29), -D(1870), -D(1388), -D(9935), -D(17799), -D(69679), D(56778), D( 3300), D(8755), -D( 970), D(1210), -D(208), D( 73), D( 3),
104 /* 22 */ -D(10), -D(147), -D( 2), -D(1919), -D(1692), -D(9863), -D(19478), -D(70590), D(55178), D( 2122), D(8491), -D(1131), D(1137), -D(215), D( 68), D( 2),
105 /* 23 */ -D(11), -D(154), -D( 36), -D(1962), -D(2006), -D(9750), -D(21189), -D(71420), D(53534), D( 998), D(8209), -D(1280), D(1064), -D(221), D( 63), D( 2),
106 /* 24 */ -D(13), -D(161), -D( 72), -D(2001), -D(2330), -D(9592), -D(22929), -D(72169), D(51853), -D( 70), D(7910), -D(1414), D( 991), -D(224), D( 58), D( 2),
107 /* 25 */ -D(14), -D(169), -D(111), -D(2032), -D(2663), -D(9389), -D(24694), -D(72835), D(50137), -D( 1082), D(7597), -D(1535), D( 919), -D(227), D( 53), D( 2),
108 /* 26 */ -D(16), -D(176), -D(153), -D(2057), -D(3004), -D(9139), -D(26482), -D(73415), D(48390), -D( 2037), D(7271), -D(1644), D( 848), -D(228), D( 49), D( 1),
109 /* 27 */ -D(17), -D(183), -D(197), -D(2075), -D(3351), -D(8840), -D(28289), -D(73908), D(46617), -D( 2935), D(6935), -D(1739), D( 779), -D(228), D( 45), D( 1),
110 /* 28 */ -D(19), -D(190), -D(244), -D(2085), -D(3705), -D(8492), -D(30112), -D(74313), D(44821), -D( 3776), D(6589), -D(1822), D( 711), -D(227), D( 41), D( 1),
111 /* 29 */ -D(21), -D(196), -D(294), -D(2087), -D(4063), -D(8092), -D(31947), -D(74630), D(43006), -D( 4561), D(6237), -D(1893), D( 645), -D(225), D( 38), D( 1),
112 /* 30 */ -D(24), -D(202), -D(347), -D(2080), -D(4425), -D(7640), -D(33791), -D(74856), D(41176), -D( 5288), D(5879), -D(1952), D( 581), -D(222), D( 35), D( 1),
113 /* 31 */ -D(26), -D(208), -D(401), -D(2063), -D(4788), -D(7134), -D(35640), -D(74992), D(39336), -D( 5959), D(5517), -D(2000), D( 519), -D(218), D( 31), D( 1)
116 #undef D
118 // needed to prevent from internal overflow in calculate_V
119 #define OVERFLOW_FIX 1
121 // V-coefficients were expanded (<<) by V_COEFFICIENT_EXPAND
122 #define V_COEFFICIENT_EXPAND 27
124 #if defined(MPC_FIXED_POINT)
125 #if defined(OPTIMIZE_FOR_SPEED)
126 // define 32=32x32-multiplication for DCT-coefficients with samples, vcoef will be pre-shifted on creation
127 // samples are rounded to +/- 2^19 as pre-shift before 32=32x32-multiply
128 #define MPC_MULTIPLY_V(sample, vcoef) ( MPC_SHR_RND(sample, 12) * vcoef )
130 // pre- and postscale are used to avoid internal overflow in synthesis calculation
131 #define MPC_MULTIPLY_V_PRESCALE(sample, vcoef) ( MPC_SHR_RND(sample, (12+OVERFLOW_FIX)) * vcoef )
132 #define MPC_MULTIPLY_V_POSTSCALE(sample, vcoef) ( MPC_SHR_RND(sample, (12-OVERFLOW_FIX)) * vcoef )
133 #define MPC_V_POSTSCALE(sample) (sample<<OVERFLOW_FIX)
135 // round to +/- 2^16 as pre-shift before 32=32x32-multiply
136 #define MPC_MAKE_INVCOS(value) (MPC_SHR_RND(value, 15))
137 #else
138 // define 64=32x32-multiplication for DCT-coefficients with samples. Via usage of MPC_FRACT highly optimized assembler might be used
139 // MULTIPLY_FRACT will do >>32 after multiplication, as V-coef were expanded by V_COEFFICIENT_EXPAND we'll correct this on the result.
140 // Will loose 5bit accuracy on result in fract part without effect on final audio result
141 #define MPC_MULTIPLY_V(sample, vcoef) ( (MPC_MULTIPLY_FRACT(sample, vcoef)) << (32-V_COEFFICIENT_EXPAND) )
143 // pre- and postscale are used to avoid internal overflow in synthesis calculation
144 #define MPC_MULTIPLY_V_PRESCALE(sample, vcoef) ( (MPC_MULTIPLY_FRACT(sample, vcoef)) << (32-V_COEFFICIENT_EXPAND-OVERFLOW_FIX) )
145 #define MPC_MULTIPLY_V_POSTSCALE(sample, vcoef) ( (MPC_MULTIPLY_FRACT(sample, vcoef)) << (32-V_COEFFICIENT_EXPAND+OVERFLOW_FIX) )
146 #define MPC_V_POSTSCALE(sample) (sample<<OVERFLOW_FIX)
148 // directly use accurate 32bit-coefficients
149 #define MPC_MAKE_INVCOS(value) (value)
150 #endif
151 #else
152 // for floating point use the standard multiplication macro
153 #define MPC_MULTIPLY_V (sample, vcoef) ( MPC_MULTIPLY(sample, vcoef) )
154 #define MPC_MULTIPLY_V_PRESCALE (sample, vcoef) ( MPC_MULTIPLY(sample, vcoef) )
155 #define MPC_MULTIPLY_V_POSTSCALE(sample, vcoef) ( MPC_MULTIPLY(sample, vcoef) )
156 #define MPC_V_POSTSCALE(sample) (sample)
158 // downscale the accurate 32bit-coefficients and convert to float
159 #define MPC_MAKE_INVCOS(value) MAKE_MPC_SAMPLE((double)value/(double)(1<<V_COEFFICIENT_EXPAND))
160 #endif
162 // define constants for DCT-synthesis
163 // INVCOSxx = (0.5 / cos(xx*PI/64)) << 27, <<27 to saturate to +/- 2^31
164 #define INVCOS01 MPC_MAKE_INVCOS( 67189797)
165 #define INVCOS02 MPC_MAKE_INVCOS( 67433575)
166 #define INVCOS03 MPC_MAKE_INVCOS( 67843164)
167 #define INVCOS04 MPC_MAKE_INVCOS( 68423604)
168 #define INVCOS05 MPC_MAKE_INVCOS( 69182167)
169 #define INVCOS06 MPC_MAKE_INVCOS( 70128577)
170 #define INVCOS07 MPC_MAKE_INVCOS( 71275330)
171 #define INVCOS08 MPC_MAKE_INVCOS( 72638111)
172 #define INVCOS09 MPC_MAKE_INVCOS( 74236348)
173 #define INVCOS10 MPC_MAKE_INVCOS( 76093940)
174 #define INVCOS11 MPC_MAKE_INVCOS( 78240207)
175 #define INVCOS12 MPC_MAKE_INVCOS( 80711144)
176 #define INVCOS13 MPC_MAKE_INVCOS( 83551089)
177 #define INVCOS14 MPC_MAKE_INVCOS( 86814950)
178 #define INVCOS15 MPC_MAKE_INVCOS( 90571242)
179 #define INVCOS16 MPC_MAKE_INVCOS( 94906266)
180 #define INVCOS17 MPC_MAKE_INVCOS( 99929967)
181 #define INVCOS18 MPC_MAKE_INVCOS( 105784321)
182 #define INVCOS19 MPC_MAKE_INVCOS( 112655602)
183 #define INVCOS20 MPC_MAKE_INVCOS( 120792764)
184 #define INVCOS21 MPC_MAKE_INVCOS( 130535899)
185 #define INVCOS22 MPC_MAKE_INVCOS( 142361749)
186 #define INVCOS23 MPC_MAKE_INVCOS( 156959571)
187 #define INVCOS24 MPC_MAKE_INVCOS( 175363913)
188 #define INVCOS25 MPC_MAKE_INVCOS( 199201203)
189 #define INVCOS26 MPC_MAKE_INVCOS( 231182936)
190 #define INVCOS27 MPC_MAKE_INVCOS( 276190692)
191 #define INVCOS28 MPC_MAKE_INVCOS( 343988688)
192 #define INVCOS29 MPC_MAKE_INVCOS( 457361460)
193 #define INVCOS30 MPC_MAKE_INVCOS( 684664578)
194 #define INVCOS31 MPC_MAKE_INVCOS(1367679739)
196 void
197 mpc_calculate_new_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V )
198 ICODE_ATTR_MPC_LARGE_IRAM;
200 void
201 mpc_calculate_new_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V )
203 // Calculating new V-buffer values for left channel
204 // calculate new V-values (ISO-11172-3, p. 39)
205 // based upon fast-MDCT algorithm by Byeong Gi Lee
206 MPC_SAMPLE_FORMAT A[16];
207 MPC_SAMPLE_FORMAT B[16];
208 MPC_SAMPLE_FORMAT tmp;
210 A[ 0] = Sample[ 0] + Sample[31];
211 A[ 1] = Sample[ 1] + Sample[30];
212 A[ 2] = Sample[ 2] + Sample[29];
213 A[ 3] = Sample[ 3] + Sample[28];
214 A[ 4] = Sample[ 4] + Sample[27];
215 A[ 5] = Sample[ 5] + Sample[26];
216 A[ 6] = Sample[ 6] + Sample[25];
217 A[ 7] = Sample[ 7] + Sample[24];
218 A[ 8] = Sample[ 8] + Sample[23];
219 A[ 9] = Sample[ 9] + Sample[22];
220 A[10] = Sample[10] + Sample[21];
221 A[11] = Sample[11] + Sample[20];
222 A[12] = Sample[12] + Sample[19];
223 A[13] = Sample[13] + Sample[18];
224 A[14] = Sample[14] + Sample[17];
225 A[15] = Sample[15] + Sample[16];
226 // 16 adds
228 B[ 0] = A[ 0] + A[15];
229 B[ 1] = A[ 1] + A[14];
230 B[ 2] = A[ 2] + A[13];
231 B[ 3] = A[ 3] + A[12];
232 B[ 4] = A[ 4] + A[11];
233 B[ 5] = A[ 5] + A[10];
234 B[ 6] = A[ 6] + A[ 9];
235 B[ 7] = A[ 7] + A[ 8];;
236 B[ 8] = MPC_MULTIPLY_V((A[ 0] - A[15]), INVCOS02);
237 B[ 9] = MPC_MULTIPLY_V((A[ 1] - A[14]), INVCOS06);
238 B[10] = MPC_MULTIPLY_V((A[ 2] - A[13]), INVCOS10);
239 B[11] = MPC_MULTIPLY_V((A[ 3] - A[12]), INVCOS14);
240 B[12] = MPC_MULTIPLY_V((A[ 4] - A[11]), INVCOS18);
241 B[13] = MPC_MULTIPLY_V((A[ 5] - A[10]), INVCOS22);
242 B[14] = MPC_MULTIPLY_V((A[ 6] - A[ 9]), INVCOS26);
243 B[15] = MPC_MULTIPLY_V((A[ 7] - A[ 8]), INVCOS30);
244 // 8 adds, 8 subs, 8 muls, 8 shifts
246 A[ 0] = B[ 0] + B[ 7];
247 A[ 1] = B[ 1] + B[ 6];
248 A[ 2] = B[ 2] + B[ 5];
249 A[ 3] = B[ 3] + B[ 4];
250 A[ 4] = MPC_MULTIPLY_V((B[ 0] - B[ 7]), INVCOS04);
251 A[ 5] = MPC_MULTIPLY_V((B[ 1] - B[ 6]), INVCOS12);
252 A[ 6] = MPC_MULTIPLY_V((B[ 2] - B[ 5]), INVCOS20);
253 A[ 7] = MPC_MULTIPLY_V((B[ 3] - B[ 4]), INVCOS28);
254 A[ 8] = B[ 8] + B[15];
255 A[ 9] = B[ 9] + B[14];
256 A[10] = B[10] + B[13];
257 A[11] = B[11] + B[12];
258 A[12] = MPC_MULTIPLY_V((B[ 8] - B[15]), INVCOS04);
259 A[13] = MPC_MULTIPLY_V((B[ 9] - B[14]), INVCOS12);
260 A[14] = MPC_MULTIPLY_V((B[10] - B[13]), INVCOS20);
261 A[15] = MPC_MULTIPLY_V((B[11] - B[12]), INVCOS28);
262 // 8 adds, 8 subs, 8 muls, 8 shifts
264 B[ 0] = A[ 0] + A[ 3];
265 B[ 1] = A[ 1] + A[ 2];
266 B[ 2] = MPC_MULTIPLY_V((A[ 0] - A[ 3]), INVCOS08);
267 B[ 3] = MPC_MULTIPLY_V((A[ 1] - A[ 2]), INVCOS24);
268 B[ 4] = A[ 4] + A[ 7];
269 B[ 5] = A[ 5] + A[ 6];
270 B[ 6] = MPC_MULTIPLY_V((A[ 4] - A[ 7]), INVCOS08);
271 B[ 7] = MPC_MULTIPLY_V((A[ 5] - A[ 6]), INVCOS24);
272 B[ 8] = A[ 8] + A[11];
273 B[ 9] = A[ 9] + A[10];
274 B[10] = MPC_MULTIPLY_V((A[ 8] - A[11]), INVCOS08);
275 B[11] = MPC_MULTIPLY_V((A[ 9] - A[10]), INVCOS24);
276 B[12] = A[12] + A[15];
277 B[13] = A[13] + A[14];
278 B[14] = MPC_MULTIPLY_V((A[12] - A[15]), INVCOS08);
279 B[15] = MPC_MULTIPLY_V((A[13] - A[14]), INVCOS24);
280 // 8 adds, 8 subs, 8 muls, 8 shifts
282 A[ 0] = B[ 0] + B[ 1];
283 A[ 1] = MPC_MULTIPLY_V((B[ 0] - B[ 1]), INVCOS16);
284 A[ 2] = B[ 2] + B[ 3];
285 A[ 3] = MPC_MULTIPLY_V((B[ 2] - B[ 3]), INVCOS16);
286 A[ 4] = B[ 4] + B[ 5];
287 A[ 5] = MPC_MULTIPLY_V((B[ 4] - B[ 5]), INVCOS16);
288 A[ 6] = B[ 6] + B[ 7];
289 A[ 7] = MPC_MULTIPLY_V((B[ 6] - B[ 7]), INVCOS16);
290 A[ 8] = B[ 8] + B[ 9];
291 A[ 9] = MPC_MULTIPLY_V((B[ 8] - B[ 9]), INVCOS16);
292 A[10] = B[10] + B[11];
293 A[11] = MPC_MULTIPLY_V((B[10] - B[11]), INVCOS16);
294 A[12] = B[12] + B[13];
295 A[13] = MPC_MULTIPLY_V((B[12] - B[13]), INVCOS16);
296 A[14] = B[14] + B[15];
297 A[15] = MPC_MULTIPLY_V((B[14] - B[15]), INVCOS16);
298 // 8 adds, 8 subs, 8 muls, 8 shifts
300 // multiple used expressions: -(A[12] + A[14] + A[15])
301 V[48] = -A[ 0];
302 V[ 0] = A[ 1];
303 V[40] = -A[ 2] - (V[ 8] = A[ 3]);
304 V[36] = -((V[ 4] = A[ 5] + (V[12] = A[ 7])) + A[ 6]);
305 V[44] = - A[ 4] - A[ 6] - A[ 7];
306 V[ 6] = (V[10] = A[11] + (V[14] = A[15])) + A[13];
307 V[38] = (V[34] = -(V[ 2] = A[ 9] + A[13] + A[15]) - A[14]) + A[ 9] - A[10] - A[11];
308 V[46] = (tmp = -(A[12] + A[14] + A[15])) - A[ 8];
309 V[42] = tmp - A[10] - A[11];
310 // 9 adds, 9 subs
312 A[ 0] = MPC_MULTIPLY_V_PRESCALE((Sample[ 0] - Sample[31]), INVCOS01);
313 A[ 1] = MPC_MULTIPLY_V_PRESCALE((Sample[ 1] - Sample[30]), INVCOS03);
314 A[ 2] = MPC_MULTIPLY_V_PRESCALE((Sample[ 2] - Sample[29]), INVCOS05);
315 A[ 3] = MPC_MULTIPLY_V_PRESCALE((Sample[ 3] - Sample[28]), INVCOS07);
316 A[ 4] = MPC_MULTIPLY_V_PRESCALE((Sample[ 4] - Sample[27]), INVCOS09);
317 A[ 5] = MPC_MULTIPLY_V_PRESCALE((Sample[ 5] - Sample[26]), INVCOS11);
318 A[ 6] = MPC_MULTIPLY_V_PRESCALE((Sample[ 6] - Sample[25]), INVCOS13);
319 A[ 7] = MPC_MULTIPLY_V_PRESCALE((Sample[ 7] - Sample[24]), INVCOS15);
320 A[ 8] = MPC_MULTIPLY_V_PRESCALE((Sample[ 8] - Sample[23]), INVCOS17);
321 A[ 9] = MPC_MULTIPLY_V_PRESCALE((Sample[ 9] - Sample[22]), INVCOS19);
322 A[10] = MPC_MULTIPLY_V_PRESCALE((Sample[10] - Sample[21]), INVCOS21);
323 A[11] = MPC_MULTIPLY_V_PRESCALE((Sample[11] - Sample[20]), INVCOS23);
324 A[12] = MPC_MULTIPLY_V_PRESCALE((Sample[12] - Sample[19]), INVCOS25);
325 A[13] = MPC_MULTIPLY_V_PRESCALE((Sample[13] - Sample[18]), INVCOS27);
326 A[14] = MPC_MULTIPLY_V_PRESCALE((Sample[14] - Sample[17]), INVCOS29);
327 A[15] = MPC_MULTIPLY_V_PRESCALE((Sample[15] - Sample[16]), INVCOS31);
328 // 16 subs, 16 muls, 16 shifts
330 B[ 0] = A[ 0] + A[15];
331 B[ 1] = A[ 1] + A[14];
332 B[ 2] = A[ 2] + A[13];
333 B[ 3] = A[ 3] + A[12];
334 B[ 4] = A[ 4] + A[11];
335 B[ 5] = A[ 5] + A[10];
336 B[ 6] = A[ 6] + A[ 9];
337 B[ 7] = A[ 7] + A[ 8];
338 B[ 8] = MPC_MULTIPLY_V((A[ 0] - A[15]), INVCOS02);
339 B[ 9] = MPC_MULTIPLY_V((A[ 1] - A[14]), INVCOS06);
340 B[10] = MPC_MULTIPLY_V((A[ 2] - A[13]), INVCOS10);
341 B[11] = MPC_MULTIPLY_V((A[ 3] - A[12]), INVCOS14);
342 B[12] = MPC_MULTIPLY_V((A[ 4] - A[11]), INVCOS18);
343 B[13] = MPC_MULTIPLY_V((A[ 5] - A[10]), INVCOS22);
344 B[14] = MPC_MULTIPLY_V((A[ 6] - A[ 9]), INVCOS26);
345 B[15] = MPC_MULTIPLY_V((A[ 7] - A[ 8]), INVCOS30);
346 // 8 adds, 8 subs, 8 muls, 8 shift
348 A[ 0] = B[ 0] + B[ 7];
349 A[ 1] = B[ 1] + B[ 6];
350 A[ 2] = B[ 2] + B[ 5];
351 A[ 3] = B[ 3] + B[ 4];
352 A[ 4] = MPC_MULTIPLY_V((B[ 0] - B[ 7]), INVCOS04);
353 A[ 5] = MPC_MULTIPLY_V((B[ 1] - B[ 6]), INVCOS12);
354 A[ 6] = MPC_MULTIPLY_V((B[ 2] - B[ 5]), INVCOS20);
355 A[ 7] = MPC_MULTIPLY_V((B[ 3] - B[ 4]), INVCOS28);
356 A[ 8] = B[ 8] + B[15];
357 A[ 9] = B[ 9] + B[14];
358 A[10] = B[10] + B[13];
359 A[11] = B[11] + B[12];
360 A[12] = MPC_MULTIPLY_V((B[ 8] - B[15]), INVCOS04);
361 A[13] = MPC_MULTIPLY_V((B[ 9] - B[14]), INVCOS12);
362 A[14] = MPC_MULTIPLY_V((B[10] - B[13]), INVCOS20);
363 A[15] = MPC_MULTIPLY_V((B[11] - B[12]), INVCOS28);
364 // 8 adds, 8 subs, 8 muls, 8 shift
366 B[ 0] = A[ 0] + A[ 3];
367 B[ 1] = A[ 1] + A[ 2];
368 B[ 2] = MPC_MULTIPLY_V((A[ 0] - A[ 3]), INVCOS08);
369 B[ 3] = MPC_MULTIPLY_V((A[ 1] - A[ 2]), INVCOS24);
370 B[ 4] = A[ 4] + A[ 7];
371 B[ 5] = A[ 5] + A[ 6];
372 B[ 6] = MPC_MULTIPLY_V((A[ 4] - A[ 7]), INVCOS08);
373 B[ 7] = MPC_MULTIPLY_V((A[ 5] - A[ 6]), INVCOS24);
374 B[ 8] = A[ 8] + A[11];
375 B[ 9] = A[ 9] + A[10];
376 B[10] = MPC_MULTIPLY_V((A[ 8] - A[11]), INVCOS08);
377 B[11] = MPC_MULTIPLY_V((A[ 9] - A[10]), INVCOS24);
378 B[12] = A[12] + A[15];
379 B[13] = A[13] + A[14];
380 B[14] = MPC_MULTIPLY_V((A[12] - A[15]), INVCOS08);
381 B[15] = MPC_MULTIPLY_V((A[13] - A[14]), INVCOS24);
382 // 8 adds, 8 subs, 8 muls, 8 shift
384 A[ 0] = MPC_V_POSTSCALE((B[ 0] + B[ 1]));
385 A[ 1] = MPC_MULTIPLY_V_POSTSCALE((B[ 0] - B[ 1]), INVCOS16);
386 A[ 2] = MPC_V_POSTSCALE((B[ 2] + B[ 3]));
387 A[ 3] = MPC_MULTIPLY_V_POSTSCALE((B[ 2] - B[ 3]), INVCOS16);
388 A[ 4] = MPC_V_POSTSCALE((B[ 4] + B[ 5]));
389 A[ 5] = MPC_MULTIPLY_V_POSTSCALE((B[ 4] - B[ 5]), INVCOS16);
390 A[ 6] = MPC_V_POSTSCALE((B[ 6] + B[ 7]));
391 A[ 7] = MPC_MULTIPLY_V_POSTSCALE((B[ 6] - B[ 7]), INVCOS16);
392 A[ 8] = MPC_V_POSTSCALE((B[ 8] + B[ 9]));
393 A[ 9] = MPC_MULTIPLY_V_POSTSCALE((B[ 8] - B[ 9]), INVCOS16);
394 A[10] = MPC_V_POSTSCALE((B[10] + B[11]));
395 A[11] = MPC_MULTIPLY_V_POSTSCALE((B[10] - B[11]), INVCOS16);
396 A[12] = MPC_V_POSTSCALE((B[12] + B[13]));
397 A[13] = MPC_MULTIPLY_V_POSTSCALE((B[12] - B[13]), INVCOS16);
398 A[14] = MPC_V_POSTSCALE((B[14] + B[15]));
399 A[15] = MPC_MULTIPLY_V_POSTSCALE((B[14] - B[15]), INVCOS16);
400 // 8 adds, 8 subs, 8 muls, 8 shift
402 // multiple used expressions: A[ 4]+A[ 6]+A[ 7], A[ 9]+A[13]+A[15]
403 V[ 5] = (V[11] = (V[13] = A[ 7] + (V[15] = A[15])) + A[11]) + A[ 5] + A[13];
404 V[ 7] = (V[ 9] = A[ 3] + A[11] + A[15]) + A[13];
405 V[33] = -(V[ 1] = A[ 1] + A[ 9] + A[13] + A[15]) - A[14];
406 V[35] = -(V[ 3] = A[ 5] + A[ 7] + A[ 9] + A[13] + A[15]) - A[ 6] - A[14];
407 V[37] = (tmp = -(A[10] + A[11] + A[13] + A[14] + A[15])) - A[ 5] - A[ 6] - A[ 7];
408 V[39] = tmp - A[ 2] - A[ 3];
409 V[41] = (tmp += A[13] - A[12]) - A[ 2] - A[ 3];
410 V[43] = tmp - A[ 4] - A[ 6] - A[ 7];
411 V[47] = (tmp = -(A[ 8] + A[12] + A[14] + A[15])) - A[ 0];
412 V[45] = tmp - A[ 4] - A[ 6] - A[ 7];
413 // 22 adds, 18 subs
415 V[32] = -(V[ 0] = MPC_V_PRESHIFT(V[ 0]));
416 V[31] = -(V[ 1] = MPC_V_PRESHIFT(V[ 1]));
417 V[30] = -(V[ 2] = MPC_V_PRESHIFT(V[ 2]));
418 V[29] = -(V[ 3] = MPC_V_PRESHIFT(V[ 3]));
419 V[28] = -(V[ 4] = MPC_V_PRESHIFT(V[ 4]));
420 V[27] = -(V[ 5] = MPC_V_PRESHIFT(V[ 5]));
421 V[26] = -(V[ 6] = MPC_V_PRESHIFT(V[ 6]));
422 V[25] = -(V[ 7] = MPC_V_PRESHIFT(V[ 7]));
423 V[24] = -(V[ 8] = MPC_V_PRESHIFT(V[ 8]));
424 V[23] = -(V[ 9] = MPC_V_PRESHIFT(V[ 9]));
425 V[22] = -(V[10] = MPC_V_PRESHIFT(V[10]));
426 V[21] = -(V[11] = MPC_V_PRESHIFT(V[11]));
427 V[20] = -(V[12] = MPC_V_PRESHIFT(V[12]));
428 V[19] = -(V[13] = MPC_V_PRESHIFT(V[13]));
429 V[18] = -(V[14] = MPC_V_PRESHIFT(V[14]));
430 V[17] = -(V[15] = MPC_V_PRESHIFT(V[15]));
431 // 16 adds, 16 shifts (OPTIMIZE_FOR_SPEED only)
433 V[63] = (V[33] = MPC_V_PRESHIFT(V[33]));
434 V[62] = (V[34] = MPC_V_PRESHIFT(V[34]));
435 V[61] = (V[35] = MPC_V_PRESHIFT(V[35]));
436 V[60] = (V[36] = MPC_V_PRESHIFT(V[36]));
437 V[59] = (V[37] = MPC_V_PRESHIFT(V[37]));
438 V[58] = (V[38] = MPC_V_PRESHIFT(V[38]));
439 V[57] = (V[39] = MPC_V_PRESHIFT(V[39]));
440 V[56] = (V[40] = MPC_V_PRESHIFT(V[40]));
441 V[55] = (V[41] = MPC_V_PRESHIFT(V[41]));
442 V[54] = (V[42] = MPC_V_PRESHIFT(V[42]));
443 V[53] = (V[43] = MPC_V_PRESHIFT(V[43]));
444 V[52] = (V[44] = MPC_V_PRESHIFT(V[44]));
445 V[51] = (V[45] = MPC_V_PRESHIFT(V[45]));
446 V[50] = (V[46] = MPC_V_PRESHIFT(V[46]));
447 V[49] = (V[47] = MPC_V_PRESHIFT(V[47]));
448 V[48] = (V[48] = MPC_V_PRESHIFT(V[48]));
449 // 16 adds, 16 shifts (OPTIMIZE_FOR_SPEED only)
451 // OPTIMIZE_FOR_SPEED total: 143 adds, 107 subs, 80 muls, 112 shifts
452 // total: 111 adds, 107 subs, 80 muls, 80 shifts
455 #if defined(CPU_ARM)
456 extern void
457 mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
458 const MPC_SAMPLE_FORMAT * V,
459 const MPC_SAMPLE_FORMAT * D);
460 #else
461 static void
462 mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
463 const MPC_SAMPLE_FORMAT * V,
464 const MPC_SAMPLE_FORMAT * D)
466 mpc_int32_t k;
468 #if defined(OPTIMIZE_FOR_SPEED)
469 // 32=32x32-multiply (FIXED_POINT)
470 for ( k = 0; k < 32; k++, D += 16, V++ )
472 *Data = V[ 0]*D[ 0] + V[ 96]*D[ 1] + V[128]*D[ 2] + V[224]*D[ 3]
473 + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7]
474 + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11]
475 + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15];
476 *Data >>= 1; // post shift to compensate for pre-shifting
477 Data += 1;
478 // total: 32 * (16 muls, 15 adds)
480 #else
481 #if defined(CPU_COLDFIRE)
482 // 64=32x32-multiply assembler for Coldfire
483 for ( k = 0; k < 32; k++, D += 16, V++ )
485 asm volatile (
486 "movem.l (%[D]), %%d0-%%d3 \n\t"
487 "move.l (%[V]), %%a5 \n\t"
488 "mac.l %%d0, %%a5, (96*4, %[V]), %%a5, %%acc0 \n\t"
489 "mac.l %%d1, %%a5, (128*4, %[V]), %%a5, %%acc0\n\t"
490 "mac.l %%d2, %%a5, (224*4, %[V]), %%a5, %%acc0\n\t"
491 "mac.l %%d3, %%a5, (256*4, %[V]), %%a5, %%acc0\n\t"
492 "movem.l (4*4, %[D]), %%d0-%%d3 \n\t"
493 "mac.l %%d0, %%a5, (352*4, %[V]), %%a5, %%acc0\n\t"
494 "mac.l %%d1, %%a5, (384*4, %[V]), %%a5, %%acc0\n\t"
495 "mac.l %%d2, %%a5, (480*4, %[V]), %%a5, %%acc0\n\t"
496 "mac.l %%d3, %%a5, (512*4, %[V]), %%a5, %%acc0\n\t"
497 "movem.l (8*4, %[D]), %%d0-%%d3 \n\t"
498 "mac.l %%d0, %%a5, (608*4, %[V]), %%a5, %%acc0\n\t"
499 "mac.l %%d1, %%a5, (640*4, %[V]), %%a5, %%acc0\n\t"
500 "mac.l %%d2, %%a5, (736*4, %[V]), %%a5, %%acc0\n\t"
501 "mac.l %%d3, %%a5, (768*4, %[V]), %%a5, %%acc0\n\t"
502 "movem.l (12*4, %[D]), %%d0-%%d3 \n\t"
503 "mac.l %%d0, %%a5, (864*4, %[V]), %%a5, %%acc0\n\t"
504 "mac.l %%d1, %%a5, (896*4, %[V]), %%a5, %%acc0\n\t"
505 "mac.l %%d2, %%a5, (992*4, %[V]), %%a5, %%acc0\n\t"
506 "mac.l %%d3, %%a5, %%acc0 \n\t"
507 "movclr.l %%acc0, %%d0 \n\t"
508 "lsl.l #1, %%d0 \n\t"
509 "move.l %%d0, (%[Data])+ \n"
510 : [Data] "+a" (Data)
511 : [V] "a" (V), [D] "a" (D)
512 : "d0", "d1", "d2", "d3", "a5");
514 #else
515 // 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C
516 for ( k = 0; k < 32; k++, D += 16, V++ )
518 *Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],30) + MPC_MULTIPLY_EX(V[ 96],D[ 1],30)
519 + MPC_MULTIPLY_EX(V[128],D[ 2],30) + MPC_MULTIPLY_EX(V[224],D[ 3],30)
520 + MPC_MULTIPLY_EX(V[256],D[ 4],30) + MPC_MULTIPLY_EX(V[352],D[ 5],30)
521 + MPC_MULTIPLY_EX(V[384],D[ 6],30) + MPC_MULTIPLY_EX(V[480],D[ 7],30)
522 + MPC_MULTIPLY_EX(V[512],D[ 8],30) + MPC_MULTIPLY_EX(V[608],D[ 9],30)
523 + MPC_MULTIPLY_EX(V[640],D[10],30) + MPC_MULTIPLY_EX(V[736],D[11],30)
524 + MPC_MULTIPLY_EX(V[768],D[12],30) + MPC_MULTIPLY_EX(V[864],D[13],30)
525 + MPC_MULTIPLY_EX(V[896],D[14],30) + MPC_MULTIPLY_EX(V[992],D[15],30);
526 Data += 1;
527 // total: 16 muls, 15 adds, 16 shifts
529 #endif
530 #endif
532 #endif /* CPU_ARM */
534 static void
535 mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, const MPC_SAMPLE_FORMAT *Y)
537 mpc_uint32_t n;
539 if (NULL != OutData)
541 for ( n = 0; n < 36; n++, Y += 32, OutData += 32 )
543 V -= 64;
544 mpc_calculate_new_V ( Y, V );
545 mpc_decoder_windowing_D( OutData, V, Di_opt );
550 void
551 mpc_decoder_synthese_filter_float(mpc_decoder *d, MPC_SAMPLE_FORMAT *OutData)
553 /********* left channel ********/
554 memmove(d->V_L + MPC_V_MEM, d->V_L, 960 * sizeof(MPC_SAMPLE_FORMAT) );
556 mpc_full_synthesis_filter(
557 OutData,
558 (MPC_SAMPLE_FORMAT *)(d->V_L + MPC_V_MEM),
559 (MPC_SAMPLE_FORMAT *)(d->Y_L [0]));
561 /******** right channel ********/
562 memmove(d->V_R + MPC_V_MEM, d->V_R, 960 * sizeof(MPC_SAMPLE_FORMAT) );
564 mpc_full_synthesis_filter(
565 (OutData == NULL ? NULL : OutData + MPC_FRAME_LENGTH),
566 (MPC_SAMPLE_FORMAT *)(d->V_R + MPC_V_MEM),
567 (MPC_SAMPLE_FORMAT *)(d->Y_R [0]));
570 /*******************************************/
571 /* */
572 /* dithered synthesis */
573 /* */
574 /*******************************************/
576 static const unsigned char Parity [256] ICONST_ATTR = { // parity
577 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
578 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
579 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
580 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
581 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
582 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
583 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
584 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0
588 * This is a simple random number generator with good quality for audio purposes.
589 * It consists of two polycounters with opposite rotation direction and different
590 * periods. The periods are coprime, so the total period is the product of both.
592 * -------------------------------------------------------------------------------------------------
593 * +-> |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0|
594 * | -------------------------------------------------------------------------------------------------
595 * | | | | | | |
596 * | +--+--+--+-XOR-+--------+
597 * | |
598 * +--------------------------------------------------------------------------------------+
600 * -------------------------------------------------------------------------------------------------
601 * |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0| <-+
602 * ------------------------------------------------------------------------------------------------- |
603 * | | | | |
604 * +--+----XOR----+--+ |
605 * | |
606 * +----------------------------------------------------------------------------------------+
609 * The first has an period of 3*5*17*257*65537, the second of 7*47*73*178481,
610 * which gives a period of 18.410.713.077.675.721.215. The result is the
611 * XORed values of both generators.
613 mpc_uint32_t
614 mpc_random_int(mpc_decoder *d)
616 #if 1
617 mpc_uint32_t t1, t2, t3, t4;
619 t3 = t1 = d->__r1; t4 = t2 = d->__r2; // Parity calculation is done via table lookup, this is also available
620 t1 &= 0xF5; t2 >>= 25; // on CPUs without parity, can be implemented in C and avoid unpredictable
621 t1 = Parity [t1]; t2 &= 0x63; // jumps and slow rotate through the carry flag operations.
622 t1 <<= 31; t2 = Parity [t2];
624 return (d->__r1 = (t3 >> 1) | t1 ) ^ (d->__r2 = (t4 + t4) | t2 );
625 #else
626 return (d->__r1 = (d->__r1 >> 1) | ((mpc_uint32_t)Parity [d->__r1 & 0xF5] << 31) ) ^
627 (d->__r2 = (d->__r2 << 1) | (mpc_uint32_t)Parity [(d->__r2 >> 25) & 0x63] );
628 #endif