Correct order of odd and even row colours in the rowcolors command, and set colours...
[kugel-rb.git] / apps / codecs / libmusepack / synth_filter.c
blobd8196eb40f17cce1c60556326b3c4cd514a7f788
1 /*
2 Copyright (c) 2005, The Musepack Development Team
3 All rights reserved.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
9 * Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following
14 disclaimer in the documentation and/or other materials provided
15 with the distribution.
17 * Neither the name of the The Musepack Development Team nor the
18 names of its contributors may be used to endorse or promote
19 products derived from this software without specific prior
20 written permission.
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 /// \file synth_filter.c
36 /// Synthesis functions.
37 /// \todo document me
39 #include "musepack.h"
40 #include "internal.h"
42 /* C O N S T A N T S */
43 #undef _
45 #if defined(MPC_FIXED_POINT)
46 #if defined(OPTIMIZE_FOR_SPEED)
47 // round at compile time to +/- 2^14 as a pre-shift before 32=32x32-multiply
48 #define D(value) (MPC_SHR_RND(value, 3))
50 // round at runtime to +/- 2^17 as a pre-shift before 32=32x32-multiply
51 // samples are 18.14 fixed point. 30.2 after this shift, whereas the
52 // 15.2 bits are significant (not including sign)
53 #define MPC_V_PRESHIFT(X) MPC_SHR_RND(X, 12)
55 // in this configuration a post-shift by >>1 is needed after synthesis
56 #else
57 #if defined(CPU_ARM)
58 // do not up-scale D-values to achieve higher speed in smull/mlal
59 // operations. saves ~14/8 = 1.75 cycles per multiplication
60 #define D(value) (value)
62 // in this configuration a post-shift by >>16 is needed after synthesis
63 #else
64 // saturate to +/- 2^31 (= value << (31-17)), D-values are +/- 2^17
65 #define D(value) (value << (14))
66 #endif
67 // do not perform pre-shift
68 #define MPC_V_PRESHIFT(X) (X)
69 #endif
70 #else
71 // IMPORTANT: internal scaling is somehow strange for floating point, therefore we scale the coefficients Di_opt
72 // by the correct amount to have proper scaled output
73 #define D(value) MAKE_MPC_SAMPLE((double)value*(double)(0x1000))
75 // do not perform pre-shift
76 #define MPC_V_PRESHIFT(X) (X)
77 #endif
79 // Di_opt coefficients are +/- 2^17 (pre-shifted by <<16)
80 static const MPC_SAMPLE_FORMAT Di_opt [512] ICONST_ATTR = {
81 /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
82 /* 0 */ D( 0), -D( 29), D(213), -D( 459), D(2037), -D(5153), D( 6574), -D(37489), D(75038), D(37489), D(6574), D(5153), D(2037), D(459), D(213), D(29),
83 /* 1 */ -D( 1), -D( 31), D(218), -D( 519), D(2000), -D(5517), D( 5959), -D(39336), D(74992), D(35640), D(7134), D(4788), D(2063), D(401), D(208), D(26),
84 /* 2 */ -D( 1), -D( 35), D(222), -D( 581), D(1952), -D(5879), D( 5288), -D(41176), D(74856), D(33791), D(7640), D(4425), D(2080), D(347), D(202), D(24),
85 /* 3 */ -D( 1), -D( 38), D(225), -D( 645), D(1893), -D(6237), D( 4561), -D(43006), D(74630), D(31947), D(8092), D(4063), D(2087), D(294), D(196), D(21),
86 /* 4 */ -D( 1), -D( 41), D(227), -D( 711), D(1822), -D(6589), D( 3776), -D(44821), D(74313), D(30112), D(8492), D(3705), D(2085), D(244), D(190), D(19),
87 /* 5 */ -D( 1), -D( 45), D(228), -D( 779), D(1739), -D(6935), D( 2935), -D(46617), D(73908), D(28289), D(8840), D(3351), D(2075), D(197), D(183), D(17),
88 /* 6 */ -D( 1), -D( 49), D(228), -D( 848), D(1644), -D(7271), D( 2037), -D(48390), D(73415), D(26482), D(9139), D(3004), D(2057), D(153), D(176), D(16),
89 /* 7 */ -D( 2), -D( 53), D(227), -D( 919), D(1535), -D(7597), D( 1082), -D(50137), D(72835), D(24694), D(9389), D(2663), D(2032), D(111), D(169), D(14),
90 /* 8 */ -D( 2), -D( 58), D(224), -D( 991), D(1414), -D(7910), D( 70), -D(51853), D(72169), D(22929), D(9592), D(2330), D(2001), D( 72), D(161), D(13),
91 /* 9 */ -D( 2), -D( 63), D(221), -D(1064), D(1280), -D(8209), -D( 998), -D(53534), D(71420), D(21189), D(9750), D(2006), D(1962), D( 36), D(154), D(11),
92 /* 10 */ -D( 2), -D( 68), D(215), -D(1137), D(1131), -D(8491), -D( 2122), -D(55178), D(70590), D(19478), D(9863), D(1692), D(1919), D( 2), D(147), D(10),
93 /* 11 */ -D( 3), -D( 73), D(208), -D(1210), D( 970), -D(8755), -D( 3300), -D(56778), D(69679), D(17799), D(9935), D(1388), D(1870), -D( 29), D(139), D( 9),
94 /* 12 */ -D( 3), -D( 79), D(200), -D(1283), D( 794), -D(8998), -D( 4533), -D(58333), D(68692), D(16155), D(9966), D(1095), D(1817), -D( 57), D(132), D( 8),
95 /* 13 */ -D( 4), -D( 85), D(189), -D(1356), D( 605), -D(9219), -D( 5818), -D(59838), D(67629), D(14548), D(9959), D( 814), D(1759), -D( 83), D(125), D( 7),
96 /* 14 */ -D( 4), -D( 91), D(177), -D(1428), D( 402), -D(9416), -D( 7154), -D(61289), D(66494), D(12980), D(9916), D( 545), D(1698), -D(106), D(117), D( 7),
97 /* 15 */ -D( 5), -D( 97), D(163), -D(1498), D( 185), -D(9585), -D( 8540), -D(62684), D(65290), D(11455), D(9838), D( 288), D(1634), -D(127), D(111), D( 6),
98 /* 16 */ -D( 5), -D(104), D(146), -D(1567), -D( 45), -D(9727), -D( 9975), -D(64019), D(64019), D( 9975), D(9727), D( 45), D(1567), -D(146), D(104), D( 5),
99 /* 17 */ -D( 6), -D(111), D(127), -D(1634), -D( 288), -D(9838), -D(11455), -D(65290), D(62684), D( 8540), D(9585), -D( 185), D(1498), -D(163), D( 97), D( 5),
100 /* 18 */ -D( 7), -D(117), D(106), -D(1698), -D( 545), -D(9916), -D(12980), -D(66494), D(61289), D( 7154), D(9416), -D( 402), D(1428), -D(177), D( 91), D( 4),
101 /* 19 */ -D( 7), -D(125), D( 83), -D(1759), -D( 814), -D(9959), -D(14548), -D(67629), D(59838), D( 5818), D(9219), -D( 605), D(1356), -D(189), D( 85), D( 4),
102 /* 20 */ -D( 8), -D(132), D( 57), -D(1817), -D(1095), -D(9966), -D(16155), -D(68692), D(58333), D( 4533), D(8998), -D( 794), D(1283), -D(200), D( 79), D( 3),
103 /* 21 */ -D( 9), -D(139), D( 29), -D(1870), -D(1388), -D(9935), -D(17799), -D(69679), D(56778), D( 3300), D(8755), -D( 970), D(1210), -D(208), D( 73), D( 3),
104 /* 22 */ -D(10), -D(147), -D( 2), -D(1919), -D(1692), -D(9863), -D(19478), -D(70590), D(55178), D( 2122), D(8491), -D(1131), D(1137), -D(215), D( 68), D( 2),
105 /* 23 */ -D(11), -D(154), -D( 36), -D(1962), -D(2006), -D(9750), -D(21189), -D(71420), D(53534), D( 998), D(8209), -D(1280), D(1064), -D(221), D( 63), D( 2),
106 /* 24 */ -D(13), -D(161), -D( 72), -D(2001), -D(2330), -D(9592), -D(22929), -D(72169), D(51853), -D( 70), D(7910), -D(1414), D( 991), -D(224), D( 58), D( 2),
107 /* 25 */ -D(14), -D(169), -D(111), -D(2032), -D(2663), -D(9389), -D(24694), -D(72835), D(50137), -D( 1082), D(7597), -D(1535), D( 919), -D(227), D( 53), D( 2),
108 /* 26 */ -D(16), -D(176), -D(153), -D(2057), -D(3004), -D(9139), -D(26482), -D(73415), D(48390), -D( 2037), D(7271), -D(1644), D( 848), -D(228), D( 49), D( 1),
109 /* 27 */ -D(17), -D(183), -D(197), -D(2075), -D(3351), -D(8840), -D(28289), -D(73908), D(46617), -D( 2935), D(6935), -D(1739), D( 779), -D(228), D( 45), D( 1),
110 /* 28 */ -D(19), -D(190), -D(244), -D(2085), -D(3705), -D(8492), -D(30112), -D(74313), D(44821), -D( 3776), D(6589), -D(1822), D( 711), -D(227), D( 41), D( 1),
111 /* 29 */ -D(21), -D(196), -D(294), -D(2087), -D(4063), -D(8092), -D(31947), -D(74630), D(43006), -D( 4561), D(6237), -D(1893), D( 645), -D(225), D( 38), D( 1),
112 /* 30 */ -D(24), -D(202), -D(347), -D(2080), -D(4425), -D(7640), -D(33791), -D(74856), D(41176), -D( 5288), D(5879), -D(1952), D( 581), -D(222), D( 35), D( 1),
113 /* 31 */ -D(26), -D(208), -D(401), -D(2063), -D(4788), -D(7134), -D(35640), -D(74992), D(39336), -D( 5959), D(5517), -D(2000), D( 519), -D(218), D( 31), D( 1)
116 #undef D
118 // needed to prevent from internal overflow in calculate_V (see below)
119 #define OVERFLOW_FIX 2
121 // V-coefficients were expanded (<<) by V_COEFFICIENT_EXPAND
122 #define V_COEFFICIENT_EXPAND 27
124 #if defined(MPC_FIXED_POINT)
125 #if defined(OPTIMIZE_FOR_SPEED)
126 // define 32=32x32-multiplication for DCT-coefficients with samples, vcoef will be pre-shifted on creation
127 // samples are rounded to +/- 2^19 as pre-shift before 32=32x32-multiply
128 #define MPC_MULTIPLY_V(sample, vcoef) ( MPC_SHR_RND(sample, 12) * vcoef )
130 // pre- and postscale are used to avoid internal overflow in synthesis calculation
131 // samples are s15.0, v-coefs are 4.12 -> internal format is s19.12
132 #define MPC_MULTIPLY_V_PRESCALE(sample, vcoef) ( MPC_SHR_RND(sample, (12+OVERFLOW_FIX)) * vcoef )
133 #define MPC_MULTIPLY_V_POSTSCALE(sample, vcoef) ( MPC_SHR_RND(sample, (12-OVERFLOW_FIX)) * vcoef )
134 #define MPC_V_POSTSCALE(sample) (sample<<OVERFLOW_FIX)
136 // round to +/- 2^16 as pre-shift before 32=32x32-multiply
137 #define MPC_MAKE_INVCOS(value) (MPC_SHR_RND(value, 15))
138 #else
139 // define 64=32x32-multiplication for DCT-coefficients with samples. Via usage of MPC_FRACT highly optimized assembler might be used
140 // MULTIPLY_FRACT will do >>32 after multiplication, as V-coef were expanded by V_COEFFICIENT_EXPAND we'll correct this on the result.
141 // Will loose 5bit accuracy on result in fract part without effect on final audio result
142 #define MPC_MULTIPLY_V(sample, vcoef) ( (MPC_MULTIPLY_FRACT(sample, vcoef)) << (32-V_COEFFICIENT_EXPAND) )
144 // pre- and postscale are used to avoid internal overflow in synthesis calculation
145 // samples are s15.14, v-coefs are 4.27 -> internal format is s19.12
146 #define MPC_MULTIPLY_V_PRESCALE(sample, vcoef) ( (MPC_MULTIPLY_FRACT(sample, vcoef)) << (32-V_COEFFICIENT_EXPAND-OVERFLOW_FIX) )
147 #define MPC_MULTIPLY_V_POSTSCALE(sample, vcoef) ( (MPC_MULTIPLY_FRACT(sample, vcoef)) << (32-V_COEFFICIENT_EXPAND+OVERFLOW_FIX) )
148 #define MPC_V_POSTSCALE(sample) (sample<<OVERFLOW_FIX)
150 // directly use accurate 32bit-coefficients
151 #define MPC_MAKE_INVCOS(value) (value)
152 #endif
153 #else
154 // for floating point use the standard multiplication macro
155 #define MPC_MULTIPLY_V (sample, vcoef) ( MPC_MULTIPLY(sample, vcoef) )
156 #define MPC_MULTIPLY_V_PRESCALE (sample, vcoef) ( MPC_MULTIPLY(sample, vcoef) )
157 #define MPC_MULTIPLY_V_POSTSCALE(sample, vcoef) ( MPC_MULTIPLY(sample, vcoef) )
158 #define MPC_V_POSTSCALE(sample) (sample)
160 // downscale the accurate 32bit-coefficients and convert to float
161 #define MPC_MAKE_INVCOS(value) MAKE_MPC_SAMPLE((double)value/(double)(1<<V_COEFFICIENT_EXPAND))
162 #endif
164 // define constants for DCT-synthesis
165 // INVCOSxx = (0.5 / cos(xx*PI/64)) << 27, <<27 to saturate to +/- 2^31
166 #define INVCOS01 MPC_MAKE_INVCOS( 67189797)
167 #define INVCOS02 MPC_MAKE_INVCOS( 67433575)
168 #define INVCOS03 MPC_MAKE_INVCOS( 67843164)
169 #define INVCOS04 MPC_MAKE_INVCOS( 68423604)
170 #define INVCOS05 MPC_MAKE_INVCOS( 69182167)
171 #define INVCOS06 MPC_MAKE_INVCOS( 70128577)
172 #define INVCOS07 MPC_MAKE_INVCOS( 71275330)
173 #define INVCOS08 MPC_MAKE_INVCOS( 72638111)
174 #define INVCOS09 MPC_MAKE_INVCOS( 74236348)
175 #define INVCOS10 MPC_MAKE_INVCOS( 76093940)
176 #define INVCOS11 MPC_MAKE_INVCOS( 78240207)
177 #define INVCOS12 MPC_MAKE_INVCOS( 80711144)
178 #define INVCOS13 MPC_MAKE_INVCOS( 83551089)
179 #define INVCOS14 MPC_MAKE_INVCOS( 86814950)
180 #define INVCOS15 MPC_MAKE_INVCOS( 90571242)
181 #define INVCOS16 MPC_MAKE_INVCOS( 94906266)
182 #define INVCOS17 MPC_MAKE_INVCOS( 99929967)
183 #define INVCOS18 MPC_MAKE_INVCOS( 105784321)
184 #define INVCOS19 MPC_MAKE_INVCOS( 112655602)
185 #define INVCOS20 MPC_MAKE_INVCOS( 120792764)
186 #define INVCOS21 MPC_MAKE_INVCOS( 130535899)
187 #define INVCOS22 MPC_MAKE_INVCOS( 142361749)
188 #define INVCOS23 MPC_MAKE_INVCOS( 156959571)
189 #define INVCOS24 MPC_MAKE_INVCOS( 175363913)
190 #define INVCOS25 MPC_MAKE_INVCOS( 199201203)
191 #define INVCOS26 MPC_MAKE_INVCOS( 231182936)
192 #define INVCOS27 MPC_MAKE_INVCOS( 276190692)
193 #define INVCOS28 MPC_MAKE_INVCOS( 343988688)
194 #define INVCOS29 MPC_MAKE_INVCOS( 457361460)
195 #define INVCOS30 MPC_MAKE_INVCOS( 684664578)
196 #define INVCOS31 MPC_MAKE_INVCOS(1367679739)
198 void
199 mpc_calculate_new_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V )
200 ICODE_ATTR_MPC_LARGE_IRAM;
202 void
203 mpc_calculate_new_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V )
205 // Calculating new V-buffer values for left channel
206 // calculate new V-values (ISO-11172-3, p. 39)
207 // based upon fast-MDCT algorithm by Byeong Gi Lee
208 MPC_SAMPLE_FORMAT A[16];
209 MPC_SAMPLE_FORMAT B[16];
210 MPC_SAMPLE_FORMAT tmp;
212 A[ 0] = Sample[ 0] + Sample[31];
213 A[ 1] = Sample[ 1] + Sample[30];
214 A[ 2] = Sample[ 2] + Sample[29];
215 A[ 3] = Sample[ 3] + Sample[28];
216 A[ 4] = Sample[ 4] + Sample[27];
217 A[ 5] = Sample[ 5] + Sample[26];
218 A[ 6] = Sample[ 6] + Sample[25];
219 A[ 7] = Sample[ 7] + Sample[24];
220 A[ 8] = Sample[ 8] + Sample[23];
221 A[ 9] = Sample[ 9] + Sample[22];
222 A[10] = Sample[10] + Sample[21];
223 A[11] = Sample[11] + Sample[20];
224 A[12] = Sample[12] + Sample[19];
225 A[13] = Sample[13] + Sample[18];
226 A[14] = Sample[14] + Sample[17];
227 A[15] = Sample[15] + Sample[16];
228 // 16 adds
230 B[ 0] = A[ 0] + A[15];
231 B[ 1] = A[ 1] + A[14];
232 B[ 2] = A[ 2] + A[13];
233 B[ 3] = A[ 3] + A[12];
234 B[ 4] = A[ 4] + A[11];
235 B[ 5] = A[ 5] + A[10];
236 B[ 6] = A[ 6] + A[ 9];
237 B[ 7] = A[ 7] + A[ 8];
238 B[ 8] = MPC_MULTIPLY_V((A[ 0] - A[15]), INVCOS02);
239 B[ 9] = MPC_MULTIPLY_V((A[ 1] - A[14]), INVCOS06);
240 B[10] = MPC_MULTIPLY_V((A[ 2] - A[13]), INVCOS10);
241 B[11] = MPC_MULTIPLY_V((A[ 3] - A[12]), INVCOS14);
242 B[12] = MPC_MULTIPLY_V((A[ 4] - A[11]), INVCOS18);
243 B[13] = MPC_MULTIPLY_V((A[ 5] - A[10]), INVCOS22);
244 B[14] = MPC_MULTIPLY_V((A[ 6] - A[ 9]), INVCOS26);
245 B[15] = MPC_MULTIPLY_V((A[ 7] - A[ 8]), INVCOS30);
246 // 8 adds, 8 subs, 8 muls, 8 shifts
248 A[ 0] = B[ 0] + B[ 7];
249 A[ 1] = B[ 1] + B[ 6];
250 A[ 2] = B[ 2] + B[ 5];
251 A[ 3] = B[ 3] + B[ 4];
252 A[ 4] = MPC_MULTIPLY_V((B[ 0] - B[ 7]), INVCOS04);
253 A[ 5] = MPC_MULTIPLY_V((B[ 1] - B[ 6]), INVCOS12);
254 A[ 6] = MPC_MULTIPLY_V((B[ 2] - B[ 5]), INVCOS20);
255 A[ 7] = MPC_MULTIPLY_V((B[ 3] - B[ 4]), INVCOS28);
256 A[ 8] = B[ 8] + B[15];
257 A[ 9] = B[ 9] + B[14];
258 A[10] = B[10] + B[13];
259 A[11] = B[11] + B[12];
260 A[12] = MPC_MULTIPLY_V((B[ 8] - B[15]), INVCOS04);
261 A[13] = MPC_MULTIPLY_V((B[ 9] - B[14]), INVCOS12);
262 A[14] = MPC_MULTIPLY_V((B[10] - B[13]), INVCOS20);
263 A[15] = MPC_MULTIPLY_V((B[11] - B[12]), INVCOS28);
264 // 8 adds, 8 subs, 8 muls, 8 shifts
266 B[ 0] = A[ 0] + A[ 3];
267 B[ 1] = A[ 1] + A[ 2];
268 B[ 2] = MPC_MULTIPLY_V((A[ 0] - A[ 3]), INVCOS08);
269 B[ 3] = MPC_MULTIPLY_V((A[ 1] - A[ 2]), INVCOS24);
270 B[ 4] = A[ 4] + A[ 7];
271 B[ 5] = A[ 5] + A[ 6];
272 B[ 6] = MPC_MULTIPLY_V((A[ 4] - A[ 7]), INVCOS08);
273 B[ 7] = MPC_MULTIPLY_V((A[ 5] - A[ 6]), INVCOS24);
274 B[ 8] = A[ 8] + A[11];
275 B[ 9] = A[ 9] + A[10];
276 B[10] = MPC_MULTIPLY_V((A[ 8] - A[11]), INVCOS08);
277 B[11] = MPC_MULTIPLY_V((A[ 9] - A[10]), INVCOS24);
278 B[12] = A[12] + A[15];
279 B[13] = A[13] + A[14];
280 B[14] = MPC_MULTIPLY_V((A[12] - A[15]), INVCOS08);
281 B[15] = MPC_MULTIPLY_V((A[13] - A[14]), INVCOS24);
282 // 8 adds, 8 subs, 8 muls, 8 shifts
284 A[ 0] = B[ 0] + B[ 1];
285 A[ 1] = MPC_MULTIPLY_V((B[ 0] - B[ 1]), INVCOS16);
286 A[ 2] = B[ 2] + B[ 3];
287 A[ 3] = MPC_MULTIPLY_V((B[ 2] - B[ 3]), INVCOS16);
288 A[ 4] = B[ 4] + B[ 5];
289 A[ 5] = MPC_MULTIPLY_V((B[ 4] - B[ 5]), INVCOS16);
290 A[ 6] = B[ 6] + B[ 7];
291 A[ 7] = MPC_MULTIPLY_V((B[ 6] - B[ 7]), INVCOS16);
292 A[ 8] = B[ 8] + B[ 9];
293 A[ 9] = MPC_MULTIPLY_V((B[ 8] - B[ 9]), INVCOS16);
294 A[10] = B[10] + B[11];
295 A[11] = MPC_MULTIPLY_V((B[10] - B[11]), INVCOS16);
296 A[12] = B[12] + B[13];
297 A[13] = MPC_MULTIPLY_V((B[12] - B[13]), INVCOS16);
298 A[14] = B[14] + B[15];
299 A[15] = MPC_MULTIPLY_V((B[14] - B[15]), INVCOS16);
300 // 8 adds, 8 subs, 8 muls, 8 shifts
302 // multiple used expressions: -(A[12] + A[14] + A[15])
303 V[48] = -A[ 0];
304 V[ 0] = A[ 1];
305 V[40] = -A[ 2] - (V[ 8] = A[ 3]);
306 V[36] = -((V[ 4] = A[ 5] + (V[12] = A[ 7])) + A[ 6]);
307 V[44] = - A[ 4] - A[ 6] - A[ 7];
308 V[ 6] = (V[10] = A[11] + (V[14] = A[15])) + A[13];
309 V[38] = (V[34] = -(V[ 2] = A[ 9] + A[13] + A[15]) - A[14]) + A[ 9] - A[10] - A[11];
310 V[46] = (tmp = -(A[12] + A[14] + A[15])) - A[ 8];
311 V[42] = tmp - A[10] - A[11];
312 // 9 adds, 9 subs
314 A[ 0] = MPC_MULTIPLY_V_PRESCALE((Sample[ 0] - Sample[31]), INVCOS01);
315 A[ 1] = MPC_MULTIPLY_V_PRESCALE((Sample[ 1] - Sample[30]), INVCOS03);
316 A[ 2] = MPC_MULTIPLY_V_PRESCALE((Sample[ 2] - Sample[29]), INVCOS05);
317 A[ 3] = MPC_MULTIPLY_V_PRESCALE((Sample[ 3] - Sample[28]), INVCOS07);
318 A[ 4] = MPC_MULTIPLY_V_PRESCALE((Sample[ 4] - Sample[27]), INVCOS09);
319 A[ 5] = MPC_MULTIPLY_V_PRESCALE((Sample[ 5] - Sample[26]), INVCOS11);
320 A[ 6] = MPC_MULTIPLY_V_PRESCALE((Sample[ 6] - Sample[25]), INVCOS13);
321 A[ 7] = MPC_MULTIPLY_V_PRESCALE((Sample[ 7] - Sample[24]), INVCOS15);
322 A[ 8] = MPC_MULTIPLY_V_PRESCALE((Sample[ 8] - Sample[23]), INVCOS17);
323 A[ 9] = MPC_MULTIPLY_V_PRESCALE((Sample[ 9] - Sample[22]), INVCOS19);
324 A[10] = MPC_MULTIPLY_V_PRESCALE((Sample[10] - Sample[21]), INVCOS21);
325 A[11] = MPC_MULTIPLY_V_PRESCALE((Sample[11] - Sample[20]), INVCOS23);
326 A[12] = MPC_MULTIPLY_V_PRESCALE((Sample[12] - Sample[19]), INVCOS25);
327 A[13] = MPC_MULTIPLY_V_PRESCALE((Sample[13] - Sample[18]), INVCOS27);
328 A[14] = MPC_MULTIPLY_V_PRESCALE((Sample[14] - Sample[17]), INVCOS29);
329 A[15] = MPC_MULTIPLY_V_PRESCALE((Sample[15] - Sample[16]), INVCOS31);
330 // 16 subs, 16 muls, 16 shifts
332 B[ 0] = A[ 0] + A[15];
333 B[ 1] = A[ 1] + A[14];
334 B[ 2] = A[ 2] + A[13];
335 B[ 3] = A[ 3] + A[12];
336 B[ 4] = A[ 4] + A[11];
337 B[ 5] = A[ 5] + A[10];
338 B[ 6] = A[ 6] + A[ 9];
339 B[ 7] = A[ 7] + A[ 8];
340 B[ 8] = MPC_MULTIPLY_V((A[ 0] - A[15]), INVCOS02);
341 B[ 9] = MPC_MULTIPLY_V((A[ 1] - A[14]), INVCOS06);
342 B[10] = MPC_MULTIPLY_V((A[ 2] - A[13]), INVCOS10);
343 B[11] = MPC_MULTIPLY_V((A[ 3] - A[12]), INVCOS14);
344 B[12] = MPC_MULTIPLY_V((A[ 4] - A[11]), INVCOS18);
345 B[13] = MPC_MULTIPLY_V((A[ 5] - A[10]), INVCOS22);
346 B[14] = MPC_MULTIPLY_V((A[ 6] - A[ 9]), INVCOS26);
347 B[15] = MPC_MULTIPLY_V((A[ 7] - A[ 8]), INVCOS30);
348 // 8 adds, 8 subs, 8 muls, 8 shift
350 A[ 0] = B[ 0] + B[ 7];
351 A[ 1] = B[ 1] + B[ 6];
352 A[ 2] = B[ 2] + B[ 5];
353 A[ 3] = B[ 3] + B[ 4];
354 A[ 4] = MPC_MULTIPLY_V((B[ 0] - B[ 7]), INVCOS04);
355 A[ 5] = MPC_MULTIPLY_V((B[ 1] - B[ 6]), INVCOS12);
356 A[ 6] = MPC_MULTIPLY_V((B[ 2] - B[ 5]), INVCOS20);
357 A[ 7] = MPC_MULTIPLY_V((B[ 3] - B[ 4]), INVCOS28);
358 A[ 8] = B[ 8] + B[15];
359 A[ 9] = B[ 9] + B[14];
360 A[10] = B[10] + B[13];
361 A[11] = B[11] + B[12];
362 A[12] = MPC_MULTIPLY_V((B[ 8] - B[15]), INVCOS04);
363 A[13] = MPC_MULTIPLY_V((B[ 9] - B[14]), INVCOS12);
364 A[14] = MPC_MULTIPLY_V((B[10] - B[13]), INVCOS20);
365 A[15] = MPC_MULTIPLY_V((B[11] - B[12]), INVCOS28);
366 // 8 adds, 8 subs, 8 muls, 8 shift
368 B[ 0] = A[ 0] + A[ 3];
369 B[ 1] = A[ 1] + A[ 2];
370 B[ 2] = MPC_MULTIPLY_V((A[ 0] - A[ 3]), INVCOS08);
371 B[ 3] = MPC_MULTIPLY_V((A[ 1] - A[ 2]), INVCOS24);
372 B[ 4] = A[ 4] + A[ 7];
373 B[ 5] = A[ 5] + A[ 6];
374 B[ 6] = MPC_MULTIPLY_V((A[ 4] - A[ 7]), INVCOS08);
375 B[ 7] = MPC_MULTIPLY_V((A[ 5] - A[ 6]), INVCOS24);
376 B[ 8] = A[ 8] + A[11];
377 B[ 9] = A[ 9] + A[10];
378 B[10] = MPC_MULTIPLY_V((A[ 8] - A[11]), INVCOS08);
379 B[11] = MPC_MULTIPLY_V((A[ 9] - A[10]), INVCOS24);
380 B[12] = A[12] + A[15];
381 B[13] = A[13] + A[14];
382 B[14] = MPC_MULTIPLY_V((A[12] - A[15]), INVCOS08);
383 B[15] = MPC_MULTIPLY_V((A[13] - A[14]), INVCOS24);
384 // 8 adds, 8 subs, 8 muls, 8 shift
386 A[ 0] = MPC_V_POSTSCALE((B[ 0] + B[ 1]));
387 A[ 1] = MPC_MULTIPLY_V_POSTSCALE((B[ 0] - B[ 1]), INVCOS16);
388 A[ 2] = MPC_V_POSTSCALE((B[ 2] + B[ 3]));
389 A[ 3] = MPC_MULTIPLY_V_POSTSCALE((B[ 2] - B[ 3]), INVCOS16);
390 A[ 4] = MPC_V_POSTSCALE((B[ 4] + B[ 5]));
391 A[ 5] = MPC_MULTIPLY_V_POSTSCALE((B[ 4] - B[ 5]), INVCOS16);
392 A[ 6] = MPC_V_POSTSCALE((B[ 6] + B[ 7]));
393 A[ 7] = MPC_MULTIPLY_V_POSTSCALE((B[ 6] - B[ 7]), INVCOS16);
394 A[ 8] = MPC_V_POSTSCALE((B[ 8] + B[ 9]));
395 A[ 9] = MPC_MULTIPLY_V_POSTSCALE((B[ 8] - B[ 9]), INVCOS16);
396 A[10] = MPC_V_POSTSCALE((B[10] + B[11]));
397 A[11] = MPC_MULTIPLY_V_POSTSCALE((B[10] - B[11]), INVCOS16);
398 A[12] = MPC_V_POSTSCALE((B[12] + B[13]));
399 A[13] = MPC_MULTIPLY_V_POSTSCALE((B[12] - B[13]), INVCOS16);
400 A[14] = MPC_V_POSTSCALE((B[14] + B[15]));
401 A[15] = MPC_MULTIPLY_V_POSTSCALE((B[14] - B[15]), INVCOS16);
402 // 8 adds, 8 subs, 8 muls, 8 shift
404 // multiple used expressions: A[ 4]+A[ 6]+A[ 7], A[ 9]+A[13]+A[15]
405 V[ 5] = (V[11] = (V[13] = A[ 7] + (V[15] = A[15])) + A[11]) + A[ 5] + A[13];
406 V[ 7] = (V[ 9] = A[ 3] + A[11] + A[15]) + A[13];
407 V[33] = -(V[ 1] = A[ 1] + A[ 9] + A[13] + A[15]) - A[14];
408 V[35] = -(V[ 3] = A[ 5] + A[ 7] + A[ 9] + A[13] + A[15]) - A[ 6] - A[14];
409 V[37] = (tmp = -(A[10] + A[11] + A[13] + A[14] + A[15])) - A[ 5] - A[ 6] - A[ 7];
410 V[39] = tmp - A[ 2] - A[ 3];
411 V[41] = (tmp += A[13] - A[12]) - A[ 2] - A[ 3];
412 V[43] = tmp - A[ 4] - A[ 6] - A[ 7];
413 V[47] = (tmp = -(A[ 8] + A[12] + A[14] + A[15])) - A[ 0];
414 V[45] = tmp - A[ 4] - A[ 6] - A[ 7];
415 // 22 adds, 18 subs
417 V[32] = -(V[ 0] = MPC_V_PRESHIFT(V[ 0]));
418 V[31] = -(V[ 1] = MPC_V_PRESHIFT(V[ 1]));
419 V[30] = -(V[ 2] = MPC_V_PRESHIFT(V[ 2]));
420 V[29] = -(V[ 3] = MPC_V_PRESHIFT(V[ 3]));
421 V[28] = -(V[ 4] = MPC_V_PRESHIFT(V[ 4]));
422 V[27] = -(V[ 5] = MPC_V_PRESHIFT(V[ 5]));
423 V[26] = -(V[ 6] = MPC_V_PRESHIFT(V[ 6]));
424 V[25] = -(V[ 7] = MPC_V_PRESHIFT(V[ 7]));
425 V[24] = -(V[ 8] = MPC_V_PRESHIFT(V[ 8]));
426 V[23] = -(V[ 9] = MPC_V_PRESHIFT(V[ 9]));
427 V[22] = -(V[10] = MPC_V_PRESHIFT(V[10]));
428 V[21] = -(V[11] = MPC_V_PRESHIFT(V[11]));
429 V[20] = -(V[12] = MPC_V_PRESHIFT(V[12]));
430 V[19] = -(V[13] = MPC_V_PRESHIFT(V[13]));
431 V[18] = -(V[14] = MPC_V_PRESHIFT(V[14]));
432 V[17] = -(V[15] = MPC_V_PRESHIFT(V[15]));
433 // 16 adds, 16 shifts (OPTIMIZE_FOR_SPEED only)
435 V[63] = (V[33] = MPC_V_PRESHIFT(V[33]));
436 V[62] = (V[34] = MPC_V_PRESHIFT(V[34]));
437 V[61] = (V[35] = MPC_V_PRESHIFT(V[35]));
438 V[60] = (V[36] = MPC_V_PRESHIFT(V[36]));
439 V[59] = (V[37] = MPC_V_PRESHIFT(V[37]));
440 V[58] = (V[38] = MPC_V_PRESHIFT(V[38]));
441 V[57] = (V[39] = MPC_V_PRESHIFT(V[39]));
442 V[56] = (V[40] = MPC_V_PRESHIFT(V[40]));
443 V[55] = (V[41] = MPC_V_PRESHIFT(V[41]));
444 V[54] = (V[42] = MPC_V_PRESHIFT(V[42]));
445 V[53] = (V[43] = MPC_V_PRESHIFT(V[43]));
446 V[52] = (V[44] = MPC_V_PRESHIFT(V[44]));
447 V[51] = (V[45] = MPC_V_PRESHIFT(V[45]));
448 V[50] = (V[46] = MPC_V_PRESHIFT(V[46]));
449 V[49] = (V[47] = MPC_V_PRESHIFT(V[47]));
450 V[48] = (V[48] = MPC_V_PRESHIFT(V[48]));
451 // 16 adds, 16 shifts (OPTIMIZE_FOR_SPEED only)
453 // OPTIMIZE_FOR_SPEED total: 143 adds, 107 subs, 80 muls, 112 shifts
454 // total: 111 adds, 107 subs, 80 muls, 80 shifts
457 #if defined(CPU_ARM)
458 extern void
459 mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
460 const MPC_SAMPLE_FORMAT * V,
461 const MPC_SAMPLE_FORMAT * D);
462 #else
463 static void
464 mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
465 const MPC_SAMPLE_FORMAT * V,
466 const MPC_SAMPLE_FORMAT * D)
468 mpc_int32_t k;
470 #if defined(OPTIMIZE_FOR_SPEED)
471 // 32=32x32-multiply (FIXED_POINT)
472 for ( k = 0; k < 32; k++, D += 16, V++ )
474 *Data = V[ 0]*D[ 0] + V[ 96]*D[ 1] + V[128]*D[ 2] + V[224]*D[ 3]
475 + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7]
476 + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11]
477 + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15];
478 *Data >>= 1; // post shift to compensate for pre-shifting
479 Data += 1;
480 // total: 32 * (16 muls, 15 adds)
482 #else
483 #if defined(CPU_COLDFIRE)
484 // 64=32x32-multiply assembler for Coldfire
485 for ( k = 0; k < 32; k++, D += 16, V++ )
487 asm volatile (
488 "movem.l (%[D]), %%d0-%%d3 \n\t"
489 "move.l (%[V]), %%a5 \n\t"
490 "mac.l %%d0, %%a5, (96*4, %[V]), %%a5, %%acc0 \n\t"
491 "mac.l %%d1, %%a5, (128*4, %[V]), %%a5, %%acc0\n\t"
492 "mac.l %%d2, %%a5, (224*4, %[V]), %%a5, %%acc0\n\t"
493 "mac.l %%d3, %%a5, (256*4, %[V]), %%a5, %%acc0\n\t"
494 "movem.l (4*4, %[D]), %%d0-%%d3 \n\t"
495 "mac.l %%d0, %%a5, (352*4, %[V]), %%a5, %%acc0\n\t"
496 "mac.l %%d1, %%a5, (384*4, %[V]), %%a5, %%acc0\n\t"
497 "mac.l %%d2, %%a5, (480*4, %[V]), %%a5, %%acc0\n\t"
498 "mac.l %%d3, %%a5, (512*4, %[V]), %%a5, %%acc0\n\t"
499 "movem.l (8*4, %[D]), %%d0-%%d3 \n\t"
500 "mac.l %%d0, %%a5, (608*4, %[V]), %%a5, %%acc0\n\t"
501 "mac.l %%d1, %%a5, (640*4, %[V]), %%a5, %%acc0\n\t"
502 "mac.l %%d2, %%a5, (736*4, %[V]), %%a5, %%acc0\n\t"
503 "mac.l %%d3, %%a5, (768*4, %[V]), %%a5, %%acc0\n\t"
504 "movem.l (12*4, %[D]), %%d0-%%d3 \n\t"
505 "mac.l %%d0, %%a5, (864*4, %[V]), %%a5, %%acc0\n\t"
506 "mac.l %%d1, %%a5, (896*4, %[V]), %%a5, %%acc0\n\t"
507 "mac.l %%d2, %%a5, (992*4, %[V]), %%a5, %%acc0\n\t"
508 "mac.l %%d3, %%a5, %%acc0 \n\t"
509 "movclr.l %%acc0, %%d0 \n\t"
510 "lsl.l #1, %%d0 \n\t"
511 "move.l %%d0, (%[Data])+ \n"
512 : [Data] "+a" (Data)
513 : [V] "a" (V), [D] "a" (D)
514 : "d0", "d1", "d2", "d3", "a5");
516 #else
517 // 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C
518 for ( k = 0; k < 32; k++, D += 16, V++ )
520 *Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],30) + MPC_MULTIPLY_EX(V[ 96],D[ 1],30)
521 + MPC_MULTIPLY_EX(V[128],D[ 2],30) + MPC_MULTIPLY_EX(V[224],D[ 3],30)
522 + MPC_MULTIPLY_EX(V[256],D[ 4],30) + MPC_MULTIPLY_EX(V[352],D[ 5],30)
523 + MPC_MULTIPLY_EX(V[384],D[ 6],30) + MPC_MULTIPLY_EX(V[480],D[ 7],30)
524 + MPC_MULTIPLY_EX(V[512],D[ 8],30) + MPC_MULTIPLY_EX(V[608],D[ 9],30)
525 + MPC_MULTIPLY_EX(V[640],D[10],30) + MPC_MULTIPLY_EX(V[736],D[11],30)
526 + MPC_MULTIPLY_EX(V[768],D[12],30) + MPC_MULTIPLY_EX(V[864],D[13],30)
527 + MPC_MULTIPLY_EX(V[896],D[14],30) + MPC_MULTIPLY_EX(V[992],D[15],30);
528 Data += 1;
529 // total: 16 muls, 15 adds, 16 shifts
531 #endif
532 #endif
534 #endif /* CPU_ARM */
536 static void
537 mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, const MPC_SAMPLE_FORMAT *Y)
539 mpc_uint32_t n;
541 if (NULL != OutData)
543 for ( n = 0; n < 36; n++, Y += 32, OutData += 32 )
545 V -= 64;
546 mpc_calculate_new_V ( Y, V );
547 mpc_decoder_windowing_D( OutData, V, Di_opt );
552 void
553 mpc_decoder_synthese_filter_float(mpc_decoder *d, MPC_SAMPLE_FORMAT *OutData)
555 /********* left channel ********/
556 memmove(d->V_L + MPC_V_MEM, d->V_L, 960 * sizeof(MPC_SAMPLE_FORMAT) );
558 mpc_full_synthesis_filter(
559 OutData,
560 (MPC_SAMPLE_FORMAT *)(d->V_L + MPC_V_MEM),
561 (MPC_SAMPLE_FORMAT *)(d->Y_L [0]));
563 /******** right channel ********/
564 memmove(d->V_R + MPC_V_MEM, d->V_R, 960 * sizeof(MPC_SAMPLE_FORMAT) );
566 mpc_full_synthesis_filter(
567 (OutData == NULL ? NULL : OutData + MPC_FRAME_LENGTH),
568 (MPC_SAMPLE_FORMAT *)(d->V_R + MPC_V_MEM),
569 (MPC_SAMPLE_FORMAT *)(d->Y_R [0]));
572 /*******************************************/
573 /* */
574 /* dithered synthesis */
575 /* */
576 /*******************************************/
578 static const unsigned char Parity [256] ICONST_ATTR = { // parity
579 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
580 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
581 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
582 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
583 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
584 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
585 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
586 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0
590 * This is a simple random number generator with good quality for audio purposes.
591 * It consists of two polycounters with opposite rotation direction and different
592 * periods. The periods are coprime, so the total period is the product of both.
594 * -------------------------------------------------------------------------------------------------
595 * +-> |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0|
596 * | -------------------------------------------------------------------------------------------------
597 * | | | | | | |
598 * | +--+--+--+-XOR-+--------+
599 * | |
600 * +--------------------------------------------------------------------------------------+
602 * -------------------------------------------------------------------------------------------------
603 * |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0| <-+
604 * ------------------------------------------------------------------------------------------------- |
605 * | | | | |
606 * +--+----XOR----+--+ |
607 * | |
608 * +----------------------------------------------------------------------------------------+
611 * The first has an period of 3*5*17*257*65537, the second of 7*47*73*178481,
612 * which gives a period of 18.410.713.077.675.721.215. The result is the
613 * XORed values of both generators.
615 mpc_uint32_t
616 mpc_random_int(mpc_decoder *d)
618 #if 1
619 mpc_uint32_t t1, t2, t3, t4;
621 t3 = t1 = d->__r1; t4 = t2 = d->__r2; // Parity calculation is done via table lookup, this is also available
622 t1 &= 0xF5; t2 >>= 25; // on CPUs without parity, can be implemented in C and avoid unpredictable
623 t1 = Parity [t1]; t2 &= 0x63; // jumps and slow rotate through the carry flag operations.
624 t1 <<= 31; t2 = Parity [t2];
626 return (d->__r1 = (t3 >> 1) | t1 ) ^ (d->__r2 = (t4 + t4) | t2 );
627 #else
628 return (d->__r1 = (d->__r1 >> 1) | ((mpc_uint32_t)Parity [d->__r1 & 0xF5] << 31) ) ^
629 (d->__r2 = (d->__r2 << 1) | (mpc_uint32_t)Parity [(d->__r2 >> 25) & 0x63] );
630 #endif