lcd-m6sp.c: remove \r
[kugel-rb.git] / apps / codecs / libmusepack / synth_filter.c
blob0f415a4838be3181b3acb1bcd5217d5cbe3ac125
1 /*
2 Copyright (c) 2005, The Musepack Development Team
3 All rights reserved.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
9 * Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following
14 disclaimer in the documentation and/or other materials provided
15 with the distribution.
17 * Neither the name of the The Musepack Development Team nor the
18 names of its contributors may be used to endorse or promote
19 products derived from this software without specific prior
20 written permission.
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 /// \file synth_filter.c
36 /// Synthesis functions.
37 /// \todo document me
38 #include <string.h>
39 #include "mpcdec.h"
40 #include "decoder.h"
41 #include "mpcdec_math.h"
42 #include "internal.h"
44 /* C O N S T A N T S */
45 #undef _
47 #if defined(MPC_FIXED_POINT)
48 #if defined(CPU_ARM)
49 // do not up-scale D-values to achieve higher speed in smull/mlal
50 // operations. saves ~14/8 = 1.75 cycles per multiplication
51 #define D(value) (value)
53 // in this configuration a post-shift by >>16 is needed after synthesis
54 #else
55 // saturate to +/- 2^31 (= value << (31-17)), D-values are +/- 2^17
56 #define D(value) (value << (14))
57 #endif
58 #else
59 // IMPORTANT: internal scaling is somehow strange for floating point, therefore we scale the coefficients Di_opt
60 // by the correct amount to have proper scaled output
61 #define D(value) MAKE_MPC_SAMPLE((double)value*(double)(0x1000))
62 #endif
64 // Di_opt coefficients are +/- 2^17 (pre-shifted by <<16)
65 static const MPC_SAMPLE_FORMAT Di_opt [512] ICONST_ATTR = {
66 /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
67 /* 0 */ D( 0), -D( 29), D(213), -D( 459), D(2037), -D(5153), D( 6574), -D(37489), D(75038), D(37489), D(6574), D(5153), D(2037), D(459), D(213), D(29),
68 /* 1 */ -D( 1), -D( 31), D(218), -D( 519), D(2000), -D(5517), D( 5959), -D(39336), D(74992), D(35640), D(7134), D(4788), D(2063), D(401), D(208), D(26),
69 /* 2 */ -D( 1), -D( 35), D(222), -D( 581), D(1952), -D(5879), D( 5288), -D(41176), D(74856), D(33791), D(7640), D(4425), D(2080), D(347), D(202), D(24),
70 /* 3 */ -D( 1), -D( 38), D(225), -D( 645), D(1893), -D(6237), D( 4561), -D(43006), D(74630), D(31947), D(8092), D(4063), D(2087), D(294), D(196), D(21),
71 /* 4 */ -D( 1), -D( 41), D(227), -D( 711), D(1822), -D(6589), D( 3776), -D(44821), D(74313), D(30112), D(8492), D(3705), D(2085), D(244), D(190), D(19),
72 /* 5 */ -D( 1), -D( 45), D(228), -D( 779), D(1739), -D(6935), D( 2935), -D(46617), D(73908), D(28289), D(8840), D(3351), D(2075), D(197), D(183), D(17),
73 /* 6 */ -D( 1), -D( 49), D(228), -D( 848), D(1644), -D(7271), D( 2037), -D(48390), D(73415), D(26482), D(9139), D(3004), D(2057), D(153), D(176), D(16),
74 /* 7 */ -D( 2), -D( 53), D(227), -D( 919), D(1535), -D(7597), D( 1082), -D(50137), D(72835), D(24694), D(9389), D(2663), D(2032), D(111), D(169), D(14),
75 /* 8 */ -D( 2), -D( 58), D(224), -D( 991), D(1414), -D(7910), D( 70), -D(51853), D(72169), D(22929), D(9592), D(2330), D(2001), D( 72), D(161), D(13),
76 /* 9 */ -D( 2), -D( 63), D(221), -D(1064), D(1280), -D(8209), -D( 998), -D(53534), D(71420), D(21189), D(9750), D(2006), D(1962), D( 36), D(154), D(11),
77 /* 10 */ -D( 2), -D( 68), D(215), -D(1137), D(1131), -D(8491), -D( 2122), -D(55178), D(70590), D(19478), D(9863), D(1692), D(1919), D( 2), D(147), D(10),
78 /* 11 */ -D( 3), -D( 73), D(208), -D(1210), D( 970), -D(8755), -D( 3300), -D(56778), D(69679), D(17799), D(9935), D(1388), D(1870), -D( 29), D(139), D( 9),
79 /* 12 */ -D( 3), -D( 79), D(200), -D(1283), D( 794), -D(8998), -D( 4533), -D(58333), D(68692), D(16155), D(9966), D(1095), D(1817), -D( 57), D(132), D( 8),
80 /* 13 */ -D( 4), -D( 85), D(189), -D(1356), D( 605), -D(9219), -D( 5818), -D(59838), D(67629), D(14548), D(9959), D( 814), D(1759), -D( 83), D(125), D( 7),
81 /* 14 */ -D( 4), -D( 91), D(177), -D(1428), D( 402), -D(9416), -D( 7154), -D(61289), D(66494), D(12980), D(9916), D( 545), D(1698), -D(106), D(117), D( 7),
82 /* 15 */ -D( 5), -D( 97), D(163), -D(1498), D( 185), -D(9585), -D( 8540), -D(62684), D(65290), D(11455), D(9838), D( 288), D(1634), -D(127), D(111), D( 6),
83 /* 16 */ -D( 5), -D(104), D(146), -D(1567), -D( 45), -D(9727), -D( 9975), -D(64019), D(64019), D( 9975), D(9727), D( 45), D(1567), -D(146), D(104), D( 5),
84 /* 17 */ -D( 6), -D(111), D(127), -D(1634), -D( 288), -D(9838), -D(11455), -D(65290), D(62684), D( 8540), D(9585), -D( 185), D(1498), -D(163), D( 97), D( 5),
85 /* 18 */ -D( 7), -D(117), D(106), -D(1698), -D( 545), -D(9916), -D(12980), -D(66494), D(61289), D( 7154), D(9416), -D( 402), D(1428), -D(177), D( 91), D( 4),
86 /* 19 */ -D( 7), -D(125), D( 83), -D(1759), -D( 814), -D(9959), -D(14548), -D(67629), D(59838), D( 5818), D(9219), -D( 605), D(1356), -D(189), D( 85), D( 4),
87 /* 20 */ -D( 8), -D(132), D( 57), -D(1817), -D(1095), -D(9966), -D(16155), -D(68692), D(58333), D( 4533), D(8998), -D( 794), D(1283), -D(200), D( 79), D( 3),
88 /* 21 */ -D( 9), -D(139), D( 29), -D(1870), -D(1388), -D(9935), -D(17799), -D(69679), D(56778), D( 3300), D(8755), -D( 970), D(1210), -D(208), D( 73), D( 3),
89 /* 22 */ -D(10), -D(147), -D( 2), -D(1919), -D(1692), -D(9863), -D(19478), -D(70590), D(55178), D( 2122), D(8491), -D(1131), D(1137), -D(215), D( 68), D( 2),
90 /* 23 */ -D(11), -D(154), -D( 36), -D(1962), -D(2006), -D(9750), -D(21189), -D(71420), D(53534), D( 998), D(8209), -D(1280), D(1064), -D(221), D( 63), D( 2),
91 /* 24 */ -D(13), -D(161), -D( 72), -D(2001), -D(2330), -D(9592), -D(22929), -D(72169), D(51853), -D( 70), D(7910), -D(1414), D( 991), -D(224), D( 58), D( 2),
92 /* 25 */ -D(14), -D(169), -D(111), -D(2032), -D(2663), -D(9389), -D(24694), -D(72835), D(50137), -D( 1082), D(7597), -D(1535), D( 919), -D(227), D( 53), D( 2),
93 /* 26 */ -D(16), -D(176), -D(153), -D(2057), -D(3004), -D(9139), -D(26482), -D(73415), D(48390), -D( 2037), D(7271), -D(1644), D( 848), -D(228), D( 49), D( 1),
94 /* 27 */ -D(17), -D(183), -D(197), -D(2075), -D(3351), -D(8840), -D(28289), -D(73908), D(46617), -D( 2935), D(6935), -D(1739), D( 779), -D(228), D( 45), D( 1),
95 /* 28 */ -D(19), -D(190), -D(244), -D(2085), -D(3705), -D(8492), -D(30112), -D(74313), D(44821), -D( 3776), D(6589), -D(1822), D( 711), -D(227), D( 41), D( 1),
96 /* 29 */ -D(21), -D(196), -D(294), -D(2087), -D(4063), -D(8092), -D(31947), -D(74630), D(43006), -D( 4561), D(6237), -D(1893), D( 645), -D(225), D( 38), D( 1),
97 /* 30 */ -D(24), -D(202), -D(347), -D(2080), -D(4425), -D(7640), -D(33791), -D(74856), D(41176), -D( 5288), D(5879), -D(1952), D( 581), -D(222), D( 35), D( 1),
98 /* 31 */ -D(26), -D(208), -D(401), -D(2063), -D(4788), -D(7134), -D(35640), -D(74992), D(39336), -D( 5959), D(5517), -D(2000), D( 519), -D(218), D( 31), D( 1)
101 #undef D
103 // DCT32-coefficients were expanded (<<) by DCT32_COEFFICIENT_EXPAND
104 #define DCT32_COEFFICIENT_EXPAND 31
106 #if defined(MPC_FIXED_POINT)
107 // define 64=32x32-multiplication for DCT-coefficients with samples. Via usage of MPC_FRACT highly optimized assembler might be used
108 // MULTIPLY_FRACT will perform >>32 after multiplication, as coef were expanded by DCT32_COEFFICIENT_EXPAND we'll correct this on the result.
109 // Will loose 4 bit accuracy on result in fract part without effect on final audio result
110 #define MPC_DCT32_MUL(sample, coef) (MPC_MULTIPLY_FRACT(sample,coef) << (32-DCT32_COEFFICIENT_EXPAND))
111 #define MPC_DCT32_SHIFT(sample) (sample)
112 #else
113 // for floating point use the standard multiplication macro
114 #define MPC_DCT32_MUL(sample, coef) (MPC_MULTIPLY(sample, coef) )
115 #define MPC_DCT32_SHIFT(sample) (sample)
116 #endif
118 /******************************************************************************
119 * mpc_dct32(const int *in, int *out)
121 * mpc_dct32 is a dct32 with in[32]->dct[32] that contains the mirroring from
122 * dct[32] to the expected out[64]. The symmetry is
123 * out[16] = 0,
124 * out[ 0..15] = dct[ 0..15],
125 * out[32..17] = -dct[ 0..15],
126 * out[33..48] = -dct[16..31],
127 * out[63..48] = -dct[16..31].
128 * The cos-tab has the format s0.31.
129 *****************************************************************************/
130 void
131 mpc_dct32(const MPC_SAMPLE_FORMAT *in, MPC_SAMPLE_FORMAT *v)
132 ICODE_ATTR_MPC_LARGE_IRAM;
134 void
135 mpc_dct32(const MPC_SAMPLE_FORMAT *in, MPC_SAMPLE_FORMAT *v)
137 MPC_SAMPLE_FORMAT t0, t1, t2, t3, t4, t5, t6, t7;
138 MPC_SAMPLE_FORMAT t8, t9, t10, t11, t12, t13, t14, t15;
139 MPC_SAMPLE_FORMAT t16, t17, t18, t19, t20, t21, t22, t23;
140 MPC_SAMPLE_FORMAT t24, t25, t26, t27, t28, t29, t30, t31;
141 MPC_SAMPLE_FORMAT t32, t33, t34, t35, t36, t37, t38, t39;
142 MPC_SAMPLE_FORMAT t40, t41, t42, t43, t44, t45, t46, t47;
143 MPC_SAMPLE_FORMAT t48, t49, t50, t51, t52, t53, t54, t55;
144 MPC_SAMPLE_FORMAT t56, t57, t58, t59, t60, t61, t62, t63;
145 MPC_SAMPLE_FORMAT t64, t65, t66, t67, t68, t69, t70, t71;
146 MPC_SAMPLE_FORMAT t72, t73, t74, t75, t76, t77, t78, t79;
147 MPC_SAMPLE_FORMAT t80, t81, t82, t83, t84, t85, t86, t87;
148 MPC_SAMPLE_FORMAT t88, t89, t90, t91, t92, t93, t94, t95;
149 MPC_SAMPLE_FORMAT t96, t97, t98, t99, t100, t101, t102, t103;
150 MPC_SAMPLE_FORMAT t104, t105, t106, t107, t108, t109, t110, t111;
151 MPC_SAMPLE_FORMAT t112, t113, t114, t115, t116, t117, t118, t119;
152 MPC_SAMPLE_FORMAT t120, t121, t122, t123, t124, t125, t126, t127;
153 MPC_SAMPLE_FORMAT t128, t129, t130, t131, t132, t133, t134, t135;
154 MPC_SAMPLE_FORMAT t136, t137, t138, t139, t140, t141, t142, t143;
155 MPC_SAMPLE_FORMAT t144, t145, t146, t147, t148, t149, t150, t151;
156 MPC_SAMPLE_FORMAT t152, t153, t154, t155, t156, t157, t158, t159;
157 MPC_SAMPLE_FORMAT t160, t161, t162, t163, t164, t165, t166, t167;
158 MPC_SAMPLE_FORMAT t168, t169, t170, t171, t172, t173, t174, t175;
159 MPC_SAMPLE_FORMAT t176;
161 /* costab[i] = cos(PI / (2 * 32) * i) */
162 #define costab01 (0x7fd8878e) /* 0.998795456 */
163 #define costab02 (0x7f62368f) /* 0.995184727 */
164 #define costab03 (0x7e9d55fc) /* 0.989176510 */
165 #define costab04 (0x7d8a5f40) /* 0.980785280 */
166 #define costab05 (0x7c29fbee) /* 0.970031253 */
167 #define costab06 (0x7a7d055b) /* 0.956940336 */
168 #define costab07 (0x78848414) /* 0.941544065 */
169 #define costab08 (0x7641af3d) /* 0.923879533 */
170 #define costab09 (0x73b5ebd1) /* 0.903989293 */
171 #define costab10 (0x70e2cbc6) /* 0.881921264 */
172 #define costab11 (0x6dca0d14) /* 0.857728610 */
173 #define costab12 (0x6a6d98a4) /* 0.831469612 */
174 #define costab13 (0x66cf8120) /* 0.803207531 */
175 #define costab14 (0x62f201ac) /* 0.773010453 */
176 #define costab15 (0x5ed77c8a) /* 0.740951125 */
177 #define costab16 (0x5a82799a) /* 0.707106781 */
178 #define costab17 (0x55f5a4d2) /* 0.671558955 */
179 #define costab18 (0x5133cc94) /* 0.634393284 */
180 #define costab19 (0x4c3fdff4) /* 0.595699304 */
181 #define costab20 (0x471cece7) /* 0.555570233 */
182 #define costab21 (0x41ce1e65) /* 0.514102744 */
183 #define costab22 (0x3c56ba70) /* 0.471396737 */
184 #define costab23 (0x36ba2014) /* 0.427555093 */
185 #define costab24 (0x30fbc54d) /* 0.382683432 */
186 #define costab25 (0x2b1f34eb) /* 0.336889853 */
187 #define costab26 (0x25280c5e) /* 0.290284677 */
188 #define costab27 (0x1f19f97b) /* 0.242980180 */
189 #define costab28 (0x18f8b83c) /* 0.195090322 */
190 #define costab29 (0x12c8106f) /* 0.146730474 */
191 #define costab30 (0x0c8bd35e) /* 0.098017140 */
192 #define costab31 (0x0647d97c) /* 0.049067674 */
194 t0 = in[ 0] + in[31]; t16 = MPC_DCT32_MUL(in[ 0] - in[31], costab01);
195 t1 = in[15] + in[16]; t17 = MPC_DCT32_MUL(in[15] - in[16], costab31);
197 t41 = t16 + t17;
198 t59 = MPC_DCT32_MUL(t16 - t17, costab02);
199 t33 = t0 + t1;
200 t50 = MPC_DCT32_MUL(t0 - t1, costab02);
202 t2 = in[ 7] + in[24]; t18 = MPC_DCT32_MUL(in[ 7] - in[24], costab15);
203 t3 = in[ 8] + in[23]; t19 = MPC_DCT32_MUL(in[ 8] - in[23], costab17);
205 t42 = t18 + t19;
206 t60 = MPC_DCT32_MUL(t18 - t19, costab30);
207 t34 = t2 + t3;
208 t51 = MPC_DCT32_MUL(t2 - t3, costab30);
210 t4 = in[ 3] + in[28]; t20 = MPC_DCT32_MUL(in[ 3] - in[28], costab07);
211 t5 = in[12] + in[19]; t21 = MPC_DCT32_MUL(in[12] - in[19], costab25);
213 t43 = t20 + t21;
214 t61 = MPC_DCT32_MUL(t20 - t21, costab14);
215 t35 = t4 + t5;
216 t52 = MPC_DCT32_MUL(t4 - t5, costab14);
218 t6 = in[ 4] + in[27]; t22 = MPC_DCT32_MUL(in[ 4] - in[27], costab09);
219 t7 = in[11] + in[20]; t23 = MPC_DCT32_MUL(in[11] - in[20], costab23);
221 t44 = t22 + t23;
222 t62 = MPC_DCT32_MUL(t22 - t23, costab18);
223 t36 = t6 + t7;
224 t53 = MPC_DCT32_MUL(t6 - t7, costab18);
226 t8 = in[ 1] + in[30]; t24 = MPC_DCT32_MUL(in[ 1] - in[30], costab03);
227 t9 = in[14] + in[17]; t25 = MPC_DCT32_MUL(in[14] - in[17], costab29);
229 t45 = t24 + t25;
230 t63 = MPC_DCT32_MUL(t24 - t25, costab06);
231 t37 = t8 + t9;
232 t54 = MPC_DCT32_MUL(t8 - t9, costab06);
234 t10 = in[ 6] + in[25]; t26 = MPC_DCT32_MUL(in[ 6] - in[25], costab13);
235 t11 = in[ 9] + in[22]; t27 = MPC_DCT32_MUL(in[ 9] - in[22], costab19);
237 t46 = t26 + t27;
238 t64 = MPC_DCT32_MUL(t26 - t27, costab26);
239 t38 = t10 + t11;
240 t55 = MPC_DCT32_MUL(t10 - t11, costab26);
242 t12 = in[ 2] + in[29]; t28 = MPC_DCT32_MUL(in[ 2] - in[29], costab05);
243 t13 = in[13] + in[18]; t29 = MPC_DCT32_MUL(in[13] - in[18], costab27);
245 t47 = t28 + t29;
246 t65 = MPC_DCT32_MUL(t28 - t29, costab10);
247 t39 = t12 + t13;
248 t56 = MPC_DCT32_MUL(t12 - t13, costab10);
250 t14 = in[ 5] + in[26]; t30 = MPC_DCT32_MUL(in[ 5] - in[26], costab11);
251 t15 = in[10] + in[21]; t31 = MPC_DCT32_MUL(in[10] - in[21], costab21);
253 t48 = t30 + t31;
254 t66 = MPC_DCT32_MUL(t30 - t31, costab22);
255 t40 = t14 + t15;
256 t57 = MPC_DCT32_MUL(t14 - t15, costab22);
258 t69 = t33 + t34; t89 = MPC_DCT32_MUL(t33 - t34, costab04);
259 t70 = t35 + t36; t90 = MPC_DCT32_MUL(t35 - t36, costab28);
260 t71 = t37 + t38; t91 = MPC_DCT32_MUL(t37 - t38, costab12);
261 t72 = t39 + t40; t92 = MPC_DCT32_MUL(t39 - t40, costab20);
262 t73 = t41 + t42; t94 = MPC_DCT32_MUL(t41 - t42, costab04);
263 t74 = t43 + t44; t95 = MPC_DCT32_MUL(t43 - t44, costab28);
264 t75 = t45 + t46; t96 = MPC_DCT32_MUL(t45 - t46, costab12);
265 t76 = t47 + t48; t97 = MPC_DCT32_MUL(t47 - t48, costab20);
267 t78 = t50 + t51; t100 = MPC_DCT32_MUL(t50 - t51, costab04);
268 t79 = t52 + t53; t101 = MPC_DCT32_MUL(t52 - t53, costab28);
269 t80 = t54 + t55; t102 = MPC_DCT32_MUL(t54 - t55, costab12);
270 t81 = t56 + t57; t103 = MPC_DCT32_MUL(t56 - t57, costab20);
272 t83 = t59 + t60; t106 = MPC_DCT32_MUL(t59 - t60, costab04);
273 t84 = t61 + t62; t107 = MPC_DCT32_MUL(t61 - t62, costab28);
274 t85 = t63 + t64; t108 = MPC_DCT32_MUL(t63 - t64, costab12);
275 t86 = t65 + t66; t109 = MPC_DCT32_MUL(t65 - t66, costab20);
277 t113 = t69 + t70;
278 t114 = t71 + t72;
280 /* 0 */ v[48] = -MPC_DCT32_SHIFT(t113 + t114);
281 /* 16 */ v[32] = -(v[ 0] = MPC_DCT32_SHIFT(MPC_DCT32_MUL(t113 - t114, costab16)));
283 t115 = t73 + t74;
284 t116 = t75 + t76;
286 t32 = t115 + t116;
288 /* 1 */ v[49] = v[47] = -MPC_DCT32_SHIFT(t32);
290 t118 = t78 + t79;
291 t119 = t80 + t81;
293 t58 = t118 + t119;
295 /* 2 */ v[50] = v[46] = -MPC_DCT32_SHIFT(t58);
297 t121 = t83 + t84;
298 t122 = t85 + t86;
300 t67 = t121 + t122;
302 t49 = (t67 * 2) - t32;
304 /* 3 */ v[51] = v[45] = -MPC_DCT32_SHIFT(t49);
306 t125 = t89 + t90;
307 t126 = t91 + t92;
309 t93 = t125 + t126;
311 /* 4 */ v[52] = v[44] = -MPC_DCT32_SHIFT(t93);
313 t128 = t94 + t95;
314 t129 = t96 + t97;
316 t98 = t128 + t129;
318 t68 = (t98 * 2) - t49;
320 /* 5 */ v[53] = v[43] = -MPC_DCT32_SHIFT(t68);
322 t132 = t100 + t101;
323 t133 = t102 + t103;
325 t104 = t132 + t133;
327 t82 = (t104 * 2) - t58;
329 /* 6 */ v[54] = v[42] = -MPC_DCT32_SHIFT(t82);
331 t136 = t106 + t107;
332 t137 = t108 + t109;
334 t110 = t136 + t137;
336 t87 = (t110 * 2) - t67;
338 t77 = (t87 * 2) - t68;
340 /* 7 */ v[55] = v[41] = -MPC_DCT32_SHIFT(t77);
342 t141 = MPC_DCT32_MUL(t69 - t70, costab08);
343 t142 = MPC_DCT32_MUL(t71 - t72, costab24);
344 t143 = t141 + t142;
346 /* 8 */ v[56] = v[40] = -MPC_DCT32_SHIFT(t143);
347 /* 24 */ v[24] = -(v[ 8] = MPC_DCT32_SHIFT((MPC_DCT32_MUL(t141 - t142, costab16) * 2) - t143));
349 t144 = MPC_DCT32_MUL(t73 - t74, costab08);
350 t145 = MPC_DCT32_MUL(t75 - t76, costab24);
351 t146 = t144 + t145;
353 t88 = (t146 * 2) - t77;
355 /* 9 */ v[57] = v[39] = -MPC_DCT32_SHIFT(t88);
357 t148 = MPC_DCT32_MUL(t78 - t79, costab08);
358 t149 = MPC_DCT32_MUL(t80 - t81, costab24);
359 t150 = t148 + t149;
361 t105 = (t150 * 2) - t82;
363 /* 10 */ v[58] = v[38] = -MPC_DCT32_SHIFT(t105);
365 t152 = MPC_DCT32_MUL(t83 - t84, costab08);
366 t153 = MPC_DCT32_MUL(t85 - t86, costab24);
367 t154 = t152 + t153;
369 t111 = (t154 * 2) - t87;
371 t99 = (t111 * 2) - t88;
373 /* 11 */ v[59] = v[37] = -MPC_DCT32_SHIFT(t99);
375 t157 = MPC_DCT32_MUL(t89 - t90, costab08);
376 t158 = MPC_DCT32_MUL(t91 - t92, costab24);
377 t159 = t157 + t158;
379 t127 = (t159 * 2) - t93;
381 /* 12 */ v[60] = v[36] = -MPC_DCT32_SHIFT(t127);
383 t160 = (MPC_DCT32_MUL(t125 - t126, costab16) * 2) - t127;
385 /* 20 */ v[28] = -(v[ 4] = MPC_DCT32_SHIFT(t160));
386 /* 28 */ v[20] = -(v[12] = MPC_DCT32_SHIFT((((MPC_DCT32_MUL(t157 - t158, costab16) * 2) - t159) * 2) - t160));
388 t161 = MPC_DCT32_MUL(t94 - t95, costab08);
389 t162 = MPC_DCT32_MUL(t96 - t97, costab24);
390 t163 = t161 + t162;
392 t130 = (t163 * 2) - t98;
394 t112 = (t130 * 2) - t99;
396 /* 13 */ v[61] = v[35] = -MPC_DCT32_SHIFT(t112);
398 t164 = (MPC_DCT32_MUL(t128 - t129, costab16) * 2) - t130;
400 t166 = MPC_DCT32_MUL(t100 - t101, costab08);
401 t167 = MPC_DCT32_MUL(t102 - t103, costab24);
402 t168 = t166 + t167;
404 t134 = (t168 * 2) - t104;
406 t120 = (t134 * 2) - t105;
408 /* 14 */ v[62] = v[34] = -MPC_DCT32_SHIFT(t120);
410 t135 = (MPC_DCT32_MUL(t118 - t119, costab16) * 2) - t120;
412 /* 18 */ v[30] = -(v[ 2] = MPC_DCT32_SHIFT(t135));
414 t169 = (MPC_DCT32_MUL(t132 - t133, costab16) * 2) - t134;
416 t151 = (t169 * 2) - t135;
418 /* 22 */ v[26] = -(v[ 6] = MPC_DCT32_SHIFT(t151));
420 t170 = (((MPC_DCT32_MUL(t148 - t149, costab16) * 2) - t150) * 2) - t151;
422 /* 26 */ v[22] = -(v[10] = MPC_DCT32_SHIFT(t170));
423 /* 30 */ v[18] = -(v[14] = MPC_DCT32_SHIFT((((((MPC_DCT32_MUL(t166 - t167, costab16) * 2) - t168) * 2) - t169) * 2) - t170));
425 t171 = MPC_DCT32_MUL(t106 - t107, costab08);
426 t172 = MPC_DCT32_MUL(t108 - t109, costab24);
427 t173 = t171 + t172;
429 t138 = (t173 * 2) - t110;
431 t123 = (t138 * 2) - t111;
433 t139 = (MPC_DCT32_MUL(t121 - t122, costab16) * 2) - t123;
435 t117 = (t123 * 2) - t112;
437 /* 15 */ v[63] = v[33] =-MPC_DCT32_SHIFT(t117);
439 t124 = (MPC_DCT32_MUL(t115 - t116, costab16) * 2) - t117;
441 /* 17 */ v[31] = -(v[ 1] = MPC_DCT32_SHIFT(t124));
443 t131 = (t139 * 2) - t124;
445 /* 19 */ v[29] = -(v[ 3] = MPC_DCT32_SHIFT(t131));
447 t140 = (t164 * 2) - t131;
449 /* 21 */ v[27] = -(v[ 5] = MPC_DCT32_SHIFT(t140));
451 t174 = (MPC_DCT32_MUL(t136 - t137, costab16) * 2) - t138;
453 t155 = (t174 * 2) - t139;
455 t147 = (t155 * 2) - t140;
457 /* 23 */ v[25] = -(v[ 7] = MPC_DCT32_SHIFT(t147));
459 t156 = (((MPC_DCT32_MUL(t144 - t145, costab16) * 2) - t146) * 2) - t147;
461 /* 25 */ v[23] = -(v[ 9] = MPC_DCT32_SHIFT(t156));
463 t175 = (((MPC_DCT32_MUL(t152 - t153, costab16) * 2) - t154) * 2) - t155;
465 t165 = (t175 * 2) - t156;
467 /* 27 */ v[21] = -(v[11] = MPC_DCT32_SHIFT(t165));
469 t176 = (((((MPC_DCT32_MUL(t161 - t162, costab16) * 2) - t163) * 2) - t164) * 2) - t165;
471 /* 29 */ v[19] = -(v[13] = MPC_DCT32_SHIFT(t176));
472 /* 31 */ v[17] = -(v[15] = MPC_DCT32_SHIFT((((((((MPC_DCT32_MUL(t171 - t172, costab16) * 2) - t173) * 2) - t174) * 2) - t175) * 2) - t176));
475 #if defined(CPU_ARM)
476 extern void
477 mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
478 const MPC_SAMPLE_FORMAT * V,
479 const MPC_SAMPLE_FORMAT * D);
480 #else
481 static void
482 mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
483 const MPC_SAMPLE_FORMAT * V,
484 const MPC_SAMPLE_FORMAT * D)
486 mpc_int32_t k;
488 #if defined(CPU_COLDFIRE)
489 // 64=32x32-multiply assembler for Coldfire
490 for ( k = 0; k < 32; k++, D += 16, V++ )
492 asm volatile (
493 "movem.l (%[D]), %%d0-%%d3 \n\t"
494 "move.l (%[V]), %%a5 \n\t"
495 "mac.l %%d0, %%a5, (96*4, %[V]), %%a5, %%acc0 \n\t"
496 "mac.l %%d1, %%a5, (128*4, %[V]), %%a5, %%acc0\n\t"
497 "mac.l %%d2, %%a5, (224*4, %[V]), %%a5, %%acc0\n\t"
498 "mac.l %%d3, %%a5, (256*4, %[V]), %%a5, %%acc0\n\t"
499 "movem.l (4*4, %[D]), %%d0-%%d3 \n\t"
500 "mac.l %%d0, %%a5, (352*4, %[V]), %%a5, %%acc0\n\t"
501 "mac.l %%d1, %%a5, (384*4, %[V]), %%a5, %%acc0\n\t"
502 "mac.l %%d2, %%a5, (480*4, %[V]), %%a5, %%acc0\n\t"
503 "mac.l %%d3, %%a5, (512*4, %[V]), %%a5, %%acc0\n\t"
504 "movem.l (8*4, %[D]), %%d0-%%d3 \n\t"
505 "mac.l %%d0, %%a5, (608*4, %[V]), %%a5, %%acc0\n\t"
506 "mac.l %%d1, %%a5, (640*4, %[V]), %%a5, %%acc0\n\t"
507 "mac.l %%d2, %%a5, (736*4, %[V]), %%a5, %%acc0\n\t"
508 "mac.l %%d3, %%a5, (768*4, %[V]), %%a5, %%acc0\n\t"
509 "movem.l (12*4, %[D]), %%d0-%%d3 \n\t"
510 "mac.l %%d0, %%a5, (864*4, %[V]), %%a5, %%acc0\n\t"
511 "mac.l %%d1, %%a5, (896*4, %[V]), %%a5, %%acc0\n\t"
512 "mac.l %%d2, %%a5, (992*4, %[V]), %%a5, %%acc0\n\t"
513 "mac.l %%d3, %%a5, %%acc0 \n\t"
514 "movclr.l %%acc0, %%d0 \n\t"
515 "lsl.l #1, %%d0 \n\t"
516 "move.l %%d0, (%[Data])+ \n"
517 : [Data] "+a" (Data)
518 : [V] "a" (V), [D] "a" (D)
519 : "d0", "d1", "d2", "d3", "a5");
521 #else
522 // 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C
523 for ( k = 0; k < 32; k++, D += 16, V++ )
525 *Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],30) + MPC_MULTIPLY_EX(V[ 96],D[ 1],30)
526 + MPC_MULTIPLY_EX(V[128],D[ 2],30) + MPC_MULTIPLY_EX(V[224],D[ 3],30)
527 + MPC_MULTIPLY_EX(V[256],D[ 4],30) + MPC_MULTIPLY_EX(V[352],D[ 5],30)
528 + MPC_MULTIPLY_EX(V[384],D[ 6],30) + MPC_MULTIPLY_EX(V[480],D[ 7],30)
529 + MPC_MULTIPLY_EX(V[512],D[ 8],30) + MPC_MULTIPLY_EX(V[608],D[ 9],30)
530 + MPC_MULTIPLY_EX(V[640],D[10],30) + MPC_MULTIPLY_EX(V[736],D[11],30)
531 + MPC_MULTIPLY_EX(V[768],D[12],30) + MPC_MULTIPLY_EX(V[864],D[13],30)
532 + MPC_MULTIPLY_EX(V[896],D[14],30) + MPC_MULTIPLY_EX(V[992],D[15],30);
533 Data += 1;
534 // total: 16 muls, 15 adds, 16 shifts
536 #endif /* COLDFIRE */
538 #endif /* CPU_ARM */
540 static void
541 mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, const MPC_SAMPLE_FORMAT *Y)
543 mpc_uint32_t n;
545 if (NULL != OutData)
547 for ( n = 0; n < 36; n++, Y += 32, OutData += 32 )
549 V -= 64;
550 mpc_dct32(Y, V);
551 mpc_decoder_windowing_D( OutData, V, Di_opt );
556 void
557 mpc_decoder_synthese_filter_float(mpc_decoder *d, MPC_SAMPLE_FORMAT *OutData,
558 int num_channels)
560 (void)num_channels;
562 /********* left channel ********/
563 memmove(d->V_L + MPC_V_MEM, d->V_L, 960 * sizeof(MPC_SAMPLE_FORMAT) );
564 mpc_full_synthesis_filter(OutData,
565 (MPC_SAMPLE_FORMAT *)(d->V_L + MPC_V_MEM),
566 (MPC_SAMPLE_FORMAT *)(d->Y_L));
568 /******** right channel ********/
569 memmove(d->V_R + MPC_V_MEM, d->V_R, 960 * sizeof(MPC_SAMPLE_FORMAT) );
570 mpc_full_synthesis_filter((OutData == NULL ? NULL : OutData + MPC_FRAME_LENGTH),
571 (MPC_SAMPLE_FORMAT *)(d->V_R + MPC_V_MEM),
572 (MPC_SAMPLE_FORMAT *)(d->Y_R));
575 /*******************************************/
576 /* */
577 /* dithered synthesis */
578 /* */
579 /*******************************************/
581 static const unsigned char Parity [256] = { // parity
582 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
583 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
584 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
585 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
586 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
587 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
588 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
589 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0
593 * This is a simple random number generator with good quality for audio purposes.
594 * It consists of two polycounters with opposite rotation direction and different
595 * periods. The periods are coprime, so the total period is the product of both.
597 * -------------------------------------------------------------------------------------------------
598 * +-> |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0|
599 * | -------------------------------------------------------------------------------------------------
600 * | | | | | | |
601 * | +--+--+--+-XOR-+--------+
602 * | |
603 * +--------------------------------------------------------------------------------------+
605 * -------------------------------------------------------------------------------------------------
606 * |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0| <-+
607 * ------------------------------------------------------------------------------------------------- |
608 * | | | | |
609 * +--+----XOR----+--+ |
610 * | |
611 * +----------------------------------------------------------------------------------------+
614 * The first has an period of 3*5*17*257*65537, the second of 7*47*73*178481,
615 * which gives a period of 18.410.713.077.675.721.215. The result is the
616 * XORed values of both generators.
618 mpc_uint32_t
619 mpc_random_int(mpc_decoder *d)
621 #if 1
622 mpc_uint32_t t1, t2, t3, t4;
624 t3 = t1 = d->__r1; t4 = t2 = d->__r2; // Parity calculation is done via table lookup, this is also available
625 t1 &= 0xF5; t2 >>= 25; // on CPUs without parity, can be implemented in C and avoid unpredictable
626 t1 = Parity [t1]; t2 &= 0x63; // jumps and slow rotate through the carry flag operations.
627 t1 <<= 31; t2 = Parity [t2];
629 return (d->__r1 = (t3 >> 1) | t1 ) ^ (d->__r2 = (t4 + t4) | t2 );
630 #else
631 return (d->__r1 = (d->__r1 >> 1) | ((mpc_uint32_t)Parity [d->__r1 & 0xF5] << 31) ) ^
632 (d->__r2 = (d->__r2 << 1) | (mpc_uint32_t)Parity [(d->__r2 >> 25) & 0x63] );
633 #endif