2 ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding
3 ** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com
5 ** This program is free software; you can redistribute it and/or modify
6 ** it under the terms of the GNU General Public License as published by
7 ** the Free Software Foundation; either version 2 of the License, or
8 ** (at your option) any later version.
10 ** This program is distributed in the hope that it will be useful,
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ** GNU General Public License for more details.
15 ** You should have received a copy of the GNU General Public License
16 ** along with this program; if not, write to the Free Software
17 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 ** Any non-GPL usage of this software or parts of this software is strictly
22 ** Commercial non-GPL licensing of this software is possible.
23 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
38 #include "sbr_qmf_c.h"
39 #include "sbr_syntax.h"
42 #define FAAD_SYNTHESIS_SCALE(X) ((X)>>1)
43 #define FAAD_ANALYSIS_SCALE1(X) ((X)>>4)
44 #define FAAD_ANALYSIS_SCALE2(X) ((X))
45 #define FAAD_ANALYSIS_SCALE3(X) ((X))
47 #define FAAD_SYNTHESIS_SCALE(X) ((X)/64.0f)
48 #define FAAD_ANALYSIS_SCALE1(X) ((X))
49 #define FAAD_ANALYSIS_SCALE2(X) (2.0f*(X))
50 #define FAAD_ANALYSIS_SCALE3(X) ((X)/32.0f)
53 qmfa_info
*qmfa_init(uint8_t channels
)
55 qmfa_info
*qmfa
= (qmfa_info
*)faad_malloc(sizeof(qmfa_info
));
57 /* x is implemented as double ringbuffer */
58 qmfa
->x
= (real_t
*)faad_malloc(2 * channels
* 10 * sizeof(real_t
));
59 memset(qmfa
->x
, 0, 2 * channels
* 10 * sizeof(real_t
));
61 /* ringbuffer index */
64 qmfa
->channels
= channels
;
69 void qmfa_end(qmfa_info
*qmfa
)
73 if (qmfa
->x
) faad_free(qmfa
->x
);
78 void sbr_qmf_analysis_32(sbr_info
*sbr
, qmfa_info
*qmfa
, const real_t
*input
,
79 qmf_t X
[MAX_NTSR
][64], uint8_t offset
, uint8_t kx
)
81 real_t u
[64] MEM_ALIGN_ATTR
;
83 real_t real
[32] MEM_ALIGN_ATTR
;
84 real_t imag
[32] MEM_ALIGN_ATTR
;
86 real_t y
[32] MEM_ALIGN_ATTR
;
90 uint32_t l
, idx0
, idx1
;
93 for (l
= 0; l
< sbr
->numTimeSlotsRate
; l
++)
97 /* shift input buffer x */
98 /* input buffer is not shifted anymore, x is implemented as double ringbuffer */
99 //memmove(qmfa->x + 32, qmfa->x, (320-32)*sizeof(real_t));
101 /* add new samples to input buffer x */
102 idx0
= qmfa
->x_index
+ 31; idx1
= idx0
+ 320;
103 for (n
= 0; n
< 32; n
+=4)
105 qmfa
->x
[idx0
--] = qmfa
->x
[idx1
--] = (input
[in
++]);
106 qmfa
->x
[idx0
--] = qmfa
->x
[idx1
--] = (input
[in
++]);
107 qmfa
->x
[idx0
--] = qmfa
->x
[idx1
--] = (input
[in
++]);
108 qmfa
->x
[idx0
--] = qmfa
->x
[idx1
--] = (input
[in
++]);
111 /* window and summation to create array u */
112 for (n
= 0; n
< 32; n
++)
114 idx0
= qmfa
->x_index
+ n
; idx1
= n
* 20;
115 u
[n
] = FAAD_ANALYSIS_SCALE1(
116 MUL_F(qmfa
->x
[idx0
], qmf_c
[idx1
]) +
117 MUL_F(qmfa
->x
[idx0
+ 64], qmf_c
[idx1
+ 2]) +
118 MUL_F(qmfa
->x
[idx0
+ 128], qmf_c
[idx1
+ 4]) +
119 MUL_F(qmfa
->x
[idx0
+ 192], qmf_c
[idx1
+ 6]) +
120 MUL_F(qmfa
->x
[idx0
+ 256], qmf_c
[idx1
+ 8]));
122 for (n
= 32; n
< 64; n
++)
124 idx0
= qmfa
->x_index
+ n
; idx1
= n
* 20 - 639;
125 u
[n
] = FAAD_ANALYSIS_SCALE1(
126 MUL_F(qmfa
->x
[idx0
], qmf_c
[idx1
]) +
127 MUL_F(qmfa
->x
[idx0
+ 64], qmf_c
[idx1
+ 2]) +
128 MUL_F(qmfa
->x
[idx0
+ 128], qmf_c
[idx1
+ 4]) +
129 MUL_F(qmfa
->x
[idx0
+ 192], qmf_c
[idx1
+ 6]) +
130 MUL_F(qmfa
->x
[idx0
+ 256], qmf_c
[idx1
+ 8]));
133 /* update ringbuffer index */
135 if (qmfa
->x_index
< 0)
136 qmfa
->x_index
= (320-32);
138 /* calculate 32 subband samples by introducing X */
141 for (n
= 1; n
< 16; n
++)
142 y
[n
] = u
[n
+48] + u
[48-n
];
143 for (n
= 16; n
< 32; n
++)
144 y
[n
] = -u
[n
-16] + u
[48-n
];
146 DCT3_32_unscaled(u
, y
);
148 for (n
= 0; n
< 32; n
++)
152 QMF_RE(X
[l
+ offset
][n
]) = FAAD_ANALYSIS_SCALE2(u
[n
]);
154 QMF_RE(X
[l
+ offset
][n
]) = 0;
157 #else /* #ifdef SBR_LOW_POWER */
159 // Reordering of data moved from DCT_IV to here
160 idx0
= 30; idx1
= 63;
161 imag
[31] = u
[ 1]; real
[ 0] = u
[ 0];
162 for (n
= 1; n
< 31; n
+=3)
164 imag
[idx0
--] = u
[n
+1]; real
[n
] = -u
[idx1
--];
165 imag
[idx0
--] = u
[n
+2]; real
[n
+1] = -u
[idx1
--];
166 imag
[idx0
--] = u
[n
+3]; real
[n
+2] = -u
[idx1
--];
168 imag
[ 0] = u
[32]; real
[31] = -u
[33];
170 // dct4_kernel is DCT_IV without reordering which is done before and after FFT
171 dct4_kernel(real
, imag
);
173 // Reordering of data moved from DCT_IV to here
174 /* Step 1: Calculate all non-zero pairs */
176 for (n
= 0; n
< kx
/2; n
++) {
177 idx0
= 2*n
; idx1
= idx0
+ 1;
178 QMF_RE(pX
[idx0
]) = FAAD_ANALYSIS_SCALE2( real
[n
]);
179 QMF_IM(pX
[idx0
]) = FAAD_ANALYSIS_SCALE2( imag
[n
]);
180 QMF_RE(pX
[idx1
]) = FAAD_ANALYSIS_SCALE2(-imag
[31-n
]);
181 QMF_IM(pX
[idx1
]) = FAAD_ANALYSIS_SCALE2(-real
[31-n
]);
183 /* Step 2: Calculate a single pair with half zero'ed */
185 idx0
= 2*n
; idx1
= idx0
+ 1;
186 QMF_RE(pX
[idx0
]) = FAAD_ANALYSIS_SCALE2( real
[n
]);
187 QMF_IM(pX
[idx0
]) = FAAD_ANALYSIS_SCALE2( imag
[n
]);
188 QMF_RE(pX
[idx1
]) = QMF_IM(pX
[idx1
]) = 0;
191 /* Step 3: All other are zero'ed */
192 for (; n
< 16; n
++) {
193 idx0
= 2*n
; idx1
= idx0
+ 1;
194 QMF_RE(pX
[idx0
]) = QMF_IM(pX
[idx0
]) = 0;
195 QMF_RE(pX
[idx1
]) = QMF_IM(pX
[idx1
]) = 0;
197 #endif /* #ifdef SBR_LOW_POWER */
201 qmfs_info
*qmfs_init(uint8_t channels
)
203 qmfs_info
*qmfs
= (qmfs_info
*)faad_malloc(sizeof(qmfs_info
));
205 /* v is a double ringbuffer */
206 qmfs
->v
= (real_t
*)faad_malloc(2 * channels
* 20 * sizeof(real_t
));
207 memset(qmfs
->v
, 0, 2 * channels
* 20 * sizeof(real_t
));
211 qmfs
->channels
= channels
;
216 void qmfs_end(qmfs_info
*qmfs
)
220 if (qmfs
->v
) faad_free(qmfs
->v
);
227 void sbr_qmf_synthesis_32(sbr_info
*sbr
, qmfs_info
*qmfs
, qmf_t X
[MAX_NTSR
][64],
230 real_t x
[16] MEM_ALIGN_ATTR
;
231 real_t y
[16] MEM_ALIGN_ATTR
;
232 int16_t n
, k
, out
= 0;
235 /* qmf subsample l */
236 for (l
= 0; l
< sbr
->numTimeSlotsRate
; l
++)
239 /* we are not shifting v, it is a double ringbuffer */
240 //memmove(qmfs->v + 64, qmfs->v, (640-64)*sizeof(real_t));
242 /* calculate 64 samples */
243 for (k
= 0; k
< 16; k
++)
245 y
[k
] = FAAD_ANALYSIS_SCALE3((QMF_RE(X
[l
][k
]) - QMF_RE(X
[l
][31-k
])));
246 x
[k
] = FAAD_ANALYSIS_SCALE3((QMF_RE(X
[l
][k
]) + QMF_RE(X
[l
][31-k
])));
250 DCT2_16_unscaled(x
, x
);
254 for (n
= 8; n
< 24; n
++)
256 qmfs
->v
[qmfs
->v_index
+ n
*2 ] = qmfs
->v
[qmfs
->v_index
+ 640 + n
*2 ] = x
[n
-8];
257 qmfs
->v
[qmfs
->v_index
+ n
*2+1] = qmfs
->v
[qmfs
->v_index
+ 640 + n
*2+1] = y
[n
-8];
259 for (n
= 0; n
< 16; n
++)
261 qmfs
->v
[qmfs
->v_index
+ n
] = qmfs
->v
[qmfs
->v_index
+ 640 + n
] = qmfs
->v
[qmfs
->v_index
+ 32-n
];
263 qmfs
->v
[qmfs
->v_index
+ 48] = qmfs
->v
[qmfs
->v_index
+ 640 + 48] = 0;
264 for (n
= 1; n
< 16; n
++)
266 qmfs
->v
[qmfs
->v_index
+ 48+n
] = qmfs
->v
[qmfs
->v_index
+ 640 + 48+n
] = -qmfs
->v
[qmfs
->v_index
+ 48-n
];
269 /* calculate 32 output samples and window */
270 for (k
= 0; k
< 32; k
++)
272 output
[out
++] = MUL_F(qmfs
->v
[qmfs
->v_index
+ k
], qmf_c
[ 2*k
*10]) +
273 MUL_F(qmfs
->v
[qmfs
->v_index
+ 96 + k
], qmf_c
[1 + 2*k
*10]) +
274 MUL_F(qmfs
->v
[qmfs
->v_index
+ 128 + k
], qmf_c
[2 + 2*k
*10]) +
275 MUL_F(qmfs
->v
[qmfs
->v_index
+ 224 + k
], qmf_c
[3 + 2*k
*10]) +
276 MUL_F(qmfs
->v
[qmfs
->v_index
+ 256 + k
], qmf_c
[4 + 2*k
*10]) +
277 MUL_F(qmfs
->v
[qmfs
->v_index
+ 352 + k
], qmf_c
[5 + 2*k
*10]) +
278 MUL_F(qmfs
->v
[qmfs
->v_index
+ 384 + k
], qmf_c
[6 + 2*k
*10]) +
279 MUL_F(qmfs
->v
[qmfs
->v_index
+ 480 + k
], qmf_c
[7 + 2*k
*10]) +
280 MUL_F(qmfs
->v
[qmfs
->v_index
+ 512 + k
], qmf_c
[8 + 2*k
*10]) +
281 MUL_F(qmfs
->v
[qmfs
->v_index
+ 608 + k
], qmf_c
[9 + 2*k
*10]);
284 /* update the ringbuffer index */
286 if (qmfs
->v_index
< 0)
287 qmfs
->v_index
= (640-64);
291 void sbr_qmf_synthesis_64(sbr_info
*sbr
, qmfs_info
*qmfs
, qmf_t X
[MAX_NTSR
][64],
294 real_t x
[64] MEM_ALIGN_ATTR
;
295 real_t y
[64] MEM_ALIGN_ATTR
;
296 int16_t n
, k
, out
= 0;
300 /* qmf subsample l */
301 for (l
= 0; l
< sbr
->numTimeSlotsRate
; l
++)
304 /* we are not shifting v, it is a double ringbuffer */
305 //memmove(qmfs->v + 128, qmfs->v, (1280-128)*sizeof(real_t));
307 /* calculate 128 samples */
308 for (k
= 0; k
< 32; k
++)
310 y
[k
] = FAAD_ANALYSIS_SCALE3((QMF_RE(X
[l
][k
]) - QMF_RE(X
[l
][63-k
])));
311 x
[k
] = FAAD_ANALYSIS_SCALE3((QMF_RE(X
[l
][k
]) + QMF_RE(X
[l
][63-k
])));
315 DCT2_32_unscaled(x
, x
);
319 for (n
= 16; n
< 48; n
++)
321 qmfs
->v
[qmfs
->v_index
+ n
*2] = qmfs
->v
[qmfs
->v_index
+ 1280 + n
*2 ] = x
[n
-16];
322 qmfs
->v
[qmfs
->v_index
+ n
*2+1] = qmfs
->v
[qmfs
->v_index
+ 1280 + n
*2+1] = y
[n
-16];
324 for (n
= 0; n
< 32; n
++)
326 qmfs
->v
[qmfs
->v_index
+ n
] = qmfs
->v
[qmfs
->v_index
+ 1280 + n
] = qmfs
->v
[qmfs
->v_index
+ 64-n
];
328 qmfs
->v
[qmfs
->v_index
+ 96] = qmfs
->v
[qmfs
->v_index
+ 1280 + 96] = 0;
329 for (n
= 1; n
< 32; n
++)
331 qmfs
->v
[qmfs
->v_index
+ 96+n
] = qmfs
->v
[qmfs
->v_index
+ 1280 + 96+n
] = -qmfs
->v
[qmfs
->v_index
+ 96-n
];
334 /* calculate 64 output samples and window */
335 for (k
= 0; k
< 64; k
++)
337 output
[out
++] = MUL_F(qmfs
->v
[qmfs
->v_index
+ k
], qmf_c
[ k
*10]) +
338 MUL_F(qmfs
->v
[qmfs
->v_index
+ 192 + k
], qmf_c
[1 + k
*10]) +
339 MUL_F(qmfs
->v
[qmfs
->v_index
+ 256 + k
], qmf_c
[2 + k
*10]) +
340 MUL_F(qmfs
->v
[qmfs
->v_index
+ 256 + 192 + k
], qmf_c
[3 + k
*10]) +
341 MUL_F(qmfs
->v
[qmfs
->v_index
+ 512 + k
], qmf_c
[4 + k
*10]) +
342 MUL_F(qmfs
->v
[qmfs
->v_index
+ 512 + 192 + k
], qmf_c
[5 + k
*10]) +
343 MUL_F(qmfs
->v
[qmfs
->v_index
+ 768 + k
], qmf_c
[6 + k
*10]) +
344 MUL_F(qmfs
->v
[qmfs
->v_index
+ 768 + 192 + k
], qmf_c
[7 + k
*10]) +
345 MUL_F(qmfs
->v
[qmfs
->v_index
+ 1024 + k
], qmf_c
[8 + k
*10]) +
346 MUL_F(qmfs
->v
[qmfs
->v_index
+ 1024 + 192 + k
], qmf_c
[9 + k
*10]);
349 /* update the ringbuffer index */
350 qmfs
->v_index
-= 128;
351 if (qmfs
->v_index
< 0)
352 qmfs
->v_index
= (1280-128);
355 #else /* #ifdef SBR_LOW_POWER */
357 static const complex_t qmf32_pre_twiddle
[] =
359 { FRAC_CONST(0.999924701839145), FRAC_CONST(-0.012271538285720) },
360 { FRAC_CONST(0.999322384588350), FRAC_CONST(-0.036807222941359) },
361 { FRAC_CONST(0.998118112900149), FRAC_CONST(-0.061320736302209) },
362 { FRAC_CONST(0.996312612182778), FRAC_CONST(-0.085797312344440) },
363 { FRAC_CONST(0.993906970002356), FRAC_CONST(-0.110222207293883) },
364 { FRAC_CONST(0.990902635427780), FRAC_CONST(-0.134580708507126) },
365 { FRAC_CONST(0.987301418157858), FRAC_CONST(-0.158858143333861) },
366 { FRAC_CONST(0.983105487431216), FRAC_CONST(-0.183039887955141) },
367 { FRAC_CONST(0.978317370719628), FRAC_CONST(-0.207111376192219) },
368 { FRAC_CONST(0.972939952205560), FRAC_CONST(-0.231058108280671) },
369 { FRAC_CONST(0.966976471044852), FRAC_CONST(-0.254865659604515) },
370 { FRAC_CONST(0.960430519415566), FRAC_CONST(-0.278519689385053) },
371 { FRAC_CONST(0.953306040354194), FRAC_CONST(-0.302005949319228) },
372 { FRAC_CONST(0.945607325380521), FRAC_CONST(-0.325310292162263) },
373 { FRAC_CONST(0.937339011912575), FRAC_CONST(-0.348418680249435) },
374 { FRAC_CONST(0.928506080473216), FRAC_CONST(-0.371317193951838) },
375 { FRAC_CONST(0.919113851690058), FRAC_CONST(-0.393992040061048) },
376 { FRAC_CONST(0.909167983090522), FRAC_CONST(-0.416429560097637) },
377 { FRAC_CONST(0.898674465693954), FRAC_CONST(-0.438616238538528) },
378 { FRAC_CONST(0.887639620402854), FRAC_CONST(-0.460538710958240) },
379 { FRAC_CONST(0.876070094195407), FRAC_CONST(-0.482183772079123) },
380 { FRAC_CONST(0.863972856121587), FRAC_CONST(-0.503538383725718) },
381 { FRAC_CONST(0.851355193105265), FRAC_CONST(-0.524589682678469) },
382 { FRAC_CONST(0.838224705554838), FRAC_CONST(-0.545324988422046) },
383 { FRAC_CONST(0.824589302785025), FRAC_CONST(-0.565731810783613) },
384 { FRAC_CONST(0.810457198252595), FRAC_CONST(-0.585797857456439) },
385 { FRAC_CONST(0.795836904608884), FRAC_CONST(-0.605511041404326) },
386 { FRAC_CONST(0.780737228572094), FRAC_CONST(-0.624859488142386) },
387 { FRAC_CONST(0.765167265622459), FRAC_CONST(-0.643831542889791) },
388 { FRAC_CONST(0.749136394523459), FRAC_CONST(-0.662415777590172) },
389 { FRAC_CONST(0.732654271672413), FRAC_CONST(-0.680600997795453) },
390 { FRAC_CONST(0.715730825283819), FRAC_CONST(-0.698376249408973) }
393 #define FAAD_CMPLX_PRETWIDDLE_SUB(k) \
394 (MUL_F(QMF_RE(X[l][k]), RE(qmf32_pre_twiddle[k])) - \
395 MUL_F(QMF_IM(X[l][k]), IM(qmf32_pre_twiddle[k])))
397 #define FAAD_CMPLX_PRETWIDDLE_ADD(k) \
398 (MUL_F(QMF_IM(X[l][k]), RE(qmf32_pre_twiddle[k])) + \
399 MUL_F(QMF_RE(X[l][k]), IM(qmf32_pre_twiddle[k])))
401 void sbr_qmf_synthesis_32(sbr_info
*sbr
, qmfs_info
*qmfs
, qmf_t X
[MAX_NTSR
][64],
404 real_t x1
[32] MEM_ALIGN_ATTR
;
405 real_t x2
[32] MEM_ALIGN_ATTR
;
406 int32_t n
, k
, idx0
, idx1
, out
= 0;
409 /* qmf subsample l */
410 for (l
= 0; l
< sbr
->numTimeSlotsRate
; l
++)
413 /* buffer is not shifted, we are using a ringbuffer */
414 //memmove(qmfs->v + 64, qmfs->v, (640-64)*sizeof(real_t));
416 /* calculate 64 samples */
417 /* complex pre-twiddle */
420 x1
[k
] = FAAD_CMPLX_PRETWIDDLE_SUB(k
); x2
[k
] = FAAD_CMPLX_PRETWIDDLE_ADD(k
); k
++;
421 x1
[k
] = FAAD_CMPLX_PRETWIDDLE_SUB(k
); x2
[k
] = FAAD_CMPLX_PRETWIDDLE_ADD(k
); k
++;
422 x1
[k
] = FAAD_CMPLX_PRETWIDDLE_SUB(k
); x2
[k
] = FAAD_CMPLX_PRETWIDDLE_ADD(k
); k
++;
423 x1
[k
] = FAAD_CMPLX_PRETWIDDLE_SUB(k
); x2
[k
] = FAAD_CMPLX_PRETWIDDLE_ADD(k
); k
++;
430 idx0
= qmfs
->v_index
;
431 idx1
= qmfs
->v_index
+ 63;
432 for (n
= 0; n
< 32; n
+=2)
434 qmfs
->v
[idx0
] = qmfs
->v
[idx0
+ 640] = -x1
[n
] + x2
[n
]; idx0
++;
435 qmfs
->v
[idx1
] = qmfs
->v
[idx1
+ 640] = x1
[n
] + x2
[n
]; idx1
--;
436 qmfs
->v
[idx0
] = qmfs
->v
[idx0
+ 640] = -x1
[n
+1] + x2
[n
+1]; idx0
++;
437 qmfs
->v
[idx1
] = qmfs
->v
[idx1
+ 640] = x1
[n
+1] + x2
[n
+1]; idx1
--;
440 /* calculate 32 output samples and window */
441 for (k
= 0; k
< 32; k
++)
443 idx0
= qmfs
->v_index
+ k
; idx1
= 2*k
*10;
444 output
[out
++] = FAAD_SYNTHESIS_SCALE(
445 MUL_F(qmfs
->v
[idx0
], qmf_c
[idx1
]) +
446 MUL_F(qmfs
->v
[idx0
+ 96], qmf_c
[idx1
+1]) +
447 MUL_F(qmfs
->v
[idx0
+ 128], qmf_c
[idx1
+2]) +
448 MUL_F(qmfs
->v
[idx0
+ 224], qmf_c
[idx1
+3]) +
449 MUL_F(qmfs
->v
[idx0
+ 256], qmf_c
[idx1
+4]) +
450 MUL_F(qmfs
->v
[idx0
+ 352], qmf_c
[idx1
+5]) +
451 MUL_F(qmfs
->v
[idx0
+ 384], qmf_c
[idx1
+6]) +
452 MUL_F(qmfs
->v
[idx0
+ 480], qmf_c
[idx1
+7]) +
453 MUL_F(qmfs
->v
[idx0
+ 512], qmf_c
[idx1
+8]) +
454 MUL_F(qmfs
->v
[idx0
+ 608], qmf_c
[idx1
+9]));
457 /* update ringbuffer index */
459 if (qmfs
->v_index
< 0)
460 qmfs
->v_index
= (640 - 64);
464 void sbr_qmf_synthesis_64(sbr_info
*sbr
, qmfs_info
*qmfs
, qmf_t X
[MAX_NTSR
][64],
467 real_t real1
[32] MEM_ALIGN_ATTR
;
468 real_t imag1
[32] MEM_ALIGN_ATTR
;
469 real_t real2
[32] MEM_ALIGN_ATTR
;
470 real_t imag2
[32] MEM_ALIGN_ATTR
;
472 real_t
*p_buf_1
, *p_buf_3
;
473 int32_t n
, k
, idx0
, idx1
, out
= 0;
476 /* qmf subsample l */
477 for (l
= 0; l
< sbr
->numTimeSlotsRate
; l
++)
480 /* buffer is not shifted, we use double ringbuffer */
481 //memmove(qmfs->v + 128, qmfs->v, (1280-128)*sizeof(real_t));
483 /* calculate 128 samples */
485 for (k
= 0; k
< 32; k
++)
487 idx0
= 2*k
; idx1
= idx0
+1;
488 real1
[ k
] = QMF_RE(pX
[idx0
]); imag2
[ k
] = QMF_IM(pX
[idx0
]);
489 imag1
[31-k
] = QMF_RE(pX
[idx1
]); real2
[31-k
] = QMF_IM(pX
[idx1
]);
492 // dct4_kernel is DCT_IV without reordering which is done before and after FFT
493 dct4_kernel(real1
, imag1
);
494 dct4_kernel(real2
, imag2
);
496 p_buf_1
= qmfs
->v
+ qmfs
->v_index
;
497 p_buf_3
= p_buf_1
+ 1280;
499 idx0
= 0; idx1
= 127;
500 for (n
= 0; n
< 32; n
++)
502 p_buf_1
[idx0
] = p_buf_3
[idx0
] = real2
[ n
] - real1
[ n
]; idx0
++;
503 p_buf_1
[idx1
] = p_buf_3
[idx1
] = real2
[ n
] + real1
[ n
]; idx1
--;
504 p_buf_1
[idx0
] = p_buf_3
[idx0
] = imag2
[31-n
] + imag1
[31-n
]; idx0
++;
505 p_buf_1
[idx1
] = p_buf_3
[idx1
] = imag2
[31-n
] - imag1
[31-n
]; idx1
--;
508 p_buf_1
= qmfs
->v
+ qmfs
->v_index
;
510 /* calculate 64 output samples and window */
512 const real_t
*qtab
= qmf_c
;
513 real_t
*pbuf
= p_buf_1
;
514 for (k
= 0; k
< 64; k
++, pbuf
++)
516 real_t
*pout
= &output
[out
++];
518 "ldmia %[qtab]!, { r0-r3 } \n\t"
519 "ldr r4, [%[pbuf]] \n\t"
520 "ldr r7, [%[pbuf], #192*4] \n\t"
521 "smull r5, r6, r4, r0 \n\t"
522 "ldr r4, [%[pbuf], #256*4] \n\t"
523 "smlal r5, r6, r7, r1 \n\t"
524 "ldr r7, [%[pbuf], #448*4] \n\t"
525 "smlal r5, r6, r4, r2 \n\t"
526 "ldr r4, [%[pbuf], #512*4] \n\t"
527 "smlal r5, r6, r7, r3 \n\t"
529 "ldmia %[qtab]!, { r0-r3 } \n\t"
530 "ldr r7, [%[pbuf], #704*4] \n\t"
531 "smlal r5, r6, r4, r0 \n\t"
532 "ldr r4, [%[pbuf], #768*4] \n\t"
533 "smlal r5, r6, r7, r1 \n\t"
534 "ldr r7, [%[pbuf], #960*4] \n\t"
535 "smlal r5, r6, r4, r2 \n\t"
536 "mov r2, #1024*4 \n\t"
538 "ldmia %[qtab]!, { r0-r1 } \n\t"
539 "ldr r4, [%[pbuf], r2] \n\t"
540 "smlal r5, r6, r7, r3 \n\t"
541 "mov r2, #1216*4 \n\t"
542 "ldr r7, [%[pbuf], r2] \n\t"
543 "smlal r5, r6, r4, r0 \n\t"
544 "smlal r5, r6, r7, r1 \n\t"
546 "str r6, [%[pout]] \n"
548 : [pbuf
] "r" (pbuf
), [pout
] "r" (pout
)
549 : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "memory");
551 #elif defined CPU_COLDFIRE
552 const real_t
*qtab
= qmf_c
;
553 real_t
*pbuf
= p_buf_1
;
554 for (k
= 0; k
< 64; k
++, pbuf
++)
556 real_t
*pout
= &output
[out
++];
558 "move.l (%[pbuf]), %%d5 \n"
560 "movem.l (%[qtab]), %%d0-%%d4 \n"
561 "mac.l %%d0, %%d5, (192*4, %[pbuf]), %%d5, %%acc0 \n"
562 "mac.l %%d1, %%d5, (256*4, %[pbuf]), %%d5, %%acc0 \n"
563 "mac.l %%d2, %%d5, (448*4, %[pbuf]), %%d5, %%acc0 \n"
564 "mac.l %%d3, %%d5, (512*4, %[pbuf]), %%d5, %%acc0 \n"
565 "mac.l %%d4, %%d5, (704*4, %[pbuf]), %%d5, %%acc0 \n"
566 "lea.l (20, %[qtab]), %[qtab] \n"
568 "movem.l (%[qtab]), %%d0-%%d4 \n"
569 "mac.l %%d0, %%d5, (768*4, %[pbuf]), %%d5, %%acc0 \n"
570 "mac.l %%d1, %%d5, (960*4, %[pbuf]), %%d5, %%acc0 \n"
571 "mac.l %%d2, %%d5, (1024*4, %[pbuf]), %%d5, %%acc0 \n"
572 "mac.l %%d3, %%d5, (1216*4, %[pbuf]), %%d5, %%acc0 \n"
573 "mac.l %%d4, %%d5, %%acc0 \n"
574 "lea.l (20, %[qtab]), %[qtab] \n"
576 "movclr.l %%acc0, %%d0 \n"
577 "move.l %%d0, (%[pout]) \n"
581 : "d0", "d1", "d2", "d3", "d4", "d5", "memory");
584 for (k
= 0; k
< 64; k
++)
587 output
[out
++] = FAAD_SYNTHESIS_SCALE(
588 MUL_F(p_buf_1
[k
], qmf_c
[idx0
]) +
589 MUL_F(p_buf_1
[k
+ 192 ], qmf_c
[idx0
+1]) +
590 MUL_F(p_buf_1
[k
+ 256 ], qmf_c
[idx0
+2]) +
591 MUL_F(p_buf_1
[k
+ 256+192], qmf_c
[idx0
+3]) +
592 MUL_F(p_buf_1
[k
+ 512 ], qmf_c
[idx0
+4]) +
593 MUL_F(p_buf_1
[k
+ 512+192], qmf_c
[idx0
+5]) +
594 MUL_F(p_buf_1
[k
+ 768 ], qmf_c
[idx0
+6]) +
595 MUL_F(p_buf_1
[k
+ 768+192], qmf_c
[idx0
+7]) +
596 MUL_F(p_buf_1
[k
+1024 ], qmf_c
[idx0
+8]) +
597 MUL_F(p_buf_1
[k
+1024+192], qmf_c
[idx0
+9]));
601 /* update ringbuffer index */
602 qmfs
->v_index
-= 128;
603 if (qmfs
->v_index
< 0)
604 qmfs
->v_index
= (1280 - 128);
607 #endif /* #ifdef SBR_LOW_POWER */
609 #endif /* #ifdef SBR_DEC */