Fix compilation with newer versions of DUMB
[alure.git] / src / decoders / minimp3.h
bloba878b12db0aefb17cc113d27a66ab48706b143c9
1 #ifndef MINIMP3_H
2 #define MINIMP3_H
3 /*
4 https://github.com/lieff/minimp3
5 To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide.
6 This software is distributed without any warranty.
7 See <http://creativecommons.org/publicdomain/zero/1.0/>.
8 */
9 #include <stdint.h>
11 #define MINIMP3_MAX_SAMPLES_PER_FRAME (1152*2)
13 typedef struct
15 int frame_bytes, channels, hz, layer, bitrate_kbps;
16 } mp3dec_frame_info_t;
18 typedef struct
20 float mdct_overlap[2][9*32], qmf_state[15*2*32];
21 int reserv, free_format_bytes;
22 unsigned char header[4], reserv_buf[511];
23 } mp3dec_t;
25 #ifdef __cplusplus
26 extern "C" {
27 #endif /* __cplusplus */
29 void mp3dec_init(mp3dec_t *dec);
30 #ifndef MINIMP3_FLOAT_OUTPUT
31 typedef int16_t mp3d_sample_t;
32 #else /* MINIMP3_FLOAT_OUTPUT */
33 typedef float mp3d_sample_t;
34 void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples);
35 #endif /* MINIMP3_FLOAT_OUTPUT */
36 int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info);
38 #ifdef __cplusplus
40 #endif /* __cplusplus */
42 #endif /* MINIMP3_H */
43 #if defined(MINIMP3_IMPLEMENTATION) && !defined(_MINIMP3_IMPLEMENTATION_GUARD)
44 #define _MINIMP3_IMPLEMENTATION_GUARD
46 #include <stdlib.h>
47 #include <string.h>
49 #define MAX_FREE_FORMAT_FRAME_SIZE 2304 /* more than ISO spec's */
50 #ifndef MAX_FRAME_SYNC_MATCHES
51 #define MAX_FRAME_SYNC_MATCHES 10
52 #endif /* MAX_FRAME_SYNC_MATCHES */
54 #define MAX_L3_FRAME_PAYLOAD_BYTES MAX_FREE_FORMAT_FRAME_SIZE /* MUST be >= 320000/8/32000*1152 = 1440 */
56 #define MAX_BITRESERVOIR_BYTES 511
57 #define SHORT_BLOCK_TYPE 2
58 #define STOP_BLOCK_TYPE 3
59 #define MODE_MONO 3
60 #define MODE_JOINT_STEREO 1
61 #define HDR_SIZE 4
62 #define HDR_IS_MONO(h) (((h[3]) & 0xC0) == 0xC0)
63 #define HDR_IS_MS_STEREO(h) (((h[3]) & 0xE0) == 0x60)
64 #define HDR_IS_FREE_FORMAT(h) (((h[2]) & 0xF0) == 0)
65 #define HDR_IS_CRC(h) (!((h[1]) & 1))
66 #define HDR_TEST_PADDING(h) ((h[2]) & 0x2)
67 #define HDR_TEST_MPEG1(h) ((h[1]) & 0x8)
68 #define HDR_TEST_NOT_MPEG25(h) ((h[1]) & 0x10)
69 #define HDR_TEST_I_STEREO(h) ((h[3]) & 0x10)
70 #define HDR_TEST_MS_STEREO(h) ((h[3]) & 0x20)
71 #define HDR_GET_STEREO_MODE(h) (((h[3]) >> 6) & 3)
72 #define HDR_GET_STEREO_MODE_EXT(h) (((h[3]) >> 4) & 3)
73 #define HDR_GET_LAYER(h) (((h[1]) >> 1) & 3)
74 #define HDR_GET_BITRATE(h) ((h[2]) >> 4)
75 #define HDR_GET_SAMPLE_RATE(h) (((h[2]) >> 2) & 3)
76 #define HDR_GET_MY_SAMPLE_RATE(h) (HDR_GET_SAMPLE_RATE(h) + (((h[1] >> 3) & 1) + ((h[1] >> 4) & 1))*3)
77 #define HDR_IS_FRAME_576(h) ((h[1] & 14) == 2)
78 #define HDR_IS_LAYER_1(h) ((h[1] & 6) == 6)
80 #define BITS_DEQUANTIZER_OUT -1
81 #define MAX_SCF (255 + BITS_DEQUANTIZER_OUT*4 - 210)
82 #define MAX_SCFI ((MAX_SCF + 3) & ~3)
84 #define MINIMP3_MIN(a, b) ((a) > (b) ? (b) : (a))
85 #define MINIMP3_MAX(a, b) ((a) < (b) ? (b) : (a))
87 #if !defined(MINIMP3_NO_SIMD)
89 #if !defined(MINIMP3_ONLY_SIMD) && (defined(_M_X64) || defined(_M_ARM64) || defined(__x86_64__) || defined(__aarch64__))
90 /* x64 always have SSE2, arm64 always have neon, no need for generic code */
91 #define MINIMP3_ONLY_SIMD
92 #endif /* SIMD checks... */
94 #if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || ((defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__))
95 #if defined(_MSC_VER)
96 #include <intrin.h>
97 #endif /* defined(_MSC_VER) */
98 #include <immintrin.h>
99 #define HAVE_SSE 1
100 #define HAVE_SIMD 1
101 #define VSTORE _mm_storeu_ps
102 #define VLD _mm_loadu_ps
103 #define VSET _mm_set1_ps
104 #define VADD _mm_add_ps
105 #define VSUB _mm_sub_ps
106 #define VMUL _mm_mul_ps
107 #define VMAC(a, x, y) _mm_add_ps(a, _mm_mul_ps(x, y))
108 #define VMSB(a, x, y) _mm_sub_ps(a, _mm_mul_ps(x, y))
109 #define VMUL_S(x, s) _mm_mul_ps(x, _mm_set1_ps(s))
110 #define VREV(x) _mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 1, 2, 3))
111 typedef __m128 f4;
112 #if defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD)
113 #define minimp3_cpuid __cpuid
114 #else /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */
115 static __inline__ __attribute__((always_inline)) void minimp3_cpuid(int CPUInfo[], const int InfoType)
117 #if defined(__PIC__)
118 __asm__ __volatile__(
119 #if defined(__x86_64__)
120 "push %%rbx\n"
121 "cpuid\n"
122 "xchgl %%ebx, %1\n"
123 "pop %%rbx\n"
124 #else /* defined(__x86_64__) */
125 "xchgl %%ebx, %1\n"
126 "cpuid\n"
127 "xchgl %%ebx, %1\n"
128 #endif /* defined(__x86_64__) */
129 : "=a" (CPUInfo[0]), "=r" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])
130 : "a" (InfoType));
131 #else /* defined(__PIC__) */
132 __asm__ __volatile__(
133 "cpuid"
134 : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])
135 : "a" (InfoType));
136 #endif /* defined(__PIC__)*/
138 #endif /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */
139 static int have_simd()
141 #ifdef MINIMP3_ONLY_SIMD
142 return 1;
143 #else /* MINIMP3_ONLY_SIMD */
144 static int g_have_simd;
145 int CPUInfo[4];
146 #ifdef MINIMP3_TEST
147 static int g_counter;
148 if (g_counter++ > 100)
149 return 0;
150 #endif /* MINIMP3_TEST */
151 if (g_have_simd)
152 goto end;
153 minimp3_cpuid(CPUInfo, 0);
154 g_have_simd = 1;
155 if (CPUInfo[0] > 0)
157 minimp3_cpuid(CPUInfo, 1);
158 g_have_simd = (CPUInfo[3] & (1 << 26)) + 1; /* SSE2 */
160 end:
161 return g_have_simd - 1;
162 #endif /* MINIMP3_ONLY_SIMD */
164 #elif defined(__ARM_NEON) || defined(__aarch64__)
165 #include <arm_neon.h>
166 #define HAVE_SIMD 1
167 #define VSTORE vst1q_f32
168 #define VLD vld1q_f32
169 #define VSET vmovq_n_f32
170 #define VADD vaddq_f32
171 #define VSUB vsubq_f32
172 #define VMUL vmulq_f32
173 #define VMAC(a, x, y) vmlaq_f32(a, x, y)
174 #define VMSB(a, x, y) vmlsq_f32(a, x, y)
175 #define VMUL_S(x, s) vmulq_f32(x, vmovq_n_f32(s))
176 #define VREV(x) vcombine_f32(vget_high_f32(vrev64q_f32(x)), vget_low_f32(vrev64q_f32(x)))
177 typedef float32x4_t f4;
178 static int have_simd()
179 { /* TODO: detect neon for !MINIMP3_ONLY_SIMD */
180 return 1;
182 #else /* SIMD checks... */
183 #define HAVE_SIMD 0
184 #ifdef MINIMP3_ONLY_SIMD
185 #error MINIMP3_ONLY_SIMD used, but SSE/NEON not enabled
186 #endif /* MINIMP3_ONLY_SIMD */
187 #endif /* SIMD checks... */
188 #else /* !defined(MINIMP3_NO_SIMD) */
189 #define HAVE_SIMD 0
190 #endif /* !defined(MINIMP3_NO_SIMD) */
192 typedef struct
194 const uint8_t *buf;
195 int pos, limit;
196 } bs_t;
198 typedef struct
200 float scf[3*64];
201 uint8_t total_bands, stereo_bands, bitalloc[64], scfcod[64];
202 } L12_scale_info;
204 typedef struct
206 uint8_t tab_offset, code_tab_width, band_count;
207 } L12_subband_alloc_t;
209 typedef struct
211 const uint8_t *sfbtab;
212 uint16_t part_23_length, big_values, scalefac_compress;
213 uint8_t global_gain, block_type, mixed_block_flag, n_long_sfb, n_short_sfb;
214 uint8_t table_select[3], region_count[3], subblock_gain[3];
215 uint8_t preflag, scalefac_scale, count1_table, scfsi;
216 } L3_gr_info_t;
218 typedef struct
220 bs_t bs;
221 uint8_t maindata[MAX_BITRESERVOIR_BYTES + MAX_L3_FRAME_PAYLOAD_BYTES];
222 L3_gr_info_t gr_info[4];
223 float grbuf[2][576], scf[40], syn[18 + 15][2*32];
224 uint8_t ist_pos[2][39];
225 } mp3dec_scratch_t;
227 static void bs_init(bs_t *bs, const uint8_t *data, int bytes)
229 bs->buf = data;
230 bs->pos = 0;
231 bs->limit = bytes*8;
234 static uint32_t get_bits(bs_t *bs, int n)
236 uint32_t next, cache = 0, s = bs->pos & 7;
237 int shl = n + s;
238 const uint8_t *p = bs->buf + (bs->pos >> 3);
239 if ((bs->pos += n) > bs->limit)
240 return 0;
241 next = *p++ & (255 >> s);
242 while ((shl -= 8) > 0)
244 cache |= next << shl;
245 next = *p++;
247 return cache | (next >> -shl);
250 static int hdr_valid(const uint8_t *h)
252 return h[0] == 0xff &&
253 ((h[1] & 0xF0) == 0xf0 || (h[1] & 0xFE) == 0xe2) &&
254 (HDR_GET_LAYER(h) != 0) &&
255 (HDR_GET_BITRATE(h) != 15) &&
256 (HDR_GET_SAMPLE_RATE(h) != 3);
259 static int hdr_compare(const uint8_t *h1, const uint8_t *h2)
261 return hdr_valid(h2) &&
262 ((h1[1] ^ h2[1]) & 0xFE) == 0 &&
263 ((h1[2] ^ h2[2]) & 0x0C) == 0 &&
264 !(HDR_IS_FREE_FORMAT(h1) ^ HDR_IS_FREE_FORMAT(h2));
267 static unsigned hdr_bitrate_kbps(const uint8_t *h)
269 static const uint8_t halfrate[2][3][15] = {
270 { { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,16,24,28,32,40,48,56,64,72,80,88,96,112,128 } },
271 { { 0,16,20,24,28,32,40,48,56,64,80,96,112,128,160 }, { 0,16,24,28,32,40,48,56,64,80,96,112,128,160,192 }, { 0,16,32,48,64,80,96,112,128,144,160,176,192,208,224 } },
273 return 2*halfrate[!!HDR_TEST_MPEG1(h)][HDR_GET_LAYER(h) - 1][HDR_GET_BITRATE(h)];
276 static unsigned hdr_sample_rate_hz(const uint8_t *h)
278 static const unsigned g_hz[3] = { 44100, 48000, 32000 };
279 return g_hz[HDR_GET_SAMPLE_RATE(h)] >> (int)!HDR_TEST_MPEG1(h) >> (int)!HDR_TEST_NOT_MPEG25(h);
282 static unsigned hdr_frame_samples(const uint8_t *h)
284 return HDR_IS_LAYER_1(h) ? 384 : (1152 >> (int)HDR_IS_FRAME_576(h));
287 static int hdr_frame_bytes(const uint8_t *h, int free_format_size)
289 int frame_bytes = hdr_frame_samples(h)*hdr_bitrate_kbps(h)*125/hdr_sample_rate_hz(h);
290 if (HDR_IS_LAYER_1(h))
292 frame_bytes &= ~3; /* slot align */
294 return frame_bytes ? frame_bytes : free_format_size;
297 static int hdr_padding(const uint8_t *h)
299 return HDR_TEST_PADDING(h) ? (HDR_IS_LAYER_1(h) ? 4 : 1) : 0;
302 #ifndef MINIMP3_ONLY_MP3
303 static const L12_subband_alloc_t *L12_subband_alloc_table(const uint8_t *hdr, L12_scale_info *sci)
305 const L12_subband_alloc_t *alloc;
306 int mode = HDR_GET_STEREO_MODE(hdr);
307 int nbands, stereo_bands = (mode == MODE_MONO) ? 0 : (mode == MODE_JOINT_STEREO) ? (HDR_GET_STEREO_MODE_EXT(hdr) << 2) + 4 : 32;
309 if (HDR_IS_LAYER_1(hdr))
311 static const L12_subband_alloc_t g_alloc_L1[] = { { 76, 4, 32 } };
312 alloc = g_alloc_L1;
313 nbands = 32;
314 } else if (!HDR_TEST_MPEG1(hdr))
316 static const L12_subband_alloc_t g_alloc_L2M2[] = { { 60, 4, 4 }, { 44, 3, 7 }, { 44, 2, 19 } };
317 alloc = g_alloc_L2M2;
318 nbands = 30;
319 } else
321 static const L12_subband_alloc_t g_alloc_L2M1[] = { { 0, 4, 3 }, { 16, 4, 8 }, { 32, 3, 12 }, { 40, 2, 7 } };
322 int sample_rate_idx = HDR_GET_SAMPLE_RATE(hdr);
323 unsigned kbps = hdr_bitrate_kbps(hdr) >> (int)(mode != MODE_MONO);
324 if (!kbps) /* free-format */
326 kbps = 192;
329 alloc = g_alloc_L2M1;
330 nbands = 27;
331 if (kbps < 56)
333 static const L12_subband_alloc_t g_alloc_L2M1_lowrate[] = { { 44, 4, 2 }, { 44, 3, 10 } };
334 alloc = g_alloc_L2M1_lowrate;
335 nbands = sample_rate_idx == 2 ? 12 : 8;
336 } else if (kbps >= 96 && sample_rate_idx != 1)
338 nbands = 30;
342 sci->total_bands = (uint8_t)nbands;
343 sci->stereo_bands = (uint8_t)MINIMP3_MIN(stereo_bands, nbands);
345 return alloc;
348 static void L12_read_scalefactors(bs_t *bs, uint8_t *pba, uint8_t *scfcod, int bands, float *scf)
350 static const float g_deq_L12[18*3] = {
351 #define DQ(x) 9.53674316e-07f/x, 7.56931807e-07f/x, 6.00777173e-07f/x
352 DQ(3),DQ(7),DQ(15),DQ(31),DQ(63),DQ(127),DQ(255),DQ(511),DQ(1023),DQ(2047),DQ(4095),DQ(8191),DQ(16383),DQ(32767),DQ(65535),DQ(3),DQ(5),DQ(9)
354 int i, m;
355 for (i = 0; i < bands; i++)
357 float s = 0;
358 int ba = *pba++;
359 int mask = ba ? 4 + ((19 >> scfcod[i]) & 3) : 0;
360 for (m = 4; m; m >>= 1)
362 if (mask & m)
364 int b = get_bits(bs, 6);
365 s = g_deq_L12[ba*3 - 6 + b % 3]*(1 << 21 >> b/3);
367 *scf++ = s;
372 static void L12_read_scale_info(const uint8_t *hdr, bs_t *bs, L12_scale_info *sci)
374 static const uint8_t g_bitalloc_code_tab[] = {
375 0,17, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16,
376 0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,16,
377 0,17,18, 3,19,4,5,16,
378 0,17,18,16,
379 0,17,18,19, 4,5,6, 7,8, 9,10,11,12,13,14,15,
380 0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,14,
381 0, 2, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16
383 const L12_subband_alloc_t *subband_alloc = L12_subband_alloc_table(hdr, sci);
385 int i, k = 0, ba_bits = 0;
386 const uint8_t *ba_code_tab = g_bitalloc_code_tab;
388 for (i = 0; i < sci->total_bands; i++)
390 uint8_t ba;
391 if (i == k)
393 k += subband_alloc->band_count;
394 ba_bits = subband_alloc->code_tab_width;
395 ba_code_tab = g_bitalloc_code_tab + subband_alloc->tab_offset;
396 subband_alloc++;
398 ba = ba_code_tab[get_bits(bs, ba_bits)];
399 sci->bitalloc[2*i] = ba;
400 if (i < sci->stereo_bands)
402 ba = ba_code_tab[get_bits(bs, ba_bits)];
404 sci->bitalloc[2*i + 1] = sci->stereo_bands ? ba : 0;
407 for (i = 0; i < 2*sci->total_bands; i++)
409 sci->scfcod[i] = sci->bitalloc[i] ? HDR_IS_LAYER_1(hdr) ? 2 : get_bits(bs, 2) : 6;
412 L12_read_scalefactors(bs, sci->bitalloc, sci->scfcod, sci->total_bands*2, sci->scf);
414 for (i = sci->stereo_bands; i < sci->total_bands; i++)
416 sci->bitalloc[2*i + 1] = 0;
420 static int L12_dequantize_granule(float *grbuf, bs_t *bs, L12_scale_info *sci, int group_size)
422 int i, j, k, choff = 576;
423 for (j = 0; j < 4; j++)
425 float *dst = grbuf + group_size*j;
426 for (i = 0; i < 2*sci->total_bands; i++)
428 int ba = sci->bitalloc[i];
429 if (ba != 0)
431 if (ba < 17)
433 int half = (1 << (ba - 1)) - 1;
434 for (k = 0; k < group_size; k++)
436 dst[k] = (float)((int)get_bits(bs, ba) - half);
438 } else
440 unsigned mod = (2 << (ba - 17)) + 1; /* 3, 5, 9 */
441 unsigned code = get_bits(bs, mod + 2 - (mod >> 3)); /* 5, 7, 10 */
442 for (k = 0; k < group_size; k++, code /= mod)
444 dst[k] = (float)((int)(code % mod - mod/2));
448 dst += choff;
449 choff = 18 - choff;
452 return group_size*4;
455 static void L12_apply_scf_384(L12_scale_info *sci, const float *scf, float *dst)
457 int i, k;
458 memcpy(dst + 576 + sci->stereo_bands*18, dst + sci->stereo_bands*18, (sci->total_bands - sci->stereo_bands)*18*sizeof(float));
459 for (i = 0; i < sci->total_bands; i++, dst += 18, scf += 6)
461 for (k = 0; k < 12; k++)
463 dst[k + 0] *= scf[0];
464 dst[k + 576] *= scf[3];
468 #endif /* MINIMP3_ONLY_MP3 */
470 static int L3_read_side_info(bs_t *bs, L3_gr_info_t *gr, const uint8_t *hdr)
472 static const uint8_t g_scf_long[8][23] = {
473 { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
474 { 12,12,12,12,12,12,16,20,24,28,32,40,48,56,64,76,90,2,2,2,2,2,0 },
475 { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
476 { 6,6,6,6,6,6,8,10,12,14,16,18,22,26,32,38,46,54,62,70,76,36,0 },
477 { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
478 { 4,4,4,4,4,4,6,6,8,8,10,12,16,20,24,28,34,42,50,54,76,158,0 },
479 { 4,4,4,4,4,4,6,6,6,8,10,12,16,18,22,28,34,40,46,54,54,192,0 },
480 { 4,4,4,4,4,4,6,6,8,10,12,16,20,24,30,38,46,56,68,84,102,26,0 }
482 static const uint8_t g_scf_short[8][40] = {
483 { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
484 { 8,8,8,8,8,8,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },
485 { 4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },
486 { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },
487 { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
488 { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },
489 { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },
490 { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }
492 static const uint8_t g_scf_mixed[8][40] = {
493 { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
494 { 12,12,12,4,4,4,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },
495 { 6,6,6,6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },
496 { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },
497 { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
498 { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },
499 { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },
500 { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }
503 unsigned tables, scfsi = 0;
504 int main_data_begin, part_23_sum = 0;
505 int sr_idx = HDR_GET_MY_SAMPLE_RATE(hdr); sr_idx -= (sr_idx != 0);
506 int gr_count = HDR_IS_MONO(hdr) ? 1 : 2;
508 if (HDR_TEST_MPEG1(hdr))
510 gr_count *= 2;
511 main_data_begin = get_bits(bs, 9);
512 scfsi = get_bits(bs, 7 + gr_count);
513 } else
515 main_data_begin = get_bits(bs, 8 + gr_count) >> gr_count;
520 if (HDR_IS_MONO(hdr))
522 scfsi <<= 4;
524 gr->part_23_length = (uint16_t)get_bits(bs, 12);
525 part_23_sum += gr->part_23_length;
526 gr->big_values = (uint16_t)get_bits(bs, 9);
527 if (gr->big_values > 288)
529 return -1;
531 gr->global_gain = (uint8_t)get_bits(bs, 8);
532 gr->scalefac_compress = (uint16_t)get_bits(bs, HDR_TEST_MPEG1(hdr) ? 4 : 9);
533 gr->sfbtab = g_scf_long[sr_idx];
534 gr->n_long_sfb = 22;
535 gr->n_short_sfb = 0;
536 if (get_bits(bs, 1))
538 gr->block_type = (uint8_t)get_bits(bs, 2);
539 if (!gr->block_type)
541 return -1;
543 gr->mixed_block_flag = (uint8_t)get_bits(bs, 1);
544 gr->region_count[0] = 7;
545 gr->region_count[1] = 255;
546 if (gr->block_type == SHORT_BLOCK_TYPE)
548 scfsi &= 0x0F0F;
549 if (!gr->mixed_block_flag)
551 gr->region_count[0] = 8;
552 gr->sfbtab = g_scf_short[sr_idx];
553 gr->n_long_sfb = 0;
554 gr->n_short_sfb = 39;
555 } else
557 gr->sfbtab = g_scf_mixed[sr_idx];
558 gr->n_long_sfb = HDR_TEST_MPEG1(hdr) ? 8 : 6;
559 gr->n_short_sfb = 30;
562 tables = get_bits(bs, 10);
563 tables <<= 5;
564 gr->subblock_gain[0] = (uint8_t)get_bits(bs, 3);
565 gr->subblock_gain[1] = (uint8_t)get_bits(bs, 3);
566 gr->subblock_gain[2] = (uint8_t)get_bits(bs, 3);
567 } else
569 gr->block_type = 0;
570 gr->mixed_block_flag = 0;
571 tables = get_bits(bs, 15);
572 gr->region_count[0] = (uint8_t)get_bits(bs, 4);
573 gr->region_count[1] = (uint8_t)get_bits(bs, 3);
574 gr->region_count[2] = 255;
576 gr->table_select[0] = (uint8_t)(tables >> 10);
577 gr->table_select[1] = (uint8_t)((tables >> 5) & 31);
578 gr->table_select[2] = (uint8_t)((tables) & 31);
579 gr->preflag = HDR_TEST_MPEG1(hdr) ? get_bits(bs, 1) : (gr->scalefac_compress >= 500);
580 gr->scalefac_scale = (uint8_t)get_bits(bs, 1);
581 gr->count1_table = (uint8_t)get_bits(bs, 1);
582 gr->scfsi = (uint8_t)((scfsi >> 12) & 15);
583 scfsi <<= 4;
584 gr++;
585 } while(--gr_count);
587 if (part_23_sum + bs->pos > bs->limit + main_data_begin*8)
589 return -1;
592 return main_data_begin;
595 static void L3_read_scalefactors(uint8_t *scf, uint8_t *ist_pos, const uint8_t *scf_size, const uint8_t *scf_count, bs_t *bitbuf, int scfsi)
597 int i, k;
598 for (i = 0; i < 4 && scf_count[i]; i++, scfsi *= 2)
600 int cnt = scf_count[i];
601 if (scfsi & 8)
603 memcpy(scf, ist_pos, cnt);
604 } else
606 int bits = scf_size[i];
607 if (!bits)
609 memset(scf, 0, cnt);
610 memset(ist_pos, 0, cnt);
611 } else
613 int max_scf = (scfsi < 0) ? (1 << bits) - 1 : -1;
614 for (k = 0; k < cnt; k++)
616 int s = get_bits(bitbuf, bits);
617 ist_pos[k] = (s == max_scf ? -1 : s);
618 scf[k] = s;
622 ist_pos += cnt;
623 scf += cnt;
625 scf[0] = scf[1] = scf[2] = 0;
628 static float L3_ldexp_q2(float y, int exp_q2)
630 static const float g_expfrac[4] = { 9.31322575e-10f,7.83145814e-10f,6.58544508e-10f,5.53767716e-10f };
631 int e;
634 e = MINIMP3_MIN(30*4, exp_q2);
635 y *= g_expfrac[e & 3]*(1 << 30 >> (e >> 2));
636 } while ((exp_q2 -= e) > 0);
637 return y;
640 static void L3_decode_scalefactors(const uint8_t *hdr, uint8_t *ist_pos, bs_t *bs, const L3_gr_info_t *gr, float *scf, int ch)
642 static const uint8_t g_scf_partitions[3][28] = {
643 { 6,5,5, 5,6,5,5,5,6,5, 7,3,11,10,0,0, 7, 7, 7,0, 6, 6,6,3, 8, 8,5,0 },
644 { 8,9,6,12,6,9,9,9,6,9,12,6,15,18,0,0, 6,15,12,0, 6,12,9,6, 6,18,9,0 },
645 { 9,9,6,12,9,9,9,9,9,9,12,6,18,18,0,0,12,12,12,0,12, 9,9,6,15,12,9,0 }
647 const uint8_t *scf_partition = g_scf_partitions[!!gr->n_short_sfb + !gr->n_long_sfb];
648 uint8_t scf_size[4], iscf[40];
649 int i, scf_shift = gr->scalefac_scale + 1, gain_exp, scfsi = gr->scfsi;
650 float gain;
652 if (HDR_TEST_MPEG1(hdr))
654 static const uint8_t g_scfc_decode[16] = { 0,1,2,3, 12,5,6,7, 9,10,11,13, 14,15,18,19 };
655 int part = g_scfc_decode[gr->scalefac_compress];
656 scf_size[1] = scf_size[0] = (uint8_t)(part >> 2);
657 scf_size[3] = scf_size[2] = (uint8_t)(part & 3);
658 } else
660 static const uint8_t g_mod[6*4] = { 5,5,4,4,5,5,4,1,4,3,1,1,5,6,6,1,4,4,4,1,4,3,1,1 };
661 int k, modprod, sfc, ist = HDR_TEST_I_STEREO(hdr) && ch;
662 sfc = gr->scalefac_compress >> ist;
663 for (k = ist*3*4; sfc >= 0; sfc -= modprod, k += 4)
665 for (modprod = 1, i = 3; i >= 0; i--)
667 scf_size[i] = (uint8_t)(sfc / modprod % g_mod[k + i]);
668 modprod *= g_mod[k + i];
671 scf_partition += k;
672 scfsi = -16;
674 L3_read_scalefactors(iscf, ist_pos, scf_size, scf_partition, bs, scfsi);
676 if (gr->n_short_sfb)
678 int sh = 3 - scf_shift;
679 for (i = 0; i < gr->n_short_sfb; i += 3)
681 iscf[gr->n_long_sfb + i + 0] += gr->subblock_gain[0] << sh;
682 iscf[gr->n_long_sfb + i + 1] += gr->subblock_gain[1] << sh;
683 iscf[gr->n_long_sfb + i + 2] += gr->subblock_gain[2] << sh;
685 } else if (gr->preflag)
687 static const uint8_t g_preamp[10] = { 1,1,1,1,2,2,3,3,3,2 };
688 for (i = 0; i < 10; i++)
690 iscf[11 + i] += g_preamp[i];
694 gain_exp = gr->global_gain + BITS_DEQUANTIZER_OUT*4 - 210 - (HDR_IS_MS_STEREO(hdr) ? 2 : 0);
695 gain = L3_ldexp_q2(1 << (MAX_SCFI/4), MAX_SCFI - gain_exp);
696 for (i = 0; i < (int)(gr->n_long_sfb + gr->n_short_sfb); i++)
698 scf[i] = L3_ldexp_q2(gain, iscf[i] << scf_shift);
702 static const float g_pow43[129 + 16] = {
703 0,-1,-2.519842f,-4.326749f,-6.349604f,-8.549880f,-10.902724f,-13.390518f,-16.000000f,-18.720754f,-21.544347f,-24.463781f,-27.473142f,-30.567351f,-33.741992f,-36.993181f,
704 0,1,2.519842f,4.326749f,6.349604f,8.549880f,10.902724f,13.390518f,16.000000f,18.720754f,21.544347f,24.463781f,27.473142f,30.567351f,33.741992f,36.993181f,40.317474f,43.711787f,47.173345f,50.699631f,54.288352f,57.937408f,61.644865f,65.408941f,69.227979f,73.100443f,77.024898f,81.000000f,85.024491f,89.097188f,93.216975f,97.382800f,101.593667f,105.848633f,110.146801f,114.487321f,118.869381f,123.292209f,127.755065f,132.257246f,136.798076f,141.376907f,145.993119f,150.646117f,155.335327f,160.060199f,164.820202f,169.614826f,174.443577f,179.305980f,184.201575f,189.129918f,194.090580f,199.083145f,204.107210f,209.162385f,214.248292f,219.364564f,224.510845f,229.686789f,234.892058f,240.126328f,245.389280f,250.680604f,256.000000f,261.347174f,266.721841f,272.123723f,277.552547f,283.008049f,288.489971f,293.998060f,299.532071f,305.091761f,310.676898f,316.287249f,321.922592f,327.582707f,333.267377f,338.976394f,344.709550f,350.466646f,356.247482f,362.051866f,367.879608f,373.730522f,379.604427f,385.501143f,391.420496f,397.362314f,403.326427f,409.312672f,415.320884f,421.350905f,427.402579f,433.475750f,439.570269f,445.685987f,451.822757f,457.980436f,464.158883f,470.357960f,476.577530f,482.817459f,489.077615f,495.357868f,501.658090f,507.978156f,514.317941f,520.677324f,527.056184f,533.454404f,539.871867f,546.308458f,552.764065f,559.238575f,565.731879f,572.243870f,578.774440f,585.323483f,591.890898f,598.476581f,605.080431f,611.702349f,618.342238f,625.000000f,631.675540f,638.368763f,645.079578f
707 static float L3_pow_43(int x)
709 float frac;
710 int sign, mult = 256;
712 if (x < 129)
714 return g_pow43[16 + x];
717 if (x < 1024)
719 mult = 16;
720 x <<= 3;
723 sign = 2*x & 64;
724 frac = (float)((x & 63) - sign) / ((x & ~63) + sign);
725 return g_pow43[16 + ((x + sign) >> 6)]*(1.f + frac*((4.f/3) + frac*(2.f/9)))*mult;
728 static void L3_huffman(float *dst, bs_t *bs, const L3_gr_info_t *gr_info, const float *scf, int layer3gr_limit)
730 static const int16_t tabs[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
731 785,785,785,785,784,784,784,784,513,513,513,513,513,513,513,513,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
732 -255,1313,1298,1282,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,290,288,
733 -255,1313,1298,1282,769,769,769,769,529,529,529,529,529,529,529,529,528,528,528,528,528,528,528,528,512,512,512,512,512,512,512,512,290,288,
734 -253,-318,-351,-367,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,819,818,547,547,275,275,275,275,561,560,515,546,289,274,288,258,
735 -254,-287,1329,1299,1314,1312,1057,1057,1042,1042,1026,1026,784,784,784,784,529,529,529,529,529,529,529,529,769,769,769,769,768,768,768,768,563,560,306,306,291,259,
736 -252,-413,-477,-542,1298,-575,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-383,-399,1107,1092,1106,1061,849,849,789,789,1104,1091,773,773,1076,1075,341,340,325,309,834,804,577,577,532,532,516,516,832,818,803,816,561,561,531,531,515,546,289,289,288,258,
737 -252,-429,-493,-559,1057,1057,1042,1042,529,529,529,529,529,529,529,529,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,-382,1077,-415,1106,1061,1104,849,849,789,789,1091,1076,1029,1075,834,834,597,581,340,340,339,324,804,833,532,532,832,772,818,803,817,787,816,771,290,290,290,290,288,258,
738 -253,-349,-414,-447,-463,1329,1299,-479,1314,1312,1057,1057,1042,1042,1026,1026,785,785,785,785,784,784,784,784,769,769,769,769,768,768,768,768,-319,851,821,-335,836,850,805,849,341,340,325,336,533,533,579,579,564,564,773,832,578,548,563,516,321,276,306,291,304,259,
739 -251,-572,-733,-830,-863,-879,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,1396,1351,1381,1366,1395,1335,1380,-559,1334,1138,1138,1063,1063,1350,1392,1031,1031,1062,1062,1364,1363,1120,1120,1333,1348,881,881,881,881,375,374,359,373,343,358,341,325,791,791,1123,1122,-703,1105,1045,-719,865,865,790,790,774,774,1104,1029,338,293,323,308,-799,-815,833,788,772,818,803,816,322,292,307,320,561,531,515,546,289,274,288,258,
740 -251,-525,-605,-685,-765,-831,-846,1298,1057,1057,1312,1282,785,785,785,785,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,1399,1398,1383,1367,1382,1396,1351,-511,1381,1366,1139,1139,1079,1079,1124,1124,1364,1349,1363,1333,882,882,882,882,807,807,807,807,1094,1094,1136,1136,373,341,535,535,881,775,867,822,774,-591,324,338,-671,849,550,550,866,864,609,609,293,336,534,534,789,835,773,-751,834,804,308,307,833,788,832,772,562,562,547,547,305,275,560,515,290,290,
741 -252,-397,-477,-557,-622,-653,-719,-735,-750,1329,1299,1314,1057,1057,1042,1042,1312,1282,1024,1024,785,785,785,785,784,784,784,784,769,769,769,769,-383,1127,1141,1111,1126,1140,1095,1110,869,869,883,883,1079,1109,882,882,375,374,807,868,838,881,791,-463,867,822,368,263,852,837,836,-543,610,610,550,550,352,336,534,534,865,774,851,821,850,805,593,533,579,564,773,832,578,578,548,548,577,577,307,276,306,291,516,560,259,259,
742 -250,-2107,-2507,-2764,-2909,-2974,-3007,-3023,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-767,-1052,-1213,-1277,-1358,-1405,-1469,-1535,-1550,-1582,-1614,-1647,-1662,-1694,-1726,-1759,-1774,-1807,-1822,-1854,-1886,1565,-1919,-1935,-1951,-1967,1731,1730,1580,1717,-1983,1729,1564,-1999,1548,-2015,-2031,1715,1595,-2047,1714,-2063,1610,-2079,1609,-2095,1323,1323,1457,1457,1307,1307,1712,1547,1641,1700,1699,1594,1685,1625,1442,1442,1322,1322,-780,-973,-910,1279,1278,1277,1262,1276,1261,1275,1215,1260,1229,-959,974,974,989,989,-943,735,478,478,495,463,506,414,-1039,1003,958,1017,927,942,987,957,431,476,1272,1167,1228,-1183,1256,-1199,895,895,941,941,1242,1227,1212,1135,1014,1014,490,489,503,487,910,1013,985,925,863,894,970,955,1012,847,-1343,831,755,755,984,909,428,366,754,559,-1391,752,486,457,924,997,698,698,983,893,740,740,908,877,739,739,667,667,953,938,497,287,271,271,683,606,590,712,726,574,302,302,738,736,481,286,526,725,605,711,636,724,696,651,589,681,666,710,364,467,573,695,466,466,301,465,379,379,709,604,665,679,316,316,634,633,436,436,464,269,424,394,452,332,438,363,347,408,393,448,331,422,362,407,392,421,346,406,391,376,375,359,1441,1306,-2367,1290,-2383,1337,-2399,-2415,1426,1321,-2431,1411,1336,-2447,-2463,-2479,1169,1169,1049,1049,1424,1289,1412,1352,1319,-2495,1154,1154,1064,1064,1153,1153,416,390,360,404,403,389,344,374,373,343,358,372,327,357,342,311,356,326,1395,1394,1137,1137,1047,1047,1365,1392,1287,1379,1334,1364,1349,1378,1318,1363,792,792,792,792,1152,1152,1032,1032,1121,1121,1046,1046,1120,1120,1030,1030,-2895,1106,1061,1104,849,849,789,789,1091,1076,1029,1090,1060,1075,833,833,309,324,532,532,832,772,818,803,561,561,531,560,515,546,289,274,288,258,
743 -250,-1179,-1579,-1836,-1996,-2124,-2253,-2333,-2413,-2477,-2542,-2574,-2607,-2622,-2655,1314,1313,1298,1312,1282,785,785,785,785,1040,1040,1025,1025,768,768,768,768,-766,-798,-830,-862,-895,-911,-927,-943,-959,-975,-991,-1007,-1023,-1039,-1055,-1070,1724,1647,-1103,-1119,1631,1767,1662,1738,1708,1723,-1135,1780,1615,1779,1599,1677,1646,1778,1583,-1151,1777,1567,1737,1692,1765,1722,1707,1630,1751,1661,1764,1614,1736,1676,1763,1750,1645,1598,1721,1691,1762,1706,1582,1761,1566,-1167,1749,1629,767,766,751,765,494,494,735,764,719,749,734,763,447,447,748,718,477,506,431,491,446,476,461,505,415,430,475,445,504,399,460,489,414,503,383,474,429,459,502,502,746,752,488,398,501,473,413,472,486,271,480,270,-1439,-1455,1357,-1471,-1487,-1503,1341,1325,-1519,1489,1463,1403,1309,-1535,1372,1448,1418,1476,1356,1462,1387,-1551,1475,1340,1447,1402,1386,-1567,1068,1068,1474,1461,455,380,468,440,395,425,410,454,364,467,466,464,453,269,409,448,268,432,1371,1473,1432,1417,1308,1460,1355,1446,1459,1431,1083,1083,1401,1416,1458,1445,1067,1067,1370,1457,1051,1051,1291,1430,1385,1444,1354,1415,1400,1443,1082,1082,1173,1113,1186,1066,1185,1050,-1967,1158,1128,1172,1097,1171,1081,-1983,1157,1112,416,266,375,400,1170,1142,1127,1065,793,793,1169,1033,1156,1096,1141,1111,1155,1080,1126,1140,898,898,808,808,897,897,792,792,1095,1152,1032,1125,1110,1139,1079,1124,882,807,838,881,853,791,-2319,867,368,263,822,852,837,866,806,865,-2399,851,352,262,534,534,821,836,594,594,549,549,593,593,533,533,848,773,579,579,564,578,548,563,276,276,577,576,306,291,516,560,305,305,275,259,
744 -251,-892,-2058,-2620,-2828,-2957,-3023,-3039,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,-559,1530,-575,-591,1528,1527,1407,1526,1391,1023,1023,1023,1023,1525,1375,1268,1268,1103,1103,1087,1087,1039,1039,1523,-604,815,815,815,815,510,495,509,479,508,463,507,447,431,505,415,399,-734,-782,1262,-815,1259,1244,-831,1258,1228,-847,-863,1196,-879,1253,987,987,748,-767,493,493,462,477,414,414,686,669,478,446,461,445,474,429,487,458,412,471,1266,1264,1009,1009,799,799,-1019,-1276,-1452,-1581,-1677,-1757,-1821,-1886,-1933,-1997,1257,1257,1483,1468,1512,1422,1497,1406,1467,1496,1421,1510,1134,1134,1225,1225,1466,1451,1374,1405,1252,1252,1358,1480,1164,1164,1251,1251,1238,1238,1389,1465,-1407,1054,1101,-1423,1207,-1439,830,830,1248,1038,1237,1117,1223,1148,1236,1208,411,426,395,410,379,269,1193,1222,1132,1235,1221,1116,976,976,1192,1162,1177,1220,1131,1191,963,963,-1647,961,780,-1663,558,558,994,993,437,408,393,407,829,978,813,797,947,-1743,721,721,377,392,844,950,828,890,706,706,812,859,796,960,948,843,934,874,571,571,-1919,690,555,689,421,346,539,539,944,779,918,873,932,842,903,888,570,570,931,917,674,674,-2575,1562,-2591,1609,-2607,1654,1322,1322,1441,1441,1696,1546,1683,1593,1669,1624,1426,1426,1321,1321,1639,1680,1425,1425,1305,1305,1545,1668,1608,1623,1667,1592,1638,1666,1320,1320,1652,1607,1409,1409,1304,1304,1288,1288,1664,1637,1395,1395,1335,1335,1622,1636,1394,1394,1319,1319,1606,1621,1392,1392,1137,1137,1137,1137,345,390,360,375,404,373,1047,-2751,-2767,-2783,1062,1121,1046,-2799,1077,-2815,1106,1061,789,789,1105,1104,263,355,310,340,325,354,352,262,339,324,1091,1076,1029,1090,1060,1075,833,833,788,788,1088,1028,818,818,803,803,561,561,531,531,816,771,546,546,289,274,288,258,
745 -253,-317,-381,-446,-478,-509,1279,1279,-811,-1179,-1451,-1756,-1900,-2028,-2189,-2253,-2333,-2414,-2445,-2511,-2526,1313,1298,-2559,1041,1041,1040,1040,1025,1025,1024,1024,1022,1007,1021,991,1020,975,1019,959,687,687,1018,1017,671,671,655,655,1016,1015,639,639,758,758,623,623,757,607,756,591,755,575,754,559,543,543,1009,783,-575,-621,-685,-749,496,-590,750,749,734,748,974,989,1003,958,988,973,1002,942,987,957,972,1001,926,986,941,971,956,1000,910,985,925,999,894,970,-1071,-1087,-1102,1390,-1135,1436,1509,1451,1374,-1151,1405,1358,1480,1420,-1167,1507,1494,1389,1342,1465,1435,1450,1326,1505,1310,1493,1373,1479,1404,1492,1464,1419,428,443,472,397,736,526,464,464,486,457,442,471,484,482,1357,1449,1434,1478,1388,1491,1341,1490,1325,1489,1463,1403,1309,1477,1372,1448,1418,1433,1476,1356,1462,1387,-1439,1475,1340,1447,1402,1474,1324,1461,1371,1473,269,448,1432,1417,1308,1460,-1711,1459,-1727,1441,1099,1099,1446,1386,1431,1401,-1743,1289,1083,1083,1160,1160,1458,1445,1067,1067,1370,1457,1307,1430,1129,1129,1098,1098,268,432,267,416,266,400,-1887,1144,1187,1082,1173,1113,1186,1066,1050,1158,1128,1143,1172,1097,1171,1081,420,391,1157,1112,1170,1142,1127,1065,1169,1049,1156,1096,1141,1111,1155,1080,1126,1154,1064,1153,1140,1095,1048,-2159,1125,1110,1137,-2175,823,823,1139,1138,807,807,384,264,368,263,868,838,853,791,867,822,852,837,866,806,865,790,-2319,851,821,836,352,262,850,805,849,-2399,533,533,835,820,336,261,578,548,563,577,532,532,832,772,562,562,547,547,305,275,560,515,290,290,288,258 };
746 static const uint8_t tab32[] = { 130,162,193,209,44,28,76,140,9,9,9,9,9,9,9,9,190,254,222,238,126,94,157,157,109,61,173,205 };
747 static const uint8_t tab33[] = { 252,236,220,204,188,172,156,140,124,108,92,76,60,44,28,12 };
748 static const int16_t tabindex[2*16] = { 0,32,64,98,0,132,180,218,292,364,426,538,648,746,0,1126,1460,1460,1460,1460,1460,1460,1460,1460,1842,1842,1842,1842,1842,1842,1842,1842 };
749 static const uint8_t g_linbits[] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,6,8,10,13,4,5,6,7,8,9,11,13 };
751 #define PEEK_BITS(n) (bs_cache >> (32 - n))
752 #define FLUSH_BITS(n) { bs_cache <<= (n); bs_sh += (n); }
753 #define CHECK_BITS while (bs_sh >= 0) { bs_cache |= (uint32_t)*bs_next_ptr++ << bs_sh; bs_sh -= 8; }
754 #define BSPOS ((bs_next_ptr - bs->buf)*8 - 24 + bs_sh)
756 float one = 0.0f;
757 int ireg = 0, big_val_cnt = gr_info->big_values;
758 const uint8_t *sfb = gr_info->sfbtab;
759 const uint8_t *bs_next_ptr = bs->buf + bs->pos/8;
760 uint32_t bs_cache = (((bs_next_ptr[0]*256u + bs_next_ptr[1])*256u + bs_next_ptr[2])*256u + bs_next_ptr[3]) << (bs->pos & 7);
761 int pairs_to_decode, np, bs_sh = (bs->pos & 7) - 8;
762 bs_next_ptr += 4;
764 while (big_val_cnt > 0)
766 int tab_num = gr_info->table_select[ireg];
767 int sfb_cnt = gr_info->region_count[ireg++];
768 const int16_t *codebook = tabs + tabindex[tab_num];
769 int linbits = g_linbits[tab_num];
772 np = *sfb++ / 2;
773 pairs_to_decode = MINIMP3_MIN(big_val_cnt, np);
774 one = *scf++;
777 int j, w = 5;
778 int leaf = codebook[PEEK_BITS(w)];
779 while (leaf < 0)
781 FLUSH_BITS(w);
782 w = leaf & 7;
783 leaf = codebook[PEEK_BITS(w) - (leaf >> 3)];
785 FLUSH_BITS(leaf >> 8);
787 for (j = 0; j < 2; j++, dst++, leaf >>= 4)
789 int lsb = leaf & 0x0F;
790 if (lsb == 15 && linbits)
792 lsb += PEEK_BITS(linbits);
793 FLUSH_BITS(linbits);
794 CHECK_BITS;
795 *dst = one*L3_pow_43(lsb)*((int32_t)bs_cache < 0 ? -1: 1);
796 } else
798 *dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one;
800 FLUSH_BITS(lsb ? 1 : 0);
802 CHECK_BITS;
803 } while (--pairs_to_decode);
804 } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0);
807 for (np = 1 - big_val_cnt;; dst += 4)
809 const uint8_t *codebook_count1 = (gr_info->count1_table) ? tab33 : tab32;
810 int leaf = codebook_count1[PEEK_BITS(4)];
811 if (!(leaf & 8))
813 leaf = codebook_count1[(leaf >> 3) + (bs_cache << 4 >> (32 - (leaf & 3)))];
815 FLUSH_BITS(leaf & 7);
816 if (BSPOS > layer3gr_limit)
818 break;
820 #define RELOAD_SCALEFACTOR if (!--np) { np = *sfb++/2; if (!np) break; one = *scf++; }
821 #define DEQ_COUNT1(s) if (leaf & (128 >> s)) { dst[s] = ((int32_t)bs_cache < 0) ? -one : one; FLUSH_BITS(1) }
822 RELOAD_SCALEFACTOR;
823 DEQ_COUNT1(0);
824 DEQ_COUNT1(1);
825 RELOAD_SCALEFACTOR;
826 DEQ_COUNT1(2);
827 DEQ_COUNT1(3);
828 CHECK_BITS;
831 bs->pos = layer3gr_limit;
834 static void L3_midside_stereo(float *left, int n)
836 int i = 0;
837 float *right = left + 576;
838 #if HAVE_SIMD
839 if (have_simd()) for (; i < n - 3; i += 4)
841 f4 vl = VLD(left + i);
842 f4 vr = VLD(right + i);
843 VSTORE(left + i, VADD(vl, vr));
844 VSTORE(right + i, VSUB(vl, vr));
846 #endif /* HAVE_SIMD */
847 for (; i < n; i++)
849 float a = left[i];
850 float b = right[i];
851 left[i] = a + b;
852 right[i] = a - b;
856 static void L3_intensity_stereo_band(float *left, int n, float kl, float kr)
858 int i;
859 for (i = 0; i < n; i++)
861 left[i + 576] = left[i]*kr;
862 left[i] = left[i]*kl;
866 static void L3_stereo_top_band(const float *right, const uint8_t *sfb, int nbands, int max_band[3])
868 int i, k;
870 max_band[0] = max_band[1] = max_band[2] = -1;
872 for (i = 0; i < nbands; i++)
874 for (k = 0; k < sfb[i]; k += 2)
876 if (right[k] != 0 || right[k + 1] != 0)
878 max_band[i % 3] = i;
879 break;
882 right += sfb[i];
886 static void L3_stereo_process(float *left, const uint8_t *ist_pos, const uint8_t *sfb, const uint8_t *hdr, int max_band[3], int mpeg2_sh)
888 static const float g_pan[7*2] = { 0,1,0.21132487f,0.78867513f,0.36602540f,0.63397460f,0.5f,0.5f,0.63397460f,0.36602540f,0.78867513f,0.21132487f,1,0 };
889 unsigned i, max_pos = HDR_TEST_MPEG1(hdr) ? 7 : 64;
891 for (i = 0; sfb[i]; i++)
893 unsigned ipos = ist_pos[i];
894 if ((int)i > max_band[i % 3] && ipos < max_pos)
896 float kl, kr, s = HDR_TEST_MS_STEREO(hdr) ? 1.41421356f : 1;
897 if (HDR_TEST_MPEG1(hdr))
899 kl = g_pan[2*ipos];
900 kr = g_pan[2*ipos + 1];
901 } else
903 kl = 1;
904 kr = L3_ldexp_q2(1, (ipos + 1) >> 1 << mpeg2_sh);
905 if (ipos & 1)
907 kl = kr;
908 kr = 1;
911 L3_intensity_stereo_band(left, sfb[i], kl*s, kr*s);
912 } else if (HDR_TEST_MS_STEREO(hdr))
914 L3_midside_stereo(left, sfb[i]);
916 left += sfb[i];
920 static void L3_intensity_stereo(float *left, uint8_t *ist_pos, const L3_gr_info_t *gr, const uint8_t *hdr)
922 int max_band[3], n_sfb = gr->n_long_sfb + gr->n_short_sfb;
923 int i, max_blocks = gr->n_short_sfb ? 3 : 1;
925 L3_stereo_top_band(left + 576, gr->sfbtab, n_sfb, max_band);
926 if (gr->n_long_sfb)
928 max_band[0] = max_band[1] = max_band[2] = MINIMP3_MAX(MINIMP3_MAX(max_band[0], max_band[1]), max_band[2]);
930 for (i = 0; i < max_blocks; i++)
932 int default_pos = HDR_TEST_MPEG1(hdr) ? 3 : 0;
933 int itop = n_sfb - max_blocks + i;
934 int prev = itop - max_blocks;
935 ist_pos[itop] = max_band[i] >= prev ? default_pos : ist_pos[prev];
937 L3_stereo_process(left, ist_pos, gr->sfbtab, hdr, max_band, gr[1].scalefac_compress & 1);
940 static void L3_reorder(float *grbuf, float *scratch, const uint8_t *sfb)
942 int i, len;
943 float *src = grbuf, *dst = scratch;
945 for (;0 != (len = *sfb); sfb += 3, src += 2*len)
947 for (i = 0; i < len; i++, src++)
949 *dst++ = src[0*len];
950 *dst++ = src[1*len];
951 *dst++ = src[2*len];
954 memcpy(grbuf, scratch, (dst - scratch)*sizeof(float));
957 static void L3_antialias(float *grbuf, int nbands)
959 static const float g_aa[2][8] = {
960 {0.85749293f,0.88174200f,0.94962865f,0.98331459f,0.99551782f,0.99916056f,0.99989920f,0.99999316f},
961 {0.51449576f,0.47173197f,0.31337745f,0.18191320f,0.09457419f,0.04096558f,0.01419856f,0.00369997f}
964 for (; nbands > 0; nbands--, grbuf += 18)
966 int i = 0;
967 #if HAVE_SIMD
968 if (have_simd()) for (; i < 8; i += 4)
970 f4 vu = VLD(grbuf + 18 + i);
971 f4 vd = VLD(grbuf + 14 - i);
972 f4 vc0 = VLD(g_aa[0] + i);
973 f4 vc1 = VLD(g_aa[1] + i);
974 vd = VREV(vd);
975 VSTORE(grbuf + 18 + i, VSUB(VMUL(vu, vc0), VMUL(vd, vc1)));
976 vd = VADD(VMUL(vu, vc1), VMUL(vd, vc0));
977 VSTORE(grbuf + 14 - i, VREV(vd));
979 #endif /* HAVE_SIMD */
980 #ifndef MINIMP3_ONLY_SIMD
981 for(; i < 8; i++)
983 float u = grbuf[18 + i];
984 float d = grbuf[17 - i];
985 grbuf[18 + i] = u*g_aa[0][i] - d*g_aa[1][i];
986 grbuf[17 - i] = u*g_aa[1][i] + d*g_aa[0][i];
988 #endif /* MINIMP3_ONLY_SIMD */
992 static void L3_dct3_9(float *y)
994 float s0, s1, s2, s3, s4, s5, s6, s7, s8, t0, t2, t4;
996 s0 = y[0]; s2 = y[2]; s4 = y[4]; s6 = y[6]; s8 = y[8];
997 t0 = s0 + s6*0.5f;
998 s0 -= s6;
999 t4 = (s4 + s2)*0.93969262f;
1000 t2 = (s8 + s2)*0.76604444f;
1001 s6 = (s4 - s8)*0.17364818f;
1002 s4 += s8 - s2;
1004 s2 = s0 - s4*0.5f;
1005 y[4] = s4 + s0;
1006 s8 = t0 - t2 + s6;
1007 s0 = t0 - t4 + t2;
1008 s4 = t0 + t4 - s6;
1010 s1 = y[1]; s3 = y[3]; s5 = y[5]; s7 = y[7];
1012 s3 *= 0.86602540f;
1013 t0 = (s5 + s1)*0.98480775f;
1014 t4 = (s5 - s7)*0.34202014f;
1015 t2 = (s1 + s7)*0.64278761f;
1016 s1 = (s1 - s5 - s7)*0.86602540f;
1018 s5 = t0 - s3 - t2;
1019 s7 = t4 - s3 - t0;
1020 s3 = t4 + s3 - t2;
1022 y[0] = s4 - s7;
1023 y[1] = s2 + s1;
1024 y[2] = s0 - s3;
1025 y[3] = s8 + s5;
1026 y[5] = s8 - s5;
1027 y[6] = s0 + s3;
1028 y[7] = s2 - s1;
1029 y[8] = s4 + s7;
1032 static void L3_imdct36(float *grbuf, float *overlap, const float *window, int nbands)
1034 int i, j;
1035 static const float g_twid9[18] = {
1036 0.73727734f,0.79335334f,0.84339145f,0.88701083f,0.92387953f,0.95371695f,0.97629601f,0.99144486f,0.99904822f,0.67559021f,0.60876143f,0.53729961f,0.46174861f,0.38268343f,0.30070580f,0.21643961f,0.13052619f,0.04361938f
1039 for (j = 0; j < nbands; j++, grbuf += 18, overlap += 9)
1041 float co[9], si[9];
1042 co[0] = -grbuf[0];
1043 si[0] = grbuf[17];
1044 for (i = 0; i < 4; i++)
1046 si[8 - 2*i] = grbuf[4*i + 1] - grbuf[4*i + 2];
1047 co[1 + 2*i] = grbuf[4*i + 1] + grbuf[4*i + 2];
1048 si[7 - 2*i] = grbuf[4*i + 4] - grbuf[4*i + 3];
1049 co[2 + 2*i] = -(grbuf[4*i + 3] + grbuf[4*i + 4]);
1051 L3_dct3_9(co);
1052 L3_dct3_9(si);
1054 si[1] = -si[1];
1055 si[3] = -si[3];
1056 si[5] = -si[5];
1057 si[7] = -si[7];
1059 i = 0;
1061 #if HAVE_SIMD
1062 if (have_simd()) for (; i < 8; i += 4)
1064 f4 vovl = VLD(overlap + i);
1065 f4 vc = VLD(co + i);
1066 f4 vs = VLD(si + i);
1067 f4 vr0 = VLD(g_twid9 + i);
1068 f4 vr1 = VLD(g_twid9 + 9 + i);
1069 f4 vw0 = VLD(window + i);
1070 f4 vw1 = VLD(window + 9 + i);
1071 f4 vsum = VADD(VMUL(vc, vr1), VMUL(vs, vr0));
1072 VSTORE(overlap + i, VSUB(VMUL(vc, vr0), VMUL(vs, vr1)));
1073 VSTORE(grbuf + i, VSUB(VMUL(vovl, vw0), VMUL(vsum, vw1)));
1074 vsum = VADD(VMUL(vovl, vw1), VMUL(vsum, vw0));
1075 VSTORE(grbuf + 14 - i, VREV(vsum));
1077 #endif /* HAVE_SIMD */
1078 for (; i < 9; i++)
1080 float ovl = overlap[i];
1081 float sum = co[i]*g_twid9[9 + i] + si[i]*g_twid9[0 + i];
1082 overlap[i] = co[i]*g_twid9[0 + i] - si[i]*g_twid9[9 + i];
1083 grbuf[i] = ovl*window[0 + i] - sum*window[9 + i];
1084 grbuf[17 - i] = ovl*window[9 + i] + sum*window[0 + i];
1089 static void L3_idct3(float x0, float x1, float x2, float *dst)
1091 float m1 = x1*0.86602540f;
1092 float a1 = x0 - x2*0.5f;
1093 dst[1] = x0 + x2;
1094 dst[0] = a1 + m1;
1095 dst[2] = a1 - m1;
1098 static void L3_imdct12(float *x, float *dst, float *overlap)
1100 static const float g_twid3[6] = { 0.79335334f,0.92387953f,0.99144486f, 0.60876143f,0.38268343f,0.13052619f };
1101 float co[3], si[3];
1102 int i;
1104 L3_idct3(-x[0], x[6] + x[3], x[12] + x[9], co);
1105 L3_idct3(x[15], x[12] - x[9], x[6] - x[3], si);
1106 si[1] = -si[1];
1108 for (i = 0; i < 3; i++)
1110 float ovl = overlap[i];
1111 float sum = co[i]*g_twid3[3 + i] + si[i]*g_twid3[0 + i];
1112 overlap[i] = co[i]*g_twid3[0 + i] - si[i]*g_twid3[3 + i];
1113 dst[i] = ovl*g_twid3[2 - i] - sum*g_twid3[5 - i];
1114 dst[5 - i] = ovl*g_twid3[5 - i] + sum*g_twid3[2 - i];
1118 static void L3_imdct_short(float *grbuf, float *overlap, int nbands)
1120 for (;nbands > 0; nbands--, overlap += 9, grbuf += 18)
1122 float tmp[18];
1123 memcpy(tmp, grbuf, sizeof(tmp));
1124 memcpy(grbuf, overlap, 6*sizeof(float));
1125 L3_imdct12(tmp, grbuf + 6, overlap + 6);
1126 L3_imdct12(tmp + 1, grbuf + 12, overlap + 6);
1127 L3_imdct12(tmp + 2, overlap, overlap + 6);
1131 static void L3_change_sign(float *grbuf)
1133 int b, i;
1134 for (b = 0, grbuf += 18; b < 32; b += 2, grbuf += 36)
1135 for (i = 1; i < 18; i += 2)
1136 grbuf[i] = -grbuf[i];
1139 static void L3_imdct_gr(float *grbuf, float *overlap, unsigned block_type, unsigned n_long_bands)
1141 static const float g_mdct_window[2][18] = {
1142 { 0.99904822f,0.99144486f,0.97629601f,0.95371695f,0.92387953f,0.88701083f,0.84339145f,0.79335334f,0.73727734f,0.04361938f,0.13052619f,0.21643961f,0.30070580f,0.38268343f,0.46174861f,0.53729961f,0.60876143f,0.67559021f },
1143 { 1,1,1,1,1,1,0.99144486f,0.92387953f,0.79335334f,0,0,0,0,0,0,0.13052619f,0.38268343f,0.60876143f }
1145 if (n_long_bands)
1147 L3_imdct36(grbuf, overlap, g_mdct_window[0], n_long_bands);
1148 grbuf += 18*n_long_bands;
1149 overlap += 9*n_long_bands;
1151 if (block_type == SHORT_BLOCK_TYPE)
1152 L3_imdct_short(grbuf, overlap, 32 - n_long_bands);
1153 else
1154 L3_imdct36(grbuf, overlap, g_mdct_window[block_type == STOP_BLOCK_TYPE], 32 - n_long_bands);
1157 static void L3_save_reservoir(mp3dec_t *h, mp3dec_scratch_t *s)
1159 int pos = (s->bs.pos + 7)/8u;
1160 int remains = s->bs.limit/8u - pos;
1161 if (remains > MAX_BITRESERVOIR_BYTES)
1163 pos += remains - MAX_BITRESERVOIR_BYTES;
1164 remains = MAX_BITRESERVOIR_BYTES;
1166 if (remains > 0)
1168 memmove(h->reserv_buf, s->maindata + pos, remains);
1170 h->reserv = remains;
1173 static int L3_restore_reservoir(mp3dec_t *h, bs_t *bs, mp3dec_scratch_t *s, int main_data_begin)
1175 int frame_bytes = (bs->limit - bs->pos)/8;
1176 int bytes_have = MINIMP3_MIN(h->reserv, main_data_begin);
1177 memcpy(s->maindata, h->reserv_buf + MINIMP3_MAX(0, h->reserv - main_data_begin), MINIMP3_MIN(h->reserv, main_data_begin));
1178 memcpy(s->maindata + bytes_have, bs->buf + bs->pos/8, frame_bytes);
1179 bs_init(&s->bs, s->maindata, bytes_have + frame_bytes);
1180 return h->reserv >= main_data_begin;
1183 static void L3_decode(mp3dec_t *h, mp3dec_scratch_t *s, L3_gr_info_t *gr_info, int nch)
1185 int ch;
1187 for (ch = 0; ch < nch; ch++)
1189 int layer3gr_limit = s->bs.pos + gr_info[ch].part_23_length;
1190 L3_decode_scalefactors(h->header, s->ist_pos[ch], &s->bs, gr_info + ch, s->scf, ch);
1191 L3_huffman(s->grbuf[ch], &s->bs, gr_info + ch, s->scf, layer3gr_limit);
1194 if (HDR_TEST_I_STEREO(h->header))
1196 L3_intensity_stereo(s->grbuf[0], s->ist_pos[1], gr_info, h->header);
1197 } else if (HDR_IS_MS_STEREO(h->header))
1199 L3_midside_stereo(s->grbuf[0], 576);
1202 for (ch = 0; ch < nch; ch++, gr_info++)
1204 int aa_bands = 31;
1205 int n_long_bands = (gr_info->mixed_block_flag ? 2 : 0) << (int)(HDR_GET_MY_SAMPLE_RATE(h->header) == 2);
1207 if (gr_info->n_short_sfb)
1209 aa_bands = n_long_bands - 1;
1210 L3_reorder(s->grbuf[ch] + n_long_bands*18, s->syn[0], gr_info->sfbtab + gr_info->n_long_sfb);
1213 L3_antialias(s->grbuf[ch], aa_bands);
1214 L3_imdct_gr(s->grbuf[ch], h->mdct_overlap[ch], gr_info->block_type, n_long_bands);
1215 L3_change_sign(s->grbuf[ch]);
1219 static void mp3d_DCT_II(float *grbuf, int n)
1221 static const float g_sec[24] = {
1222 10.19000816f,0.50060302f,0.50241929f,3.40760851f,0.50547093f,0.52249861f,2.05778098f,0.51544732f,0.56694406f,1.48416460f,0.53104258f,0.64682180f,1.16943991f,0.55310392f,0.78815460f,0.97256821f,0.58293498f,1.06067765f,0.83934963f,0.62250412f,1.72244716f,0.74453628f,0.67480832f,5.10114861f
1224 int i, k = 0;
1225 #if HAVE_SIMD
1226 if (have_simd()) for (; k < n; k += 4)
1228 f4 t[4][8], *x;
1229 float *y = grbuf + k;
1231 for (x = t[0], i = 0; i < 8; i++, x++)
1233 f4 x0 = VLD(&y[i*18]);
1234 f4 x1 = VLD(&y[(15 - i)*18]);
1235 f4 x2 = VLD(&y[(16 + i)*18]);
1236 f4 x3 = VLD(&y[(31 - i)*18]);
1237 f4 t0 = VADD(x0, x3);
1238 f4 t1 = VADD(x1, x2);
1239 f4 t2 = VMUL_S(VSUB(x1, x2), g_sec[3*i + 0]);
1240 f4 t3 = VMUL_S(VSUB(x0, x3), g_sec[3*i + 1]);
1241 x[0] = VADD(t0, t1);
1242 x[8] = VMUL_S(VSUB(t0, t1), g_sec[3*i + 2]);
1243 x[16] = VADD(t3, t2);
1244 x[24] = VMUL_S(VSUB(t3, t2), g_sec[3*i + 2]);
1246 for (x = t[0], i = 0; i < 4; i++, x += 8)
1248 f4 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
1249 xt = VSUB(x0, x7); x0 = VADD(x0, x7);
1250 x7 = VSUB(x1, x6); x1 = VADD(x1, x6);
1251 x6 = VSUB(x2, x5); x2 = VADD(x2, x5);
1252 x5 = VSUB(x3, x4); x3 = VADD(x3, x4);
1253 x4 = VSUB(x0, x3); x0 = VADD(x0, x3);
1254 x3 = VSUB(x1, x2); x1 = VADD(x1, x2);
1255 x[0] = VADD(x0, x1);
1256 x[4] = VMUL_S(VSUB(x0, x1), 0.70710677f);
1257 x5 = VADD(x5, x6);
1258 x6 = VMUL_S(VADD(x6, x7), 0.70710677f);
1259 x7 = VADD(x7, xt);
1260 x3 = VMUL_S(VADD(x3, x4), 0.70710677f);
1261 x5 = VSUB(x5, VMUL_S(x7, 0.198912367f)); /* rotate by PI/8 */
1262 x7 = VADD(x7, VMUL_S(x5, 0.382683432f));
1263 x5 = VSUB(x5, VMUL_S(x7, 0.198912367f));
1264 x0 = VSUB(xt, x6); xt = VADD(xt, x6);
1265 x[1] = VMUL_S(VADD(xt, x7), 0.50979561f);
1266 x[2] = VMUL_S(VADD(x4, x3), 0.54119611f);
1267 x[3] = VMUL_S(VSUB(x0, x5), 0.60134488f);
1268 x[5] = VMUL_S(VADD(x0, x5), 0.89997619f);
1269 x[6] = VMUL_S(VSUB(x4, x3), 1.30656302f);
1270 x[7] = VMUL_S(VSUB(xt, x7), 2.56291556f);
1273 if (k > n - 3)
1275 #if HAVE_SSE
1276 #define VSAVE2(i, v) _mm_storel_pi((__m64 *)(void*)&y[i*18], v)
1277 #else /* HAVE_SSE */
1278 #define VSAVE2(i, v) vst1_f32((float32_t *)&y[i*18], vget_low_f32(v))
1279 #endif /* HAVE_SSE */
1280 for (i = 0; i < 7; i++, y += 4*18)
1282 f4 s = VADD(t[3][i], t[3][i + 1]);
1283 VSAVE2(0, t[0][i]);
1284 VSAVE2(1, VADD(t[2][i], s));
1285 VSAVE2(2, VADD(t[1][i], t[1][i + 1]));
1286 VSAVE2(3, VADD(t[2][1 + i], s));
1288 VSAVE2(0, t[0][7]);
1289 VSAVE2(1, VADD(t[2][7], t[3][7]));
1290 VSAVE2(2, t[1][7]);
1291 VSAVE2(3, t[3][7]);
1292 } else
1294 #define VSAVE4(i, v) VSTORE(&y[i*18], v)
1295 for (i = 0; i < 7; i++, y += 4*18)
1297 f4 s = VADD(t[3][i], t[3][i + 1]);
1298 VSAVE4(0, t[0][i]);
1299 VSAVE4(1, VADD(t[2][i], s));
1300 VSAVE4(2, VADD(t[1][i], t[1][i + 1]));
1301 VSAVE4(3, VADD(t[2][1 + i], s));
1303 VSAVE4(0, t[0][7]);
1304 VSAVE4(1, VADD(t[2][7], t[3][7]));
1305 VSAVE4(2, t[1][7]);
1306 VSAVE4(3, t[3][7]);
1308 } else
1309 #endif /* HAVE_SIMD */
1310 #ifdef MINIMP3_ONLY_SIMD
1312 #else /* MINIMP3_ONLY_SIMD */
1313 for (; k < n; k++)
1315 float t[4][8], *x, *y = grbuf + k;
1317 for (x = t[0], i = 0; i < 8; i++, x++)
1319 float x0 = y[i*18];
1320 float x1 = y[(15 - i)*18];
1321 float x2 = y[(16 + i)*18];
1322 float x3 = y[(31 - i)*18];
1323 float t0 = x0 + x3;
1324 float t1 = x1 + x2;
1325 float t2 = (x1 - x2)*g_sec[3*i + 0];
1326 float t3 = (x0 - x3)*g_sec[3*i + 1];
1327 x[0] = t0 + t1;
1328 x[8] = (t0 - t1)*g_sec[3*i + 2];
1329 x[16] = t3 + t2;
1330 x[24] = (t3 - t2)*g_sec[3*i + 2];
1332 for (x = t[0], i = 0; i < 4; i++, x += 8)
1334 float x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
1335 xt = x0 - x7; x0 += x7;
1336 x7 = x1 - x6; x1 += x6;
1337 x6 = x2 - x5; x2 += x5;
1338 x5 = x3 - x4; x3 += x4;
1339 x4 = x0 - x3; x0 += x3;
1340 x3 = x1 - x2; x1 += x2;
1341 x[0] = x0 + x1;
1342 x[4] = (x0 - x1)*0.70710677f;
1343 x5 = x5 + x6;
1344 x6 = (x6 + x7)*0.70710677f;
1345 x7 = x7 + xt;
1346 x3 = (x3 + x4)*0.70710677f;
1347 x5 -= x7*0.198912367f; /* rotate by PI/8 */
1348 x7 += x5*0.382683432f;
1349 x5 -= x7*0.198912367f;
1350 x0 = xt - x6; xt += x6;
1351 x[1] = (xt + x7)*0.50979561f;
1352 x[2] = (x4 + x3)*0.54119611f;
1353 x[3] = (x0 - x5)*0.60134488f;
1354 x[5] = (x0 + x5)*0.89997619f;
1355 x[6] = (x4 - x3)*1.30656302f;
1356 x[7] = (xt - x7)*2.56291556f;
1359 for (i = 0; i < 7; i++, y += 4*18)
1361 y[0*18] = t[0][i];
1362 y[1*18] = t[2][i] + t[3][i] + t[3][i + 1];
1363 y[2*18] = t[1][i] + t[1][i + 1];
1364 y[3*18] = t[2][i + 1] + t[3][i] + t[3][i + 1];
1366 y[0*18] = t[0][7];
1367 y[1*18] = t[2][7] + t[3][7];
1368 y[2*18] = t[1][7];
1369 y[3*18] = t[3][7];
1371 #endif /* MINIMP3_ONLY_SIMD */
1374 #ifndef MINIMP3_FLOAT_OUTPUT
1375 static int16_t mp3d_scale_pcm(float sample)
1377 if (sample >= 32766.5) return (int16_t) 32767;
1378 if (sample <= -32767.5) return (int16_t)-32768;
1379 int16_t s = (int16_t)(sample + .5f);
1380 s -= (s < 0); /* away from zero, to be compliant */
1381 return s;
1383 #else /* MINIMP3_FLOAT_OUTPUT */
1384 static float mp3d_scale_pcm(float sample)
1386 return sample*(1.f/32768.f);
1388 #endif /* MINIMP3_FLOAT_OUTPUT */
1390 static void mp3d_synth_pair(mp3d_sample_t *pcm, int nch, const float *z)
1392 float a;
1393 a = (z[14*64] - z[ 0]) * 29;
1394 a += (z[ 1*64] + z[13*64]) * 213;
1395 a += (z[12*64] - z[ 2*64]) * 459;
1396 a += (z[ 3*64] + z[11*64]) * 2037;
1397 a += (z[10*64] - z[ 4*64]) * 5153;
1398 a += (z[ 5*64] + z[ 9*64]) * 6574;
1399 a += (z[ 8*64] - z[ 6*64]) * 37489;
1400 a += z[ 7*64] * 75038;
1401 pcm[0] = mp3d_scale_pcm(a);
1403 z += 2;
1404 a = z[14*64] * 104;
1405 a += z[12*64] * 1567;
1406 a += z[10*64] * 9727;
1407 a += z[ 8*64] * 64019;
1408 a += z[ 6*64] * -9975;
1409 a += z[ 4*64] * -45;
1410 a += z[ 2*64] * 146;
1411 a += z[ 0*64] * -5;
1412 pcm[16*nch] = mp3d_scale_pcm(a);
1415 static void mp3d_synth(float *xl, mp3d_sample_t *dstl, int nch, float *lins)
1417 int i;
1418 float *xr = xl + 576*(nch - 1);
1419 mp3d_sample_t *dstr = dstl + (nch - 1);
1421 static const float g_win[] = {
1422 -1,26,-31,208,218,401,-519,2063,2000,4788,-5517,7134,5959,35640,-39336,74992,
1423 -1,24,-35,202,222,347,-581,2080,1952,4425,-5879,7640,5288,33791,-41176,74856,
1424 -1,21,-38,196,225,294,-645,2087,1893,4063,-6237,8092,4561,31947,-43006,74630,
1425 -1,19,-41,190,227,244,-711,2085,1822,3705,-6589,8492,3776,30112,-44821,74313,
1426 -1,17,-45,183,228,197,-779,2075,1739,3351,-6935,8840,2935,28289,-46617,73908,
1427 -1,16,-49,176,228,153,-848,2057,1644,3004,-7271,9139,2037,26482,-48390,73415,
1428 -2,14,-53,169,227,111,-919,2032,1535,2663,-7597,9389,1082,24694,-50137,72835,
1429 -2,13,-58,161,224,72,-991,2001,1414,2330,-7910,9592,70,22929,-51853,72169,
1430 -2,11,-63,154,221,36,-1064,1962,1280,2006,-8209,9750,-998,21189,-53534,71420,
1431 -2,10,-68,147,215,2,-1137,1919,1131,1692,-8491,9863,-2122,19478,-55178,70590,
1432 -3,9,-73,139,208,-29,-1210,1870,970,1388,-8755,9935,-3300,17799,-56778,69679,
1433 -3,8,-79,132,200,-57,-1283,1817,794,1095,-8998,9966,-4533,16155,-58333,68692,
1434 -4,7,-85,125,189,-83,-1356,1759,605,814,-9219,9959,-5818,14548,-59838,67629,
1435 -4,7,-91,117,177,-106,-1428,1698,402,545,-9416,9916,-7154,12980,-61289,66494,
1436 -5,6,-97,111,163,-127,-1498,1634,185,288,-9585,9838,-8540,11455,-62684,65290
1438 float *zlin = lins + 15*64;
1439 const float *w = g_win;
1441 zlin[4*15] = xl[18*16];
1442 zlin[4*15 + 1] = xr[18*16];
1443 zlin[4*15 + 2] = xl[0];
1444 zlin[4*15 + 3] = xr[0];
1446 zlin[4*31] = xl[1 + 18*16];
1447 zlin[4*31 + 1] = xr[1 + 18*16];
1448 zlin[4*31 + 2] = xl[1];
1449 zlin[4*31 + 3] = xr[1];
1451 mp3d_synth_pair(dstr, nch, lins + 4*15 + 1);
1452 mp3d_synth_pair(dstr + 32*nch, nch, lins + 4*15 + 64 + 1);
1453 mp3d_synth_pair(dstl, nch, lins + 4*15);
1454 mp3d_synth_pair(dstl + 32*nch, nch, lins + 4*15 + 64);
1456 #if HAVE_SIMD
1457 if (have_simd()) for (i = 14; i >= 0; i--)
1459 #define VLOAD(k) f4 w0 = VSET(*w++); f4 w1 = VSET(*w++); f4 vz = VLD(&zlin[4*i - 64*k]); f4 vy = VLD(&zlin[4*i - 64*(15 - k)]);
1460 #define V0(k) { VLOAD(k) b = VADD(VMUL(vz, w1), VMUL(vy, w0)) ; a = VSUB(VMUL(vz, w0), VMUL(vy, w1)); }
1461 #define V1(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vz, w0), VMUL(vy, w1))); }
1462 #define V2(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vy, w1), VMUL(vz, w0))); }
1463 f4 a, b;
1464 zlin[4*i] = xl[18*(31 - i)];
1465 zlin[4*i + 1] = xr[18*(31 - i)];
1466 zlin[4*i + 2] = xl[1 + 18*(31 - i)];
1467 zlin[4*i + 3] = xr[1 + 18*(31 - i)];
1468 zlin[4*i + 64] = xl[1 + 18*(1 + i)];
1469 zlin[4*i + 64 + 1] = xr[1 + 18*(1 + i)];
1470 zlin[4*i - 64 + 2] = xl[18*(1 + i)];
1471 zlin[4*i - 64 + 3] = xr[18*(1 + i)];
1473 V0(0) V2(1) V1(2) V2(3) V1(4) V2(5) V1(6) V2(7)
1476 #ifndef MINIMP3_FLOAT_OUTPUT
1477 #if HAVE_SSE
1478 static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
1479 static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
1480 __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
1481 _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
1482 dstr[(15 - i)*nch] = _mm_extract_epi16(pcm8, 1);
1483 dstr[(17 + i)*nch] = _mm_extract_epi16(pcm8, 5);
1484 dstl[(15 - i)*nch] = _mm_extract_epi16(pcm8, 0);
1485 dstl[(17 + i)*nch] = _mm_extract_epi16(pcm8, 4);
1486 dstr[(47 - i)*nch] = _mm_extract_epi16(pcm8, 3);
1487 dstr[(49 + i)*nch] = _mm_extract_epi16(pcm8, 7);
1488 dstl[(47 - i)*nch] = _mm_extract_epi16(pcm8, 2);
1489 dstl[(49 + i)*nch] = _mm_extract_epi16(pcm8, 6);
1490 #else /* HAVE_SSE */
1491 int16x4_t pcma, pcmb;
1492 a = VADD(a, VSET(0.5f));
1493 b = VADD(b, VSET(0.5f));
1494 pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
1495 pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
1496 vst1_lane_s16(dstr + (15 - i)*nch, pcma, 1);
1497 vst1_lane_s16(dstr + (17 + i)*nch, pcmb, 1);
1498 vst1_lane_s16(dstl + (15 - i)*nch, pcma, 0);
1499 vst1_lane_s16(dstl + (17 + i)*nch, pcmb, 0);
1500 vst1_lane_s16(dstr + (47 - i)*nch, pcma, 3);
1501 vst1_lane_s16(dstr + (49 + i)*nch, pcmb, 3);
1502 vst1_lane_s16(dstl + (47 - i)*nch, pcma, 2);
1503 vst1_lane_s16(dstl + (49 + i)*nch, pcmb, 2);
1504 #endif /* HAVE_SSE */
1506 #else /* MINIMP3_FLOAT_OUTPUT */
1508 static const f4 g_scale = { 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f };
1509 a = VMUL(a, g_scale);
1510 b = VMUL(b, g_scale);
1511 #if HAVE_SSE
1512 _mm_store_ss(dstr + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)));
1513 _mm_store_ss(dstr + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1)));
1514 _mm_store_ss(dstl + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)));
1515 _mm_store_ss(dstl + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 0, 0, 0)));
1516 _mm_store_ss(dstr + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3)));
1517 _mm_store_ss(dstr + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 3, 3)));
1518 _mm_store_ss(dstl + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)));
1519 _mm_store_ss(dstl + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 2, 2)));
1520 #else /* HAVE_SSE */
1521 vst1q_lane_f32(dstr + (15 - i)*nch, a, 1);
1522 vst1q_lane_f32(dstr + (17 + i)*nch, b, 1);
1523 vst1q_lane_f32(dstl + (15 - i)*nch, a, 0);
1524 vst1q_lane_f32(dstl + (17 + i)*nch, b, 0);
1525 vst1q_lane_f32(dstr + (47 - i)*nch, a, 3);
1526 vst1q_lane_f32(dstr + (49 + i)*nch, b, 3);
1527 vst1q_lane_f32(dstl + (47 - i)*nch, a, 2);
1528 vst1q_lane_f32(dstl + (49 + i)*nch, b, 2);
1529 #endif /* HAVE_SSE */
1530 #endif /* MINIMP3_FLOAT_OUTPUT */
1532 } else
1533 #endif /* HAVE_SIMD */
1534 #ifdef MINIMP3_ONLY_SIMD
1536 #else /* MINIMP3_ONLY_SIMD */
1537 for (i = 14; i >= 0; i--)
1539 #define LOAD(k) float w0 = *w++; float w1 = *w++; float *vz = &zlin[4*i - k*64]; float *vy = &zlin[4*i - (15 - k)*64];
1540 #define S0(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] = vz[j]*w1 + vy[j]*w0, a[j] = vz[j]*w0 - vy[j]*w1; }
1541 #define S1(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vz[j]*w0 - vy[j]*w1; }
1542 #define S2(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vy[j]*w1 - vz[j]*w0; }
1543 float a[4], b[4];
1545 zlin[4*i] = xl[18*(31 - i)];
1546 zlin[4*i + 1] = xr[18*(31 - i)];
1547 zlin[4*i + 2] = xl[1 + 18*(31 - i)];
1548 zlin[4*i + 3] = xr[1 + 18*(31 - i)];
1549 zlin[4*(i + 16)] = xl[1 + 18*(1 + i)];
1550 zlin[4*(i + 16) + 1] = xr[1 + 18*(1 + i)];
1551 zlin[4*(i - 16) + 2] = xl[18*(1 + i)];
1552 zlin[4*(i - 16) + 3] = xr[18*(1 + i)];
1554 S0(0) S2(1) S1(2) S2(3) S1(4) S2(5) S1(6) S2(7)
1556 dstr[(15 - i)*nch] = mp3d_scale_pcm(a[1]);
1557 dstr[(17 + i)*nch] = mp3d_scale_pcm(b[1]);
1558 dstl[(15 - i)*nch] = mp3d_scale_pcm(a[0]);
1559 dstl[(17 + i)*nch] = mp3d_scale_pcm(b[0]);
1560 dstr[(47 - i)*nch] = mp3d_scale_pcm(a[3]);
1561 dstr[(49 + i)*nch] = mp3d_scale_pcm(b[3]);
1562 dstl[(47 - i)*nch] = mp3d_scale_pcm(a[2]);
1563 dstl[(49 + i)*nch] = mp3d_scale_pcm(b[2]);
1565 #endif /* MINIMP3_ONLY_SIMD */
1568 static void mp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, mp3d_sample_t *pcm, float *lins)
1570 int i;
1571 for (i = 0; i < nch; i++)
1573 mp3d_DCT_II(grbuf + 576*i, nbands);
1576 memcpy(lins, qmf_state, sizeof(float)*15*64);
1578 for (i = 0; i < nbands; i += 2)
1580 mp3d_synth(grbuf + i, pcm + 32*nch*i, nch, lins + i*64);
1582 #ifndef MINIMP3_NONSTANDARD_BUT_LOGICAL
1583 if (nch == 1)
1585 for (i = 0; i < 15*64; i += 2)
1587 qmf_state[i] = lins[nbands*64 + i];
1589 } else
1590 #endif /* MINIMP3_NONSTANDARD_BUT_LOGICAL */
1592 memcpy(qmf_state, lins + nbands*64, sizeof(float)*15*64);
1596 static int mp3d_match_frame(const uint8_t *hdr, int mp3_bytes, int frame_bytes)
1598 int i, nmatch;
1599 for (i = 0, nmatch = 0; nmatch < MAX_FRAME_SYNC_MATCHES; nmatch++)
1601 i += hdr_frame_bytes(hdr + i, frame_bytes) + hdr_padding(hdr + i);
1602 if (i + HDR_SIZE > mp3_bytes)
1603 return nmatch > 0;
1604 if (!hdr_compare(hdr, hdr + i))
1605 return 0;
1607 return 1;
1610 static int mp3d_find_frame(const uint8_t *mp3, int mp3_bytes, int *free_format_bytes, int *ptr_frame_bytes)
1612 int i, k;
1613 for (i = 0; i < mp3_bytes - HDR_SIZE; i++, mp3++)
1615 if (hdr_valid(mp3))
1617 int frame_bytes = hdr_frame_bytes(mp3, *free_format_bytes);
1618 int frame_and_padding = frame_bytes + hdr_padding(mp3);
1620 for (k = HDR_SIZE; !frame_bytes && k < MAX_FREE_FORMAT_FRAME_SIZE && i + 2*k < mp3_bytes - HDR_SIZE; k++)
1622 if (hdr_compare(mp3, mp3 + k))
1624 int fb = k - hdr_padding(mp3);
1625 int nextfb = fb + hdr_padding(mp3 + k);
1626 if (i + k + nextfb + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + k + nextfb))
1627 continue;
1628 frame_and_padding = k;
1629 frame_bytes = fb;
1630 *free_format_bytes = fb;
1633 if ((frame_bytes && i + frame_and_padding <= mp3_bytes &&
1634 mp3d_match_frame(mp3, mp3_bytes - i, frame_bytes)) ||
1635 (!i && frame_and_padding == mp3_bytes))
1637 *ptr_frame_bytes = frame_and_padding;
1638 return i;
1640 *free_format_bytes = 0;
1643 *ptr_frame_bytes = 0;
1644 return i;
1647 void mp3dec_init(mp3dec_t *dec)
1649 dec->header[0] = 0;
1652 int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info)
1654 int i = 0, igr, frame_size = 0, success = 1;
1655 const uint8_t *hdr;
1656 bs_t bs_frame[1];
1657 mp3dec_scratch_t scratch;
1659 if (mp3_bytes > 4 && dec->header[0] == 0xff && hdr_compare(dec->header, mp3))
1661 frame_size = hdr_frame_bytes(mp3, dec->free_format_bytes) + hdr_padding(mp3);
1662 if (frame_size != mp3_bytes && (frame_size + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + frame_size)))
1664 frame_size = 0;
1667 if (!frame_size)
1669 memset(dec, 0, sizeof(mp3dec_t));
1670 i = mp3d_find_frame(mp3, mp3_bytes, &dec->free_format_bytes, &frame_size);
1671 if (!frame_size || i + frame_size > mp3_bytes)
1673 info->frame_bytes = i;
1674 return 0;
1678 hdr = mp3 + i;
1679 memcpy(dec->header, hdr, HDR_SIZE);
1680 info->frame_bytes = i + frame_size;
1681 info->channels = HDR_IS_MONO(hdr) ? 1 : 2;
1682 info->hz = hdr_sample_rate_hz(hdr);
1683 info->layer = 4 - HDR_GET_LAYER(hdr);
1684 info->bitrate_kbps = hdr_bitrate_kbps(hdr);
1686 if (!pcm)
1688 return hdr_frame_samples(hdr);
1691 bs_init(bs_frame, hdr + HDR_SIZE, frame_size - HDR_SIZE);
1692 if (HDR_IS_CRC(hdr))
1694 get_bits(bs_frame, 16);
1697 if (info->layer == 3)
1699 int main_data_begin = L3_read_side_info(bs_frame, scratch.gr_info, hdr);
1700 if (main_data_begin < 0 || bs_frame->pos > bs_frame->limit)
1702 mp3dec_init(dec);
1703 return 0;
1705 success = L3_restore_reservoir(dec, bs_frame, &scratch, main_data_begin);
1706 if (success)
1708 for (igr = 0; igr < (HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm += 576*info->channels)
1710 memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1711 L3_decode(dec, &scratch, scratch.gr_info + igr*info->channels, info->channels);
1712 mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 18, info->channels, pcm, scratch.syn[0]);
1715 L3_save_reservoir(dec, &scratch);
1716 } else
1718 #ifdef MINIMP3_ONLY_MP3
1719 return 0;
1720 #else /* MINIMP3_ONLY_MP3 */
1721 L12_scale_info sci[1];
1722 L12_read_scale_info(hdr, bs_frame, sci);
1724 memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1725 for (i = 0, igr = 0; igr < 3; igr++)
1727 if (12 == (i += L12_dequantize_granule(scratch.grbuf[0] + i, bs_frame, sci, info->layer | 1)))
1729 i = 0;
1730 L12_apply_scf_384(sci, sci->scf + igr, scratch.grbuf[0]);
1731 mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 12, info->channels, pcm, scratch.syn[0]);
1732 memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1733 pcm += 384*info->channels;
1735 if (bs_frame->pos > bs_frame->limit)
1737 mp3dec_init(dec);
1738 return 0;
1741 #endif /* MINIMP3_ONLY_MP3 */
1743 return success*hdr_frame_samples(dec->header);
1746 #ifdef MINIMP3_FLOAT_OUTPUT
1747 void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples)
1749 if(num_samples > 0)
1751 int i = 0;
1752 #if HAVE_SIMD
1753 int aligned_count = num_samples & ~7;
1755 for(;i < aligned_count;i+=8)
1757 static const f4 g_scale = { 32768.0f, 32768.0f, 32768.0f, 32768.0f };
1758 f4 a = VMUL(VLD(&in[i ]), g_scale);
1759 f4 b = VMUL(VLD(&in[i+4]), g_scale);
1760 #if HAVE_SSE
1761 static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
1762 static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
1763 __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
1764 _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
1765 out[i ] = _mm_extract_epi16(pcm8, 0);
1766 out[i+1] = _mm_extract_epi16(pcm8, 1);
1767 out[i+2] = _mm_extract_epi16(pcm8, 2);
1768 out[i+3] = _mm_extract_epi16(pcm8, 3);
1769 out[i+4] = _mm_extract_epi16(pcm8, 4);
1770 out[i+5] = _mm_extract_epi16(pcm8, 5);
1771 out[i+6] = _mm_extract_epi16(pcm8, 6);
1772 out[i+7] = _mm_extract_epi16(pcm8, 7);
1773 #else /* HAVE_SSE */
1774 int16x4_t pcma, pcmb;
1775 a = VADD(a, VSET(0.5f));
1776 b = VADD(b, VSET(0.5f));
1777 pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
1778 pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
1779 vst1_lane_s16(out+i , pcma, 0);
1780 vst1_lane_s16(out+i+1, pcma, 1);
1781 vst1_lane_s16(out+i+2, pcma, 2);
1782 vst1_lane_s16(out+i+3, pcma, 3);
1783 vst1_lane_s16(out+i+4, pcmb, 0);
1784 vst1_lane_s16(out+i+5, pcmb, 1);
1785 vst1_lane_s16(out+i+6, pcmb, 2);
1786 vst1_lane_s16(out+i+7, pcmb, 3);
1787 #endif /* HAVE_SSE */
1789 #endif /* HAVE_SIMD */
1790 for(; i < num_samples; i++)
1792 float sample = in[i] * 32768.0f;
1793 if (sample >= 32766.5)
1794 out[i] = (int16_t) 32767;
1795 else if (sample <= -32767.5)
1796 out[i] = (int16_t)-32768;
1797 else
1799 int16_t s = (int16_t)(sample + .5f);
1800 s -= (s < 0); /* away from zero, to be compliant */
1801 out[i] = s;
1806 #endif /* MINIMP3_FLOAT_OUTPUT */
1807 #endif /* MINIMP3_IMPLEMENTATION && !_MINIMP3_IMPLEMENTATION_GUARD */