Fix for MiniMP3 with NEON intrinsics
[alure.git] / src / decoders / minimp3.h
blob46825c8d2ec56ddde27f086ef2feb5268c9dd5cb
1 #ifndef MINIMP3_H
2 #define MINIMP3_H
3 /*
4 https://github.com/lieff/minimp3
5 To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide.
6 This software is distributed without any warranty.
7 See <http://creativecommons.org/publicdomain/zero/1.0/>.
8 */
10 #define MINIMP3_MAX_SAMPLES_PER_FRAME (1152*2)
12 typedef struct
14 int frame_bytes, channels, hz, layer, bitrate_kbps;
15 } mp3dec_frame_info_t;
17 typedef struct
19 float mdct_overlap[2][9*32], qmf_state[15*2*32];
20 int reserv, free_format_bytes;
21 unsigned char header[4], reserv_buf[511];
22 } mp3dec_t;
24 #ifdef __cplusplus
25 extern "C" {
26 #endif
28 void mp3dec_init(mp3dec_t *dec);
29 int mp3dec_decode_frame(mp3dec_t *dec, const unsigned char *mp3, int mp3_bytes, float *pcm, mp3dec_frame_info_t *info);
30 int mp3dec_decode_frame_s16(mp3dec_t *dec, const unsigned char *mp3, int mp3_bytes, short *pcm, mp3dec_frame_info_t *info);
32 #ifdef __cplusplus
34 #endif
36 #ifdef MINIMP3_IMPLEMENTATION
38 #include <stdlib.h>
39 #include <string.h>
40 #include <stdint.h>
42 #define MAX_FREE_FORMAT_FRAME_SIZE 2304 /* more than ISO spec's */
43 #define MAX_FRAME_SYNC_MATCHES 10
45 #define MAX_L3_FRAME_PAYLOAD_BYTES MAX_FREE_FORMAT_FRAME_SIZE /* MUST be >= 320000/8/32000*1152 = 1440 */
47 #define MAX_BITRESERVOIR_BYTES 511
48 #define SHORT_BLOCK_TYPE 2
49 #define STOP_BLOCK_TYPE 3
50 #define MODE_MONO 3
51 #define MODE_JOINT_STEREO 1
52 #define HDR_SIZE 4
53 #define HDR_IS_MONO(h) (((h[3]) & 0xC0) == 0xC0)
54 #define HDR_IS_MS_STEREO(h) (((h[3]) & 0xE0) == 0x60)
55 #define HDR_IS_FREE_FORMAT(h) (((h[2]) & 0xF0) == 0)
56 #define HDR_IS_CRC(h) (!((h[1]) & 1))
57 #define HDR_TEST_PADDING(h) ((h[2]) & 0x2)
58 #define HDR_TEST_MPEG1(h) ((h[1]) & 0x8)
59 #define HDR_TEST_NOT_MPEG25(h) ((h[1]) & 0x10)
60 #define HDR_TEST_I_STEREO(h) ((h[3]) & 0x10)
61 #define HDR_TEST_MS_STEREO(h) ((h[3]) & 0x20)
62 #define HDR_GET_STEREO_MODE(h) (((h[3]) >> 6) & 3)
63 #define HDR_GET_STEREO_MODE_EXT(h) (((h[3]) >> 4) & 3)
64 #define HDR_GET_LAYER(h) (((h[1]) >> 1) & 3)
65 #define HDR_GET_BITRATE(h) ((h[2]) >> 4)
66 #define HDR_GET_SAMPLE_RATE(h) (((h[2]) >> 2) & 3)
67 #define HDR_GET_MY_SAMPLE_RATE(h) (HDR_GET_SAMPLE_RATE(h) + (((h[1] >> 3) & 1) + ((h[1] >> 4) & 1))*3)
68 #define HDR_IS_FRAME_576(h) ((h[1] & 14) == 2)
69 #define HDR_IS_LAYER_1(h) ((h[1] & 6) == 6)
71 #define BITS_DEQUANTIZER_OUT -1
72 #define MAX_SCF (255 + BITS_DEQUANTIZER_OUT*4 - 210)
73 #define MAX_SCFI ((MAX_SCF + 3) & ~3)
75 #define MINIMP3_MIN(a, b) ((a) > (b) ? (b) : (a))
76 #define MINIMP3_MAX(a, b) ((a) < (b) ? (b) : (a))
78 #if !defined(MINIMP3_NO_SIMD)
80 #if !defined(MINIMP3_ONLY_SIMD) && (defined(_M_X64) || defined(_M_ARM64) || defined(__x86_64__) || defined(__aarch64__))
81 /* x64 always have SSE2, arm64 always have neon, no need for generic code */
82 #define MINIMP3_ONLY_SIMD
83 #endif
85 #if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || ((defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__))
86 #if defined(_MSC_VER)
87 #include <intrin.h>
88 #endif
89 #include <immintrin.h>
90 #define HAVE_SSE 1
91 #define HAVE_SIMD 1
92 #define VSTORE _mm_storeu_ps
93 #define VLD _mm_loadu_ps
94 #define VSET _mm_set1_ps
95 #define VADD _mm_add_ps
96 #define VSUB _mm_sub_ps
97 #define VMUL _mm_mul_ps
98 #define VMAC(a, x, y) _mm_add_ps(a, _mm_mul_ps(x, y))
99 #define VMSB(a, x, y) _mm_sub_ps(a, _mm_mul_ps(x, y))
100 #define VMUL_S(x, s) _mm_mul_ps(x, _mm_set1_ps(s))
101 #define VREV(x) _mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 1, 2, 3))
102 typedef __m128 f4;
103 #if defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD)
104 #define minimp3_cpuid __cpuid
105 #else
106 static __inline__ __attribute__((always_inline)) void minimp3_cpuid(int CPUInfo[], const int InfoType)
108 #if defined(__PIC__)
109 __asm__ __volatile__(
110 #if defined(__x86_64__)
111 "push %%rbx\n"
112 "cpuid\n"
113 "xchgl %%ebx, %1\n"
114 "pop %%rbx\n"
115 #else
116 "xchgl %%ebx, %1\n"
117 "cpuid\n"
118 "xchgl %%ebx, %1\n"
119 #endif
120 : "=a" (CPUInfo[0]), "=r" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])
121 : "a" (InfoType));
122 #else
123 __asm__ __volatile__(
124 "cpuid"
125 : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])
126 : "a" (InfoType));
127 #endif
129 #endif
130 static int have_simd()
132 #ifdef MINIMP3_ONLY_SIMD
133 return 1;
134 #else
135 static int g_have_simd;
136 int CPUInfo[4];
137 #ifdef MINIMP3_TEST
138 static int g_counter;
139 if (g_counter++ > 100)
140 return 0;
141 #endif
142 if (g_have_simd)
143 goto end;
144 minimp3_cpuid(CPUInfo, 0);
145 g_have_simd = 1;
146 if (CPUInfo[0] > 0)
148 minimp3_cpuid(CPUInfo, 1);
149 g_have_simd = (CPUInfo[3] & (1 << 26)) + 1; /* SSE2 */
151 end:
152 return g_have_simd - 1;
153 #endif
155 #elif defined(__ARM_NEON) || defined(__aarch64__)
156 #include <arm_neon.h>
157 #define HAVE_SIMD 1
158 #define VSTORE vst1q_f32
159 #define VLD vld1q_f32
160 #define VSET vmovq_n_f32
161 #define VADD vaddq_f32
162 #define VSUB vsubq_f32
163 #define VMUL vmulq_f32
164 #define VMAC(a, x, y) vmlaq_f32(a, x, y)
165 #define VMSB(a, x, y) vmlsq_f32(a, x, y)
166 #define VMUL_S(x, s) vmulq_f32(x, vmovq_n_f32(s))
167 #define VREV(x) vcombine_f32(vget_high_f32(vrev64q_f32(x)), vget_low_f32(vrev64q_f32(x)))
168 typedef float32x4_t f4;
169 static int have_simd()
170 { /* TODO: detect neon for !MINIMP3_ONLY_SIMD */
171 return 1;
173 #else
174 #define HAVE_SIMD 0
175 #ifdef MINIMP3_ONLY_SIMD
176 #error MINIMP3_ONLY_SIMD used, but SSE/NEON not enabled
177 #endif
178 #endif
180 #else
182 #define HAVE_SIMD 0
184 #endif
186 typedef struct
188 const uint8_t *buf;
189 int pos, limit;
190 } bs_t;
192 typedef struct
194 float scf[3*64];
195 uint8_t total_bands, stereo_bands, bitalloc[64], scfcod[64];
196 } L12_scale_info;
198 typedef struct
200 uint8_t tab_offset, code_tab_width, band_count;
201 } L12_subband_alloc_t;
203 typedef struct
205 const uint8_t *sfbtab;
206 uint16_t part_23_length, big_values, scalefac_compress;
207 uint8_t global_gain, block_type, mixed_block_flag, n_long_sfb, n_short_sfb;
208 uint8_t table_select[3], region_count[3], subblock_gain[3];
209 uint8_t preflag, scalefac_scale, count1_table, scfsi;
210 } L3_gr_info_t;
212 typedef struct
214 bs_t bs;
215 uint8_t maindata[MAX_BITRESERVOIR_BYTES + MAX_L3_FRAME_PAYLOAD_BYTES];
216 L3_gr_info_t gr_info[4];
217 float grbuf[2][576], scf[40], syn[18 + 15][2*32];
218 uint8_t ist_pos[2][39];
219 } mp3dec_scratch_t;
221 static void bs_init(bs_t *bs, const uint8_t *data, int bytes)
223 bs->buf = data;
224 bs->pos = 0;
225 bs->limit = bytes*8;
228 static uint32_t get_bits(bs_t *bs, int n)
230 uint32_t next, cache = 0, s = bs->pos & 7;
231 int shl = n + s;
232 const uint8_t *p = bs->buf + (bs->pos >> 3);
233 if ((bs->pos += n) > bs->limit)
234 return 0;
235 next = *p++ & (255 >> s);
236 while ((shl -= 8) > 0)
238 cache |= next << shl;
239 next = *p++;
241 return cache | (next >> -shl);
244 static int hdr_valid(const uint8_t *h)
246 return h[0] == 0xff &&
247 ((h[1] & 0xF0) == 0xf0 || (h[1] & 0xFE) == 0xe2) &&
248 (HDR_GET_LAYER(h) != 0) &&
249 (HDR_GET_BITRATE(h) != 15) &&
250 (HDR_GET_SAMPLE_RATE(h) != 3);
253 static int hdr_compare(const uint8_t *h1, const uint8_t *h2)
255 return hdr_valid(h2) &&
256 ((h1[1] ^ h2[1]) & 0xFE) == 0 &&
257 ((h1[2] ^ h2[2]) & 0x0C) == 0 &&
258 !(HDR_IS_FREE_FORMAT(h1) ^ HDR_IS_FREE_FORMAT(h2));
261 static unsigned hdr_bitrate_kbps(const uint8_t *h)
263 static const uint8_t halfrate[2][3][15] = {
264 { { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,16,24,28,32,40,48,56,64,72,80,88,96,112,128 } },
265 { { 0,16,20,24,28,32,40,48,56,64,80,96,112,128,160 }, { 0,16,24,28,32,40,48,56,64,80,96,112,128,160,192 }, { 0,16,32,48,64,80,96,112,128,144,160,176,192,208,224 } },
267 return 2*halfrate[!!HDR_TEST_MPEG1(h)][HDR_GET_LAYER(h) - 1][HDR_GET_BITRATE(h)];
270 static unsigned hdr_sample_rate_hz(const uint8_t *h)
272 static const unsigned g_hz[3] = { 44100, 48000, 32000 };
273 return g_hz[HDR_GET_SAMPLE_RATE(h)] >> (int)!HDR_TEST_MPEG1(h) >> (int)!HDR_TEST_NOT_MPEG25(h);
276 static unsigned hdr_frame_samples(const uint8_t *h)
278 return HDR_IS_LAYER_1(h) ? 384 : (1152 >> (int)HDR_IS_FRAME_576(h));
281 static int hdr_frame_bytes(const uint8_t *h, int free_format_size)
283 int frame_bytes = hdr_frame_samples(h)*hdr_bitrate_kbps(h)*125/hdr_sample_rate_hz(h);
284 if (HDR_IS_LAYER_1(h))
286 frame_bytes &= ~3; /* slot align */
288 return frame_bytes ? frame_bytes : free_format_size;
291 static int hdr_padding(const uint8_t *h)
293 return HDR_TEST_PADDING(h) ? (HDR_IS_LAYER_1(h) ? 4 : 1) : 0;
296 #ifndef MINIMP3_ONLY_MP3
297 static const L12_subband_alloc_t *L12_subband_alloc_table(const uint8_t *hdr, L12_scale_info *sci)
299 const L12_subband_alloc_t *alloc;
300 int mode = HDR_GET_STEREO_MODE(hdr);
301 int nbands, stereo_bands = (mode == MODE_MONO) ? 0 : (mode == MODE_JOINT_STEREO) ? (HDR_GET_STEREO_MODE_EXT(hdr) << 2) + 4 : 32;
303 if (HDR_IS_LAYER_1(hdr))
305 static const L12_subband_alloc_t g_alloc_L1[] = { { 76, 4, 32 } };
306 alloc = g_alloc_L1;
307 nbands = 32;
308 } else if (!HDR_TEST_MPEG1(hdr))
310 static const L12_subband_alloc_t g_alloc_L2M2[] = { { 60, 4, 4 }, { 44, 3, 7 }, { 44, 2, 19 } };
311 alloc = g_alloc_L2M2;
312 nbands = 30;
313 } else
315 static const L12_subband_alloc_t g_alloc_L2M1[] = { { 0, 4, 3 }, { 16, 4, 8 }, { 32, 3, 12 }, { 40, 2, 7 } };
316 int sample_rate_idx = HDR_GET_SAMPLE_RATE(hdr);
317 unsigned kbps = hdr_bitrate_kbps(hdr) >> (int)(mode != MODE_MONO);
318 if (!kbps) /* free-format */
320 kbps = 192;
323 alloc = g_alloc_L2M1;
324 nbands = 27;
325 if (kbps < 56)
327 static const L12_subband_alloc_t g_alloc_L2M1_lowrate[] = { { 44, 4, 2 }, { 44, 3, 10 } };
328 alloc = g_alloc_L2M1_lowrate;
329 nbands = sample_rate_idx == 2 ? 12 : 8;
330 } else if (kbps >= 96 && sample_rate_idx != 1)
332 nbands = 30;
336 sci->total_bands = (uint8_t)nbands;
337 sci->stereo_bands = (uint8_t)MINIMP3_MIN(stereo_bands, nbands);
339 return alloc;
342 static void L12_read_scalefactors(bs_t *bs, uint8_t *pba, uint8_t *scfcod, int bands, float *scf)
344 static const float g_deq_L12[18*3] = {
345 #define DQ(x) 9.53674316e-07f/x, 7.56931807e-07f/x, 6.00777173e-07f/x
346 DQ(3),DQ(7),DQ(15),DQ(31),DQ(63),DQ(127),DQ(255),DQ(511),DQ(1023),DQ(2047),DQ(4095),DQ(8191),DQ(16383),DQ(32767),DQ(65535),DQ(3),DQ(5),DQ(9)
348 int i, m;
349 for (i = 0; i < bands; i++)
351 float s = 0;
352 int ba = *pba++;
353 int mask = ba ? 4 + ((19 >> scfcod[i]) & 3) : 0;
354 for (m = 4; m; m >>= 1)
356 if (mask & m)
358 int b = get_bits(bs, 6);
359 s = g_deq_L12[ba*3 - 6 + b % 3]*(1 << 21 >> b/3);
361 *scf++ = s;
366 static void L12_read_scale_info(const uint8_t *hdr, bs_t *bs, L12_scale_info *sci)
368 static const uint8_t g_bitalloc_code_tab[] = {
369 0,17, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16,
370 0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,16,
371 0,17,18, 3,19,4,5,16,
372 0,17,18,16,
373 0,17,18,19, 4,5,6, 7,8, 9,10,11,12,13,14,15,
374 0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,14,
375 0, 2, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16
377 const L12_subband_alloc_t *subband_alloc = L12_subband_alloc_table(hdr, sci);
379 int i, k = 0, ba_bits = 0;
380 const uint8_t *ba_code_tab = g_bitalloc_code_tab;
382 for (i = 0; i < sci->total_bands; i++)
384 uint8_t ba;
385 if (i == k)
387 k += subband_alloc->band_count;
388 ba_bits = subband_alloc->code_tab_width;
389 ba_code_tab = g_bitalloc_code_tab + subband_alloc->tab_offset;
390 subband_alloc++;
392 ba = ba_code_tab[get_bits(bs, ba_bits)];
393 sci->bitalloc[2*i] = ba;
394 if (i < sci->stereo_bands)
396 ba = ba_code_tab[get_bits(bs, ba_bits)];
398 sci->bitalloc[2*i + 1] = sci->stereo_bands ? ba : 0;
401 for (i = 0; i < 2*sci->total_bands; i++)
403 sci->scfcod[i] = sci->bitalloc[i] ? HDR_IS_LAYER_1(hdr) ? 2 : get_bits(bs, 2) : 6;
406 L12_read_scalefactors(bs, sci->bitalloc, sci->scfcod, sci->total_bands*2, sci->scf);
408 for (i = sci->stereo_bands; i < sci->total_bands; i++)
410 sci->bitalloc[2*i + 1] = 0;
414 static int L12_dequantize_granule(float *grbuf, bs_t *bs, L12_scale_info *sci, int group_size)
416 int i, j, k, choff = 576;
417 for (j = 0; j < 4; j++)
419 float *dst = grbuf + group_size*j;
420 for (i = 0; i < 2*sci->total_bands; i++)
422 int ba = sci->bitalloc[i];
423 if (ba != 0)
425 if (ba < 17)
427 int half = (1 << (ba - 1)) - 1;
428 for (k = 0; k < group_size; k++)
430 dst[k] = (float)((int)get_bits(bs, ba) - half);
432 } else
434 unsigned mod = (2 << (ba - 17)) + 1; /* 3, 5, 9 */
435 unsigned code = get_bits(bs, mod + 2 - (mod >> 3)); /* 5, 7, 10 */
436 for (k = 0; k < group_size; k++, code /= mod)
438 dst[k] = (float)((int)(code % mod - mod/2));
442 dst += choff;
443 choff = 18 - choff;
446 return group_size*4;
449 static void L12_apply_scf_384(L12_scale_info *sci, const float *scf, float *dst)
451 int i, k;
452 memcpy(dst + 576 + sci->stereo_bands*18, dst + sci->stereo_bands*18, (sci->total_bands - sci->stereo_bands)*18*sizeof(float));
453 for (i = 0; i < sci->total_bands; i++, dst += 18, scf += 6)
455 for (k = 0; k < 12; k++)
457 dst[k + 0] *= scf[0];
458 dst[k + 576] *= scf[3];
462 #endif
464 static int L3_read_side_info(bs_t *bs, L3_gr_info_t *gr, const uint8_t *hdr)
466 static const uint8_t g_scf_long[8][23] = {
467 { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
468 { 12,12,12,12,12,12,16,20,24,28,32,40,48,56,64,76,90,2,2,2,2,2,0 },
469 { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
470 { 6,6,6,6,6,6,8,10,12,14,16,18,22,26,32,38,46,54,62,70,76,36,0 },
471 { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
472 { 4,4,4,4,4,4,6,6,8,8,10,12,16,20,24,28,34,42,50,54,76,158,0 },
473 { 4,4,4,4,4,4,6,6,6,8,10,12,16,18,22,28,34,40,46,54,54,192,0 },
474 { 4,4,4,4,4,4,6,6,8,10,12,16,20,24,30,38,46,56,68,84,102,26,0 }
476 static const uint8_t g_scf_short[8][40] = {
477 { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
478 { 8,8,8,8,8,8,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },
479 { 4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },
480 { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },
481 { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
482 { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },
483 { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },
484 { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }
486 static const uint8_t g_scf_mixed[8][40] = {
487 { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
488 { 12,12,12,4,4,4,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },
489 { 6,6,6,6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },
490 { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },
491 { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
492 { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },
493 { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },
494 { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }
497 unsigned tables, scfsi = 0;
498 int main_data_begin, part_23_sum = 0;
499 int sr_idx = HDR_GET_MY_SAMPLE_RATE(hdr); sr_idx -= (sr_idx != 0);
500 int gr_count = HDR_IS_MONO(hdr) ? 1 : 2;
502 if (HDR_TEST_MPEG1(hdr))
504 gr_count *= 2;
505 main_data_begin = get_bits(bs, 9);
506 scfsi = get_bits(bs, 7 + gr_count);
507 } else
509 main_data_begin = get_bits(bs, 8 + gr_count) >> gr_count;
514 if (HDR_IS_MONO(hdr))
516 scfsi <<= 4;
518 gr->part_23_length = (uint16_t)get_bits(bs, 12);
519 part_23_sum += gr->part_23_length;
520 gr->big_values = (uint16_t)get_bits(bs, 9);
521 if (gr->big_values > 288)
523 return -1;
525 gr->global_gain = (uint8_t)get_bits(bs, 8);
526 gr->scalefac_compress = (uint16_t)get_bits(bs, HDR_TEST_MPEG1(hdr) ? 4 : 9);
527 gr->sfbtab = g_scf_long[sr_idx];
528 gr->n_long_sfb = 22;
529 gr->n_short_sfb = 0;
530 if (get_bits(bs, 1))
532 gr->block_type = (uint8_t)get_bits(bs, 2);
533 if (!gr->block_type)
535 return -1;
537 gr->mixed_block_flag = (uint8_t)get_bits(bs, 1);
538 gr->region_count[0] = 7;
539 gr->region_count[1] = 255;
540 if (gr->block_type == SHORT_BLOCK_TYPE)
542 scfsi &= 0x0F0F;
543 if (!gr->mixed_block_flag)
545 gr->region_count[0] = 8;
546 gr->sfbtab = g_scf_short[sr_idx];
547 gr->n_long_sfb = 0;
548 gr->n_short_sfb = 39;
549 } else
551 gr->sfbtab = g_scf_mixed[sr_idx];
552 gr->n_long_sfb = HDR_TEST_MPEG1(hdr) ? 8 : 6;
553 gr->n_short_sfb = 30;
556 tables = get_bits(bs, 10);
557 tables <<= 5;
558 gr->subblock_gain[0] = (uint8_t)get_bits(bs, 3);
559 gr->subblock_gain[1] = (uint8_t)get_bits(bs, 3);
560 gr->subblock_gain[2] = (uint8_t)get_bits(bs, 3);
561 } else
563 gr->block_type = 0;
564 gr->mixed_block_flag = 0;
565 tables = get_bits(bs, 15);
566 gr->region_count[0] = (uint8_t)get_bits(bs, 4);
567 gr->region_count[1] = (uint8_t)get_bits(bs, 3);
568 gr->region_count[2] = 255;
570 gr->table_select[0] = (uint8_t)(tables >> 10);
571 gr->table_select[1] = (uint8_t)((tables >> 5) & 31);
572 gr->table_select[2] = (uint8_t)((tables) & 31);
573 gr->preflag = HDR_TEST_MPEG1(hdr) ? get_bits(bs, 1) : (gr->scalefac_compress >= 500);
574 gr->scalefac_scale = (uint8_t)get_bits(bs, 1);
575 gr->count1_table = (uint8_t)get_bits(bs, 1);
576 gr->scfsi = (uint8_t)((scfsi >> 12) & 15);
577 scfsi <<= 4;
578 gr++;
579 } while(--gr_count);
581 if (part_23_sum + bs->pos > bs->limit + main_data_begin*8)
583 return -1;
586 return main_data_begin;
589 static void L3_read_scalefactors(uint8_t *scf, uint8_t *ist_pos, const uint8_t *scf_size, const uint8_t *scf_count, bs_t *bitbuf, int scfsi)
591 int i, k;
592 for (i = 0; i < 4 && scf_count[i]; i++, scfsi *= 2)
594 int cnt = scf_count[i];
595 if (scfsi & 8)
597 memcpy(scf, ist_pos, cnt);
598 } else
600 int bits = scf_size[i];
601 if (!bits)
603 memset(scf, 0, cnt);
604 memset(ist_pos, 0, cnt);
605 } else
607 int max_scf = (scfsi < 0) ? (1 << bits) - 1 : -1;
608 for (k = 0; k < cnt; k++)
610 int s = get_bits(bitbuf, bits);
611 ist_pos[k] = (s == max_scf ? -1 : s);
612 scf[k] = s;
616 ist_pos += cnt;
617 scf += cnt;
619 scf[0] = scf[1] = scf[2] = 0;
622 static float L3_ldexp_q2(float y, int exp_q2)
624 static const float g_expfrac[4] = { 9.31322575e-10f,7.83145814e-10f,6.58544508e-10f,5.53767716e-10f };
625 int e;
628 e = MINIMP3_MIN(30*4, exp_q2);
629 y *= g_expfrac[e & 3]*(1 << 30 >> (e >> 2));
630 } while ((exp_q2 -= e) > 0);
631 return y;
634 static void L3_decode_scalefactors(const uint8_t *hdr, uint8_t *ist_pos, bs_t *bs, const L3_gr_info_t *gr, float *scf, int ch)
636 static const uint8_t g_scf_partitions[3][28] = {
637 { 6,5,5, 5,6,5,5,5,6,5, 7,3,11,10,0,0, 7, 7, 7,0, 6, 6,6,3, 8, 8,5,0 },
638 { 8,9,6,12,6,9,9,9,6,9,12,6,15,18,0,0, 6,15,12,0, 6,12,9,6, 6,18,9,0 },
639 { 9,9,6,12,9,9,9,9,9,9,12,6,18,18,0,0,12,12,12,0,12, 9,9,6,15,12,9,0 }
641 const uint8_t *scf_partition = g_scf_partitions[!!gr->n_short_sfb + !gr->n_long_sfb];
642 uint8_t scf_size[4], iscf[40];
643 int i, scf_shift = gr->scalefac_scale + 1, gain_exp, scfsi = gr->scfsi;
644 float gain;
646 if (HDR_TEST_MPEG1(hdr))
648 static const uint8_t g_scfc_decode[16] = { 0,1,2,3, 12,5,6,7, 9,10,11,13, 14,15,18,19 };
649 int part = g_scfc_decode[gr->scalefac_compress];
650 scf_size[1] = scf_size[0] = (uint8_t)(part >> 2);
651 scf_size[3] = scf_size[2] = (uint8_t)(part & 3);
652 } else
654 static const uint8_t g_mod[6*4] = { 5,5,4,4,5,5,4,1,4,3,1,1,5,6,6,1,4,4,4,1,4,3,1,1 };
655 int k, modprod, sfc, ist = HDR_TEST_I_STEREO(hdr) && ch;
656 sfc = gr->scalefac_compress >> ist;
657 for (k = ist*3*4; sfc >= 0; sfc -= modprod, k += 4)
659 for (modprod = 1, i = 3; i >= 0; i--)
661 scf_size[i] = (uint8_t)(sfc / modprod % g_mod[k + i]);
662 modprod *= g_mod[k + i];
665 scf_partition += k;
666 scfsi = -16;
668 L3_read_scalefactors(iscf, ist_pos, scf_size, scf_partition, bs, scfsi);
670 if (gr->n_short_sfb)
672 int sh = 3 - scf_shift;
673 for (i = 0; i < gr->n_short_sfb; i += 3)
675 iscf[gr->n_long_sfb + i + 0] += gr->subblock_gain[0] << sh;
676 iscf[gr->n_long_sfb + i + 1] += gr->subblock_gain[1] << sh;
677 iscf[gr->n_long_sfb + i + 2] += gr->subblock_gain[2] << sh;
679 } else if (gr->preflag)
681 static const uint8_t g_preamp[10] = { 1,1,1,1,2,2,3,3,3,2 };
682 for (i = 0; i < 10; i++)
684 iscf[11 + i] += g_preamp[i];
688 gain_exp = gr->global_gain + BITS_DEQUANTIZER_OUT*4 - 210 - (HDR_IS_MS_STEREO(hdr) ? 2 : 0);
689 gain = L3_ldexp_q2(1 << (MAX_SCFI/4), MAX_SCFI - gain_exp);
690 for (i = 0; i < (int)(gr->n_long_sfb + gr->n_short_sfb); i++)
692 scf[i] = L3_ldexp_q2(gain, iscf[i] << scf_shift);
696 static const float g_pow43[129 + 16] = {
697 0,-1,-2.519842f,-4.326749f,-6.349604f,-8.549880f,-10.902724f,-13.390518f,-16.000000f,-18.720754f,-21.544347f,-24.463781f,-27.473142f,-30.567351f,-33.741992f,-36.993181f,
698 0,1,2.519842f,4.326749f,6.349604f,8.549880f,10.902724f,13.390518f,16.000000f,18.720754f,21.544347f,24.463781f,27.473142f,30.567351f,33.741992f,36.993181f,40.317474f,43.711787f,47.173345f,50.699631f,54.288352f,57.937408f,61.644865f,65.408941f,69.227979f,73.100443f,77.024898f,81.000000f,85.024491f,89.097188f,93.216975f,97.382800f,101.593667f,105.848633f,110.146801f,114.487321f,118.869381f,123.292209f,127.755065f,132.257246f,136.798076f,141.376907f,145.993119f,150.646117f,155.335327f,160.060199f,164.820202f,169.614826f,174.443577f,179.305980f,184.201575f,189.129918f,194.090580f,199.083145f,204.107210f,209.162385f,214.248292f,219.364564f,224.510845f,229.686789f,234.892058f,240.126328f,245.389280f,250.680604f,256.000000f,261.347174f,266.721841f,272.123723f,277.552547f,283.008049f,288.489971f,293.998060f,299.532071f,305.091761f,310.676898f,316.287249f,321.922592f,327.582707f,333.267377f,338.976394f,344.709550f,350.466646f,356.247482f,362.051866f,367.879608f,373.730522f,379.604427f,385.501143f,391.420496f,397.362314f,403.326427f,409.312672f,415.320884f,421.350905f,427.402579f,433.475750f,439.570269f,445.685987f,451.822757f,457.980436f,464.158883f,470.357960f,476.577530f,482.817459f,489.077615f,495.357868f,501.658090f,507.978156f,514.317941f,520.677324f,527.056184f,533.454404f,539.871867f,546.308458f,552.764065f,559.238575f,565.731879f,572.243870f,578.774440f,585.323483f,591.890898f,598.476581f,605.080431f,611.702349f,618.342238f,625.000000f,631.675540f,638.368763f,645.079578f
701 static float L3_pow_43(int x)
703 float frac;
704 int sign, mult = 256;
706 if (x < 129)
708 return g_pow43[16 + x];
711 if (x < 1024)
713 mult = 16;
714 x <<= 3;
717 sign = 2*x & 64;
718 frac = (float)((x & 63) - sign) / ((x & ~63) + sign);
719 return g_pow43[16 + ((x + sign) >> 6)]*(1.f + frac*((4.f/3) + frac*(2.f/9)))*mult;
722 static void L3_huffman(float *dst, bs_t *bs, const L3_gr_info_t *gr_info, const float *scf, int layer3gr_limit)
724 static const int16_t tabs[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
725 785,785,785,785,784,784,784,784,513,513,513,513,513,513,513,513,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
726 -255,1313,1298,1282,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,290,288,
727 -255,1313,1298,1282,769,769,769,769,529,529,529,529,529,529,529,529,528,528,528,528,528,528,528,528,512,512,512,512,512,512,512,512,290,288,
728 -253,-318,-351,-367,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,819,818,547,547,275,275,275,275,561,560,515,546,289,274,288,258,
729 -254,-287,1329,1299,1314,1312,1057,1057,1042,1042,1026,1026,784,784,784,784,529,529,529,529,529,529,529,529,769,769,769,769,768,768,768,768,563,560,306,306,291,259,
730 -252,-413,-477,-542,1298,-575,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-383,-399,1107,1092,1106,1061,849,849,789,789,1104,1091,773,773,1076,1075,341,340,325,309,834,804,577,577,532,532,516,516,832,818,803,816,561,561,531,531,515,546,289,289,288,258,
731 -252,-429,-493,-559,1057,1057,1042,1042,529,529,529,529,529,529,529,529,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,-382,1077,-415,1106,1061,1104,849,849,789,789,1091,1076,1029,1075,834,834,597,581,340,340,339,324,804,833,532,532,832,772,818,803,817,787,816,771,290,290,290,290,288,258,
732 -253,-349,-414,-447,-463,1329,1299,-479,1314,1312,1057,1057,1042,1042,1026,1026,785,785,785,785,784,784,784,784,769,769,769,769,768,768,768,768,-319,851,821,-335,836,850,805,849,341,340,325,336,533,533,579,579,564,564,773,832,578,548,563,516,321,276,306,291,304,259,
733 -251,-572,-733,-830,-863,-879,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,1396,1351,1381,1366,1395,1335,1380,-559,1334,1138,1138,1063,1063,1350,1392,1031,1031,1062,1062,1364,1363,1120,1120,1333,1348,881,881,881,881,375,374,359,373,343,358,341,325,791,791,1123,1122,-703,1105,1045,-719,865,865,790,790,774,774,1104,1029,338,293,323,308,-799,-815,833,788,772,818,803,816,322,292,307,320,561,531,515,546,289,274,288,258,
734 -251,-525,-605,-685,-765,-831,-846,1298,1057,1057,1312,1282,785,785,785,785,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,1399,1398,1383,1367,1382,1396,1351,-511,1381,1366,1139,1139,1079,1079,1124,1124,1364,1349,1363,1333,882,882,882,882,807,807,807,807,1094,1094,1136,1136,373,341,535,535,881,775,867,822,774,-591,324,338,-671,849,550,550,866,864,609,609,293,336,534,534,789,835,773,-751,834,804,308,307,833,788,832,772,562,562,547,547,305,275,560,515,290,290,
735 -252,-397,-477,-557,-622,-653,-719,-735,-750,1329,1299,1314,1057,1057,1042,1042,1312,1282,1024,1024,785,785,785,785,784,784,784,784,769,769,769,769,-383,1127,1141,1111,1126,1140,1095,1110,869,869,883,883,1079,1109,882,882,375,374,807,868,838,881,791,-463,867,822,368,263,852,837,836,-543,610,610,550,550,352,336,534,534,865,774,851,821,850,805,593,533,579,564,773,832,578,578,548,548,577,577,307,276,306,291,516,560,259,259,
736 -250,-2107,-2507,-2764,-2909,-2974,-3007,-3023,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-767,-1052,-1213,-1277,-1358,-1405,-1469,-1535,-1550,-1582,-1614,-1647,-1662,-1694,-1726,-1759,-1774,-1807,-1822,-1854,-1886,1565,-1919,-1935,-1951,-1967,1731,1730,1580,1717,-1983,1729,1564,-1999,1548,-2015,-2031,1715,1595,-2047,1714,-2063,1610,-2079,1609,-2095,1323,1323,1457,1457,1307,1307,1712,1547,1641,1700,1699,1594,1685,1625,1442,1442,1322,1322,-780,-973,-910,1279,1278,1277,1262,1276,1261,1275,1215,1260,1229,-959,974,974,989,989,-943,735,478,478,495,463,506,414,-1039,1003,958,1017,927,942,987,957,431,476,1272,1167,1228,-1183,1256,-1199,895,895,941,941,1242,1227,1212,1135,1014,1014,490,489,503,487,910,1013,985,925,863,894,970,955,1012,847,-1343,831,755,755,984,909,428,366,754,559,-1391,752,486,457,924,997,698,698,983,893,740,740,908,877,739,739,667,667,953,938,497,287,271,271,683,606,590,712,726,574,302,302,738,736,481,286,526,725,605,711,636,724,696,651,589,681,666,710,364,467,573,695,466,466,301,465,379,379,709,604,665,679,316,316,634,633,436,436,464,269,424,394,452,332,438,363,347,408,393,448,331,422,362,407,392,421,346,406,391,376,375,359,1441,1306,-2367,1290,-2383,1337,-2399,-2415,1426,1321,-2431,1411,1336,-2447,-2463,-2479,1169,1169,1049,1049,1424,1289,1412,1352,1319,-2495,1154,1154,1064,1064,1153,1153,416,390,360,404,403,389,344,374,373,343,358,372,327,357,342,311,356,326,1395,1394,1137,1137,1047,1047,1365,1392,1287,1379,1334,1364,1349,1378,1318,1363,792,792,792,792,1152,1152,1032,1032,1121,1121,1046,1046,1120,1120,1030,1030,-2895,1106,1061,1104,849,849,789,789,1091,1076,1029,1090,1060,1075,833,833,309,324,532,532,832,772,818,803,561,561,531,560,515,546,289,274,288,258,
737 -250,-1179,-1579,-1836,-1996,-2124,-2253,-2333,-2413,-2477,-2542,-2574,-2607,-2622,-2655,1314,1313,1298,1312,1282,785,785,785,785,1040,1040,1025,1025,768,768,768,768,-766,-798,-830,-862,-895,-911,-927,-943,-959,-975,-991,-1007,-1023,-1039,-1055,-1070,1724,1647,-1103,-1119,1631,1767,1662,1738,1708,1723,-1135,1780,1615,1779,1599,1677,1646,1778,1583,-1151,1777,1567,1737,1692,1765,1722,1707,1630,1751,1661,1764,1614,1736,1676,1763,1750,1645,1598,1721,1691,1762,1706,1582,1761,1566,-1167,1749,1629,767,766,751,765,494,494,735,764,719,749,734,763,447,447,748,718,477,506,431,491,446,476,461,505,415,430,475,445,504,399,460,489,414,503,383,474,429,459,502,502,746,752,488,398,501,473,413,472,486,271,480,270,-1439,-1455,1357,-1471,-1487,-1503,1341,1325,-1519,1489,1463,1403,1309,-1535,1372,1448,1418,1476,1356,1462,1387,-1551,1475,1340,1447,1402,1386,-1567,1068,1068,1474,1461,455,380,468,440,395,425,410,454,364,467,466,464,453,269,409,448,268,432,1371,1473,1432,1417,1308,1460,1355,1446,1459,1431,1083,1083,1401,1416,1458,1445,1067,1067,1370,1457,1051,1051,1291,1430,1385,1444,1354,1415,1400,1443,1082,1082,1173,1113,1186,1066,1185,1050,-1967,1158,1128,1172,1097,1171,1081,-1983,1157,1112,416,266,375,400,1170,1142,1127,1065,793,793,1169,1033,1156,1096,1141,1111,1155,1080,1126,1140,898,898,808,808,897,897,792,792,1095,1152,1032,1125,1110,1139,1079,1124,882,807,838,881,853,791,-2319,867,368,263,822,852,837,866,806,865,-2399,851,352,262,534,534,821,836,594,594,549,549,593,593,533,533,848,773,579,579,564,578,548,563,276,276,577,576,306,291,516,560,305,305,275,259,
738 -251,-892,-2058,-2620,-2828,-2957,-3023,-3039,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,-559,1530,-575,-591,1528,1527,1407,1526,1391,1023,1023,1023,1023,1525,1375,1268,1268,1103,1103,1087,1087,1039,1039,1523,-604,815,815,815,815,510,495,509,479,508,463,507,447,431,505,415,399,-734,-782,1262,-815,1259,1244,-831,1258,1228,-847,-863,1196,-879,1253,987,987,748,-767,493,493,462,477,414,414,686,669,478,446,461,445,474,429,487,458,412,471,1266,1264,1009,1009,799,799,-1019,-1276,-1452,-1581,-1677,-1757,-1821,-1886,-1933,-1997,1257,1257,1483,1468,1512,1422,1497,1406,1467,1496,1421,1510,1134,1134,1225,1225,1466,1451,1374,1405,1252,1252,1358,1480,1164,1164,1251,1251,1238,1238,1389,1465,-1407,1054,1101,-1423,1207,-1439,830,830,1248,1038,1237,1117,1223,1148,1236,1208,411,426,395,410,379,269,1193,1222,1132,1235,1221,1116,976,976,1192,1162,1177,1220,1131,1191,963,963,-1647,961,780,-1663,558,558,994,993,437,408,393,407,829,978,813,797,947,-1743,721,721,377,392,844,950,828,890,706,706,812,859,796,960,948,843,934,874,571,571,-1919,690,555,689,421,346,539,539,944,779,918,873,932,842,903,888,570,570,931,917,674,674,-2575,1562,-2591,1609,-2607,1654,1322,1322,1441,1441,1696,1546,1683,1593,1669,1624,1426,1426,1321,1321,1639,1680,1425,1425,1305,1305,1545,1668,1608,1623,1667,1592,1638,1666,1320,1320,1652,1607,1409,1409,1304,1304,1288,1288,1664,1637,1395,1395,1335,1335,1622,1636,1394,1394,1319,1319,1606,1621,1392,1392,1137,1137,1137,1137,345,390,360,375,404,373,1047,-2751,-2767,-2783,1062,1121,1046,-2799,1077,-2815,1106,1061,789,789,1105,1104,263,355,310,340,325,354,352,262,339,324,1091,1076,1029,1090,1060,1075,833,833,788,788,1088,1028,818,818,803,803,561,561,531,531,816,771,546,546,289,274,288,258,
739 -253,-317,-381,-446,-478,-509,1279,1279,-811,-1179,-1451,-1756,-1900,-2028,-2189,-2253,-2333,-2414,-2445,-2511,-2526,1313,1298,-2559,1041,1041,1040,1040,1025,1025,1024,1024,1022,1007,1021,991,1020,975,1019,959,687,687,1018,1017,671,671,655,655,1016,1015,639,639,758,758,623,623,757,607,756,591,755,575,754,559,543,543,1009,783,-575,-621,-685,-749,496,-590,750,749,734,748,974,989,1003,958,988,973,1002,942,987,957,972,1001,926,986,941,971,956,1000,910,985,925,999,894,970,-1071,-1087,-1102,1390,-1135,1436,1509,1451,1374,-1151,1405,1358,1480,1420,-1167,1507,1494,1389,1342,1465,1435,1450,1326,1505,1310,1493,1373,1479,1404,1492,1464,1419,428,443,472,397,736,526,464,464,486,457,442,471,484,482,1357,1449,1434,1478,1388,1491,1341,1490,1325,1489,1463,1403,1309,1477,1372,1448,1418,1433,1476,1356,1462,1387,-1439,1475,1340,1447,1402,1474,1324,1461,1371,1473,269,448,1432,1417,1308,1460,-1711,1459,-1727,1441,1099,1099,1446,1386,1431,1401,-1743,1289,1083,1083,1160,1160,1458,1445,1067,1067,1370,1457,1307,1430,1129,1129,1098,1098,268,432,267,416,266,400,-1887,1144,1187,1082,1173,1113,1186,1066,1050,1158,1128,1143,1172,1097,1171,1081,420,391,1157,1112,1170,1142,1127,1065,1169,1049,1156,1096,1141,1111,1155,1080,1126,1154,1064,1153,1140,1095,1048,-2159,1125,1110,1137,-2175,823,823,1139,1138,807,807,384,264,368,263,868,838,853,791,867,822,852,837,866,806,865,790,-2319,851,821,836,352,262,850,805,849,-2399,533,533,835,820,336,261,578,548,563,577,532,532,832,772,562,562,547,547,305,275,560,515,290,290,288,258 };
740 static const uint8_t tab32[] = { 130,162,193,209,44,28,76,140,9,9,9,9,9,9,9,9,190,254,222,238,126,94,157,157,109,61,173,205 };
741 static const uint8_t tab33[] = { 252,236,220,204,188,172,156,140,124,108,92,76,60,44,28,12 };
742 static const int16_t tabindex[2*16] = { 0,32,64,98,0,132,180,218,292,364,426,538,648,746,0,1126,1460,1460,1460,1460,1460,1460,1460,1460,1842,1842,1842,1842,1842,1842,1842,1842 };
743 static const uint8_t g_linbits[] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,6,8,10,13,4,5,6,7,8,9,11,13 };
745 #define PEEK_BITS(n) (bs_cache >> (32 - n))
746 #define FLUSH_BITS(n) { bs_cache <<= (n); bs_sh += (n); }
747 #define CHECK_BITS while (bs_sh >= 0) { bs_cache |= (uint32_t)*bs_next_ptr++ << bs_sh; bs_sh -= 8; }
748 #define BSPOS ((bs_next_ptr - bs->buf)*8 - 24 + bs_sh)
750 float one = 0.0f;
751 int ireg = 0, big_val_cnt = gr_info->big_values;
752 const uint8_t *sfb = gr_info->sfbtab;
753 const uint8_t *bs_next_ptr = bs->buf + bs->pos/8;
754 uint32_t bs_cache = (((bs_next_ptr[0]*256u + bs_next_ptr[1])*256u + bs_next_ptr[2])*256u + bs_next_ptr[3]) << (bs->pos & 7);
755 int pairs_to_decode, np, bs_sh = (bs->pos & 7) - 8;
756 bs_next_ptr += 4;
758 while (big_val_cnt > 0)
760 int tab_num = gr_info->table_select[ireg];
761 int sfb_cnt = gr_info->region_count[ireg++];
762 const short *codebook = tabs + tabindex[tab_num];
763 int linbits = g_linbits[tab_num];
766 np = *sfb++ / 2;
767 pairs_to_decode = MINIMP3_MIN(big_val_cnt, np);
768 one = *scf++;
771 int j, w = 5;
772 int leaf = codebook[PEEK_BITS(w)];
773 while (leaf < 0)
775 FLUSH_BITS(w);
776 w = leaf & 7;
777 leaf = codebook[PEEK_BITS(w) - (leaf >> 3)];
779 FLUSH_BITS(leaf >> 8);
781 for (j = 0; j < 2; j++, dst++, leaf >>= 4)
783 int lsb = leaf & 0x0F;
784 if (lsb == 15 && linbits)
786 lsb += PEEK_BITS(linbits);
787 FLUSH_BITS(linbits);
788 CHECK_BITS;
789 *dst = one*L3_pow_43(lsb)*((int32_t)bs_cache < 0 ? -1: 1);
790 } else
792 *dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one;
794 FLUSH_BITS(lsb ? 1 : 0);
796 CHECK_BITS;
797 } while (--pairs_to_decode);
798 } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0);
801 for (np = 1 - big_val_cnt;; dst += 4)
803 const uint8_t *codebook_count1 = (gr_info->count1_table) ? tab33 : tab32;
804 int leaf = codebook_count1[PEEK_BITS(4)];
805 if (!(leaf & 8))
807 leaf = codebook_count1[(leaf >> 3) + (bs_cache << 4 >> (32 - (leaf & 3)))];
809 FLUSH_BITS(leaf & 7);
810 if (BSPOS > layer3gr_limit)
812 break;
814 #define RELOAD_SCALEFACTOR if (!--np) { np = *sfb++/2; if (!np) break; one = *scf++; }
815 #define DEQ_COUNT1(s) if (leaf & (128 >> s)) { dst[s] = ((int32_t)bs_cache < 0) ? -one : one; FLUSH_BITS(1) }
816 RELOAD_SCALEFACTOR;
817 DEQ_COUNT1(0);
818 DEQ_COUNT1(1);
819 RELOAD_SCALEFACTOR;
820 DEQ_COUNT1(2);
821 DEQ_COUNT1(3);
822 CHECK_BITS;
825 bs->pos = layer3gr_limit;
828 static void L3_midside_stereo(float *left, int n)
830 int i = 0;
831 float *right = left + 576;
832 #if HAVE_SIMD
833 if (have_simd()) for (; i < n - 3; i += 4)
835 f4 vl = VLD(left + i);
836 f4 vr = VLD(right + i);
837 VSTORE(left + i, VADD(vl, vr));
838 VSTORE(right + i, VSUB(vl, vr));
840 #endif
841 for (; i < n; i++)
843 float a = left[i];
844 float b = right[i];
845 left[i] = a + b;
846 right[i] = a - b;
850 static void L3_intensity_stereo_band(float *left, int n, float kl, float kr)
852 int i;
853 for (i = 0; i < n; i++)
855 left[i + 576] = left[i]*kr;
856 left[i] = left[i]*kl;
860 static void L3_stereo_top_band(const float *right, const uint8_t *sfb, int nbands, int max_band[3])
862 int i, k;
864 max_band[0] = max_band[1] = max_band[2] = -1;
866 for (i = 0; i < nbands; i++)
868 for (k = 0; k < sfb[i]; k += 2)
870 if (right[k] != 0 || right[k + 1] != 0)
872 max_band[i % 3] = i;
873 break;
876 right += sfb[i];
880 static void L3_stereo_process(float *left, const uint8_t *ist_pos, const uint8_t *sfb, const uint8_t *hdr, int max_band[3], int mpeg2_sh)
882 static const float g_pan[7*2] = { 0,1,0.21132487f,0.78867513f,0.36602540f,0.63397460f,0.5f,0.5f,0.63397460f,0.36602540f,0.78867513f,0.21132487f,1,0 };
883 unsigned i, max_pos = HDR_TEST_MPEG1(hdr) ? 7 : 64;
885 for (i = 0; sfb[i]; i++)
887 unsigned ipos = ist_pos[i];
888 if ((int)i > max_band[i % 3] && ipos < max_pos)
890 float kl, kr, s = HDR_TEST_MS_STEREO(hdr) ? 1.41421356f : 1;
891 if (HDR_TEST_MPEG1(hdr))
893 kl = g_pan[2*ipos];
894 kr = g_pan[2*ipos + 1];
895 } else
897 kl = 1;
898 kr = L3_ldexp_q2(1, (ipos + 1) >> 1 << mpeg2_sh);
899 if (ipos & 1)
901 kl = kr;
902 kr = 1;
905 L3_intensity_stereo_band(left, sfb[i], kl*s, kr*s);
906 } else if (HDR_TEST_MS_STEREO(hdr))
908 L3_midside_stereo(left, sfb[i]);
910 left += sfb[i];
914 static void L3_intensity_stereo(float *left, uint8_t *ist_pos, const L3_gr_info_t *gr, const uint8_t *hdr)
916 int max_band[3], n_sfb = gr->n_long_sfb + gr->n_short_sfb;
917 int i, max_blocks = gr->n_short_sfb ? 3 : 1;
919 L3_stereo_top_band(left + 576, gr->sfbtab, n_sfb, max_band);
920 if (gr->n_long_sfb)
922 max_band[0] = max_band[1] = max_band[2] = MINIMP3_MAX(MINIMP3_MAX(max_band[0], max_band[1]), max_band[2]);
924 for (i = 0; i < max_blocks; i++)
926 int default_pos = HDR_TEST_MPEG1(hdr) ? 3 : 0;
927 int itop = n_sfb - max_blocks + i;
928 int prev = itop - max_blocks;
929 ist_pos[itop] = max_band[i] >= prev ? default_pos : ist_pos[prev];
931 L3_stereo_process(left, ist_pos, gr->sfbtab, hdr, max_band, gr[1].scalefac_compress & 1);
934 static void L3_reorder(float *grbuf, float *scratch, const uint8_t *sfb)
936 int i, len;
937 float *src = grbuf, *dst = scratch;
939 for (;0 != (len = *sfb); sfb += 3, src += 2*len)
941 for (i = 0; i < len; i++, src++)
943 *dst++ = src[0*len];
944 *dst++ = src[1*len];
945 *dst++ = src[2*len];
948 memcpy(grbuf, scratch, (dst - scratch)*sizeof(float));
951 static void L3_antialias(float *grbuf, int nbands)
953 static const float g_aa[2][8] = {
954 {0.85749293f,0.88174200f,0.94962865f,0.98331459f,0.99551782f,0.99916056f,0.99989920f,0.99999316f},
955 {0.51449576f,0.47173197f,0.31337745f,0.18191320f,0.09457419f,0.04096558f,0.01419856f,0.00369997f}
958 for (; nbands > 0; nbands--, grbuf += 18)
960 int i = 0;
961 #if HAVE_SIMD
962 if (have_simd()) for (; i < 8; i += 4)
964 f4 vu = VLD(grbuf + 18 + i);
965 f4 vd = VLD(grbuf + 14 - i);
966 f4 vc0 = VLD(g_aa[0] + i);
967 f4 vc1 = VLD(g_aa[1] + i);
968 vd = VREV(vd);
969 VSTORE(grbuf + 18 + i, VSUB(VMUL(vu, vc0), VMUL(vd, vc1)));
970 vd = VADD(VMUL(vu, vc1), VMUL(vd, vc0));
971 VSTORE(grbuf + 14 - i, VREV(vd));
973 #endif
974 #ifndef MINIMP3_ONLY_SIMD
975 for(; i < 8; i++)
977 float u = grbuf[18 + i];
978 float d = grbuf[17 - i];
979 grbuf[18 + i] = u*g_aa[0][i] - d*g_aa[1][i];
980 grbuf[17 - i] = u*g_aa[1][i] + d*g_aa[0][i];
982 #endif
986 static void L3_dct3_9(float *y)
988 float s0, s1, s2, s3, s4, s5, s6, s7, s8, t0, t2, t4;
990 s0 = y[0]; s2 = y[2]; s4 = y[4]; s6 = y[6]; s8 = y[8];
991 t0 = s0 + s6*0.5f;
992 s0 -= s6;
993 t4 = (s4 + s2)*0.93969262f;
994 t2 = (s8 + s2)*0.76604444f;
995 s6 = (s4 - s8)*0.17364818f;
996 s4 += s8 - s2;
998 s2 = s0 - s4*0.5f;
999 y[4] = s4 + s0;
1000 s8 = t0 - t2 + s6;
1001 s0 = t0 - t4 + t2;
1002 s4 = t0 + t4 - s6;
1004 s1 = y[1]; s3 = y[3]; s5 = y[5]; s7 = y[7];
1006 s3 *= 0.86602540f;
1007 t0 = (s5 + s1)*0.98480775f;
1008 t4 = (s5 - s7)*0.34202014f;
1009 t2 = (s1 + s7)*0.64278761f;
1010 s1 = (s1 - s5 - s7)*0.86602540f;
1012 s5 = t0 - s3 - t2;
1013 s7 = t4 - s3 - t0;
1014 s3 = t4 + s3 - t2;
1016 y[0] = s4 - s7;
1017 y[1] = s2 + s1;
1018 y[2] = s0 - s3;
1019 y[3] = s8 + s5;
1020 y[5] = s8 - s5;
1021 y[6] = s0 + s3;
1022 y[7] = s2 - s1;
1023 y[8] = s4 + s7;
1026 static void L3_imdct36(float *grbuf, float *overlap, const float *window, int nbands)
1028 int i, j;
1029 static const float g_twid9[18] = {
1030 0.73727734f,0.79335334f,0.84339145f,0.88701083f,0.92387953f,0.95371695f,0.97629601f,0.99144486f,0.99904822f,0.67559021f,0.60876143f,0.53729961f,0.46174861f,0.38268343f,0.30070580f,0.21643961f,0.13052619f,0.04361938f
1033 for (j = 0; j < nbands; j++, grbuf += 18, overlap += 9)
1035 float co[9], si[9];
1036 co[0] = -grbuf[0];
1037 si[0] = grbuf[17];
1038 for (i = 0; i < 4; i++)
1040 si[8 - 2*i] = grbuf[4*i + 1] - grbuf[4*i + 2];
1041 co[1 + 2*i] = grbuf[4*i + 1] + grbuf[4*i + 2];
1042 si[7 - 2*i] = grbuf[4*i + 4] - grbuf[4*i + 3];
1043 co[2 + 2*i] = -(grbuf[4*i + 3] + grbuf[4*i + 4]);
1045 L3_dct3_9(co);
1046 L3_dct3_9(si);
1048 si[1] = -si[1];
1049 si[3] = -si[3];
1050 si[5] = -si[5];
1051 si[7] = -si[7];
1053 i = 0;
1055 #if HAVE_SIMD
1056 if (have_simd()) for (; i < 8; i += 4)
1058 f4 vovl = VLD(overlap + i);
1059 f4 vc = VLD(co + i);
1060 f4 vs = VLD(si + i);
1061 f4 vr0 = VLD(g_twid9 + i);
1062 f4 vr1 = VLD(g_twid9 + 9 + i);
1063 f4 vw0 = VLD(window + i);
1064 f4 vw1 = VLD(window + 9 + i);
1065 f4 vsum = VADD(VMUL(vc, vr1), VMUL(vs, vr0));
1066 VSTORE(overlap + i, VSUB(VMUL(vc, vr0), VMUL(vs, vr1)));
1067 VSTORE(grbuf + i, VSUB(VMUL(vovl, vw0), VMUL(vsum, vw1)));
1068 vsum = VADD(VMUL(vovl, vw1), VMUL(vsum, vw0));
1069 VSTORE(grbuf + 14 - i, VREV(vsum));
1071 #endif
1072 for (; i < 9; i++)
1074 float ovl = overlap[i];
1075 float sum = co[i]*g_twid9[9 + i] + si[i]*g_twid9[0 + i];
1076 overlap[i] = co[i]*g_twid9[0 + i] - si[i]*g_twid9[9 + i];
1077 grbuf[i] = ovl*window[0 + i] - sum*window[9 + i];
1078 grbuf[17 - i] = ovl*window[9 + i] + sum*window[0 + i];
1083 static void L3_idct3(float x0, float x1, float x2, float *dst)
1085 float m1 = x1*0.86602540f;
1086 float a1 = x0 - x2*0.5f;
1087 dst[1] = x0 + x2;
1088 dst[0] = a1 + m1;
1089 dst[2] = a1 - m1;
1092 static void L3_imdct12(float *x, float *dst, float *overlap)
1094 static const float g_twid3[6] = { 0.79335334f,0.92387953f,0.99144486f, 0.60876143f,0.38268343f,0.13052619f };
1095 float co[3], si[3];
1096 int i;
1098 L3_idct3(-x[0], x[6] + x[3], x[12] + x[9], co);
1099 L3_idct3(x[15], x[12] - x[9], x[6] - x[3], si);
1100 si[1] = -si[1];
1102 for (i = 0; i < 3; i++)
1104 float ovl = overlap[i];
1105 float sum = co[i]*g_twid3[3 + i] + si[i]*g_twid3[0 + i];
1106 overlap[i] = co[i]*g_twid3[0 + i] - si[i]*g_twid3[3 + i];
1107 dst[i] = ovl*g_twid3[2 - i] - sum*g_twid3[5 - i];
1108 dst[5 - i] = ovl*g_twid3[5 - i] + sum*g_twid3[2 - i];
1112 static void L3_imdct_short(float *grbuf, float *overlap, int nbands)
1114 for (;nbands > 0; nbands--, overlap += 9, grbuf += 18)
1116 float tmp[18];
1117 memcpy(tmp, grbuf, sizeof(tmp));
1118 memcpy(grbuf, overlap, 6*sizeof(float));
1119 L3_imdct12(tmp, grbuf + 6, overlap + 6);
1120 L3_imdct12(tmp + 1, grbuf + 12, overlap + 6);
1121 L3_imdct12(tmp + 2, overlap, overlap + 6);
1125 static void L3_change_sign(float *grbuf)
1127 int b, i;
1128 for (b = 0, grbuf += 18; b < 32; b += 2, grbuf += 36)
1129 for (i = 1; i < 18; i += 2)
1130 grbuf[i] = -grbuf[i];
1133 static void L3_imdct_gr(float *grbuf, float *overlap, unsigned block_type, unsigned n_long_bands)
1135 static const float g_mdct_window[2][18] = {
1136 { 0.99904822f,0.99144486f,0.97629601f,0.95371695f,0.92387953f,0.88701083f,0.84339145f,0.79335334f,0.73727734f,0.04361938f,0.13052619f,0.21643961f,0.30070580f,0.38268343f,0.46174861f,0.53729961f,0.60876143f,0.67559021f },
1137 { 1,1,1,1,1,1,0.99144486f,0.92387953f,0.79335334f,0,0,0,0,0,0,0.13052619f,0.38268343f,0.60876143f }
1139 if (n_long_bands)
1141 L3_imdct36(grbuf, overlap, g_mdct_window[0], n_long_bands);
1142 grbuf += 18*n_long_bands;
1143 overlap += 9*n_long_bands;
1145 if (block_type == SHORT_BLOCK_TYPE)
1146 L3_imdct_short(grbuf, overlap, 32 - n_long_bands);
1147 else
1148 L3_imdct36(grbuf, overlap, g_mdct_window[block_type == STOP_BLOCK_TYPE], 32 - n_long_bands);
1151 static void L3_save_reservoir(mp3dec_t *h, mp3dec_scratch_t *s)
1153 int pos = (s->bs.pos + 7)/8u;
1154 int remains = s->bs.limit/8u - pos;
1155 if (remains > MAX_BITRESERVOIR_BYTES)
1157 pos += remains - MAX_BITRESERVOIR_BYTES;
1158 remains = MAX_BITRESERVOIR_BYTES;
1160 if (remains > 0)
1162 memmove(h->reserv_buf, s->maindata + pos, remains);
1164 h->reserv = remains;
1167 static int L3_restore_reservoir(mp3dec_t *h, bs_t *bs, mp3dec_scratch_t *s, int main_data_begin)
1169 int frame_bytes = (bs->limit - bs->pos)/8;
1170 int bytes_have = MINIMP3_MIN(h->reserv, main_data_begin);
1171 memcpy(s->maindata, h->reserv_buf + MINIMP3_MAX(0, h->reserv - main_data_begin), MINIMP3_MIN(h->reserv, main_data_begin));
1172 memcpy(s->maindata + bytes_have, bs->buf + bs->pos/8, frame_bytes);
1173 bs_init(&s->bs, s->maindata, bytes_have + frame_bytes);
1174 return h->reserv >= main_data_begin;
1177 static void L3_decode(mp3dec_t *h, mp3dec_scratch_t *s, L3_gr_info_t *gr_info, int nch)
1179 int ch;
1181 for (ch = 0; ch < nch; ch++)
1183 int layer3gr_limit = s->bs.pos + gr_info[ch].part_23_length;
1184 L3_decode_scalefactors(h->header, s->ist_pos[ch], &s->bs, gr_info + ch, s->scf, ch);
1185 L3_huffman(s->grbuf[ch], &s->bs, gr_info + ch, s->scf, layer3gr_limit);
1188 if (HDR_TEST_I_STEREO(h->header))
1190 L3_intensity_stereo(s->grbuf[0], s->ist_pos[1], gr_info, h->header);
1191 } else if (HDR_IS_MS_STEREO(h->header))
1193 L3_midside_stereo(s->grbuf[0], 576);
1196 for (ch = 0; ch < nch; ch++, gr_info++)
1198 int aa_bands = 31;
1199 int n_long_bands = (gr_info->mixed_block_flag ? 2 : 0) << (int)(HDR_GET_MY_SAMPLE_RATE(h->header) == 2);
1201 if (gr_info->n_short_sfb)
1203 aa_bands = n_long_bands - 1;
1204 L3_reorder(s->grbuf[ch] + n_long_bands*18, s->syn[0], gr_info->sfbtab + gr_info->n_long_sfb);
1207 L3_antialias(s->grbuf[ch], aa_bands);
1208 L3_imdct_gr(s->grbuf[ch], h->mdct_overlap[ch], gr_info->block_type, n_long_bands);
1209 L3_change_sign(s->grbuf[ch]);
1213 static void mp3d_DCT_II(float *grbuf, int n)
1215 static const float g_sec[24] = {
1216 10.19000816f,0.50060302f,0.50241929f,3.40760851f,0.50547093f,0.52249861f,2.05778098f,0.51544732f,0.56694406f,1.48416460f,0.53104258f,0.64682180f,1.16943991f,0.55310392f,0.78815460f,0.97256821f,0.58293498f,1.06067765f,0.83934963f,0.62250412f,1.72244716f,0.74453628f,0.67480832f,5.10114861f
1218 int i, k = 0;
1219 #if HAVE_SIMD
1220 if (have_simd()) for (; k < n; k += 4)
1222 f4 t[4][8], *x;
1223 float *y = grbuf + k;
1225 for (x = t[0], i = 0; i < 8; i++, x++)
1227 f4 x0 = VLD(&y[i*18]);
1228 f4 x1 = VLD(&y[(15 - i)*18]);
1229 f4 x2 = VLD(&y[(16 + i)*18]);
1230 f4 x3 = VLD(&y[(31 - i)*18]);
1231 f4 t0 = VADD(x0, x3);
1232 f4 t1 = VADD(x1, x2);
1233 f4 t2 = VMUL_S(VSUB(x1, x2), g_sec[3*i + 0]);
1234 f4 t3 = VMUL_S(VSUB(x0, x3), g_sec[3*i + 1]);
1235 x[0] = VADD(t0, t1);
1236 x[8] = VMUL_S(VSUB(t0, t1), g_sec[3*i + 2]);
1237 x[16] = VADD(t3, t2);
1238 x[24] = VMUL_S(VSUB(t3, t2), g_sec[3*i + 2]);
1240 for (x = t[0], i = 0; i < 4; i++, x += 8)
1242 f4 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
1243 xt = VSUB(x0, x7); x0 = VADD(x0, x7);
1244 x7 = VSUB(x1, x6); x1 = VADD(x1, x6);
1245 x6 = VSUB(x2, x5); x2 = VADD(x2, x5);
1246 x5 = VSUB(x3, x4); x3 = VADD(x3, x4);
1247 x4 = VSUB(x0, x3); x0 = VADD(x0, x3);
1248 x3 = VSUB(x1, x2); x1 = VADD(x1, x2);
1249 x[0] = VADD(x0, x1);
1250 x[4] = VMUL_S(VSUB(x0, x1), 0.70710677f);
1251 x5 = VADD(x5, x6);
1252 x6 = VMUL_S(VADD(x6, x7), 0.70710677f);
1253 x7 = VADD(x7, xt);
1254 x3 = VMUL_S(VADD(x3, x4), 0.70710677f);
1255 x5 = VSUB(x5, VMUL_S(x7, 0.198912367f)); /* rotate by PI/8 */
1256 x7 = VADD(x7, VMUL_S(x5, 0.382683432f));
1257 x5 = VSUB(x5, VMUL_S(x7, 0.198912367f));
1258 x0 = VSUB(xt, x6); xt = VADD(xt, x6);
1259 x[1] = VMUL_S(VADD(xt, x7), 0.50979561f);
1260 x[2] = VMUL_S(VADD(x4, x3), 0.54119611f);
1261 x[3] = VMUL_S(VSUB(x0, x5), 0.60134488f);
1262 x[5] = VMUL_S(VADD(x0, x5), 0.89997619f);
1263 x[6] = VMUL_S(VSUB(x4, x3), 1.30656302f);
1264 x[7] = VMUL_S(VSUB(xt, x7), 2.56291556f);
1267 if (k > n - 3)
1269 #if HAVE_SSE
1270 #define VSAVE2(i, v) _mm_storel_pi((__m64 *)(void*)&y[i*18], v)
1271 #else
1272 #define VSAVE2(i, v) vst1_f32((float32_t *)&y[i*18], vget_low_f32(v))
1273 #endif
1274 for (i = 0; i < 7; i++, y += 4*18)
1276 f4 s = VADD(t[3][i], t[3][i + 1]);
1277 VSAVE2(0, t[0][i]);
1278 VSAVE2(1, VADD(t[2][i], s));
1279 VSAVE2(2, VADD(t[1][i], t[1][i + 1]));
1280 VSAVE2(3, VADD(t[2][1 + i], s));
1282 VSAVE2(0, t[0][7]);
1283 VSAVE2(1, VADD(t[2][7], t[3][7]));
1284 VSAVE2(2, t[1][7]);
1285 VSAVE2(3, t[3][7]);
1286 } else
1288 #define VSAVE4(i, v) VSTORE(&y[i*18], v)
1289 for (i = 0; i < 7; i++, y += 4*18)
1291 f4 s = VADD(t[3][i], t[3][i + 1]);
1292 VSAVE4(0, t[0][i]);
1293 VSAVE4(1, VADD(t[2][i], s));
1294 VSAVE4(2, VADD(t[1][i], t[1][i + 1]));
1295 VSAVE4(3, VADD(t[2][1 + i], s));
1297 VSAVE4(0, t[0][7]);
1298 VSAVE4(1, VADD(t[2][7], t[3][7]));
1299 VSAVE4(2, t[1][7]);
1300 VSAVE4(3, t[3][7]);
1302 } else
1303 #endif
1304 #ifdef MINIMP3_ONLY_SIMD
1306 #else
1307 for (; k < n; k++)
1309 float t[4][8], *x, *y = grbuf + k;
1311 for (x = t[0], i = 0; i < 8; i++, x++)
1313 float x0 = y[i*18];
1314 float x1 = y[(15 - i)*18];
1315 float x2 = y[(16 + i)*18];
1316 float x3 = y[(31 - i)*18];
1317 float t0 = x0 + x3;
1318 float t1 = x1 + x2;
1319 float t2 = (x1 - x2)*g_sec[3*i + 0];
1320 float t3 = (x0 - x3)*g_sec[3*i + 1];
1321 x[0] = t0 + t1;
1322 x[8] = (t0 - t1)*g_sec[3*i + 2];
1323 x[16] = t3 + t2;
1324 x[24] = (t3 - t2)*g_sec[3*i + 2];
1326 for (x = t[0], i = 0; i < 4; i++, x += 8)
1328 float x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
1329 xt = x0 - x7; x0 += x7;
1330 x7 = x1 - x6; x1 += x6;
1331 x6 = x2 - x5; x2 += x5;
1332 x5 = x3 - x4; x3 += x4;
1333 x4 = x0 - x3; x0 += x3;
1334 x3 = x1 - x2; x1 += x2;
1335 x[0] = x0 + x1;
1336 x[4] = (x0 - x1)*0.70710677f;
1337 x5 = x5 + x6;
1338 x6 = (x6 + x7)*0.70710677f;
1339 x7 = x7 + xt;
1340 x3 = (x3 + x4)*0.70710677f;
1341 x5 -= x7*0.198912367f; /* rotate by PI/8 */
1342 x7 += x5*0.382683432f;
1343 x5 -= x7*0.198912367f;
1344 x0 = xt - x6; xt += x6;
1345 x[1] = (xt + x7)*0.50979561f;
1346 x[2] = (x4 + x3)*0.54119611f;
1347 x[3] = (x0 - x5)*0.60134488f;
1348 x[5] = (x0 + x5)*0.89997619f;
1349 x[6] = (x4 - x3)*1.30656302f;
1350 x[7] = (xt - x7)*2.56291556f;
1353 for (i = 0; i < 7; i++, y += 4*18)
1355 y[0*18] = t[0][i];
1356 y[1*18] = t[2][i] + t[3][i] + t[3][i + 1];
1357 y[2*18] = t[1][i] + t[1][i + 1];
1358 y[3*18] = t[2][i + 1] + t[3][i] + t[3][i + 1];
1360 y[0*18] = t[0][7];
1361 y[1*18] = t[2][7] + t[3][7];
1362 y[2*18] = t[1][7];
1363 y[3*18] = t[3][7];
1365 #endif
1368 static float mp3d_scale_pcm(float sample)
1370 return sample / 32768.0f;
1373 static void mp3d_synth_pair(float *pcm, int nch, const float *z)
1375 float a;
1376 a = (z[14*64] - z[ 0]) * 29;
1377 a += (z[ 1*64] + z[13*64]) * 213;
1378 a += (z[12*64] - z[ 2*64]) * 459;
1379 a += (z[ 3*64] + z[11*64]) * 2037;
1380 a += (z[10*64] - z[ 4*64]) * 5153;
1381 a += (z[ 5*64] + z[ 9*64]) * 6574;
1382 a += (z[ 8*64] - z[ 6*64]) * 37489;
1383 a += z[ 7*64] * 75038;
1384 pcm[0] = mp3d_scale_pcm(a);
1386 z += 2;
1387 a = z[14*64] * 104;
1388 a += z[12*64] * 1567;
1389 a += z[10*64] * 9727;
1390 a += z[ 8*64] * 64019;
1391 a += z[ 6*64] * -9975;
1392 a += z[ 4*64] * -45;
1393 a += z[ 2*64] * 146;
1394 a += z[ 0*64] * -5;
1395 pcm[16*nch] = mp3d_scale_pcm(a);
1398 static void mp3d_synth(float *xl, float *dstl, int nch, float *lins)
1400 int i;
1401 float *xr = xl + 576*(nch - 1);
1402 float *dstr = dstl + (nch - 1);
1404 static const float g_win[] = {
1405 -1,26,-31,208,218,401,-519,2063,2000,4788,-5517,7134,5959,35640,-39336,74992,
1406 -1,24,-35,202,222,347,-581,2080,1952,4425,-5879,7640,5288,33791,-41176,74856,
1407 -1,21,-38,196,225,294,-645,2087,1893,4063,-6237,8092,4561,31947,-43006,74630,
1408 -1,19,-41,190,227,244,-711,2085,1822,3705,-6589,8492,3776,30112,-44821,74313,
1409 -1,17,-45,183,228,197,-779,2075,1739,3351,-6935,8840,2935,28289,-46617,73908,
1410 -1,16,-49,176,228,153,-848,2057,1644,3004,-7271,9139,2037,26482,-48390,73415,
1411 -2,14,-53,169,227,111,-919,2032,1535,2663,-7597,9389,1082,24694,-50137,72835,
1412 -2,13,-58,161,224,72,-991,2001,1414,2330,-7910,9592,70,22929,-51853,72169,
1413 -2,11,-63,154,221,36,-1064,1962,1280,2006,-8209,9750,-998,21189,-53534,71420,
1414 -2,10,-68,147,215,2,-1137,1919,1131,1692,-8491,9863,-2122,19478,-55178,70590,
1415 -3,9,-73,139,208,-29,-1210,1870,970,1388,-8755,9935,-3300,17799,-56778,69679,
1416 -3,8,-79,132,200,-57,-1283,1817,794,1095,-8998,9966,-4533,16155,-58333,68692,
1417 -4,7,-85,125,189,-83,-1356,1759,605,814,-9219,9959,-5818,14548,-59838,67629,
1418 -4,7,-91,117,177,-106,-1428,1698,402,545,-9416,9916,-7154,12980,-61289,66494,
1419 -5,6,-97,111,163,-127,-1498,1634,185,288,-9585,9838,-8540,11455,-62684,65290
1421 float *zlin = lins + 15*64;
1422 const float *w = g_win;
1424 zlin[4*15] = xl[18*16];
1425 zlin[4*15 + 1] = xr[18*16];
1426 zlin[4*15 + 2] = xl[0];
1427 zlin[4*15 + 3] = xr[0];
1429 zlin[4*31] = xl[1 + 18*16];
1430 zlin[4*31 + 1] = xr[1 + 18*16];
1431 zlin[4*31 + 2] = xl[1];
1432 zlin[4*31 + 3] = xr[1];
1434 mp3d_synth_pair(dstr, nch, lins + 4*15 + 1);
1435 mp3d_synth_pair(dstr + 32*nch, nch, lins + 4*15 + 64 + 1);
1436 mp3d_synth_pair(dstl, nch, lins + 4*15);
1437 mp3d_synth_pair(dstl + 32*nch, nch, lins + 4*15 + 64);
1439 #if HAVE_SIMD
1440 if (have_simd()) for (i = 14; i >= 0; i--)
1442 #define VLOAD(k) f4 w0 = VSET(*w++); f4 w1 = VSET(*w++); f4 vz = VLD(&zlin[4*i - 64*k]); f4 vy = VLD(&zlin[4*i - 64*(15 - k)]);
1443 #define V0(k) { VLOAD(k) b = VADD(VMUL(vz, w1), VMUL(vy, w0)) ; a = VSUB(VMUL(vz, w0), VMUL(vy, w1)); }
1444 #define V1(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vz, w0), VMUL(vy, w1))); }
1445 #define V2(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vy, w1), VMUL(vz, w0))); }
1446 f4 a, b;
1447 zlin[4*i] = xl[18*(31 - i)];
1448 zlin[4*i + 1] = xr[18*(31 - i)];
1449 zlin[4*i + 2] = xl[1 + 18*(31 - i)];
1450 zlin[4*i + 3] = xr[1 + 18*(31 - i)];
1451 zlin[4*i + 64] = xl[1 + 18*(1 + i)];
1452 zlin[4*i + 64 + 1] = xr[1 + 18*(1 + i)];
1453 zlin[4*i - 64 + 2] = xl[18*(1 + i)];
1454 zlin[4*i - 64 + 3] = xr[18*(1 + i)];
1456 V0(0) V2(1) V1(2) V2(3) V1(4) V2(5) V1(6) V2(7)
1459 static const f4 g_scale = { 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f };
1460 a = VMUL(a, g_scale);
1461 b = VMUL(b, g_scale);
1463 #if HAVE_SSE
1464 _mm_store_ss(dstr + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)));
1465 _mm_store_ss(dstr + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1)));
1466 _mm_store_ss(dstl + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)));
1467 _mm_store_ss(dstl + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 0, 0, 0)));
1468 _mm_store_ss(dstr + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3)));
1469 _mm_store_ss(dstr + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 3, 3)));
1470 _mm_store_ss(dstl + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)));
1471 _mm_store_ss(dstl + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 2, 2)));
1472 #else
1473 vst1q_lane_f32(dstr + (15 - i)*nch, a, 1);
1474 vst1q_lane_f32(dstr + (17 + i)*nch, b, 1);
1475 vst1q_lane_f32(dstl + (15 - i)*nch, a, 0);
1476 vst1q_lane_f32(dstl + (17 + i)*nch, b, 0);
1477 vst1q_lane_f32(dstr + (47 - i)*nch, a, 3);
1478 vst1q_lane_f32(dstr + (49 + i)*nch, b, 3);
1479 vst1q_lane_f32(dstl + (47 - i)*nch, a, 2);
1480 vst1q_lane_f32(dstl + (49 + i)*nch, b, 2);
1481 #endif
1483 } else
1484 #endif
1485 #ifdef MINIMP3_ONLY_SIMD
1487 #else
1488 for (i = 14; i >= 0; i--)
1490 #define LOAD(k) float w0 = *w++; float w1 = *w++; float *vz = &zlin[4*i - k*64]; float *vy = &zlin[4*i - (15 - k)*64];
1491 #define S0(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] = vz[j]*w1 + vy[j]*w0, a[j] = vz[j]*w0 - vy[j]*w1; }
1492 #define S1(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vz[j]*w0 - vy[j]*w1; }
1493 #define S2(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vy[j]*w1 - vz[j]*w0; }
1494 float a[4], b[4];
1496 zlin[4*i] = xl[18*(31 - i)];
1497 zlin[4*i + 1] = xr[18*(31 - i)];
1498 zlin[4*i + 2] = xl[1 + 18*(31 - i)];
1499 zlin[4*i + 3] = xr[1 + 18*(31 - i)];
1500 zlin[4*(i + 16)] = xl[1 + 18*(1 + i)];
1501 zlin[4*(i + 16) + 1] = xr[1 + 18*(1 + i)];
1502 zlin[4*(i - 16) + 2] = xl[18*(1 + i)];
1503 zlin[4*(i - 16) + 3] = xr[18*(1 + i)];
1505 S0(0) S2(1) S1(2) S2(3) S1(4) S2(5) S1(6) S2(7)
1507 dstr[(15 - i)*nch] = mp3d_scale_pcm(a[1]);
1508 dstr[(17 + i)*nch] = mp3d_scale_pcm(b[1]);
1509 dstl[(15 - i)*nch] = mp3d_scale_pcm(a[0]);
1510 dstl[(17 + i)*nch] = mp3d_scale_pcm(b[0]);
1511 dstr[(47 - i)*nch] = mp3d_scale_pcm(a[3]);
1512 dstr[(49 + i)*nch] = mp3d_scale_pcm(b[3]);
1513 dstl[(47 - i)*nch] = mp3d_scale_pcm(a[2]);
1514 dstl[(49 + i)*nch] = mp3d_scale_pcm(b[2]);
1516 #endif
1519 static void mp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, float *pcm, float *lins)
1521 int i;
1522 for (i = 0; i < nch; i++)
1524 mp3d_DCT_II(grbuf + 576*i, nbands);
1527 memcpy(lins, qmf_state, sizeof(float)*15*64);
1529 for (i = 0; i < nbands; i += 2)
1531 mp3d_synth(grbuf + i, pcm + 32*nch*i, nch, lins + i*64);
1533 #ifndef MINIMP3_NONSTANDARD_BUT_LOGICAL
1534 if (nch == 1)
1536 for (i = 0; i < 15*64; i += 2)
1538 qmf_state[i] = lins[nbands*64 + i];
1540 } else
1541 #endif
1543 memcpy(qmf_state, lins + nbands*64, sizeof(float)*15*64);
1547 static int mp3d_match_frame(const uint8_t *hdr, int mp3_bytes, int frame_bytes)
1549 int i, nmatch;
1550 for (i = 0, nmatch = 0; nmatch < MAX_FRAME_SYNC_MATCHES; nmatch++)
1552 i += hdr_frame_bytes(hdr + i, frame_bytes) + hdr_padding(hdr + i);
1553 if (i + HDR_SIZE > mp3_bytes)
1554 return nmatch > 0;
1555 if (!hdr_compare(hdr, hdr + i))
1556 return 0;
1558 return 1;
1561 static int mp3d_find_frame(const uint8_t *mp3, int mp3_bytes, int *free_format_bytes, int *ptr_frame_bytes)
1563 int i, k;
1564 for (i = 0; i < mp3_bytes - HDR_SIZE; i++, mp3++)
1566 if (hdr_valid(mp3))
1568 int frame_bytes = hdr_frame_bytes(mp3, *free_format_bytes);
1569 int frame_and_padding = frame_bytes + hdr_padding(mp3);
1571 for (k = HDR_SIZE; !frame_bytes && k < MAX_FREE_FORMAT_FRAME_SIZE && i + 2*k < mp3_bytes - HDR_SIZE; k++)
1573 if (hdr_compare(mp3, mp3 + k))
1575 int fb = k - hdr_padding(mp3);
1576 int nextfb = fb + hdr_padding(mp3 + k);
1577 if (i + k + nextfb + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + k + nextfb))
1578 continue;
1579 frame_and_padding = k;
1580 frame_bytes = fb;
1581 *free_format_bytes = fb;
1584 if ((frame_bytes && i + frame_and_padding <= mp3_bytes &&
1585 mp3d_match_frame(mp3, mp3_bytes - i, frame_bytes)) ||
1586 (!i && frame_and_padding == mp3_bytes))
1588 *ptr_frame_bytes = frame_and_padding;
1589 return i;
1591 *free_format_bytes = 0;
1594 *ptr_frame_bytes = 0;
1595 return i;
1598 void mp3dec_init(mp3dec_t *dec)
1600 dec->header[0] = 0;
1603 int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, float *pcm, mp3dec_frame_info_t *info)
1605 int i = 0, igr, frame_size = 0, success = 1;
1606 const uint8_t *hdr;
1607 bs_t bs_frame[1];
1608 mp3dec_scratch_t scratch;
1610 if (mp3_bytes > 4 && dec->header[0] == 0xff && hdr_compare(dec->header, mp3))
1612 frame_size = hdr_frame_bytes(mp3, dec->free_format_bytes) + hdr_padding(mp3);
1613 if (frame_size != mp3_bytes && (frame_size + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + frame_size)))
1615 frame_size = 0;
1618 if (!frame_size)
1620 memset(dec, 0, sizeof(mp3dec_t));
1621 i = mp3d_find_frame(mp3, mp3_bytes, &dec->free_format_bytes, &frame_size);
1622 if (!frame_size || i + frame_size > mp3_bytes)
1624 info->frame_bytes = i;
1625 return 0;
1629 hdr = mp3 + i;
1630 memcpy(dec->header, hdr, HDR_SIZE);
1631 info->frame_bytes = i + frame_size;
1632 info->channels = HDR_IS_MONO(hdr) ? 1 : 2;
1633 info->hz = hdr_sample_rate_hz(hdr);
1634 info->layer = 4 - HDR_GET_LAYER(hdr);
1635 info->bitrate_kbps = hdr_bitrate_kbps(hdr);
1637 if (!pcm)
1639 return hdr_frame_samples(hdr);
1642 bs_init(bs_frame, hdr + HDR_SIZE, frame_size - HDR_SIZE);
1643 if (HDR_IS_CRC(hdr))
1645 get_bits(bs_frame, 16);
1648 if (info->layer == 3)
1650 int main_data_begin = L3_read_side_info(bs_frame, scratch.gr_info, hdr);
1651 if (main_data_begin < 0 || bs_frame->pos > bs_frame->limit)
1653 mp3dec_init(dec);
1654 return 0;
1656 success = L3_restore_reservoir(dec, bs_frame, &scratch, main_data_begin);
1657 if (success)
1659 for (igr = 0; igr < (HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm += 576*info->channels)
1661 memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1662 L3_decode(dec, &scratch, scratch.gr_info + igr*info->channels, info->channels);
1663 mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 18, info->channels, pcm, scratch.syn[0]);
1666 L3_save_reservoir(dec, &scratch);
1667 } else
1669 #ifdef MINIMP3_ONLY_MP3
1670 return 0;
1671 #else
1672 L12_scale_info sci[1];
1673 L12_read_scale_info(hdr, bs_frame, sci);
1675 memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1676 for (i = 0, igr = 0; igr < 3; igr++)
1678 if (12 == (i += L12_dequantize_granule(scratch.grbuf[0] + i, bs_frame, sci, info->layer | 1)))
1680 i = 0;
1681 L12_apply_scf_384(sci, sci->scf + igr, scratch.grbuf[0]);
1682 mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 12, info->channels, pcm, scratch.syn[0]);
1683 memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1684 pcm += 384*info->channels;
1686 if (bs_frame->pos > bs_frame->limit)
1688 mp3dec_init(dec);
1689 return 0;
1692 #endif
1694 return success*hdr_frame_samples(dec->header);
1697 int mp3dec_decode_frame_s16(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, short *pcm, mp3dec_frame_info_t *info)
1699 if(pcm)
1701 float temps[MINIMP3_MAX_SAMPLES_PER_FRAME];
1702 int samples_got;
1704 samples_got = mp3dec_decode_frame(dec, mp3, mp3_bytes, temps, info);
1705 if(samples_got > 0)
1707 int todo = samples_got * info->channels;
1708 int i;
1710 for(i = 0;i < todo;i++)
1712 float sample = temps[i]*32768.0f;
1713 short s = 0;
1714 if(sample <= -32767.5f)
1715 s = -32768;
1716 else if(sample >= 32766.5f)
1717 s = 32767;
1718 else
1720 s = (short)(sample + 0.5f);
1721 s += s>>15;
1724 *(pcm++) = s;
1728 return samples_got;
1731 return mp3dec_decode_frame(dec, mp3, mp3_bytes, NULL, info);
1734 #endif /*MINIMP3_IMPLEMENTATION*/
1735 #endif /*MINIMP3_H*/