3 * Copyright (C) 2008 Konstantin Shishkov
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/aacenc.c
27 /***********************************
29 * psy model selection with some option
30 * add sane pulse detection
31 * add temporal noise shaping
32 ***********************************/
37 #include "mpeg4audio.h"
43 static const uint8_t swb_size_1024_96
[] = {
44 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
45 12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
46 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
49 static const uint8_t swb_size_1024_64
[] = {
50 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
51 12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
52 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
55 static const uint8_t swb_size_1024_48
[] = {
56 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
57 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
58 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
62 static const uint8_t swb_size_1024_32
[] = {
63 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
64 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
65 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
68 static const uint8_t swb_size_1024_24
[] = {
69 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
70 12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
71 32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
74 static const uint8_t swb_size_1024_16
[] = {
75 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
76 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
77 32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
80 static const uint8_t swb_size_1024_8
[] = {
81 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
82 16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
83 32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
86 static const uint8_t * const swb_size_1024
[] = {
87 swb_size_1024_96
, swb_size_1024_96
, swb_size_1024_64
,
88 swb_size_1024_48
, swb_size_1024_48
, swb_size_1024_32
,
89 swb_size_1024_24
, swb_size_1024_24
, swb_size_1024_16
,
90 swb_size_1024_16
, swb_size_1024_16
, swb_size_1024_8
93 static const uint8_t swb_size_128_96
[] = {
94 4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
97 static const uint8_t swb_size_128_48
[] = {
98 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
101 static const uint8_t swb_size_128_24
[] = {
102 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
105 static const uint8_t swb_size_128_16
[] = {
106 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
109 static const uint8_t swb_size_128_8
[] = {
110 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
113 static const uint8_t * const swb_size_128
[] = {
114 /* the last entry on the following row is swb_size_128_64 but is a
115 duplicate of swb_size_128_96 */
116 swb_size_128_96
, swb_size_128_96
, swb_size_128_96
,
117 swb_size_128_48
, swb_size_128_48
, swb_size_128_48
,
118 swb_size_128_24
, swb_size_128_24
, swb_size_128_16
,
119 swb_size_128_16
, swb_size_128_16
, swb_size_128_8
122 /** bits needed to code codebook run value for long windows */
123 static const uint8_t run_value_bits_long
[64] = {
124 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
125 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
126 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
127 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
130 /** bits needed to code codebook run value for short windows */
131 static const uint8_t run_value_bits_short
[16] = {
132 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
135 static const uint8_t* const run_value_bits
[2] = {
136 run_value_bits_long
, run_value_bits_short
139 /** default channel configurations */
140 static const uint8_t aac_chan_configs
[6][5] = {
141 {1, TYPE_SCE
}, // 1 channel - single channel element
142 {1, TYPE_CPE
}, // 2 channels - channel pair
143 {2, TYPE_SCE
, TYPE_CPE
}, // 3 channels - center + stereo
144 {3, TYPE_SCE
, TYPE_CPE
, TYPE_SCE
}, // 4 channels - front center + stereo + back center
145 {3, TYPE_SCE
, TYPE_CPE
, TYPE_CPE
}, // 5 channels - front center + stereo + back stereo
146 {4, TYPE_SCE
, TYPE_CPE
, TYPE_CPE
, TYPE_LFE
}, // 6 channels - front center + stereo + back stereo + LFE
150 * structure used in optimal codebook search
152 typedef struct BandCodingPath
{
153 int prev_idx
; ///< pointer to the previous path point
154 int codebook
; ///< codebook for coding band run
155 int bits
; ///< number of bit needed to code given number of bands
159 * AAC encoder context
163 MDCTContext mdct1024
; ///< long (1024 samples) frame transform context
164 MDCTContext mdct128
; ///< short (128 samples) frame transform context
166 DECLARE_ALIGNED_16(FFTSample
, output
[2048]); ///< temporary buffer for MDCT input coefficients
167 int16_t* samples
; ///< saved preprocessed input
169 int samplerate_index
; ///< MPEG-4 samplerate index
171 ChannelElement
*cpe
; ///< channel elements
172 AACPsyContext psy
; ///< psychoacoustic model context
177 * Make AAC audio config object.
178 * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
180 static void put_audio_specific_config(AVCodecContext
*avctx
)
183 AACEncContext
*s
= avctx
->priv_data
;
185 init_put_bits(&pb
, avctx
->extradata
, avctx
->extradata_size
*8);
186 put_bits(&pb
, 5, 2); //object type - AAC-LC
187 put_bits(&pb
, 4, s
->samplerate_index
); //sample rate index
188 put_bits(&pb
, 4, avctx
->channels
);
190 put_bits(&pb
, 1, 0); //frame length - 1024 samples
191 put_bits(&pb
, 1, 0); //does not depend on core coder
192 put_bits(&pb
, 1, 0); //is not extension
196 static av_cold
int aac_encode_init(AVCodecContext
*avctx
)
198 AACEncContext
*s
= avctx
->priv_data
;
201 avctx
->frame_size
= 1024;
203 for(i
= 0; i
< 16; i
++)
204 if(avctx
->sample_rate
== ff_mpeg4audio_sample_rates
[i
])
207 av_log(avctx
, AV_LOG_ERROR
, "Unsupported sample rate %d\n", avctx
->sample_rate
);
210 if(avctx
->channels
> 6){
211 av_log(avctx
, AV_LOG_ERROR
, "Unsupported number of channels: %d\n", avctx
->channels
);
214 s
->samplerate_index
= i
;
216 dsputil_init(&s
->dsp
, avctx
);
217 ff_mdct_init(&s
->mdct1024
, 11, 0, 1.0);
218 ff_mdct_init(&s
->mdct128
, 8, 0, 1.0);
220 ff_kbd_window_init(ff_aac_kbd_long_1024
, 4.0, 1024);
221 ff_kbd_window_init(ff_aac_kbd_short_128
, 6.0, 128);
222 ff_sine_window_init(ff_sine_1024
, 1024);
223 ff_sine_window_init(ff_sine_128
, 128);
225 s
->samples
= av_malloc(2 * 1024 * avctx
->channels
* sizeof(s
->samples
[0]));
226 s
->cpe
= av_mallocz(sizeof(ChannelElement
) * aac_chan_configs
[avctx
->channels
-1][0]);
227 if(ff_aac_psy_init(&s
->psy
, avctx
, AAC_PSY_3GPP
,
228 aac_chan_configs
[avctx
->channels
-1][0], 0,
229 swb_size_1024
[i
], ff_aac_num_swb_1024
[i
], swb_size_128
[i
], ff_aac_num_swb_128
[i
]) < 0){
230 av_log(avctx
, AV_LOG_ERROR
, "Cannot initialize selected model.\n");
233 avctx
->extradata
= av_malloc(2);
234 avctx
->extradata_size
= 2;
235 put_audio_specific_config(avctx
);
240 * Encode ics_info element.
241 * @see Table 4.6 (syntax of ics_info)
243 static void put_ics_info(AACEncContext
*s
, IndividualChannelStream
*info
)
247 put_bits(&s
->pb
, 1, 0); // ics_reserved bit
248 put_bits(&s
->pb
, 2, info
->window_sequence
[0]);
249 put_bits(&s
->pb
, 1, info
->use_kb_window
[0]);
250 if(info
->window_sequence
[0] != EIGHT_SHORT_SEQUENCE
){
251 put_bits(&s
->pb
, 6, info
->max_sfb
);
252 put_bits(&s
->pb
, 1, 0); // no prediction
254 put_bits(&s
->pb
, 4, info
->max_sfb
);
255 for(i
= 1; i
< info
->num_windows
; i
++)
256 put_bits(&s
->pb
, 1, info
->group_len
[i
]);
261 * Calculate the number of bits needed to code all coefficient signs in current band.
263 static int calculate_band_sign_bits(AACEncContext
*s
, SingleChannelElement
*sce
,
264 int group_len
, int start
, int size
)
268 for(w
= 0; w
< group_len
; w
++){
269 for(i
= 0; i
< size
; i
++){
270 if(sce
->icoefs
[start
+ i
])
281 static void encode_pulses(AACEncContext
*s
, Pulse
*pulse
)
285 put_bits(&s
->pb
, 1, !!pulse
->num_pulse
);
286 if(!pulse
->num_pulse
) return;
288 put_bits(&s
->pb
, 2, pulse
->num_pulse
- 1);
289 put_bits(&s
->pb
, 6, pulse
->start
);
290 for(i
= 0; i
< pulse
->num_pulse
; i
++){
291 put_bits(&s
->pb
, 5, pulse
->pos
[i
]);
292 put_bits(&s
->pb
, 4, pulse
->amp
[i
]);
297 * Encode spectral coefficients processed by psychoacoustic model.
299 static void encode_spectral_coeffs(AACEncContext
*s
, SingleChannelElement
*sce
)
301 int start
, i
, w
, w2
, wg
;
304 for(wg
= 0; wg
< sce
->ics
.num_window_groups
; wg
++){
306 for(i
= 0; i
< sce
->ics
.max_sfb
; i
++){
307 if(sce
->zeroes
[w
*16 + i
]){
308 start
+= sce
->ics
.swb_sizes
[i
];
311 for(w2
= w
; w2
< w
+ sce
->ics
.group_len
[wg
]; w2
++){
312 encode_band_coeffs(s
, sce
, start
+ w2
*128,
313 sce
->ics
.swb_sizes
[i
],
314 sce
->band_type
[w
*16 + i
]);
316 start
+= sce
->ics
.swb_sizes
[i
];
318 w
+= sce
->ics
.group_len
[wg
];
323 * Write some auxiliary information about the created AAC file.
325 static void put_bitstream_info(AVCodecContext
*avctx
, AACEncContext
*s
, const char *name
)
327 int i
, namelen
, padbits
;
329 namelen
= strlen(name
) + 2;
330 put_bits(&s
->pb
, 3, TYPE_FIL
);
331 put_bits(&s
->pb
, 4, FFMIN(namelen
, 15));
333 put_bits(&s
->pb
, 8, namelen
- 16);
334 put_bits(&s
->pb
, 4, 0); //extension type - filler
335 padbits
= 8 - (put_bits_count(&s
->pb
) & 7);
336 align_put_bits(&s
->pb
);
337 for(i
= 0; i
< namelen
- 2; i
++)
338 put_bits(&s
->pb
, 8, name
[i
]);
339 put_bits(&s
->pb
, 12 - padbits
, 0);
342 static av_cold
int aac_encode_end(AVCodecContext
*avctx
)
344 AACEncContext
*s
= avctx
->priv_data
;
346 ff_mdct_end(&s
->mdct1024
);
347 ff_mdct_end(&s
->mdct128
);
348 ff_aac_psy_end(&s
->psy
);
349 av_freep(&s
->samples
);
354 AVCodec aac_encoder
= {
358 sizeof(AACEncContext
),
362 .capabilities
= CODEC_CAP_SMALL_LAST_FRAME
| CODEC_CAP_DELAY
,
363 .sample_fmts
= (enum SampleFormat
[]){SAMPLE_FMT_S16
,SAMPLE_FMT_NONE
},
364 .long_name
= NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),