2 * WMA compatible encoder
3 * Copyright (c) 2007 Michael Niedermayer
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 static int encode_init(AVCodecContext
* avctx
){
31 WMACodecContext
*s
= avctx
->priv_data
;
32 int i
, flags1
, flags2
, block_align
;
37 if(avctx
->channels
> MAX_CHANNELS
) {
38 av_log(avctx
, AV_LOG_ERROR
, "too many channels: got %i, need %i or fewer",
39 avctx
->channels
, MAX_CHANNELS
);
40 return AVERROR(EINVAL
);
43 if (avctx
->sample_rate
> 48000) {
44 av_log(avctx
, AV_LOG_ERROR
, "sample rate is too high: %d > 48kHz",
46 return AVERROR(EINVAL
);
49 if(avctx
->bit_rate
< 24*1000) {
50 av_log(avctx
, AV_LOG_ERROR
, "bitrate too low: got %i, need 24000 or higher\n",
52 return AVERROR(EINVAL
);
55 #if FF_API_OLD_ENCODE_AUDIO
56 if (!(avctx
->coded_frame
= avcodec_alloc_frame()))
57 return AVERROR(ENOMEM
);
60 /* extract flag infos */
63 if (avctx
->codec
->id
== AV_CODEC_ID_WMAV1
) {
64 extradata
= av_malloc(4);
65 avctx
->extradata_size
= 4;
66 AV_WL16(extradata
, flags1
);
67 AV_WL16(extradata
+2, flags2
);
68 } else if (avctx
->codec
->id
== AV_CODEC_ID_WMAV2
) {
69 extradata
= av_mallocz(10);
70 avctx
->extradata_size
= 10;
71 AV_WL32(extradata
, flags1
);
72 AV_WL16(extradata
+4, flags2
);
75 avctx
->extradata
= extradata
;
76 s
->use_exp_vlc
= flags2
& 0x0001;
77 s
->use_bit_reservoir
= flags2
& 0x0002;
78 s
->use_variable_block_len
= flags2
& 0x0004;
79 if (avctx
->channels
== 2)
82 ff_wma_init(avctx
, flags2
);
85 for(i
= 0; i
< s
->nb_block_sizes
; i
++)
86 ff_mdct_init(&s
->mdct_ctx
[i
], s
->frame_len_bits
- i
+ 1, 0, 1.0);
88 block_align
= avctx
->bit_rate
* (int64_t)s
->frame_len
/
89 (avctx
->sample_rate
* 8);
90 block_align
= FFMIN(block_align
, MAX_CODED_SUPERFRAME_SIZE
);
91 avctx
->block_align
= block_align
;
92 avctx
->bit_rate
= avctx
->block_align
* 8LL * avctx
->sample_rate
/
94 avctx
->frame_size
= avctx
->delay
= s
->frame_len
;
100 static void apply_window_and_mdct(AVCodecContext
* avctx
, const AVFrame
*frame
)
102 WMACodecContext
*s
= avctx
->priv_data
;
103 float **audio
= (float **)frame
->extended_data
;
104 int len
= frame
->nb_samples
;
105 int window_index
= s
->frame_len_bits
- s
->block_len_bits
;
106 FFTContext
*mdct
= &s
->mdct_ctx
[window_index
];
108 const float * win
= s
->windows
[window_index
];
109 int window_len
= 1 << s
->block_len_bits
;
110 float n
= 2.0 * 32768.0 / window_len
;
112 for (ch
= 0; ch
< avctx
->channels
; ch
++) {
113 memcpy(s
->output
, s
->frame_out
[ch
], window_len
* sizeof(*s
->output
));
114 s
->fdsp
.vector_fmul_scalar(s
->frame_out
[ch
], audio
[ch
], n
, len
);
115 s
->fdsp
.vector_fmul_reverse(&s
->output
[window_len
], s
->frame_out
[ch
], win
, len
);
116 s
->fdsp
.vector_fmul(s
->frame_out
[ch
], s
->frame_out
[ch
], win
, len
);
117 mdct
->mdct_calc(mdct
, s
->coefs
[ch
], s
->output
);
121 //FIXME use for decoding too
122 static void init_exp(WMACodecContext
*s
, int ch
, const int *exp_param
){
125 float v
, *q
, max_scale
, *q_end
;
127 ptr
= s
->exponent_bands
[s
->frame_len_bits
- s
->block_len_bits
];
128 q
= s
->exponents
[ch
];
129 q_end
= q
+ s
->block_len
;
132 /* XXX: use a table */
133 v
= pow(10, *exp_param
++ * (1.0 / 16.0));
134 max_scale
= FFMAX(max_scale
, v
);
140 s
->max_exponent
[ch
] = max_scale
;
143 static void encode_exp_vlc(WMACodecContext
*s
, int ch
, const int *exp_param
){
148 ptr
= s
->exponent_bands
[s
->frame_len_bits
- s
->block_len_bits
];
149 q
= s
->exponents
[ch
];
150 q_end
= q
+ s
->block_len
;
151 if (s
->version
== 1) {
152 last_exp
= *exp_param
++;
153 assert(last_exp
-10 >= 0 && last_exp
-10 < 32);
154 put_bits(&s
->pb
, 5, last_exp
- 10);
159 int exp
= *exp_param
++;
160 int code
= exp
- last_exp
+ 60;
161 assert(code
>= 0 && code
< 120);
162 put_bits(&s
->pb
, ff_aac_scalefactor_bits
[code
], ff_aac_scalefactor_code
[code
]);
163 /* XXX: use a table */
169 static int encode_block(WMACodecContext
*s
, float (*src_coefs
)[BLOCK_MAX_SIZE
], int total_gain
){
170 int v
, bsize
, ch
, coef_nb_bits
, parse_exponents
;
172 int nb_coefs
[MAX_CHANNELS
];
173 static const int fixed_exp
[25]={20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20};
175 //FIXME remove duplication relative to decoder
176 if (s
->use_variable_block_len
) {
177 assert(0); //FIXME not implemented
179 /* fixed block len */
180 s
->next_block_len_bits
= s
->frame_len_bits
;
181 s
->prev_block_len_bits
= s
->frame_len_bits
;
182 s
->block_len_bits
= s
->frame_len_bits
;
185 s
->block_len
= 1 << s
->block_len_bits
;
186 // assert((s->block_pos + s->block_len) <= s->frame_len);
187 bsize
= s
->frame_len_bits
- s
->block_len_bits
;
190 v
= s
->coefs_end
[bsize
] - s
->coefs_start
;
191 for (ch
= 0; ch
< s
->avctx
->channels
; ch
++)
194 int n4
= s
->block_len
/ 2;
195 mdct_norm
= 1.0 / (float)n4
;
196 if (s
->version
== 1) {
197 mdct_norm
*= sqrt(n4
);
201 if (s
->avctx
->channels
== 2) {
202 put_bits(&s
->pb
, 1, !!s
->ms_stereo
);
205 for (ch
= 0; ch
< s
->avctx
->channels
; ch
++) {
206 s
->channel_coded
[ch
] = 1; //FIXME only set channel_coded when needed, instead of always
207 if (s
->channel_coded
[ch
]) {
208 init_exp(s
, ch
, fixed_exp
);
212 for (ch
= 0; ch
< s
->avctx
->channels
; ch
++) {
213 if (s
->channel_coded
[ch
]) {
215 float *coefs
, *exponents
, mult
;
218 coefs1
= s
->coefs1
[ch
];
219 exponents
= s
->exponents
[ch
];
220 mult
= pow(10, total_gain
* 0.05) / s
->max_exponent
[ch
];
222 coefs
= src_coefs
[ch
];
223 if (s
->use_noise_coding
&& 0) {
224 assert(0); //FIXME not implemented
226 coefs
+= s
->coefs_start
;
228 for(i
= 0;i
< n
; i
++){
229 double t
= *coefs
++ / (exponents
[i
] * mult
);
230 if(t
<-32768 || t
>32767)
233 coefs1
[i
] = lrint(t
);
240 for (ch
= 0; ch
< s
->avctx
->channels
; ch
++) {
241 int a
= s
->channel_coded
[ch
];
242 put_bits(&s
->pb
, 1, a
);
249 for(v
= total_gain
-1; v
>=127; v
-= 127)
250 put_bits(&s
->pb
, 7, 127);
251 put_bits(&s
->pb
, 7, v
);
253 coef_nb_bits
= ff_wma_total_gain_to_bits(total_gain
);
255 if (s
->use_noise_coding
) {
256 for (ch
= 0; ch
< s
->avctx
->channels
; ch
++) {
257 if (s
->channel_coded
[ch
]) {
259 n
= s
->exponent_high_sizes
[bsize
];
261 put_bits(&s
->pb
, 1, s
->high_band_coded
[ch
][i
]= 0);
263 nb_coefs
[ch
] -= s
->exponent_high_bands
[bsize
][i
];
270 if (s
->block_len_bits
!= s
->frame_len_bits
) {
271 put_bits(&s
->pb
, 1, parse_exponents
);
274 if (parse_exponents
) {
275 for (ch
= 0; ch
< s
->avctx
->channels
; ch
++) {
276 if (s
->channel_coded
[ch
]) {
277 if (s
->use_exp_vlc
) {
278 encode_exp_vlc(s
, ch
, fixed_exp
);
280 assert(0); //FIXME not implemented
281 // encode_exp_lsp(s, ch);
286 assert(0); //FIXME not implemented
289 for (ch
= 0; ch
< s
->avctx
->channels
; ch
++) {
290 if (s
->channel_coded
[ch
]) {
293 tindex
= (ch
== 1 && s
->ms_stereo
);
294 ptr
= &s
->coefs1
[ch
][0];
295 eptr
= ptr
+ nb_coefs
[ch
];
298 for(;ptr
< eptr
; ptr
++){
301 int abs_level
= FFABS(level
);
303 if(abs_level
<= s
->coef_vlcs
[tindex
]->max_level
){
304 if(run
< s
->coef_vlcs
[tindex
]->levels
[abs_level
-1])
305 code
= run
+ s
->int_table
[tindex
][abs_level
-1];
308 assert(code
< s
->coef_vlcs
[tindex
]->n
);
309 put_bits(&s
->pb
, s
->coef_vlcs
[tindex
]->huffbits
[code
], s
->coef_vlcs
[tindex
]->huffcodes
[code
]);
312 if(1<<coef_nb_bits
<= abs_level
)
315 put_bits(&s
->pb
, coef_nb_bits
, abs_level
);
316 put_bits(&s
->pb
, s
->frame_len_bits
, run
);
318 put_bits(&s
->pb
, 1, level
< 0); //FIXME the sign is fliped somewhere
325 put_bits(&s
->pb
, s
->coef_vlcs
[tindex
]->huffbits
[1], s
->coef_vlcs
[tindex
]->huffcodes
[1]);
327 if (s
->version
== 1 && s
->avctx
->channels
>= 2) {
328 avpriv_align_put_bits(&s
->pb
);
334 static int encode_frame(WMACodecContext
*s
, float (*src_coefs
)[BLOCK_MAX_SIZE
], uint8_t *buf
, int buf_size
, int total_gain
){
335 init_put_bits(&s
->pb
, buf
, buf_size
);
337 if (s
->use_bit_reservoir
) {
338 assert(0);//FIXME not implemented
340 if(encode_block(s
, src_coefs
, total_gain
) < 0)
344 avpriv_align_put_bits(&s
->pb
);
346 return put_bits_count(&s
->pb
) / 8 - s
->avctx
->block_align
;
349 static int encode_superframe(AVCodecContext
*avctx
, AVPacket
*avpkt
,
350 const AVFrame
*frame
, int *got_packet_ptr
)
352 WMACodecContext
*s
= avctx
->priv_data
;
353 int i
, total_gain
, ret
;
355 s
->block_len_bits
= s
->frame_len_bits
; //required by non variable block len
356 s
->block_len
= 1 << s
->block_len_bits
;
358 apply_window_and_mdct(avctx
, frame
);
364 for(i
= 0; i
< s
->block_len
; i
++) {
365 a
= s
->coefs
[0][i
]*0.5;
366 b
= s
->coefs
[1][i
]*0.5;
367 s
->coefs
[0][i
] = a
+ b
;
368 s
->coefs
[1][i
] = a
- b
;
372 if ((ret
= ff_alloc_packet(avpkt
, 2 * MAX_CODED_SUPERFRAME_SIZE
))) {
373 av_log(avctx
, AV_LOG_ERROR
, "Error getting output packet\n");
380 int error
= encode_frame(s
, s
->coefs
, avpkt
->data
, avpkt
->size
,
387 best
= encode_frame(s
, s
->coefs
, avpkt
->data
, avpkt
->size
, total_gain
);
389 int scoreL
= encode_frame(s
, s
->coefs
, avpkt
->data
, avpkt
->size
, total_gain
- i
);
390 int scoreR
= encode_frame(s
, s
->coefs
, avpkt
->data
, avpkt
->size
, total_gain
+ i
);
391 av_log(NULL
, AV_LOG_ERROR
, "%d %d %d (%d)\n", scoreL
, best
, scoreR
, total_gain
);
392 if(scoreL
< FFMIN(best
, scoreR
)){
395 }else if(scoreR
< best
){
402 if ((i
= encode_frame(s
, s
->coefs
, avpkt
->data
, avpkt
->size
, total_gain
)) >= 0) {
403 av_log(avctx
, AV_LOG_ERROR
, "required frame size too large. please "
404 "use a higher bit rate.\n");
405 return AVERROR(EINVAL
);
407 assert((put_bits_count(&s
->pb
) & 7) == 0);
409 put_bits(&s
->pb
, 8, 'N');
411 flush_put_bits(&s
->pb
);
413 if (frame
->pts
!= AV_NOPTS_VALUE
)
414 avpkt
->pts
= frame
->pts
- ff_samples_to_time_base(avctx
, avctx
->delay
);
416 avpkt
->size
= avctx
->block_align
;
421 AVCodec ff_wmav1_encoder
= {
423 .type
= AVMEDIA_TYPE_AUDIO
,
424 .id
= AV_CODEC_ID_WMAV1
,
425 .priv_data_size
= sizeof(WMACodecContext
),
427 .encode2
= encode_superframe
,
429 .sample_fmts
= (const enum AVSampleFormat
[]){ AV_SAMPLE_FMT_FLTP
,
430 AV_SAMPLE_FMT_NONE
},
431 .long_name
= NULL_IF_CONFIG_SMALL("Windows Media Audio 1"),
434 AVCodec ff_wmav2_encoder
= {
436 .type
= AVMEDIA_TYPE_AUDIO
,
437 .id
= AV_CODEC_ID_WMAV2
,
438 .priv_data_size
= sizeof(WMACodecContext
),
440 .encode2
= encode_superframe
,
442 .sample_fmts
= (const enum AVSampleFormat
[]){ AV_SAMPLE_FMT_FLTP
,
443 AV_SAMPLE_FMT_NONE
},
444 .long_name
= NULL_IF_CONFIG_SMALL("Windows Media Audio 2"),