3 * Copyright (c) 2008 Jaikrishnan Menon <realityman@gmx.net>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 #define DEFAULT_FRAME_SIZE 4096
30 #define DEFAULT_SAMPLE_SIZE 16
31 #define MAX_CHANNELS 8
32 #define ALAC_EXTRADATA_SIZE 36
33 #define ALAC_FRAME_HEADER_SIZE 55
34 #define ALAC_FRAME_FOOTER_SIZE 3
36 #define ALAC_ESCAPE_CODE 0x1FF
37 #define ALAC_MAX_LPC_ORDER 30
38 #define DEFAULT_MAX_PRED_ORDER 6
39 #define DEFAULT_MIN_PRED_ORDER 4
40 #define ALAC_MAX_LPC_PRECISION 9
41 #define ALAC_MAX_LPC_SHIFT 9
43 #define ALAC_CHMODE_LEFT_RIGHT 0
44 #define ALAC_CHMODE_LEFT_SIDE 1
45 #define ALAC_CHMODE_RIGHT_SIDE 2
46 #define ALAC_CHMODE_MID_SIDE 3
48 typedef struct RiceContext
{
55 typedef struct LPCContext
{
57 int lpc_coeff
[ALAC_MAX_LPC_ORDER
+1];
61 typedef struct AlacEncodeContext
{
62 int compression_level
;
63 int min_prediction_order
;
64 int max_prediction_order
;
65 int max_coded_frame_size
;
66 int write_sample_size
;
67 int32_t sample_buf
[MAX_CHANNELS
][DEFAULT_FRAME_SIZE
];
68 int32_t predictor_buf
[DEFAULT_FRAME_SIZE
];
69 int interlacing_shift
;
70 int interlacing_leftweight
;
73 LPCContext lpc
[MAX_CHANNELS
];
75 AVCodecContext
*avctx
;
79 static void init_sample_buffers(AlacEncodeContext
*s
, int16_t *input_samples
)
83 for(ch
=0;ch
<s
->avctx
->channels
;ch
++) {
84 int16_t *sptr
= input_samples
+ ch
;
85 for(i
=0;i
<s
->avctx
->frame_size
;i
++) {
86 s
->sample_buf
[ch
][i
] = *sptr
;
87 sptr
+= s
->avctx
->channels
;
92 static void encode_scalar(AlacEncodeContext
*s
, int x
, int k
, int write_sample_size
)
96 k
= FFMIN(k
, s
->rc
.k_modifier
);
102 // write escape code and sample value directly
103 put_bits(&s
->pbctx
, 9, ALAC_ESCAPE_CODE
);
104 put_bits(&s
->pbctx
, write_sample_size
, x
);
107 put_bits(&s
->pbctx
, q
, (1<<q
) - 1);
108 put_bits(&s
->pbctx
, 1, 0);
112 put_bits(&s
->pbctx
, k
, r
+1);
114 put_bits(&s
->pbctx
, k
-1, 0);
119 static void write_frame_header(AlacEncodeContext
*s
, int is_verbatim
)
121 put_bits(&s
->pbctx
, 3, s
->avctx
->channels
-1); // No. of channels -1
122 put_bits(&s
->pbctx
, 16, 0); // Seems to be zero
123 put_bits(&s
->pbctx
, 1, 1); // Sample count is in the header
124 put_bits(&s
->pbctx
, 2, 0); // FIXME: Wasted bytes field
125 put_bits(&s
->pbctx
, 1, is_verbatim
); // Audio block is verbatim
126 put_bits32(&s
->pbctx
, s
->avctx
->frame_size
); // No. of samples in the frame
129 static void calc_predictor_params(AlacEncodeContext
*s
, int ch
)
131 int32_t coefs
[MAX_LPC_ORDER
][MAX_LPC_ORDER
];
132 int shift
[MAX_LPC_ORDER
];
135 if (s
->avctx
->compression_level
> 1) {
136 opt_order
= ff_lpc_calc_coefs(&s
->dspctx
, s
->sample_buf
[ch
],
137 s
->avctx
->frame_size
,
138 s
->min_prediction_order
,
139 s
->max_prediction_order
,
140 ALAC_MAX_LPC_PRECISION
, coefs
, shift
, 1,
141 ORDER_METHOD_EST
, ALAC_MAX_LPC_SHIFT
, 1);
143 s
->lpc
[ch
].lpc_order
= opt_order
;
144 s
->lpc
[ch
].lpc_quant
= shift
[opt_order
-1];
145 memcpy(s
->lpc
[ch
].lpc_coeff
, coefs
[opt_order
-1], opt_order
*sizeof(int));
147 s
->lpc
[ch
].lpc_order
= 6;
148 s
->lpc
[ch
].lpc_quant
= 6;
149 s
->lpc
[ch
].lpc_coeff
[0] = 160;
150 s
->lpc
[ch
].lpc_coeff
[1] = -190;
151 s
->lpc
[ch
].lpc_coeff
[2] = 170;
152 s
->lpc
[ch
].lpc_coeff
[3] = -130;
153 s
->lpc
[ch
].lpc_coeff
[4] = 80;
154 s
->lpc
[ch
].lpc_coeff
[5] = -25;
158 static int estimate_stereo_mode(int32_t *left_ch
, int32_t *right_ch
, int n
)
165 /* calculate sum of 2nd order residual for each channel */
166 sum
[0] = sum
[1] = sum
[2] = sum
[3] = 0;
168 lt
= left_ch
[i
] - 2*left_ch
[i
-1] + left_ch
[i
-2];
169 rt
= right_ch
[i
] - 2*right_ch
[i
-1] + right_ch
[i
-2];
170 sum
[2] += FFABS((lt
+ rt
) >> 1);
171 sum
[3] += FFABS(lt
- rt
);
176 /* calculate score for each mode */
177 score
[0] = sum
[0] + sum
[1];
178 score
[1] = sum
[0] + sum
[3];
179 score
[2] = sum
[1] + sum
[3];
180 score
[3] = sum
[2] + sum
[3];
182 /* return mode with lowest score */
185 if(score
[i
] < score
[best
]) {
192 static void alac_stereo_decorrelation(AlacEncodeContext
*s
)
194 int32_t *left
= s
->sample_buf
[0], *right
= s
->sample_buf
[1];
195 int i
, mode
, n
= s
->avctx
->frame_size
;
198 mode
= estimate_stereo_mode(left
, right
, n
);
202 case ALAC_CHMODE_LEFT_RIGHT
:
203 s
->interlacing_leftweight
= 0;
204 s
->interlacing_shift
= 0;
207 case ALAC_CHMODE_LEFT_SIDE
:
209 right
[i
] = left
[i
] - right
[i
];
211 s
->interlacing_leftweight
= 1;
212 s
->interlacing_shift
= 0;
215 case ALAC_CHMODE_RIGHT_SIDE
:
218 right
[i
] = left
[i
] - right
[i
];
219 left
[i
] = tmp
+ (right
[i
] >> 31);
221 s
->interlacing_leftweight
= 1;
222 s
->interlacing_shift
= 31;
228 left
[i
] = (tmp
+ right
[i
]) >> 1;
229 right
[i
] = tmp
- right
[i
];
231 s
->interlacing_leftweight
= 1;
232 s
->interlacing_shift
= 1;
237 static void alac_linear_predictor(AlacEncodeContext
*s
, int ch
)
240 LPCContext lpc
= s
->lpc
[ch
];
242 if(lpc
.lpc_order
== 31) {
243 s
->predictor_buf
[0] = s
->sample_buf
[ch
][0];
245 for(i
=1; i
<s
->avctx
->frame_size
; i
++)
246 s
->predictor_buf
[i
] = s
->sample_buf
[ch
][i
] - s
->sample_buf
[ch
][i
-1];
251 // generalised linear predictor
253 if(lpc
.lpc_order
> 0) {
254 int32_t *samples
= s
->sample_buf
[ch
];
255 int32_t *residual
= s
->predictor_buf
;
257 // generate warm-up samples
258 residual
[0] = samples
[0];
259 for(i
=1;i
<=lpc
.lpc_order
;i
++)
260 residual
[i
] = samples
[i
] - samples
[i
-1];
262 // perform lpc on remaining samples
263 for(i
= lpc
.lpc_order
+ 1; i
< s
->avctx
->frame_size
; i
++) {
264 int sum
= 1 << (lpc
.lpc_quant
- 1), res_val
, j
;
266 for (j
= 0; j
< lpc
.lpc_order
; j
++) {
267 sum
+= (samples
[lpc
.lpc_order
-j
] - samples
[0]) *
271 sum
>>= lpc
.lpc_quant
;
273 residual
[i
] = sign_extend(samples
[lpc
.lpc_order
+1] - sum
,
274 s
->write_sample_size
);
275 res_val
= residual
[i
];
278 int index
= lpc
.lpc_order
- 1;
279 int neg
= (res_val
< 0);
281 while(index
>= 0 && (neg
? (res_val
< 0):(res_val
> 0))) {
282 int val
= samples
[0] - samples
[lpc
.lpc_order
- index
];
283 int sign
= (val
? FFSIGN(val
) : 0);
288 lpc
.lpc_coeff
[index
] -= sign
;
290 res_val
-= ((val
>> lpc
.lpc_quant
) *
291 (lpc
.lpc_order
- index
));
300 static void alac_entropy_coder(AlacEncodeContext
*s
)
302 unsigned int history
= s
->rc
.initial_history
;
303 int sign_modifier
= 0, i
, k
;
304 int32_t *samples
= s
->predictor_buf
;
306 for(i
=0;i
< s
->avctx
->frame_size
;) {
309 k
= av_log2((history
>> 9) + 3);
317 encode_scalar(s
, x
- sign_modifier
, k
, s
->write_sample_size
);
319 history
+= x
* s
->rc
.history_mult
320 - ((history
* s
->rc
.history_mult
) >> 9);
326 if((history
< 128) && (i
< s
->avctx
->frame_size
)) {
327 unsigned int block_size
= 0;
329 k
= 7 - av_log2(history
) + ((history
+ 16) >> 6);
331 while((*samples
== 0) && (i
< s
->avctx
->frame_size
)) {
336 encode_scalar(s
, block_size
, k
, 16);
338 sign_modifier
= (block_size
<= 0xFFFF);
346 static void write_compressed_frame(AlacEncodeContext
*s
)
350 if(s
->avctx
->channels
== 2)
351 alac_stereo_decorrelation(s
);
352 put_bits(&s
->pbctx
, 8, s
->interlacing_shift
);
353 put_bits(&s
->pbctx
, 8, s
->interlacing_leftweight
);
355 for(i
=0;i
<s
->avctx
->channels
;i
++) {
357 calc_predictor_params(s
, i
);
359 put_bits(&s
->pbctx
, 4, 0); // prediction type : currently only type 0 has been RE'd
360 put_bits(&s
->pbctx
, 4, s
->lpc
[i
].lpc_quant
);
362 put_bits(&s
->pbctx
, 3, s
->rc
.rice_modifier
);
363 put_bits(&s
->pbctx
, 5, s
->lpc
[i
].lpc_order
);
364 // predictor coeff. table
365 for(j
=0;j
<s
->lpc
[i
].lpc_order
;j
++) {
366 put_sbits(&s
->pbctx
, 16, s
->lpc
[i
].lpc_coeff
[j
]);
370 // apply lpc and entropy coding to audio samples
372 for(i
=0;i
<s
->avctx
->channels
;i
++) {
373 alac_linear_predictor(s
, i
);
374 alac_entropy_coder(s
);
378 static av_cold
int alac_encode_init(AVCodecContext
*avctx
)
380 AlacEncodeContext
*s
= avctx
->priv_data
;
381 uint8_t *alac_extradata
= av_mallocz(ALAC_EXTRADATA_SIZE
+1);
383 avctx
->frame_size
= DEFAULT_FRAME_SIZE
;
384 avctx
->bits_per_coded_sample
= DEFAULT_SAMPLE_SIZE
;
386 if(avctx
->sample_fmt
!= SAMPLE_FMT_S16
) {
387 av_log(avctx
, AV_LOG_ERROR
, "only pcm_s16 input samples are supported\n");
391 // Set default compression level
392 if(avctx
->compression_level
== FF_COMPRESSION_DEFAULT
)
393 s
->compression_level
= 2;
395 s
->compression_level
= av_clip(avctx
->compression_level
, 0, 2);
397 // Initialize default Rice parameters
398 s
->rc
.history_mult
= 40;
399 s
->rc
.initial_history
= 10;
400 s
->rc
.k_modifier
= 14;
401 s
->rc
.rice_modifier
= 4;
403 s
->max_coded_frame_size
= 8 + (avctx
->frame_size
*avctx
->channels
*avctx
->bits_per_coded_sample
>>3);
405 s
->write_sample_size
= avctx
->bits_per_coded_sample
+ avctx
->channels
- 1; // FIXME: consider wasted_bytes
407 AV_WB32(alac_extradata
, ALAC_EXTRADATA_SIZE
);
408 AV_WB32(alac_extradata
+4, MKBETAG('a','l','a','c'));
409 AV_WB32(alac_extradata
+12, avctx
->frame_size
);
410 AV_WB8 (alac_extradata
+17, avctx
->bits_per_coded_sample
);
411 AV_WB8 (alac_extradata
+21, avctx
->channels
);
412 AV_WB32(alac_extradata
+24, s
->max_coded_frame_size
);
413 AV_WB32(alac_extradata
+28, avctx
->sample_rate
*avctx
->channels
*avctx
->bits_per_coded_sample
); // average bitrate
414 AV_WB32(alac_extradata
+32, avctx
->sample_rate
);
416 // Set relevant extradata fields
417 if(s
->compression_level
> 0) {
418 AV_WB8(alac_extradata
+18, s
->rc
.history_mult
);
419 AV_WB8(alac_extradata
+19, s
->rc
.initial_history
);
420 AV_WB8(alac_extradata
+20, s
->rc
.k_modifier
);
423 s
->min_prediction_order
= DEFAULT_MIN_PRED_ORDER
;
424 if(avctx
->min_prediction_order
>= 0) {
425 if(avctx
->min_prediction_order
< MIN_LPC_ORDER
||
426 avctx
->min_prediction_order
> ALAC_MAX_LPC_ORDER
) {
427 av_log(avctx
, AV_LOG_ERROR
, "invalid min prediction order: %d\n", avctx
->min_prediction_order
);
431 s
->min_prediction_order
= avctx
->min_prediction_order
;
434 s
->max_prediction_order
= DEFAULT_MAX_PRED_ORDER
;
435 if(avctx
->max_prediction_order
>= 0) {
436 if(avctx
->max_prediction_order
< MIN_LPC_ORDER
||
437 avctx
->max_prediction_order
> ALAC_MAX_LPC_ORDER
) {
438 av_log(avctx
, AV_LOG_ERROR
, "invalid max prediction order: %d\n", avctx
->max_prediction_order
);
442 s
->max_prediction_order
= avctx
->max_prediction_order
;
445 if(s
->max_prediction_order
< s
->min_prediction_order
) {
446 av_log(avctx
, AV_LOG_ERROR
, "invalid prediction orders: min=%d max=%d\n",
447 s
->min_prediction_order
, s
->max_prediction_order
);
451 avctx
->extradata
= alac_extradata
;
452 avctx
->extradata_size
= ALAC_EXTRADATA_SIZE
;
454 avctx
->coded_frame
= avcodec_alloc_frame();
455 avctx
->coded_frame
->key_frame
= 1;
458 dsputil_init(&s
->dspctx
, avctx
);
463 static int alac_encode_frame(AVCodecContext
*avctx
, uint8_t *frame
,
464 int buf_size
, void *data
)
466 AlacEncodeContext
*s
= avctx
->priv_data
;
467 PutBitContext
*pb
= &s
->pbctx
;
468 int i
, out_bytes
, verbatim_flag
= 0;
470 if(avctx
->frame_size
> DEFAULT_FRAME_SIZE
) {
471 av_log(avctx
, AV_LOG_ERROR
, "input frame size exceeded\n");
475 if(buf_size
< 2*s
->max_coded_frame_size
) {
476 av_log(avctx
, AV_LOG_ERROR
, "buffer size is too small\n");
481 init_put_bits(pb
, frame
, buf_size
);
483 if((s
->compression_level
== 0) || verbatim_flag
) {
485 int16_t *samples
= data
;
486 write_frame_header(s
, 1);
487 for(i
=0; i
<avctx
->frame_size
*avctx
->channels
; i
++) {
488 put_sbits(pb
, 16, *samples
++);
491 init_sample_buffers(s
, data
);
492 write_frame_header(s
, 0);
493 write_compressed_frame(s
);
498 out_bytes
= put_bits_count(pb
) >> 3;
500 if(out_bytes
> s
->max_coded_frame_size
) {
501 /* frame too large. use verbatim mode */
502 if(verbatim_flag
|| (s
->compression_level
== 0)) {
503 /* still too large. must be an error. */
504 av_log(avctx
, AV_LOG_ERROR
, "error encoding frame\n");
514 static av_cold
int alac_encode_close(AVCodecContext
*avctx
)
516 av_freep(&avctx
->extradata
);
517 avctx
->extradata_size
= 0;
518 av_freep(&avctx
->coded_frame
);
522 AVCodec alac_encoder
= {
526 sizeof(AlacEncodeContext
),
530 .capabilities
= CODEC_CAP_SMALL_LAST_FRAME
,
531 .long_name
= NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),