3 * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * Mixes audio from multiple sources into a single output. The channel layout,
27 * sample rate, and sample format will be the same for all inputs and the
31 #include "libavutil/audio_fifo.h"
32 #include "libavutil/avassert.h"
33 #include "libavutil/avstring.h"
34 #include "libavutil/channel_layout.h"
35 #include "libavutil/common.h"
36 #include "libavutil/float_dsp.h"
37 #include "libavutil/mathematics.h"
38 #include "libavutil/opt.h"
39 #include "libavutil/samplefmt.h"
46 #define INPUT_OFF 0 /**< input has reached EOF */
47 #define INPUT_ON 1 /**< input is active */
48 #define INPUT_INACTIVE 2 /**< input is on, but is currently inactive */
50 #define DURATION_LONGEST 0
51 #define DURATION_SHORTEST 1
52 #define DURATION_FIRST 2
55 typedef struct FrameInfo
{
58 struct FrameInfo
*next
;
62 * Linked list used to store timestamps and frame sizes of all frames in the
63 * FIFO for the first input.
65 * This is needed to keep timestamps synchronized for the case where multiple
66 * input frames are pushed to the filter for processing before a frame is
67 * requested by the output link.
69 typedef struct FrameList
{
76 static void frame_list_clear(FrameList
*frame_list
)
79 while (frame_list
->list
) {
80 FrameInfo
*info
= frame_list
->list
;
81 frame_list
->list
= info
->next
;
84 frame_list
->nb_frames
= 0;
85 frame_list
->nb_samples
= 0;
86 frame_list
->end
= NULL
;
90 static int frame_list_next_frame_size(FrameList
*frame_list
)
92 if (!frame_list
->list
)
94 return frame_list
->list
->nb_samples
;
97 static int64_t frame_list_next_pts(FrameList
*frame_list
)
99 if (!frame_list
->list
)
100 return AV_NOPTS_VALUE
;
101 return frame_list
->list
->pts
;
104 static void frame_list_remove_samples(FrameList
*frame_list
, int nb_samples
)
106 if (nb_samples
>= frame_list
->nb_samples
) {
107 frame_list_clear(frame_list
);
109 int samples
= nb_samples
;
110 while (samples
> 0) {
111 FrameInfo
*info
= frame_list
->list
;
112 av_assert0(info
!= NULL
);
113 if (info
->nb_samples
<= samples
) {
114 samples
-= info
->nb_samples
;
115 frame_list
->list
= info
->next
;
116 if (!frame_list
->list
)
117 frame_list
->end
= NULL
;
118 frame_list
->nb_frames
--;
119 frame_list
->nb_samples
-= info
->nb_samples
;
122 info
->nb_samples
-= samples
;
123 info
->pts
+= samples
;
124 frame_list
->nb_samples
-= samples
;
131 static int frame_list_add_frame(FrameList
*frame_list
, int nb_samples
, int64_t pts
)
133 FrameInfo
*info
= av_malloc(sizeof(*info
));
135 return AVERROR(ENOMEM
);
136 info
->nb_samples
= nb_samples
;
140 if (!frame_list
->list
) {
141 frame_list
->list
= info
;
142 frame_list
->end
= info
;
144 av_assert0(frame_list
->end
!= NULL
);
145 frame_list
->end
->next
= info
;
146 frame_list
->end
= info
;
148 frame_list
->nb_frames
++;
149 frame_list
->nb_samples
+= nb_samples
;
155 typedef struct MixContext
{
156 const AVClass
*class; /**< class for AVOptions */
157 AVFloatDSPContext fdsp
;
159 int nb_inputs
; /**< number of inputs */
160 int active_inputs
; /**< number of input currently active */
161 int duration_mode
; /**< mode for determining duration */
162 float dropout_transition
; /**< transition time when an input drops out */
164 int nb_channels
; /**< number of channels */
165 int sample_rate
; /**< sample rate */
167 AVAudioFifo
**fifos
; /**< audio fifo for each input */
168 uint8_t *input_state
; /**< current state of each input */
169 float *input_scale
; /**< mixing scale factor for each input */
170 float scale_norm
; /**< normalization factor for all inputs */
171 int64_t next_pts
; /**< calculated pts for next output frame */
172 FrameList
*frame_list
; /**< list of frame info for the first input */
175 #define OFFSET(x) offsetof(MixContext, x)
176 #define A AV_OPT_FLAG_AUDIO_PARAM
177 static const AVOption options
[] = {
178 { "inputs", "Number of inputs.",
179 OFFSET(nb_inputs
), AV_OPT_TYPE_INT
, { .i64
= 2 }, 1, 32, A
},
180 { "duration", "How to determine the end-of-stream.",
181 OFFSET(duration_mode
), AV_OPT_TYPE_INT
, { .i64
= DURATION_LONGEST
}, 0, 2, A
, "duration" },
182 { "longest", "Duration of longest input.", 0, AV_OPT_TYPE_CONST
, { .i64
= DURATION_LONGEST
}, INT_MIN
, INT_MAX
, A
, "duration" },
183 { "shortest", "Duration of shortest input.", 0, AV_OPT_TYPE_CONST
, { .i64
= DURATION_SHORTEST
}, INT_MIN
, INT_MAX
, A
, "duration" },
184 { "first", "Duration of first input.", 0, AV_OPT_TYPE_CONST
, { .i64
= DURATION_FIRST
}, INT_MIN
, INT_MAX
, A
, "duration" },
185 { "dropout_transition", "Transition time, in seconds, for volume "
186 "renormalization when an input stream ends.",
187 OFFSET(dropout_transition
), AV_OPT_TYPE_FLOAT
, { .dbl
= 2.0 }, 0, INT_MAX
, A
},
191 static const AVClass amix_class
= {
192 .class_name
= "amix filter",
193 .item_name
= av_default_item_name
,
195 .version
= LIBAVUTIL_VERSION_INT
,
200 * Update the scaling factors to apply to each input during mixing.
202 * This balances the full volume range between active inputs and handles
203 * volume transitions when EOF is encountered on an input but mixing continues
204 * with the remaining inputs.
206 static void calculate_scales(MixContext
*s
, int nb_samples
)
210 if (s
->scale_norm
> s
->active_inputs
) {
211 s
->scale_norm
-= nb_samples
/ (s
->dropout_transition
* s
->sample_rate
);
212 s
->scale_norm
= FFMAX(s
->scale_norm
, s
->active_inputs
);
215 for (i
= 0; i
< s
->nb_inputs
; i
++) {
216 if (s
->input_state
[i
] == INPUT_ON
)
217 s
->input_scale
[i
] = 1.0f
/ s
->scale_norm
;
219 s
->input_scale
[i
] = 0.0f
;
223 static int config_output(AVFilterLink
*outlink
)
225 AVFilterContext
*ctx
= outlink
->src
;
226 MixContext
*s
= ctx
->priv
;
230 s
->planar
= av_sample_fmt_is_planar(outlink
->format
);
231 s
->sample_rate
= outlink
->sample_rate
;
232 outlink
->time_base
= (AVRational
){ 1, outlink
->sample_rate
};
233 s
->next_pts
= AV_NOPTS_VALUE
;
235 s
->frame_list
= av_mallocz(sizeof(*s
->frame_list
));
237 return AVERROR(ENOMEM
);
239 s
->fifos
= av_mallocz(s
->nb_inputs
* sizeof(*s
->fifos
));
241 return AVERROR(ENOMEM
);
243 s
->nb_channels
= av_get_channel_layout_nb_channels(outlink
->channel_layout
);
244 for (i
= 0; i
< s
->nb_inputs
; i
++) {
245 s
->fifos
[i
] = av_audio_fifo_alloc(outlink
->format
, s
->nb_channels
, 1024);
247 return AVERROR(ENOMEM
);
250 s
->input_state
= av_malloc(s
->nb_inputs
);
252 return AVERROR(ENOMEM
);
253 memset(s
->input_state
, INPUT_ON
, s
->nb_inputs
);
254 s
->active_inputs
= s
->nb_inputs
;
256 s
->input_scale
= av_mallocz(s
->nb_inputs
* sizeof(*s
->input_scale
));
258 return AVERROR(ENOMEM
);
259 s
->scale_norm
= s
->active_inputs
;
260 calculate_scales(s
, 0);
262 av_get_channel_layout_string(buf
, sizeof(buf
), -1, outlink
->channel_layout
);
264 av_log(ctx
, AV_LOG_VERBOSE
,
265 "inputs:%d fmt:%s srate:%d cl:%s\n", s
->nb_inputs
,
266 av_get_sample_fmt_name(outlink
->format
), outlink
->sample_rate
, buf
);
272 * Read samples from the input FIFOs, mix, and write to the output link.
274 static int output_frame(AVFilterLink
*outlink
, int nb_samples
)
276 AVFilterContext
*ctx
= outlink
->src
;
277 MixContext
*s
= ctx
->priv
;
278 AVFilterBufferRef
*out_buf
, *in_buf
;
281 calculate_scales(s
, nb_samples
);
283 out_buf
= ff_get_audio_buffer(outlink
, AV_PERM_WRITE
, nb_samples
);
285 return AVERROR(ENOMEM
);
287 in_buf
= ff_get_audio_buffer(outlink
, AV_PERM_WRITE
, nb_samples
);
289 avfilter_unref_buffer(out_buf
);
290 return AVERROR(ENOMEM
);
293 for (i
= 0; i
< s
->nb_inputs
; i
++) {
294 if (s
->input_state
[i
] == INPUT_ON
) {
295 int planes
, plane_size
, p
;
297 av_audio_fifo_read(s
->fifos
[i
], (void **)in_buf
->extended_data
,
300 planes
= s
->planar
? s
->nb_channels
: 1;
301 plane_size
= nb_samples
* (s
->planar
? 1 : s
->nb_channels
);
302 plane_size
= FFALIGN(plane_size
, 16);
304 for (p
= 0; p
< planes
; p
++) {
305 s
->fdsp
.vector_fmac_scalar((float *)out_buf
->extended_data
[p
],
306 (float *) in_buf
->extended_data
[p
],
307 s
->input_scale
[i
], plane_size
);
311 avfilter_unref_buffer(in_buf
);
313 out_buf
->pts
= s
->next_pts
;
314 if (s
->next_pts
!= AV_NOPTS_VALUE
)
315 s
->next_pts
+= nb_samples
;
317 return ff_filter_frame(outlink
, out_buf
);
321 * Returns the smallest number of samples available in the input FIFOs other
322 * than that of the first input.
324 static int get_available_samples(MixContext
*s
)
327 int available_samples
= INT_MAX
;
329 av_assert0(s
->nb_inputs
> 1);
331 for (i
= 1; i
< s
->nb_inputs
; i
++) {
333 if (s
->input_state
[i
] == INPUT_OFF
)
335 nb_samples
= av_audio_fifo_size(s
->fifos
[i
]);
336 available_samples
= FFMIN(available_samples
, nb_samples
);
338 if (available_samples
== INT_MAX
)
340 return available_samples
;
344 * Requests a frame, if needed, from each input link other than the first.
346 static int request_samples(AVFilterContext
*ctx
, int min_samples
)
348 MixContext
*s
= ctx
->priv
;
351 av_assert0(s
->nb_inputs
> 1);
353 for (i
= 1; i
< s
->nb_inputs
; i
++) {
355 if (s
->input_state
[i
] == INPUT_OFF
)
357 while (!ret
&& av_audio_fifo_size(s
->fifos
[i
]) < min_samples
)
358 ret
= ff_request_frame(ctx
->inputs
[i
]);
359 if (ret
== AVERROR_EOF
) {
360 if (av_audio_fifo_size(s
->fifos
[i
]) == 0) {
361 s
->input_state
[i
] = INPUT_OFF
;
371 * Calculates the number of active inputs and determines EOF based on the
374 * @return 0 if mixing should continue, or AVERROR_EOF if mixing should stop.
376 static int calc_active_inputs(MixContext
*s
)
379 int active_inputs
= 0;
380 for (i
= 0; i
< s
->nb_inputs
; i
++)
381 active_inputs
+= !!(s
->input_state
[i
] != INPUT_OFF
);
382 s
->active_inputs
= active_inputs
;
384 if (!active_inputs
||
385 (s
->duration_mode
== DURATION_FIRST
&& s
->input_state
[0] == INPUT_OFF
) ||
386 (s
->duration_mode
== DURATION_SHORTEST
&& active_inputs
!= s
->nb_inputs
))
391 static int request_frame(AVFilterLink
*outlink
)
393 AVFilterContext
*ctx
= outlink
->src
;
394 MixContext
*s
= ctx
->priv
;
396 int wanted_samples
, available_samples
;
398 ret
= calc_active_inputs(s
);
402 if (s
->input_state
[0] == INPUT_OFF
) {
403 ret
= request_samples(ctx
, 1);
407 ret
= calc_active_inputs(s
);
411 available_samples
= get_available_samples(s
);
412 if (!available_samples
)
413 return AVERROR(EAGAIN
);
415 return output_frame(outlink
, available_samples
);
418 if (s
->frame_list
->nb_frames
== 0) {
419 ret
= ff_request_frame(ctx
->inputs
[0]);
420 if (ret
== AVERROR_EOF
) {
421 s
->input_state
[0] = INPUT_OFF
;
422 if (s
->nb_inputs
== 1)
425 return AVERROR(EAGAIN
);
429 av_assert0(s
->frame_list
->nb_frames
> 0);
431 wanted_samples
= frame_list_next_frame_size(s
->frame_list
);
433 if (s
->active_inputs
> 1) {
434 ret
= request_samples(ctx
, wanted_samples
);
438 ret
= calc_active_inputs(s
);
443 if (s
->active_inputs
> 1) {
444 available_samples
= get_available_samples(s
);
445 if (!available_samples
)
446 return AVERROR(EAGAIN
);
447 available_samples
= FFMIN(available_samples
, wanted_samples
);
449 available_samples
= wanted_samples
;
452 s
->next_pts
= frame_list_next_pts(s
->frame_list
);
453 frame_list_remove_samples(s
->frame_list
, available_samples
);
455 return output_frame(outlink
, available_samples
);
458 static int filter_frame(AVFilterLink
*inlink
, AVFilterBufferRef
*buf
)
460 AVFilterContext
*ctx
= inlink
->dst
;
461 MixContext
*s
= ctx
->priv
;
462 AVFilterLink
*outlink
= ctx
->outputs
[0];
465 for (i
= 0; i
< ctx
->nb_inputs
; i
++)
466 if (ctx
->inputs
[i
] == inlink
)
468 if (i
>= ctx
->nb_inputs
) {
469 av_log(ctx
, AV_LOG_ERROR
, "unknown input link\n");
470 ret
= AVERROR(EINVAL
);
475 int64_t pts
= av_rescale_q(buf
->pts
, inlink
->time_base
,
477 ret
= frame_list_add_frame(s
->frame_list
, buf
->audio
->nb_samples
, pts
);
482 ret
= av_audio_fifo_write(s
->fifos
[i
], (void **)buf
->extended_data
,
483 buf
->audio
->nb_samples
);
486 avfilter_unref_buffer(buf
);
491 static int init(AVFilterContext
*ctx
, const char *args
)
493 MixContext
*s
= ctx
->priv
;
496 s
->class = &amix_class
;
497 av_opt_set_defaults(s
);
499 if ((ret
= av_set_options_string(s
, args
, "=", ":")) < 0) {
500 av_log(ctx
, AV_LOG_ERROR
, "Error parsing options string '%s'.\n", args
);
505 for (i
= 0; i
< s
->nb_inputs
; i
++) {
507 AVFilterPad pad
= { 0 };
509 snprintf(name
, sizeof(name
), "input%d", i
);
510 pad
.type
= AVMEDIA_TYPE_AUDIO
;
511 pad
.name
= av_strdup(name
);
512 pad
.filter_frame
= filter_frame
;
514 ff_insert_inpad(ctx
, i
, &pad
);
517 avpriv_float_dsp_init(&s
->fdsp
, 0);
522 static void uninit(AVFilterContext
*ctx
)
525 MixContext
*s
= ctx
->priv
;
528 for (i
= 0; i
< s
->nb_inputs
; i
++)
529 av_audio_fifo_free(s
->fifos
[i
]);
532 frame_list_clear(s
->frame_list
);
533 av_freep(&s
->frame_list
);
534 av_freep(&s
->input_state
);
535 av_freep(&s
->input_scale
);
537 for (i
= 0; i
< ctx
->nb_inputs
; i
++)
538 av_freep(&ctx
->input_pads
[i
].name
);
541 static int query_formats(AVFilterContext
*ctx
)
543 AVFilterFormats
*formats
= NULL
;
544 ff_add_format(&formats
, AV_SAMPLE_FMT_FLT
);
545 ff_add_format(&formats
, AV_SAMPLE_FMT_FLTP
);
546 ff_set_common_formats(ctx
, formats
);
547 ff_set_common_channel_layouts(ctx
, ff_all_channel_layouts());
548 ff_set_common_samplerates(ctx
, ff_all_samplerates());
552 static const AVFilterPad avfilter_af_amix_outputs
[] = {
555 .type
= AVMEDIA_TYPE_AUDIO
,
556 .config_props
= config_output
,
557 .request_frame
= request_frame
562 AVFilter avfilter_af_amix
= {
564 .description
= NULL_IF_CONFIG_SMALL("Audio mixing."),
565 .priv_size
= sizeof(MixContext
),
569 .query_formats
= query_formats
,
572 .outputs
= avfilter_af_amix_outputs
,