1 /*****************************************************************************
2 * scaletempo.c: Scale audio tempo while maintaining pitch
3 *****************************************************************************
4 * Copyright © 2008 the VideoLAN team
7 * Authors: Rov Juvano <rovjuvano@users.sourceforge.net>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 /*****************************************************************************
26 *****************************************************************************/
31 #include <vlc_common.h>
32 #include <vlc_plugin.h>
34 #include <vlc_filter.h>
36 #include <string.h> /* for memset */
37 #include <limits.h> /* form INT_MIN */
39 /*****************************************************************************
41 *****************************************************************************/
42 static int Open( vlc_object_t
* );
43 static void Close( vlc_object_t
* );
44 static block_t
*DoWork( filter_t
*, block_t
* );
47 set_description( N_("Audio tempo scaler synched with rate") )
48 set_shortname( N_("Scaletempo") )
49 set_capability( "audio filter", 0 )
50 set_category( CAT_AUDIO
)
51 set_subcategory( SUBCAT_AUDIO_AFILTER
)
53 add_integer_with_range( "scaletempo-stride", 30, 1, 2000, NULL
,
54 N_("Stride Length"), N_("Length in milliseconds to output each stride"), true )
55 add_float_with_range( "scaletempo-overlap", .20, 0.0, 1.0, NULL
,
56 N_("Overlap Length"), N_("Percentage of stride to overlap"), true )
57 add_integer_with_range( "scaletempo-search", 14, 0, 200, NULL
,
58 N_("Search Length"), N_("Length in milliseconds to search for best overlap position"), true )
60 set_callbacks( Open
, Close
)
64 * Scaletempo works by producing audio in constant sized chunks (a "stride") but
65 * consuming chunks proportional to the playback rate.
67 * Scaletempo then smooths the output by blending the end of one stride with
68 * the next ("overlap").
70 * Scaletempo smooths the overlap further by searching within the input buffer
71 * for the best overlap position. Scaletempo uses a statistical cross correlation
72 * (roughly a dot-product). Scaletempo consumes most of its CPU cycles here.
75 * sample: a single audio sample for one channel
76 * frame: a single set of samples, one for each channel
77 * VLC uses these terms differently
81 /* Filter static config */
85 double percent_overlap
;
88 unsigned samples_per_frame
; /* AKA number of channels */
89 unsigned bytes_per_sample
;
90 unsigned bytes_per_frame
;
93 double frames_stride_scaled
;
94 double frames_stride_error
;
95 unsigned bytes_stride
;
96 double bytes_stride_scaled
;
97 unsigned bytes_queue_max
;
98 unsigned bytes_queued
;
99 unsigned bytes_to_slide
;
102 unsigned samples_overlap
;
103 unsigned samples_standing
;
104 unsigned bytes_overlap
;
105 unsigned bytes_standing
;
108 void (*output_overlap
)( filter_t
*p_filter
, void *p_out_buf
, unsigned bytes_off
);
110 unsigned frames_search
;
113 unsigned(*best_overlap_offset
)( filter_t
*p_filter
);
116 /*****************************************************************************
117 * best_overlap_offset: calculate best offset for overlap
118 *****************************************************************************/
119 static unsigned best_overlap_offset_float( filter_t
*p_filter
)
121 filter_sys_t
*p
= p_filter
->p_sys
;
122 float *pw
, *po
, *ppc
, *search_start
;
123 float best_corr
= INT_MIN
;
124 unsigned best_off
= 0;
127 pw
= p
->table_window
;
129 po
+= p
->samples_per_frame
;
130 ppc
= p
->buf_pre_corr
;
131 for( i
= p
->samples_per_frame
; i
< p
->samples_overlap
; i
++ ) {
132 *ppc
++ = *pw
++ * *po
++;
135 search_start
= (float *)p
->buf_queue
+ p
->samples_per_frame
;
136 for( off
= 0; off
< p
->frames_search
; off
++ ) {
138 float *ps
= search_start
;
139 ppc
= p
->buf_pre_corr
;
140 for( i
= p
->samples_per_frame
; i
< p
->samples_overlap
; i
++ ) {
141 corr
+= *ppc
++ * *ps
++;
143 if( corr
> best_corr
) {
147 search_start
+= p
->samples_per_frame
;
150 return best_off
* p
->bytes_per_frame
;
153 /*****************************************************************************
154 * output_overlap: blend end of previous stride with beginning of current stride
155 *****************************************************************************/
156 static void output_overlap_float( filter_t
*p_filter
,
160 filter_sys_t
*p
= p_filter
->p_sys
;
161 float *pout
= buf_out
;
162 float *pb
= p
->table_blend
;
163 float *po
= p
->buf_overlap
;
164 float *pin
= (float *)( p
->buf_queue
+ bytes_off
);
166 for( i
= 0; i
< p
->samples_overlap
; i
++ ) {
167 *pout
++ = *po
- *pb
++ * ( *po
- *pin
++ ); po
++;
171 /*****************************************************************************
172 * fill_queue: fill p_sys->buf_queue as much possible, skipping samples as needed
173 *****************************************************************************/
174 static size_t fill_queue( filter_t
*p_filter
,
179 filter_sys_t
*p
= p_filter
->p_sys
;
180 unsigned bytes_in
= i_buffer
- offset
;
181 size_t offset_unchanged
= offset
;
183 if( p
->bytes_to_slide
> 0 ) {
184 if( p
->bytes_to_slide
< p
->bytes_queued
) {
185 unsigned bytes_in_move
= p
->bytes_queued
- p
->bytes_to_slide
;
186 memmove( p
->buf_queue
,
187 p
->buf_queue
+ p
->bytes_to_slide
,
189 p
->bytes_to_slide
= 0;
190 p
->bytes_queued
= bytes_in_move
;
192 unsigned bytes_in_skip
;
193 p
->bytes_to_slide
-= p
->bytes_queued
;
194 bytes_in_skip
= __MIN( p
->bytes_to_slide
, bytes_in
);
196 p
->bytes_to_slide
-= bytes_in_skip
;
197 offset
+= bytes_in_skip
;
198 bytes_in
-= bytes_in_skip
;
203 unsigned bytes_in_copy
= __MIN( p
->bytes_queue_max
- p
->bytes_queued
, bytes_in
);
204 memcpy( p
->buf_queue
+ p
->bytes_queued
,
207 p
->bytes_queued
+= bytes_in_copy
;
208 offset
+= bytes_in_copy
;
211 return offset
- offset_unchanged
;
214 /*****************************************************************************
215 * transform_buffer: main filter loop
216 *****************************************************************************/
217 static size_t transform_buffer( filter_t
*p_filter
,
222 filter_sys_t
*p
= p_filter
->p_sys
;
224 size_t offset_in
= fill_queue( p_filter
, p_buffer
, i_buffer
, 0 );
225 unsigned bytes_out
= 0;
226 while( p
->bytes_queued
>= p
->bytes_queue_max
) {
227 unsigned bytes_off
= 0;
230 if( p
->output_overlap
) {
231 if( p
->best_overlap_offset
) {
232 bytes_off
= p
->best_overlap_offset( p_filter
);
234 p
->output_overlap( p_filter
, pout
, bytes_off
);
236 memcpy( pout
+ p
->bytes_overlap
,
237 p
->buf_queue
+ bytes_off
+ p
->bytes_overlap
,
239 pout
+= p
->bytes_stride
;
240 bytes_out
+= p
->bytes_stride
;
243 memcpy( p
->buf_overlap
,
244 p
->buf_queue
+ bytes_off
+ p
->bytes_stride
,
246 double frames_to_slide
= p
->frames_stride_scaled
+ p
->frames_stride_error
;
247 unsigned frames_to_stride_whole
= (int)frames_to_slide
;
248 p
->bytes_to_slide
= frames_to_stride_whole
* p
->bytes_per_frame
;
249 p
->frames_stride_error
= frames_to_slide
- frames_to_stride_whole
;
251 offset_in
+= fill_queue( p_filter
, p_buffer
, i_buffer
, offset_in
);
257 /*****************************************************************************
258 * calculate_output_buffer_size
259 *****************************************************************************/
260 static size_t calculate_output_buffer_size( filter_t
*p_filter
,
263 filter_sys_t
*p
= p_filter
->p_sys
;
264 size_t bytes_out
= 0;
265 int bytes_to_out
= bytes_in
+ p
->bytes_queued
- p
->bytes_to_slide
;
266 if( bytes_to_out
>= (int)p
->bytes_queue_max
) {
267 /* while (total_buffered - stride_length * n >= queue_max) n++ */
268 bytes_out
= p
->bytes_stride
* ( (unsigned)(
269 ( bytes_to_out
- p
->bytes_queue_max
+ /* rounding protection */ p
->bytes_per_frame
)
270 / p
->bytes_stride_scaled
) + 1 );
275 /*****************************************************************************
276 * reinit_buffers: reinitializes buffers in p_filter->p_sys
277 *****************************************************************************/
278 static int reinit_buffers( filter_t
*p_filter
)
280 filter_sys_t
*p
= p_filter
->p_sys
;
283 unsigned frames_stride
= p
->ms_stride
* p
->sample_rate
/ 1000.0;
284 p
->bytes_stride
= frames_stride
* p
->bytes_per_frame
;
287 unsigned frames_overlap
= frames_stride
* p
->percent_overlap
;
288 if( frames_overlap
< 1 )
289 { /* if no overlap */
290 p
->bytes_overlap
= 0;
291 p
->bytes_standing
= p
->bytes_stride
;
292 p
->samples_standing
= p
->bytes_standing
/ p
->bytes_per_sample
;
293 p
->output_overlap
= NULL
;
297 unsigned prev_overlap
= p
->bytes_overlap
;
298 p
->bytes_overlap
= frames_overlap
* p
->bytes_per_frame
;
299 p
->samples_overlap
= frames_overlap
* p
->samples_per_frame
;
300 p
->bytes_standing
= p
->bytes_stride
- p
->bytes_overlap
;
301 p
->samples_standing
= p
->bytes_standing
/ p
->bytes_per_sample
;
302 p
->buf_overlap
= malloc( p
->bytes_overlap
);
303 p
->table_blend
= malloc( p
->samples_overlap
* 4 ); /* sizeof (int32|float) */
304 if( !p
->buf_overlap
|| !p
->table_blend
)
306 if( p
->bytes_overlap
> prev_overlap
)
307 memset( (uint8_t *)p
->buf_overlap
+ prev_overlap
, 0, p
->bytes_overlap
- prev_overlap
);
309 float *pb
= p
->table_blend
;
310 float t
= (float)frames_overlap
;
311 for( i
= 0; i
<frames_overlap
; i
++ )
314 for( j
= 0; j
< p
->samples_per_frame
; j
++ )
317 p
->output_overlap
= output_overlap_float
;
321 p
->frames_search
= ( frames_overlap
<= 1 ) ? 0 : p
->ms_search
* p
->sample_rate
/ 1000.0;
322 if( p
->frames_search
< 1 )
324 p
->best_overlap_offset
= NULL
;
328 unsigned bytes_pre_corr
= ( p
->samples_overlap
- p
->samples_per_frame
) * 4; /* sizeof (int32|float) */
329 p
->buf_pre_corr
= malloc( bytes_pre_corr
);
330 p
->table_window
= malloc( bytes_pre_corr
);
331 if( ! p
->buf_pre_corr
|| ! p
->table_window
)
333 float *pw
= p
->table_window
;
334 for( i
= 1; i
<frames_overlap
; i
++ )
336 float v
= i
* ( frames_overlap
- i
);
337 for( j
= 0; j
< p
->samples_per_frame
; j
++ )
340 p
->best_overlap_offset
= best_overlap_offset_float
;
343 unsigned new_size
= ( p
->frames_search
+ frames_stride
+ frames_overlap
) * p
->bytes_per_frame
;
344 if( p
->bytes_queued
> new_size
)
346 if( p
->bytes_to_slide
> p
->bytes_queued
)
348 p
->bytes_to_slide
-= p
->bytes_queued
;
353 unsigned new_queued
= __MIN( p
->bytes_queued
- p
->bytes_to_slide
, new_size
);
354 memmove( p
->buf_queue
,
355 p
->buf_queue
+ p
->bytes_queued
- new_queued
,
357 p
->bytes_to_slide
= 0;
358 p
->bytes_queued
= new_queued
;
361 p
->bytes_queue_max
= new_size
;
362 p
->buf_queue
= malloc( p
->bytes_queue_max
);
366 p
->bytes_stride_scaled
= p
->bytes_stride
* p
->scale
;
367 p
->frames_stride_scaled
= p
->bytes_stride_scaled
/ p
->bytes_per_frame
;
369 msg_Dbg( VLC_OBJECT(p_filter
),
370 "%.3f scale, %.3f stride_in, %i stride_out, %i standing, %i overlap, %i search, %i queue, %s mode",
372 p
->frames_stride_scaled
,
373 (int)( p
->bytes_stride
/ p
->bytes_per_frame
),
374 (int)( p
->bytes_standing
/ p
->bytes_per_frame
),
375 (int)( p
->bytes_overlap
/ p
->bytes_per_frame
),
377 (int)( p
->bytes_queue_max
/ p
->bytes_per_frame
),
383 /*****************************************************************************
384 * Open: initialize as "audio filter"
385 *****************************************************************************/
386 static int Open( vlc_object_t
*p_this
)
388 filter_t
*p_filter
= (filter_t
*)p_this
;
392 if( p_filter
->fmt_in
.audio
.i_format
!= VLC_CODEC_FL32
||
393 p_filter
->fmt_out
.audio
.i_format
!= VLC_CODEC_FL32
)
396 p_filter
->fmt_in
.audio
.i_format
= p_filter
->fmt_out
.audio
.i_format
= VLC_CODEC_FL32
;
397 msg_Warn( p_filter
, "bad input or output format" );
399 if( ! AOUT_FMTS_SIMILAR( &p_filter
->fmt_in
.audio
, &p_filter
->fmt_out
.audio
) )
402 memcpy( &p_filter
->fmt_out
.audio
, &p_filter
->fmt_in
.audio
, sizeof(audio_sample_format_t
) );
403 msg_Warn( p_filter
, "input and output formats are not similar" );
409 /* Allocate structure */
410 p_sys
= p_filter
->p_sys
= malloc( sizeof(*p_sys
) );
414 p_filter
->pf_audio_filter
= DoWork
;
417 p_sys
->sample_rate
= p_filter
->fmt_in
.audio
.i_rate
;
418 p_sys
->samples_per_frame
= aout_FormatNbChannels( &p_filter
->fmt_in
.audio
);
419 p_sys
->bytes_per_sample
= 4;
420 p_sys
->bytes_per_frame
= p_sys
->samples_per_frame
* p_sys
->bytes_per_sample
;
422 msg_Dbg( p_this
, "format: %5i rate, %i nch, %i bps, %s",
424 p_sys
->samples_per_frame
,
425 p_sys
->bytes_per_sample
,
428 p_sys
->ms_stride
= var_InheritInteger( p_this
, "scaletempo-stride" );
429 p_sys
->percent_overlap
= var_InheritFloat( p_this
, "scaletempo-overlap" );
430 p_sys
->ms_search
= var_InheritInteger( p_this
, "scaletempo-search" );
432 msg_Dbg( p_this
, "params: %i stride, %.3f overlap, %i search",
433 p_sys
->ms_stride
, p_sys
->percent_overlap
, p_sys
->ms_search
);
435 p_sys
->buf_queue
= NULL
;
436 p_sys
->buf_overlap
= NULL
;
437 p_sys
->table_blend
= NULL
;
438 p_sys
->buf_pre_corr
= NULL
;
439 p_sys
->table_window
= NULL
;
440 p_sys
->bytes_overlap
= 0;
441 p_sys
->bytes_queued
= 0;
442 p_sys
->bytes_to_slide
= 0;
443 p_sys
->frames_stride_error
= 0;
445 if( reinit_buffers( p_filter
) != VLC_SUCCESS
)
453 static void Close( vlc_object_t
*p_this
)
455 filter_t
*p_filter
= (filter_t
*)p_this
;
456 filter_sys_t
*p_sys
= p_filter
->p_sys
;
457 free( p_sys
->buf_queue
);
458 free( p_sys
->buf_overlap
);
459 free( p_sys
->table_blend
);
460 free( p_sys
->buf_pre_corr
);
461 free( p_sys
->table_window
);
465 /*****************************************************************************
466 * DoWork: filter wrapper for transform_buffer
467 *****************************************************************************/
468 static block_t
*DoWork( filter_t
* p_filter
, block_t
* p_in_buf
)
470 filter_sys_t
*p
= p_filter
->p_sys
;
472 if( p_filter
->fmt_in
.audio
.i_rate
== p
->sample_rate
)
475 double scale
= p_filter
->fmt_in
.audio
.i_rate
/ (double)p
->sample_rate
;
476 if( scale
!= p
->scale
) {
478 p
->bytes_stride_scaled
= p
->bytes_stride
* p
->scale
;
479 p
->frames_stride_scaled
= p
->bytes_stride_scaled
/ p
->bytes_per_frame
;
480 p
->bytes_to_slide
= 0;
481 msg_Dbg( p_filter
, "%.3f scale, %.3f stride_in, %i stride_out",
483 p
->frames_stride_scaled
,
484 (int)( p
->bytes_stride
/ p
->bytes_per_frame
) );
487 size_t i_outsize
= calculate_output_buffer_size ( p_filter
, p_in_buf
->i_buffer
);
488 block_t
*p_out_buf
= filter_NewAudioBuffer( p_filter
, i_outsize
);
489 if( p_out_buf
== NULL
)
492 size_t bytes_out
= transform_buffer( p_filter
,
493 p_in_buf
->p_buffer
, p_in_buf
->i_buffer
,
494 p_out_buf
->p_buffer
);
496 p_out_buf
->i_buffer
= bytes_out
;
497 p_out_buf
->i_nb_samples
= bytes_out
/ p
->bytes_per_frame
;
498 p_out_buf
->i_dts
= p_in_buf
->i_dts
;
499 p_out_buf
->i_pts
= p_in_buf
->i_pts
;
500 p_out_buf
->i_length
= p_in_buf
->i_length
;
502 block_Release( p_in_buf
);