1 /*****************************************************************************
2 * scaletempo.c: Scale audio tempo while maintaining pitch
3 *****************************************************************************
4 * Copyright © 2008 VLC authors and VideoLAN
7 * Authors: Rov Juvano <rovjuvano@users.sourceforge.net>
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU Lesser General Public License as published by
11 * the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this program; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 /*****************************************************************************
26 *****************************************************************************/
31 #include <vlc_common.h>
32 #include <vlc_plugin.h>
34 #include <vlc_filter.h>
35 #include <vlc_modules.h>
36 #include <vlc_atomic.h>
38 #include <string.h> /* for memset */
39 #include <limits.h> /* form INT_MIN */
41 /*****************************************************************************
43 *****************************************************************************/
44 static int Open( vlc_object_t
* );
45 static void Close( vlc_object_t
* );
46 static block_t
*DoWork( filter_t
*, block_t
* );
49 static int OpenPitch( vlc_object_t
* );
50 static void ClosePitch( vlc_object_t
* );
51 static block_t
*DoPitchWork( filter_t
*, block_t
* );
52 # define MODULE_DESC N_("Pitch Shifter")
53 # define MODULES_SHORTNAME N_("Audio pitch changer")
55 # define MODULE_DESC N_("Audio tempo scaler synched with rate")
56 # define MODULES_SHORTNAME N_("Scaletempo")
60 set_description( MODULE_DESC
)
61 set_shortname( MODULES_SHORTNAME
)
62 set_capability( "audio filter", 0 )
63 set_category( CAT_AUDIO
)
64 set_subcategory( SUBCAT_AUDIO_AFILTER
)
66 add_integer_with_range( "scaletempo-stride", 30, 1, 2000,
67 N_("Stride Length"), N_("Length in milliseconds to output each stride"), true )
68 add_float_with_range( "scaletempo-overlap", .20, 0.0, 1.0,
69 N_("Overlap Length"), N_("Percentage of stride to overlap"), true )
70 add_integer_with_range( "scaletempo-search", 14, 0, 200,
71 N_("Search Length"), N_("Length in milliseconds to search for best overlap position"), true )
73 add_float_with_range( "pitch-shift", 0, -12, 12,
74 N_("Pitch Shift"), N_("Pitch shift in semitones"), false )
75 set_callbacks( OpenPitch
, ClosePitch
)
77 set_callbacks( Open
, Close
)
83 * Scaletempo works by producing audio in constant sized chunks (a "stride") but
84 * consuming chunks proportional to the playback rate.
86 * Scaletempo then smooths the output by blending the end of one stride with
87 * the next ("overlap").
89 * Scaletempo smooths the overlap further by searching within the input buffer
90 * for the best overlap position. Scaletempo uses a statistical cross correlation
91 * (roughly a dot-product). Scaletempo consumes most of its CPU cycles here.
94 * sample: a single audio sample for one channel
95 * frame: a single set of samples, one for each channel
96 * VLC uses these terms differently
100 /* Filter static config */
104 double percent_overlap
;
107 unsigned samples_per_frame
; /* AKA number of channels */
108 unsigned bytes_per_sample
;
109 unsigned bytes_per_frame
;
110 unsigned sample_rate
;
112 double frames_stride_scaled
;
113 double frames_stride_error
;
114 unsigned bytes_stride
;
115 double bytes_stride_scaled
;
116 unsigned bytes_queue_max
;
117 unsigned bytes_queued
;
118 unsigned bytes_to_slide
;
121 unsigned samples_overlap
;
122 unsigned samples_standing
;
123 unsigned bytes_overlap
;
124 unsigned bytes_standing
;
127 void (*output_overlap
)( filter_t
*p_filter
, void *p_out_buf
, unsigned bytes_off
);
129 unsigned frames_search
;
132 unsigned(*best_overlap_offset
)( filter_t
*p_filter
);
135 filter_t
* resampler
;
136 vlc_atomic_float rate_shift
;
140 /*****************************************************************************
141 * best_overlap_offset: calculate best offset for overlap
142 *****************************************************************************/
143 static unsigned best_overlap_offset_float( filter_t
*p_filter
)
145 filter_sys_t
*p
= p_filter
->p_sys
;
146 float *pw
, *po
, *ppc
, *search_start
;
147 float best_corr
= INT_MIN
;
148 unsigned best_off
= 0;
151 pw
= p
->table_window
;
153 po
+= p
->samples_per_frame
;
154 ppc
= p
->buf_pre_corr
;
155 for( i
= p
->samples_per_frame
; i
< p
->samples_overlap
; i
++ ) {
156 *ppc
++ = *pw
++ * *po
++;
159 search_start
= (float *)p
->buf_queue
+ p
->samples_per_frame
;
160 for( off
= 0; off
< p
->frames_search
; off
++ ) {
162 float *ps
= search_start
;
163 ppc
= p
->buf_pre_corr
;
164 for( i
= p
->samples_per_frame
; i
< p
->samples_overlap
; i
++ ) {
165 corr
+= *ppc
++ * *ps
++;
167 if( corr
> best_corr
) {
171 search_start
+= p
->samples_per_frame
;
174 return best_off
* p
->bytes_per_frame
;
177 /*****************************************************************************
178 * output_overlap: blend end of previous stride with beginning of current stride
179 *****************************************************************************/
180 static void output_overlap_float( filter_t
*p_filter
,
184 filter_sys_t
*p
= p_filter
->p_sys
;
185 float *pout
= buf_out
;
186 float *pb
= p
->table_blend
;
187 float *po
= p
->buf_overlap
;
188 float *pin
= (float *)( p
->buf_queue
+ bytes_off
);
190 for( i
= 0; i
< p
->samples_overlap
; i
++ ) {
191 *pout
++ = *po
- *pb
++ * ( *po
- *pin
++ ); po
++;
195 /*****************************************************************************
196 * fill_queue: fill p_sys->buf_queue as much possible, skipping samples as needed
197 *****************************************************************************/
198 static size_t fill_queue( filter_t
*p_filter
,
203 filter_sys_t
*p
= p_filter
->p_sys
;
204 unsigned bytes_in
= i_buffer
- offset
;
205 size_t offset_unchanged
= offset
;
207 if( p
->bytes_to_slide
> 0 ) {
208 if( p
->bytes_to_slide
< p
->bytes_queued
) {
209 unsigned bytes_in_move
= p
->bytes_queued
- p
->bytes_to_slide
;
210 memmove( p
->buf_queue
,
211 p
->buf_queue
+ p
->bytes_to_slide
,
213 p
->bytes_to_slide
= 0;
214 p
->bytes_queued
= bytes_in_move
;
216 unsigned bytes_in_skip
;
217 p
->bytes_to_slide
-= p
->bytes_queued
;
218 bytes_in_skip
= __MIN( p
->bytes_to_slide
, bytes_in
);
220 p
->bytes_to_slide
-= bytes_in_skip
;
221 offset
+= bytes_in_skip
;
222 bytes_in
-= bytes_in_skip
;
227 unsigned bytes_in_copy
= __MIN( p
->bytes_queue_max
- p
->bytes_queued
, bytes_in
);
228 memcpy( p
->buf_queue
+ p
->bytes_queued
,
231 p
->bytes_queued
+= bytes_in_copy
;
232 offset
+= bytes_in_copy
;
235 return offset
- offset_unchanged
;
238 /*****************************************************************************
239 * transform_buffer: main filter loop
240 *****************************************************************************/
241 static size_t transform_buffer( filter_t
*p_filter
,
246 filter_sys_t
*p
= p_filter
->p_sys
;
248 size_t offset_in
= fill_queue( p_filter
, p_buffer
, i_buffer
, 0 );
249 unsigned bytes_out
= 0;
250 while( p
->bytes_queued
>= p
->bytes_queue_max
) {
251 unsigned bytes_off
= 0;
254 if( p
->output_overlap
) {
255 if( p
->best_overlap_offset
) {
256 bytes_off
= p
->best_overlap_offset( p_filter
);
258 p
->output_overlap( p_filter
, pout
, bytes_off
);
260 memcpy( pout
+ p
->bytes_overlap
,
261 p
->buf_queue
+ bytes_off
+ p
->bytes_overlap
,
263 pout
+= p
->bytes_stride
;
264 bytes_out
+= p
->bytes_stride
;
267 memcpy( p
->buf_overlap
,
268 p
->buf_queue
+ bytes_off
+ p
->bytes_stride
,
270 double frames_to_slide
= p
->frames_stride_scaled
+ p
->frames_stride_error
;
271 unsigned frames_to_stride_whole
= (int)frames_to_slide
;
272 p
->bytes_to_slide
= frames_to_stride_whole
* p
->bytes_per_frame
;
273 p
->frames_stride_error
= frames_to_slide
- frames_to_stride_whole
;
275 offset_in
+= fill_queue( p_filter
, p_buffer
, i_buffer
, offset_in
);
281 /*****************************************************************************
282 * calculate_output_buffer_size
283 *****************************************************************************/
284 static size_t calculate_output_buffer_size( filter_t
*p_filter
,
287 filter_sys_t
*p
= p_filter
->p_sys
;
288 size_t bytes_out
= 0;
289 int bytes_to_out
= bytes_in
+ p
->bytes_queued
- p
->bytes_to_slide
;
290 if( bytes_to_out
>= (int)p
->bytes_queue_max
) {
291 /* while (total_buffered - stride_length * n >= queue_max) n++ */
292 bytes_out
= p
->bytes_stride
* ( (unsigned)(
293 ( bytes_to_out
- p
->bytes_queue_max
+ /* rounding protection */ p
->bytes_per_frame
)
294 / p
->bytes_stride_scaled
) + 1 );
299 /*****************************************************************************
300 * reinit_buffers: reinitializes buffers in p_filter->p_sys
301 *****************************************************************************/
302 static int reinit_buffers( filter_t
*p_filter
)
304 filter_sys_t
*p
= p_filter
->p_sys
;
307 unsigned frames_stride
= p
->ms_stride
* p
->sample_rate
/ 1000.0;
308 p
->bytes_stride
= frames_stride
* p
->bytes_per_frame
;
311 unsigned frames_overlap
= frames_stride
* p
->percent_overlap
;
312 if( frames_overlap
< 1 )
313 { /* if no overlap */
314 p
->bytes_overlap
= 0;
315 p
->bytes_standing
= p
->bytes_stride
;
316 p
->samples_standing
= p
->bytes_standing
/ p
->bytes_per_sample
;
317 p
->output_overlap
= NULL
;
321 unsigned prev_overlap
= p
->bytes_overlap
;
322 p
->bytes_overlap
= frames_overlap
* p
->bytes_per_frame
;
323 p
->samples_overlap
= frames_overlap
* p
->samples_per_frame
;
324 p
->bytes_standing
= p
->bytes_stride
- p
->bytes_overlap
;
325 p
->samples_standing
= p
->bytes_standing
/ p
->bytes_per_sample
;
326 p
->buf_overlap
= vlc_alloc( 1, p
->bytes_overlap
);
327 p
->table_blend
= vlc_alloc( 4, p
->samples_overlap
); /* sizeof (int32|float) */
328 if( !p
->buf_overlap
|| !p
->table_blend
)
330 if( p
->bytes_overlap
> prev_overlap
)
331 memset( (uint8_t *)p
->buf_overlap
+ prev_overlap
, 0, p
->bytes_overlap
- prev_overlap
);
333 float *pb
= p
->table_blend
;
334 float t
= (float)frames_overlap
;
335 for( i
= 0; i
<frames_overlap
; i
++ )
338 for( j
= 0; j
< p
->samples_per_frame
; j
++ )
341 p
->output_overlap
= output_overlap_float
;
345 p
->frames_search
= ( frames_overlap
<= 1 ) ? 0 : p
->ms_search
* p
->sample_rate
/ 1000.0;
346 if( p
->frames_search
< 1 )
348 p
->best_overlap_offset
= NULL
;
352 unsigned bytes_pre_corr
= ( p
->samples_overlap
- p
->samples_per_frame
) * 4; /* sizeof (int32|float) */
353 p
->buf_pre_corr
= malloc( bytes_pre_corr
);
354 p
->table_window
= malloc( bytes_pre_corr
);
355 if( ! p
->buf_pre_corr
|| ! p
->table_window
)
357 float *pw
= p
->table_window
;
358 for( i
= 1; i
<frames_overlap
; i
++ )
360 float v
= i
* ( frames_overlap
- i
);
361 for( j
= 0; j
< p
->samples_per_frame
; j
++ )
364 p
->best_overlap_offset
= best_overlap_offset_float
;
367 unsigned new_size
= ( p
->frames_search
+ frames_stride
+ frames_overlap
) * p
->bytes_per_frame
;
368 if( p
->bytes_queued
> new_size
)
370 if( p
->bytes_to_slide
> p
->bytes_queued
)
372 p
->bytes_to_slide
-= p
->bytes_queued
;
377 unsigned new_queued
= __MIN( p
->bytes_queued
- p
->bytes_to_slide
, new_size
);
378 memmove( p
->buf_queue
,
379 p
->buf_queue
+ p
->bytes_queued
- new_queued
,
381 p
->bytes_to_slide
= 0;
382 p
->bytes_queued
= new_queued
;
385 p
->bytes_queue_max
= new_size
;
386 p
->buf_queue
= malloc( p
->bytes_queue_max
);
390 p
->bytes_stride_scaled
= p
->bytes_stride
* p
->scale
;
391 p
->frames_stride_scaled
= p
->bytes_stride_scaled
/ p
->bytes_per_frame
;
393 msg_Dbg( VLC_OBJECT(p_filter
),
394 "%.3f scale, %.3f stride_in, %i stride_out, %i standing, %i overlap, %i search, %i queue, %s mode",
396 p
->frames_stride_scaled
,
397 (int)( p
->bytes_stride
/ p
->bytes_per_frame
),
398 (int)( p
->bytes_standing
/ p
->bytes_per_frame
),
399 (int)( p
->bytes_overlap
/ p
->bytes_per_frame
),
401 (int)( p
->bytes_queue_max
/ p
->bytes_per_frame
),
407 /*****************************************************************************
408 * Open: initialize as "audio filter"
409 *****************************************************************************/
410 static int Open( vlc_object_t
*p_this
)
412 filter_t
*p_filter
= (filter_t
*)p_this
;
414 /* Allocate structure */
415 filter_sys_t
*p_sys
= p_filter
->p_sys
= malloc( sizeof(*p_sys
) );
420 p_sys
->sample_rate
= p_filter
->fmt_in
.audio
.i_rate
;
421 p_sys
->samples_per_frame
= aout_FormatNbChannels( &p_filter
->fmt_in
.audio
);
422 p_sys
->bytes_per_sample
= 4;
423 p_sys
->bytes_per_frame
= p_sys
->samples_per_frame
* p_sys
->bytes_per_sample
;
425 msg_Dbg( p_this
, "format: %5i rate, %i nch, %i bps, %s",
427 p_sys
->samples_per_frame
,
428 p_sys
->bytes_per_sample
,
431 p_sys
->ms_stride
= var_InheritInteger( p_this
, "scaletempo-stride" );
432 p_sys
->percent_overlap
= var_InheritFloat( p_this
, "scaletempo-overlap" );
433 p_sys
->ms_search
= var_InheritInteger( p_this
, "scaletempo-search" );
435 msg_Dbg( p_this
, "params: %i stride, %.3f overlap, %i search",
436 p_sys
->ms_stride
, p_sys
->percent_overlap
, p_sys
->ms_search
);
438 p_sys
->buf_queue
= NULL
;
439 p_sys
->buf_overlap
= NULL
;
440 p_sys
->table_blend
= NULL
;
441 p_sys
->buf_pre_corr
= NULL
;
442 p_sys
->table_window
= NULL
;
443 p_sys
->bytes_overlap
= 0;
444 p_sys
->bytes_queued
= 0;
445 p_sys
->bytes_to_slide
= 0;
446 p_sys
->frames_stride_error
= 0;
448 if( reinit_buffers( p_filter
) != VLC_SUCCESS
)
454 p_filter
->fmt_in
.audio
.i_format
= VLC_CODEC_FL32
;
455 aout_FormatPrepare(&p_filter
->fmt_in
.audio
);
456 p_filter
->fmt_out
.audio
= p_filter
->fmt_in
.audio
;
457 p_filter
->pf_audio_filter
= DoWork
;
463 static inline void PitchSetRateShift( filter_sys_t
*p_sys
, float pitch_shift
)
465 vlc_atomic_store_float( &p_sys
->rate_shift
,
466 p_sys
->sample_rate
/ powf(2, pitch_shift
/ 12) );
469 static int PitchCallback( vlc_object_t
*p_this
, char const *psz_var
,
470 vlc_value_t oldval
, vlc_value_t newval
, void *p_data
)
472 VLC_UNUSED( p_this
);
473 VLC_UNUSED( oldval
);
474 VLC_UNUSED( psz_var
);
476 PitchSetRateShift( p_data
, newval
.f_float
);
481 static filter_t
*ResamplerCreate(filter_t
*p_filter
)
483 filter_t
*p_resampler
= vlc_object_create( p_filter
, sizeof (filter_t
) );
484 if( unlikely( p_resampler
== NULL
) )
487 p_resampler
->owner
.sys
= NULL
;
488 p_resampler
->p_cfg
= NULL
;
489 p_resampler
->fmt_in
= p_filter
->fmt_in
;
490 p_resampler
->fmt_out
= p_filter
->fmt_in
;
491 p_resampler
->fmt_out
.audio
.i_rate
=
492 vlc_atomic_load_float( &p_filter
->p_sys
->rate_shift
);
493 aout_FormatPrepare( &p_resampler
->fmt_out
.audio
);
494 p_resampler
->p_module
= module_need( p_resampler
, "audio resampler", NULL
,
497 if( p_resampler
->p_module
== NULL
)
499 msg_Err( p_filter
, "Could not load resampler" );
500 vlc_object_release( p_resampler
);
506 static int OpenPitch( vlc_object_t
*p_this
)
508 int err
= Open( p_this
);
512 filter_t
*p_filter
= (filter_t
*)p_this
;
513 vlc_object_t
*p_aout
= p_filter
->obj
.parent
;
514 filter_sys_t
*p_sys
= p_filter
->p_sys
;
516 float pitch_shift
= var_CreateGetFloat( p_aout
, "pitch-shift" );
517 var_AddCallback( p_aout
, "pitch-shift", PitchCallback
, p_sys
);
518 PitchSetRateShift( p_sys
, pitch_shift
);
520 p_sys
->resampler
= ResamplerCreate(p_filter
);
521 if( !p_sys
->resampler
)
524 p_filter
->pf_audio_filter
= DoPitchWork
;
530 static void Close( vlc_object_t
*p_this
)
532 filter_t
*p_filter
= (filter_t
*)p_this
;
533 filter_sys_t
*p_sys
= p_filter
->p_sys
;
534 free( p_sys
->buf_queue
);
535 free( p_sys
->buf_overlap
);
536 free( p_sys
->table_blend
);
537 free( p_sys
->buf_pre_corr
);
538 free( p_sys
->table_window
);
543 static void ClosePitch( vlc_object_t
*p_this
)
545 filter_t
*p_filter
= (filter_t
*)p_this
;
546 filter_sys_t
*p_sys
= p_filter
->p_sys
;
547 vlc_object_t
*p_aout
= p_filter
->obj
.parent
;
548 var_DelCallback( p_aout
, "pitch-shift", PitchCallback
, p_sys
);
549 var_Destroy( p_aout
, "pitch-shift" );
550 module_unneed( p_sys
->resampler
, p_sys
->resampler
->p_module
);
551 vlc_object_release( p_sys
->resampler
);
556 /*****************************************************************************
557 * DoWork: filter wrapper for transform_buffer
558 *****************************************************************************/
559 static block_t
*DoWork( filter_t
* p_filter
, block_t
* p_in_buf
)
561 filter_sys_t
*p
= p_filter
->p_sys
;
563 if( p_filter
->fmt_in
.audio
.i_rate
== p
->sample_rate
)
566 double scale
= p_filter
->fmt_in
.audio
.i_rate
/ (double)p
->sample_rate
;
567 if( scale
!= p
->scale
) {
569 p
->bytes_stride_scaled
= p
->bytes_stride
* p
->scale
;
570 p
->frames_stride_scaled
= p
->bytes_stride_scaled
/ p
->bytes_per_frame
;
571 p
->bytes_to_slide
= 0;
572 msg_Dbg( p_filter
, "%.3f scale, %.3f stride_in, %i stride_out",
574 p
->frames_stride_scaled
,
575 (int)( p
->bytes_stride
/ p
->bytes_per_frame
) );
578 size_t i_outsize
= calculate_output_buffer_size ( p_filter
, p_in_buf
->i_buffer
);
579 block_t
*p_out_buf
= block_Alloc( i_outsize
);
580 if( p_out_buf
== NULL
)
583 size_t bytes_out
= transform_buffer( p_filter
,
584 p_in_buf
->p_buffer
, p_in_buf
->i_buffer
,
585 p_out_buf
->p_buffer
);
587 p_out_buf
->i_buffer
= bytes_out
;
588 p_out_buf
->i_nb_samples
= bytes_out
/ p
->bytes_per_frame
;
589 p_out_buf
->i_dts
= p_in_buf
->i_dts
;
590 p_out_buf
->i_pts
= p_in_buf
->i_pts
;
591 p_out_buf
->i_length
= p_in_buf
->i_length
;
593 block_Release( p_in_buf
);
598 static block_t
*DoPitchWork( filter_t
* p_filter
, block_t
* p_in_buf
)
600 filter_sys_t
*p
= p_filter
->p_sys
;
602 float rate_shift
= vlc_atomic_load_float( &p
->rate_shift
);
604 /* Set matching rates for resampler's output and scaletempo's input */
605 p
->resampler
->fmt_out
.audio
.i_rate
= rate_shift
;
606 p_filter
->fmt_in
.audio
.i_rate
= rate_shift
;
608 /* Change rate, thus changing pitch */
609 p_in_buf
= p
->resampler
->pf_audio_filter( p
->resampler
, p_in_buf
);
611 /* Change tempo while preserving shifted pitch */
612 return DoWork( p_filter
, p_in_buf
);