2 * scaletempo audio filter
4 * scale tempo while maintaining pitch
5 * (WSOLA technique with cross correlation)
6 * inspired by SoundTouch library by Olli Parviainen
9 * - produce 'stride' output samples per loop
10 * - consume stride*scale input samples per loop
12 * to produce smoother transitions between strides, blend next overlap
13 * samples from last stride with correlated samples of current input
15 * Copyright (c) 2007 Robert Juliano
17 * This file is part of MPlayer.
19 * MPlayer is free software; you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation; either version 2 of the License, or
22 * (at your option) any later version.
24 * MPlayer is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU General Public License for more details.
29 * You should have received a copy of the GNU General Public License along
30 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
31 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
40 #include "libavutil/common.h"
41 #include "subopt-helper.h"
43 // Data for specific instances of this filter
44 typedef struct af_scaletempo_s
49 float frames_stride_scaled
;
50 float frames_stride_error
;
53 float bytes_stride_scaled
;
65 void (*output_overlap
)(struct af_scaletempo_s
* s
, void* out_buf
, int bytes_off
);
71 int (*best_overlap_offset
)(struct af_scaletempo_s
* s
);
75 float percent_overlap
;
81 static int fill_queue(struct af_instance_s
* af
, af_data_t
* data
, int offset
)
83 af_scaletempo_t
* s
= af
->setup
;
84 int bytes_in
= data
->len
- offset
;
85 int offset_unchanged
= offset
;
87 if (s
->bytes_to_slide
> 0) {
88 if (s
->bytes_to_slide
< s
->bytes_queued
) {
89 int bytes_move
= s
->bytes_queued
- s
->bytes_to_slide
;
91 s
->buf_queue
+ s
->bytes_to_slide
,
93 s
->bytes_to_slide
= 0;
94 s
->bytes_queued
= bytes_move
;
97 s
->bytes_to_slide
-= s
->bytes_queued
;
98 bytes_skip
= FFMIN(s
->bytes_to_slide
, bytes_in
);
100 s
->bytes_to_slide
-= bytes_skip
;
101 offset
+= bytes_skip
;
102 bytes_in
-= bytes_skip
;
107 int bytes_copy
= FFMIN(s
->bytes_queue
- s
->bytes_queued
, bytes_in
);
108 assert(bytes_copy
>= 0);
109 memcpy(s
->buf_queue
+ s
->bytes_queued
,
110 (int8_t*)data
->audio
+ offset
,
112 s
->bytes_queued
+= bytes_copy
;
113 offset
+= bytes_copy
;
116 return offset
- offset_unchanged
;
119 #define UNROLL_PADDING (4*4)
121 static int best_overlap_offset_float(af_scaletempo_t
* s
)
123 float *pw
, *po
, *ppc
, *search_start
;
124 float best_corr
= INT_MIN
;
128 pw
= s
->table_window
;
130 po
+= s
->num_channels
;
131 ppc
= s
->buf_pre_corr
;
132 for (i
=s
->num_channels
; i
<s
->samples_overlap
; i
++) {
133 *ppc
++ = *pw
++ * *po
++;
136 search_start
= (float*)s
->buf_queue
+ s
->num_channels
;
137 for (off
=0; off
<s
->frames_search
; off
++) {
139 float* ps
= search_start
;
140 ppc
= s
->buf_pre_corr
;
141 for (i
=s
->num_channels
; i
<s
->samples_overlap
; i
++) {
142 corr
+= *ppc
++ * *ps
++;
144 if (corr
> best_corr
) {
148 search_start
+= s
->num_channels
;
151 return best_off
* 4 * s
->num_channels
;
154 static int best_overlap_offset_s16(af_scaletempo_t
* s
)
157 int16_t *po
, *search_start
;
158 int64_t best_corr
= INT64_MIN
;
163 pw
= s
->table_window
;
165 po
+= s
->num_channels
;
166 ppc
= s
->buf_pre_corr
;
167 for (i
=s
->num_channels
; i
<s
->samples_overlap
; i
++) {
168 *ppc
++ = ( *pw
++ * *po
++ ) >> 15;
171 search_start
= (int16_t*)s
->buf_queue
+ s
->num_channels
;
172 for (off
=0; off
<s
->frames_search
; off
++) {
174 int16_t* ps
= search_start
;
175 ppc
= s
->buf_pre_corr
;
176 ppc
+= s
->samples_overlap
- s
->num_channels
;
177 ps
+= s
->samples_overlap
- s
->num_channels
;
178 i
= -(s
->samples_overlap
- s
->num_channels
);
180 corr
+= ppc
[i
+0] * ps
[i
+0];
181 corr
+= ppc
[i
+1] * ps
[i
+1];
182 corr
+= ppc
[i
+2] * ps
[i
+2];
183 corr
+= ppc
[i
+3] * ps
[i
+3];
186 if (corr
> best_corr
) {
190 search_start
+= s
->num_channels
;
193 return best_off
* 2 * s
->num_channels
;
196 static void output_overlap_float(af_scaletempo_t
* s
, void* buf_out
,
199 float* pout
= buf_out
;
200 float* pb
= s
->table_blend
;
201 float* po
= s
->buf_overlap
;
202 float* pin
= (float*)(s
->buf_queue
+ bytes_off
);
204 for (i
=0; i
<s
->samples_overlap
; i
++) {
205 *pout
++ = *po
- *pb
++ * ( *po
- *pin
++ ); po
++;
208 static void output_overlap_s16(af_scaletempo_t
* s
, void* buf_out
,
211 int16_t* pout
= buf_out
;
212 int32_t* pb
= s
->table_blend
;
213 int16_t* po
= s
->buf_overlap
;
214 int16_t* pin
= (int16_t*)(s
->buf_queue
+ bytes_off
);
216 for (i
=0; i
<s
->samples_overlap
; i
++) {
217 *pout
++ = *po
- ( ( *pb
++ * ( *po
- *pin
++ ) ) >> 16 ); po
++;
221 // Filter data through filter
222 static af_data_t
* play(struct af_instance_s
* af
, af_data_t
* data
)
224 af_scaletempo_t
* s
= af
->setup
;
229 if (s
->scale
== 1.0) {
234 // RESIZE_LOCAL_BUFFER - can't use macro
235 max_bytes_out
= ((int)(data
->len
/ s
->bytes_stride_scaled
) + 1) * s
->bytes_stride
;
236 if (max_bytes_out
> af
->data
->len
) {
237 mp_msg(MSGT_AFILTER
, MSGL_V
, "[libaf] Reallocating memory in module %s, "
238 "old len = %i, new len = %i\n",af
->info
->name
,af
->data
->len
,max_bytes_out
);
239 af
->data
->audio
= realloc(af
->data
->audio
, max_bytes_out
);
240 if (!af
->data
->audio
) {
241 mp_msg(MSGT_AFILTER
, MSGL_FATAL
, "[libaf] Could not allocate memory\n");
244 af
->data
->len
= max_bytes_out
;
247 offset_in
= fill_queue(af
, data
, 0);
248 pout
= af
->data
->audio
;
249 while (s
->bytes_queued
>= s
->bytes_queue
) {
255 if (s
->output_overlap
) {
256 if (s
->best_overlap_offset
)
257 bytes_off
= s
->best_overlap_offset(s
);
258 s
->output_overlap(s
, pout
, bytes_off
);
260 memcpy(pout
+ s
->bytes_overlap
,
261 s
->buf_queue
+ bytes_off
+ s
->bytes_overlap
,
263 pout
+= s
->bytes_stride
;
266 memcpy(s
->buf_overlap
,
267 s
->buf_queue
+ bytes_off
+ s
->bytes_stride
,
269 tf
= s
->frames_stride_scaled
+ s
->frames_stride_error
;
271 s
->frames_stride_error
= tf
- ti
;
272 s
->bytes_to_slide
= ti
* s
->bytes_per_frame
;
274 offset_in
+= fill_queue(af
, data
, offset_in
);
277 // This filter can have a negative delay when scale > 1:
278 // output corresponding to some length of input can be decided and written
279 // after receiving only a part of that input.
280 af
->delay
= s
->bytes_queued
- s
->bytes_to_slide
;
282 data
->audio
= af
->data
->audio
;
283 data
->len
= pout
- (int8_t *)af
->data
->audio
;
287 // Initialization and runtime control
288 static int control(struct af_instance_s
* af
, int cmd
, void* arg
)
290 af_scaletempo_t
* s
= af
->setup
;
292 case AF_CONTROL_REINIT
:{
293 af_data_t
* data
= (af_data_t
*)arg
;
294 float srate
= data
->rate
/ 1000;
298 int frames_stride
, frames_overlap
;
301 mp_msg(MSGT_AFILTER
, MSGL_V
,
302 "[scaletempo] %.3f speed * %.3f scale_nominal = %.3f\n",
303 s
->speed
, s
->scale_nominal
, s
->scale
);
305 if (s
->scale
== 1.0) {
306 if (s
->speed_tempo
&& s
->speed_pitch
)
308 memcpy(af
->data
, data
, sizeof(af_data_t
));
309 return af_test_output(af
, data
);
312 af
->data
->rate
= data
->rate
;
313 af
->data
->nch
= data
->nch
;
314 if ( data
->format
== AF_FORMAT_S16_LE
315 || data
->format
== AF_FORMAT_S16_BE
) {
317 af
->data
->format
= AF_FORMAT_S16_NE
;
318 af
->data
->bps
= bps
= 2;
320 af
->data
->format
= AF_FORMAT_FLOAT_NE
;
321 af
->data
->bps
= bps
= 4;
324 frames_stride
= srate
* s
->ms_stride
;
325 s
->bytes_stride
= frames_stride
* bps
* nch
;
326 s
->bytes_stride_scaled
= s
->scale
* s
->bytes_stride
;
327 s
->frames_stride_scaled
= s
->scale
* frames_stride
;
328 s
->frames_stride_error
= 0;
329 af
->mul
= (double)s
->bytes_stride
/ s
->bytes_stride_scaled
;
331 frames_overlap
= frames_stride
* s
->percent_overlap
;
332 if (frames_overlap
<= 0) {
333 s
->bytes_standing
= s
->bytes_stride
;
334 s
->samples_standing
= s
->bytes_standing
/ bps
;
335 s
->output_overlap
= NULL
;
336 s
->bytes_overlap
= 0;
338 s
->samples_overlap
= frames_overlap
* nch
;
339 s
->bytes_overlap
= frames_overlap
* nch
* bps
;
340 s
->bytes_standing
= s
->bytes_stride
- s
->bytes_overlap
;
341 s
->samples_standing
= s
->bytes_standing
/ bps
;
342 s
->buf_overlap
= realloc(s
->buf_overlap
, s
->bytes_overlap
);
343 s
->table_blend
= realloc(s
->table_blend
, s
->bytes_overlap
* 4);
344 if(!s
->buf_overlap
|| !s
->table_blend
) {
345 mp_msg(MSGT_AFILTER
, MSGL_FATAL
, "[scaletempo] Out of memory\n");
348 memset(s
->buf_overlap
, 0, s
->bytes_overlap
);
350 int32_t* pb
= s
->table_blend
;
352 for (i
=0; i
<frames_overlap
; i
++) {
353 int32_t v
= blend
/ frames_overlap
;
354 for (j
=0; j
<nch
; j
++) {
357 blend
+= 65536; // 2^16
359 s
->output_overlap
= output_overlap_s16
;
361 float* pb
= s
->table_blend
;
362 for (i
=0; i
<frames_overlap
; i
++) {
363 float v
= i
/ (float)frames_overlap
;
364 for (j
=0; j
<nch
; j
++) {
368 s
->output_overlap
= output_overlap_float
;
372 s
->frames_search
= (frames_overlap
> 1) ? srate
* s
->ms_search
: 0;
373 if (s
->frames_search
<= 0) {
374 s
->best_overlap_offset
= NULL
;
377 int64_t t
= frames_overlap
;
378 int32_t n
= 8589934588LL / (t
* t
); // 4 * (2^31 - 1) / t^2
380 s
->buf_pre_corr
= realloc(s
->buf_pre_corr
, s
->bytes_overlap
* 2 + UNROLL_PADDING
);
381 s
->table_window
= realloc(s
->table_window
, s
->bytes_overlap
* 2 - nch
* bps
* 2);
382 if(!s
->buf_pre_corr
|| !s
->table_window
) {
383 mp_msg(MSGT_AFILTER
, MSGL_FATAL
, "[scaletempo] Out of memory\n");
386 memset((char *)s
->buf_pre_corr
+ s
->bytes_overlap
* 2, 0, UNROLL_PADDING
);
387 pw
= s
->table_window
;
388 for (i
=1; i
<frames_overlap
; i
++) {
389 int32_t v
= ( i
* (t
- i
) * n
) >> 15;
390 for (j
=0; j
<nch
; j
++) {
394 s
->best_overlap_offset
= best_overlap_offset_s16
;
397 s
->buf_pre_corr
= realloc(s
->buf_pre_corr
, s
->bytes_overlap
);
398 s
->table_window
= realloc(s
->table_window
, s
->bytes_overlap
- nch
* bps
);
399 if(!s
->buf_pre_corr
|| !s
->table_window
) {
400 mp_msg(MSGT_AFILTER
, MSGL_FATAL
, "[scaletempo] Out of memory\n");
403 pw
= s
->table_window
;
404 for (i
=1; i
<frames_overlap
; i
++) {
405 float v
= i
* (frames_overlap
- i
);
406 for (j
=0; j
<nch
; j
++) {
410 s
->best_overlap_offset
= best_overlap_offset_float
;
414 s
->bytes_per_frame
= bps
* nch
;
415 s
->num_channels
= nch
;
418 = (s
->frames_search
+ frames_stride
+ frames_overlap
) * bps
* nch
;
419 s
->buf_queue
= realloc(s
->buf_queue
, s
->bytes_queue
+ UNROLL_PADDING
);
421 mp_msg(MSGT_AFILTER
, MSGL_FATAL
, "[scaletempo] Out of memory\n");
426 s
->bytes_to_slide
= 0;
428 mp_msg (MSGT_AFILTER
, MSGL_DBG2
, "[scaletempo] "
429 "%.2f stride_in, %i stride_out, %i standing, "
430 "%i overlap, %i search, %i queue, %s mode\n",
431 s
->frames_stride_scaled
,
432 (int)(s
->bytes_stride
/ nch
/ bps
),
433 (int)(s
->bytes_standing
/ nch
/ bps
),
434 (int)(s
->bytes_overlap
/ nch
/ bps
),
436 (int)(s
->bytes_queue
/ nch
/ bps
),
437 (use_int
?"s16":"float"));
439 return af_test_output(af
, (af_data_t
*)arg
);
441 case AF_CONTROL_PLAYBACK_SPEED
| AF_CONTROL_SET
:{
442 if (s
->speed_tempo
) {
443 if (s
->speed_pitch
) {
446 s
->speed
= *(float*)arg
;
447 s
->scale
= s
->speed
* s
->scale_nominal
;
449 if (s
->speed_pitch
) {
450 s
->speed
= 1 / *(float*)arg
;
451 s
->scale
= s
->speed
* s
->scale_nominal
;
457 case AF_CONTROL_SCALETEMPO_AMOUNT
| AF_CONTROL_SET
:{
458 s
->scale
= *(float*)arg
;
459 s
->scale
= s
->speed
* s
->scale_nominal
;
462 case AF_CONTROL_SCALETEMPO_AMOUNT
| AF_CONTROL_GET
:
463 *(float*)arg
= s
->scale
;
465 case AF_CONTROL_COMMAND_LINE
:{
468 {"scale", OPT_ARG_FLOAT
, &s
->scale_nominal
, NULL
},
469 {"stride", OPT_ARG_FLOAT
, &s
->ms_stride
, NULL
},
470 {"overlap", OPT_ARG_FLOAT
, &s
->percent_overlap
, NULL
},
471 {"search", OPT_ARG_FLOAT
, &s
->ms_search
, NULL
},
472 {"speed", OPT_ARG_STR
, &speed
, NULL
},
475 if (subopt_parse(arg
, subopts
) != 0) {
478 if (s
->scale_nominal
<= 0) {
479 mp_msg(MSGT_AFILTER
, MSGL_ERR
, "[scaletempo] %s: %s: scale > 0\n",
480 mp_gtext("error parsing command line"),
481 mp_gtext("value out of range"));
484 if (s
->ms_stride
<= 0) {
485 mp_msg(MSGT_AFILTER
, MSGL_ERR
, "[scaletempo] %s: %s: stride > 0\n",
486 mp_gtext("error parsing command line"),
487 mp_gtext("value out of range"));
490 if (s
->percent_overlap
< 0 || s
->percent_overlap
> 1) {
491 mp_msg(MSGT_AFILTER
, MSGL_ERR
,
492 "[scaletempo] %s: %s: 0 <= overlap <= 1\n",
493 mp_gtext("error parsing command line"),
494 mp_gtext("value out of range"));
497 if (s
->ms_search
< 0) {
498 mp_msg(MSGT_AFILTER
, MSGL_ERR
, "[scaletempo] %s: %s: search >= 0\n",
499 mp_gtext("error parsing command line"),
500 mp_gtext("value out of range"));
504 if (strcmp(speed
.str
, "pitch") == 0) {
507 } else if (strcmp(speed
.str
, "tempo") == 0) {
510 } else if (strcmp(speed
.str
, "none") == 0) {
513 } else if (strcmp(speed
.str
, "both") == 0) {
517 mp_msg(MSGT_AFILTER
, MSGL_ERR
,
518 "[scaletempo] %s: %s: speed=[pitch|tempo|none|both]\n",
519 mp_gtext("error parsing command line"),
520 mp_gtext("value out of range"));
524 s
->scale
= s
->speed
* s
->scale_nominal
;
525 mp_msg(MSGT_AFILTER
, MSGL_DBG2
, "[scaletempo] %6.3f scale, %6.2f stride, %6.2f overlap, %6.2f search, speed = %s\n", s
->scale_nominal
, s
->ms_stride
, s
->percent_overlap
, s
->ms_search
, (s
->speed_tempo
?(s
->speed_pitch
?"tempo and speed":"tempo"):(s
->speed_pitch
?"pitch":"none")));
533 static void uninit(struct af_instance_s
* af
)
535 af_scaletempo_t
* s
= af
->setup
;
536 free(af
->data
->audio
);
539 free(s
->buf_overlap
);
540 free(s
->buf_pre_corr
);
541 free(s
->table_blend
);
542 free(s
->table_window
);
546 // Allocate memory and set function pointers
547 static int af_open(af_instance_t
* af
){
550 af
->control
= control
;
554 af
->data
= calloc(1,sizeof(af_data_t
));
555 af
->setup
= calloc(1,sizeof(af_scaletempo_t
));
556 if(af
->data
== NULL
|| af
->setup
== NULL
)
560 s
->scale
= s
->speed
= s
->scale_nominal
= 1.0;
564 s
->percent_overlap
= .20;
570 // Description of this filter
571 af_info_t af_info_scaletempo
= {
572 "Scale audio tempo while maintaining pitch",