1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
6 Centre for Digital Music, Queen Mary, University of London.
7 This file copyright 2008-2009 Matthew Davies and QMUL.
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License as
11 published by the Free Software Foundation; either version 2 of the
12 License, or (at your option) any later version. See the file
13 COPYING included with this distribution for more information.
18 #include "maths/MathAliases.h"
19 #include "maths/MathUtilities.h"
20 #include "maths/KLDivergence.h"
21 #include "dsp/transforms/FFT.h"
26 DownBeat::DownBeat(float originalSampleRate
,
27 size_t decimationFactor
,
30 m_rate(originalSampleRate
),
31 m_factor(decimationFactor
),
32 m_increment(dfIncrement
),
42 // beat frame size is next power of two up from 1.3 seconds at the
43 // downsampled rate (happens to produce 4096 for 44100 or 48000 at
44 // 16x decimation, which is our expected normal situation)
45 m_beatframesize
= MathUtilities::nextPowerOfTwo
46 (int((m_rate
/ decimationFactor
) * 1.3));
47 // std::cerr << "rate = " << m_rate << ", bfs = " << m_beatframesize << std::endl;
48 m_beatframe
= new double[m_beatframesize
];
49 m_fftRealOut
= new double[m_beatframesize
];
50 m_fftImagOut
= new double[m_beatframesize
];
51 m_fft
= new FFTReal(m_beatframesize
);
58 if (m_buffer
) free(m_buffer
);
61 delete[] m_fftRealOut
;
62 delete[] m_fftImagOut
;
67 DownBeat::setBeatsPerBar(int bpb
)
73 DownBeat::makeDecimators()
75 // std::cerr << "m_factor = " << m_factor << std::endl;
76 if (m_factor
< 2) return;
77 size_t highest
= Decimator::getHighestSupportedFactor();
78 if (m_factor
<= highest
) {
79 m_decimator1
= new Decimator(m_increment
, m_factor
);
80 // std::cerr << "DownBeat: decimator 1 factor " << m_factor << ", size " << m_increment << std::endl;
83 m_decimator1
= new Decimator(m_increment
, highest
);
84 // std::cerr << "DownBeat: decimator 1 factor " << highest << ", size " << m_increment << std::endl;
85 m_decimator2
= new Decimator(m_increment
/ highest
, m_factor
/ highest
);
86 // std::cerr << "DownBeat: decimator 2 factor " << m_factor / highest << ", size " << m_increment / highest << std::endl;
87 m_decbuf
= new float[m_increment
/ highest
];
91 DownBeat::pushAudioBlock(const float *audio
)
93 if (m_buffill
+ (m_increment
/ m_factor
) > m_bufsiz
) {
94 if (m_bufsiz
== 0) m_bufsiz
= m_increment
* 16;
95 else m_bufsiz
= m_bufsiz
* 2;
97 m_buffer
= (float *)malloc(m_bufsiz
* sizeof(float));
99 // std::cerr << "DownBeat::pushAudioBlock: realloc m_buffer to " << m_bufsiz << std::endl;
100 m_buffer
= (float *)realloc(m_buffer
, m_bufsiz
* sizeof(float));
103 if (!m_decimator1
&& m_factor
> 1) makeDecimators();
104 // float rmsin = 0, rmsout = 0;
105 // for (int i = 0; i < m_increment; ++i) {
106 // rmsin += audio[i] * audio[i];
109 m_decimator1
->process(audio
, m_decbuf
);
110 m_decimator2
->process(m_decbuf
, m_buffer
+ m_buffill
);
111 } else if (m_decimator1
) {
112 m_decimator1
->process(audio
, m_buffer
+ m_buffill
);
114 // just copy across (m_factor is presumably 1)
115 for (size_t i
= 0; i
< m_increment
; ++i
) {
116 (m_buffer
+ m_buffill
)[i
] = audio
[i
];
119 // for (int i = 0; i < m_increment / m_factor; ++i) {
120 // rmsout += m_buffer[m_buffill + i] * m_buffer[m_buffill + i];
122 // std::cerr << "pushAudioBlock: rms in " << sqrt(rmsin) << ", out " << sqrt(rmsout) << std::endl;
123 m_buffill
+= m_increment
/ m_factor
;
127 DownBeat::getBufferedAudio(size_t &length
) const
134 DownBeat::resetAudioBuffer()
136 if (m_buffer
) free(m_buffer
);
143 DownBeat::findDownBeats(const float *audio
,
145 const d_vec_t
&beats
,
148 // FIND DOWNBEATS BY PARTITIONING THE INPUT AUDIO FILE INTO BEAT SEGMENTS
149 // WHERE THE AUDIO FRAMES ARE DOWNSAMPLED BY A FACTOR OF 16 (fs ~= 2700Hz)
150 // THEN TAKING THE JENSEN-SHANNON DIVERGENCE BETWEEN BEAT SYNCHRONOUS SPECTRAL FRAMES
152 // IMPLEMENTATION (MOSTLY) FOLLOWS:
153 // DAVIES AND PLUMBLEY "A SPECTRAL DIFFERENCE APPROACH TO EXTRACTING DOWNBEATS IN MUSICAL AUDIO"
154 // EUSIPCO 2006, FLORENCE, ITALY
156 d_vec_t
newspec(m_beatframesize
/ 2); // magnitude spectrum of current beat
157 d_vec_t
oldspec(m_beatframesize
/ 2); // magnitude spectrum of previous beat
161 if (audioLength
== 0) return;
163 for (size_t i
= 0; i
+ 1 < beats
.size(); ++i
) {
165 // Copy the extents of the current beat from downsampled array
166 // into beat frame buffer
168 size_t beatstart
= (beats
[i
] * m_increment
) / m_factor
;
169 size_t beatend
= (beats
[i
+1] * m_increment
) / m_factor
;
170 if (beatend
>= audioLength
) beatend
= audioLength
- 1;
171 if (beatend
< beatstart
) beatend
= beatstart
;
172 size_t beatlen
= beatend
- beatstart
;
174 // Also apply a Hanning window to the beat frame buffer, sized
175 // to the beat extents rather than the frame size. (Because
176 // the size varies, it's easier to do this by hand than use
177 // our Window abstraction.)
179 // std::cerr << "beatlen = " << beatlen << std::endl;
182 for (size_t j
= 0; j
< beatlen
&& j
< m_beatframesize
; ++j
) {
183 double mul
= 0.5 * (1.0 - cos(TWO_PI
* (double(j
) / double(beatlen
))));
184 m_beatframe
[j
] = audio
[beatstart
+ j
] * mul
;
185 // rms += m_beatframe[j] * m_beatframe[j];
188 // std::cerr << "beat " << i << ": audio rms " << rms << std::endl;
190 for (size_t j
= beatlen
; j
< m_beatframesize
; ++j
) {
191 m_beatframe
[j
] = 0.0;
194 // Now FFT beat frame
196 m_fft
->process(false, m_beatframe
, m_fftRealOut
, m_fftImagOut
);
198 // Calculate magnitudes
200 for (size_t j
= 0; j
< m_beatframesize
/2; ++j
) {
201 newspec
[j
] = sqrt(m_fftRealOut
[j
] * m_fftRealOut
[j
] +
202 m_fftImagOut
[j
] * m_fftImagOut
[j
]);
205 // Preserve peaks by applying adaptive threshold
207 MathUtilities::adaptiveThreshold(newspec
);
209 // Calculate JS divergence between new and old spectral frames
211 if (i
> 0) { // otherwise we have no previous frame
212 m_beatsd
.push_back(measureSpecDiff(oldspec
, newspec
));
213 // std::cerr << "specdiff: " << m_beatsd[m_beatsd.size()-1] << std::endl;
216 // Copy newspec across to old
218 for (size_t j
= 0; j
< m_beatframesize
/2; ++j
) {
219 oldspec
[j
] = newspec
[j
];
223 // We now have all spectral difference measures in specdiff
226 if (timesig
== 0) timesig
= 4;
228 d_vec_t
dbcand(timesig
); // downbeat candidates
230 for (int beat
= 0; beat
< timesig
; ++beat
) {
234 // look for beat transition which leads to greatest spectral change
235 for (int beat
= 0; beat
< timesig
; ++beat
) {
237 for (int example
= beat
-1; example
< (int)m_beatsd
.size(); example
+= timesig
) {
238 if (example
< 0) continue;
239 dbcand
[beat
] += (m_beatsd
[example
]) / timesig
;
242 if (count
> 0) dbcand
[beat
] /= count
;
243 // std::cerr << "dbcand[" << beat << "] = " << dbcand[beat] << std::endl;
246 // first downbeat is beat at index of maximum value of dbcand
247 int dbind
= MathUtilities::getMax(dbcand
);
249 // remaining downbeats are at timesig intervals from the first
250 for (int i
= dbind
; i
< (int)beats
.size(); i
+= timesig
) {
251 downbeats
.push_back(i
);
256 DownBeat::measureSpecDiff(d_vec_t oldspec
, d_vec_t newspec
)
258 // JENSEN-SHANNON DIVERGENCE BETWEEN SPECTRAL FRAMES
260 unsigned int SPECSIZE
= 512; // ONLY LOOK AT FIRST 512 SAMPLES OF SPECTRUM.
261 if (SPECSIZE
> oldspec
.size()/4) {
262 SPECSIZE
= oldspec
.size()/4;
270 for (unsigned int i
= 0;i
< SPECSIZE
;i
++)
279 for (unsigned int i
= 0;i
< SPECSIZE
;i
++)
281 newspec
[i
] /= (sumnew
);
282 oldspec
[i
] /= (sumold
);
284 // IF ANY SPECTRAL VALUES ARE 0 (SHOULDN'T BE ANY!) SET THEM TO 1
295 // JENSEN-SHANNON CALCULATION
296 sd1
= 0.5*oldspec
[i
] + 0.5*newspec
[i
];
297 SD
= SD
+ (-sd1
*log(sd1
)) + (0.5*(oldspec
[i
]*log(oldspec
[i
]))) + (0.5*(newspec
[i
]*log(newspec
[i
])));
304 DownBeat::getBeatSD(vector
<double> &beatsd
) const
306 for (int i
= 0; i
< (int)m_beatsd
.size(); ++i
) beatsd
.push_back(m_beatsd
[i
]);