libs/vamp-plugins/OnsetDetect.cpp

   1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
   2
   3 /*
   4     QM Vamp Plugin Set
   5
   6     Centre for Digital Music, Queen Mary, University of London.
   7
   8     This program is free software; you can redistribute it and/or
   9     modify it under the terms of the GNU General Public License as
  10     published by the Free Software Foundation; either version 2 of the
  11     License, or (at your option) any later version.  See the file
  12     COPYING included with this distribution for more information.
  13 */
  14
  15 #include "OnsetDetect.h"
  16
  17 #include "dsp/onsets/DetectionFunction.h"
  18 #include "dsp/onsets/PeakPicking.h"
  19 #include "dsp/tempotracking/TempoTrack.h"
  20
  21 using std::string;
  22 using std::vector;
  23 using std::cerr;
  24 using std::endl;
  25
  26 float OnsetDetector::m_preferredStepSecs = 0.01161;
  27
  28 class OnsetDetectorData
  29 {
  30 public:
  31     OnsetDetectorData(const DFConfig &config) : dfConfig(config) {
  32         df = new DetectionFunction(config);
  33     }
  34     ~OnsetDetectorData() {
  35         delete df;
  36     }
  37     void reset() {
  38         delete df;
  39         df = new DetectionFunction(dfConfig);
  40         dfOutput.clear();
  41         origin = Vamp::RealTime::zeroTime;
  42     }
  43
  44     DFConfig dfConfig;
  45     DetectionFunction *df;
  46     vector<double> dfOutput;
  47     Vamp::RealTime origin;
  48 };
  49
  50
  51 OnsetDetector::OnsetDetector(float inputSampleRate) :
  52     Vamp::Plugin(inputSampleRate),
  53     m_d(0),
  54     m_dfType(DF_COMPLEXSD),
  55     m_sensitivity(50),
  56     m_whiten(false)
  57 {
  58 }
  59
  60 OnsetDetector::~OnsetDetector()
  61 {
  62     delete m_d;
  63 }
  64
  65 string
  66 OnsetDetector::getIdentifier() const
  67 {
  68     return "qm-onsetdetector";
  69 }
  70
  71 string
  72 OnsetDetector::getName() const
  73 {
  74     return "Note Onset Detector";
  75 }
  76
  77 string
  78 OnsetDetector::getDescription() const
  79 {
  80     return "Estimate individual note onset positions";
  81 }
  82
  83 string
  84 OnsetDetector::getMaker() const
  85 {
  86     return "Queen Mary, University of London";
  87 }
  88
  89 int
  90 OnsetDetector::getPluginVersion() const
  91 {
  92     return 3;
  93 }
  94
  95 string
  96 OnsetDetector::getCopyright() const
  97 {
  98     return "Plugin by Christian Landone, Chris Duxbury and Juan Pablo Bello.  Copyright (c) 2006-2009 QMUL - All Rights Reserved";
  99 }
 100
 101 OnsetDetector::ParameterList
 102 OnsetDetector::getParameterDescriptors() const
 103 {
 104     ParameterList list;
 105
 106     ParameterDescriptor desc;
 107     desc.identifier = "dftype";
 108     desc.name = "Onset Detection Function Type";
 109     desc.description = "Method used to calculate the onset detection function";
 110     desc.minValue = 0;
 111     desc.maxValue = 4;
 112     desc.defaultValue = 3;
 113     desc.isQuantized = true;
 114     desc.quantizeStep = 1;
 115     desc.valueNames.push_back("High-Frequency Content");
 116     desc.valueNames.push_back("Spectral Difference");
 117     desc.valueNames.push_back("Phase Deviation");
 118     desc.valueNames.push_back("Complex Domain");
 119     desc.valueNames.push_back("Broadband Energy Rise");
 120     list.push_back(desc);
 121
 122     desc.identifier = "sensitivity";
 123     desc.name = "Onset Detector Sensitivity";
 124     desc.description = "Sensitivity of peak-picker for onset detection";
 125     desc.minValue = 0;
 126     desc.maxValue = 100;
 127     desc.defaultValue = 50;
 128     desc.isQuantized = true;
 129     desc.quantizeStep = 1;
 130     desc.unit = "%";
 131     desc.valueNames.clear();
 132     list.push_back(desc);
 133
 134     desc.identifier = "whiten";
 135     desc.name = "Adaptive Whitening";
 136     desc.description = "Normalize frequency bin magnitudes relative to recent peak levels";
 137     desc.minValue = 0;
 138     desc.maxValue = 1;
 139     desc.defaultValue = 0;
 140     desc.isQuantized = true;
 141     desc.quantizeStep = 1;
 142     desc.unit = "";
 143     list.push_back(desc);
 144
 145     return list;
 146 }
 147
 148 float
 149 OnsetDetector::getParameter(std::string name) const
 150 {
 151     if (name == "dftype") {
 152         switch (m_dfType) {
 153         case DF_HFC: return 0;
 154         case DF_SPECDIFF: return 1;
 155         case DF_PHASEDEV: return 2;
 156         default: case DF_COMPLEXSD: return 3;
 157         case DF_BROADBAND: return 4;
 158         }
 159     } else if (name == "sensitivity") {
 160         return m_sensitivity;
 161     } else if (name == "whiten") {
 162         return m_whiten ? 1.0 : 0.0;
 163     }
 164     return 0.0;
 165 }
 166
 167 void
 168 OnsetDetector::setParameter(std::string name, float value)
 169 {
 170     if (name == "dftype") {
 171         int dfType = m_dfType;
 172         switch (lrintf(value)) {
 173         case 0: dfType = DF_HFC; break;
 174         case 1: dfType = DF_SPECDIFF; break;
 175         case 2: dfType = DF_PHASEDEV; break;
 176         default: case 3: dfType = DF_COMPLEXSD; break;
 177         case 4: dfType = DF_BROADBAND; break;
 178         }
 179         if (dfType == m_dfType) return;
 180         m_dfType = dfType;
 181         m_program = "";
 182     } else if (name == "sensitivity") {
 183         if (m_sensitivity == value) return;
 184         m_sensitivity = value;
 185         m_program = "";
 186     } else if (name == "whiten") {
 187         if (m_whiten == (value > 0.5)) return;
 188         m_whiten = (value > 0.5);
 189         m_program = "";
 190     }
 191 }
 192
 193 OnsetDetector::ProgramList
 194 OnsetDetector::getPrograms() const
 195 {
 196     ProgramList programs;
 197     programs.push_back("");
 198     programs.push_back("General purpose");
 199     programs.push_back("Soft onsets");
 200     programs.push_back("Percussive onsets");
 201     return programs;
 202 }
 203
 204 std::string
 205 OnsetDetector::getCurrentProgram() const
 206 {
 207     if (m_program == "") return "";
 208     else return m_program;
 209 }
 210
 211 void
 212 OnsetDetector::selectProgram(std::string program)
 213 {
 214     if (program == "General purpose") {
 215         setParameter("dftype", 3); // complex
 216         setParameter("sensitivity", 50);
 217         setParameter("whiten", 0);
 218     } else if (program == "Soft onsets") {
 219         setParameter("dftype", 3); // complex
 220         setParameter("sensitivity", 40);
 221         setParameter("whiten", 1);
 222     } else if (program == "Percussive onsets") {
 223         setParameter("dftype", 4); // broadband energy rise
 224         setParameter("sensitivity", 40);
 225         setParameter("whiten", 0);
 226     } else {
 227         return;
 228     }
 229     m_program = program;
 230 }
 231
 232 bool
 233 OnsetDetector::initialise(size_t channels, size_t stepSize, size_t blockSize)
 234 {
 235     if (m_d) {
 236         delete m_d;
 237         m_d = 0;
 238     }
 239
 240     if (channels < getMinChannelCount() ||
 241         channels > getMaxChannelCount()) {
 242         std::cerr << "OnsetDetector::initialise: Unsupported channel count: "
 243                   << channels << std::endl;
 244         return false;
 245     }
 246
 247     if (stepSize != getPreferredStepSize()) {
 248         std::cerr << "WARNING: OnsetDetector::initialise: Possibly sub-optimal step size for this sample rate: "
 249                   << stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl;
 250     }
 251
 252     if (blockSize != getPreferredBlockSize()) {
 253         std::cerr << "WARNING: OnsetDetector::initialise: Possibly sub-optimal block size for this sample rate: "
 254                   << blockSize << " (wanted " << (getPreferredBlockSize()) << ")" << std::endl;
 255     }
 256
 257     DFConfig dfConfig;
 258     dfConfig.DFType = m_dfType;
 259     dfConfig.stepSize = stepSize;
 260     dfConfig.frameLength = blockSize;
 261     dfConfig.dbRise = 6.0 - m_sensitivity / 16.6667;
 262     dfConfig.adaptiveWhitening = m_whiten;
 263     dfConfig.whiteningRelaxCoeff = -1;
 264     dfConfig.whiteningFloor = -1;
 265
 266     m_d = new OnsetDetectorData(dfConfig);
 267     return true;
 268 }
 269
 270 void
 271 OnsetDetector::reset()
 272 {
 273     if (m_d) m_d->reset();
 274 }
 275
 276 size_t
 277 OnsetDetector::getPreferredStepSize() const
 278 {
 279     size_t step = size_t(m_inputSampleRate * m_preferredStepSecs + 0.0001);
 280     if (step < 1) step = 1;
 281 //    std::cerr << "OnsetDetector::getPreferredStepSize: input sample rate is " << m_inputSampleRate << ", step size is " << step << std::endl;
 282     return step;
 283 }
 284
 285 size_t
 286 OnsetDetector::getPreferredBlockSize() const
 287 {
 288     return getPreferredStepSize() * 2;
 289 }
 290
 291 OnsetDetector::OutputList
 292 OnsetDetector::getOutputDescriptors() const
 293 {
 294     OutputList list;
 295
 296     float stepSecs = m_preferredStepSecs;
 297 //    if (m_d) stepSecs = m_d->dfConfig.stepSecs;
 298
 299     OutputDescriptor onsets;
 300     onsets.identifier = "onsets";
 301     onsets.name = "Note Onsets";
 302     onsets.description = "Perceived note onset positions";
 303     onsets.unit = "";
 304     onsets.hasFixedBinCount = true;
 305     onsets.binCount = 0;
 306     onsets.sampleType = OutputDescriptor::VariableSampleRate;
 307     onsets.sampleRate = 1.0 / stepSecs;
 308
 309     OutputDescriptor df;
 310     df.identifier = "detection_fn";
 311     df.name = "Onset Detection Function";
 312     df.description = "Probability function of note onset likelihood";
 313     df.unit = "";
 314     df.hasFixedBinCount = true;
 315     df.binCount = 1;
 316     df.hasKnownExtents = false;
 317     df.isQuantized = false;
 318     df.sampleType = OutputDescriptor::OneSamplePerStep;
 319
 320     OutputDescriptor sdf;
 321     sdf.identifier = "smoothed_df";
 322     sdf.name = "Smoothed Detection Function";
 323     sdf.description = "Smoothed probability function used for peak-picking";
 324     sdf.unit = "";
 325     sdf.hasFixedBinCount = true;
 326     sdf.binCount = 1;
 327     sdf.hasKnownExtents = false;
 328     sdf.isQuantized = false;
 329
 330     sdf.sampleType = OutputDescriptor::VariableSampleRate;
 331
 332 //!!! SV doesn't seem to handle these correctly in getRemainingFeatures
 333 //    sdf.sampleType = OutputDescriptor::FixedSampleRate;
 334     sdf.sampleRate = 1.0 / stepSecs;
 335
 336     list.push_back(onsets);
 337     list.push_back(df);
 338     list.push_back(sdf);
 339
 340     return list;
 341 }
 342
 343 OnsetDetector::FeatureSet
 344 OnsetDetector::process(const float *const *inputBuffers,
 345                        Vamp::RealTime timestamp)
 346 {
 347     if (!m_d) {
 348         cerr << "ERROR: OnsetDetector::process: "
 349              << "OnsetDetector has not been initialised"
 350              << endl;
 351         return FeatureSet();
 352     }
 353
 354     size_t len = m_d->dfConfig.frameLength / 2;
 355
 356 //    float mean = 0.f;
 357 //    for (size_t i = 0; i < len; ++i) {
 358 ////        std::cerr << inputBuffers[0][i] << " ";
 359 //        mean += inputBuffers[0][i];
 360 //    }
 361 ////    std::cerr << std::endl;
 362 //    mean /= len;
 363
 364 //    std::cerr << "OnsetDetector::process(" << timestamp << "): "
 365 //              << "dftype " << m_dfType << ", sens " << m_sensitivity
 366 //              << ", len " << len << ", mean " << mean << std::endl;
 367
 368     double *magnitudes = new double[len];
 369     double *phases = new double[len];
 370
 371     // We only support a single input channel
 372
 373     for (size_t i = 0; i < len; ++i) {
 374
 375         magnitudes[i] = sqrt(inputBuffers[0][i*2  ] * inputBuffers[0][i*2  ] +
 376                              inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1]);
 377
 378         phases[i] = atan2(-inputBuffers[0][i*2+1], inputBuffers[0][i*2]);
 379     }
 380
 381     double output = m_d->df->process(magnitudes, phases);
 382
 383     delete[] magnitudes;
 384     delete[] phases;
 385
 386     if (m_d->dfOutput.empty()) m_d->origin = timestamp;
 387
 388     m_d->dfOutput.push_back(output);
 389
 390     FeatureSet returnFeatures;
 391
 392     Feature feature;
 393     feature.hasTimestamp = false;
 394     feature.values.push_back(output);
 395
 396 //    std::cerr << "df: " << output << std::endl;
 397
 398     returnFeatures[1].push_back(feature); // detection function is output 1
 399     return returnFeatures;
 400 }
 401
 402 OnsetDetector::FeatureSet
 403 OnsetDetector::getRemainingFeatures()
 404 {
 405     if (!m_d) {
 406         cerr << "ERROR: OnsetDetector::getRemainingFeatures: "
 407              << "OnsetDetector has not been initialised"
 408              << endl;
 409         return FeatureSet();
 410     }
 411
 412     if (m_dfType == DF_BROADBAND) {
 413         for (size_t i = 0; i < m_d->dfOutput.size(); ++i) {
 414             if (m_d->dfOutput[i] < ((110 - m_sensitivity) *
 415                                     m_d->dfConfig.frameLength) / 200) {
 416                 m_d->dfOutput[i] = 0;
 417             }
 418         }
 419     }
 420
 421     double aCoeffs[] = { 1.0000, -0.5949, 0.2348 };
 422     double bCoeffs[] = { 0.1600,  0.3200, 0.1600 };
 423
 424     FeatureSet returnFeatures;
 425
 426     PPickParams ppParams;
 427     ppParams.length = m_d->dfOutput.size();
 428     // tau and cutoff appear to be unused in PeakPicking, but I've
 429     // inserted some moderately plausible values rather than leave
 430     // them unset.  The QuadThresh values come from trial and error.
 431     // The rest of these are copied from ttParams in the BeatTracker
 432     // code: I don't claim to know whether they're good or not --cc
 433     ppParams.tau = m_d->dfConfig.stepSize / m_inputSampleRate;
 434     ppParams.alpha = 9;
 435     ppParams.cutoff = m_inputSampleRate/4;
 436     ppParams.LPOrd = 2;
 437     ppParams.LPACoeffs = aCoeffs;
 438     ppParams.LPBCoeffs = bCoeffs;
 439     ppParams.WinT.post = 8;
 440     ppParams.WinT.pre = 7;
 441     ppParams.QuadThresh.a = (100 - m_sensitivity) / 1000.0;
 442     ppParams.QuadThresh.b = 0;
 443     ppParams.QuadThresh.c = (100 - m_sensitivity) / 1500.0;
 444
 445     PeakPicking peakPicker(ppParams);
 446
 447     double *ppSrc = new double[ppParams.length];
 448     for (unsigned int i = 0; i < ppParams.length; ++i) {
 449         ppSrc[i] = m_d->dfOutput[i];
 450     }
 451
 452     vector<int> onsets;
 453     peakPicker.process(ppSrc, ppParams.length, onsets);
 454
 455     for (size_t i = 0; i < onsets.size(); ++i) {
 456
 457         size_t index = onsets[i];
 458
 459         if (m_dfType != DF_BROADBAND) {
 460             double prevDiff = 0.0;
 461             while (index > 1) {
 462                 double diff = ppSrc[index] - ppSrc[index-1];
 463                 if (diff < prevDiff * 0.9) break;
 464                 prevDiff = diff;
 465                 --index;
 466             }
 467         }
 468
 469         size_t frame = index * m_d->dfConfig.stepSize;
 470
 471         Feature feature;
 472         feature.hasTimestamp = true;
 473         feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime
 474             (frame, lrintf(m_inputSampleRate));
 475
 476         returnFeatures[0].push_back(feature); // onsets are output 0
 477     }
 478
 479     for (unsigned int i = 0; i < ppParams.length; ++i) {
 480
 481         Feature feature;
 482 //        feature.hasTimestamp = false;
 483         feature.hasTimestamp = true;
 484         size_t frame = i * m_d->dfConfig.stepSize;
 485         feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime
 486             (frame, lrintf(m_inputSampleRate));
 487
 488         feature.values.push_back(ppSrc[i]);
 489         returnFeatures[2].push_back(feature); // smoothed df is output 2
 490     }
 491
 492     return returnFeatures;
 493 }
 494