modules/audio_filter/channel_mixer/spatialaudio.cpp

   1 /*****************************************************************************
   2  * spatialaudio.cpp : Ambisonics audio renderer and binauralizer filter
   3  *****************************************************************************
   4  * Copyright © 2017 VLC authors and VideoLAN
   5  *
   6  * Authors: Adrien Maglo <magsoft@videolan.org>
   7  *
   8  * This program is free software; you can redistribute it and/or modify it
   9  * under the terms of the GNU Lesser General Public License as published by
  10  * the Free Software Foundation; either version 2.1 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16  * GNU Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public License
  19  * along with this program; if not, write to the Free Software Foundation,
  20  * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  21  *****************************************************************************/
  22
  23 /*****************************************************************************
  24  * Preamble
  25  *****************************************************************************/
  26
  27 #ifdef HAVE_CONFIG_H
  28 # include "config.h"
  29 #endif
  30
  31 #include <assert.h>
  32
  33 #include <vlc_common.h>
  34 #include <vlc_plugin.h>
  35 #include <vlc_aout.h>
  36 #include <vlc_filter.h>
  37 #include <vlc_viewpoint.h>
  38
  39 #include <new>
  40 #include <vector>
  41 #include <sstream>
  42
  43 #include <spatialaudio/Ambisonics.h>
  44 #include <spatialaudio/SpeakersBinauralizer.h>
  45
  46 #define CFG_PREFIX "spatialaudio-"
  47
  48 #define DEFAULT_HRTF_PATH "hrtfs" DIR_SEP "dodeca_and_7channel_FHK_HRTF.sofa"
  49
  50 #define HRTF_FILE_TEXT N_("HRTF file for the binauralization")
  51 #define HRTF_FILE_LONGTEXT N_("Custom HRTF (Head-related transfer function) file" \
  52                               "in the SOFA format.")
  53
  54 #define HEADPHONES_TEXT N_("Headphones mode (binaural)")
  55 #define HEADPHONES_LONGTEXT N_("If the output is stereo, render ambisonics " \
  56                                "with the binaural decoder.")
  57
  58 static int OpenBinauralizer(vlc_object_t *p_this);
  59 static int Open( vlc_object_t * );
  60 static void Close( vlc_object_t * );
  61
  62 vlc_module_begin()
  63     set_shortname("Spatialaudio")
  64     set_description(N_("Ambisonics renderer and binauralizer"))
  65     set_capability("audio renderer", 1)
  66     set_category(CAT_AUDIO)
  67     set_subcategory(SUBCAT_AUDIO_AFILTER)
  68     set_callbacks(Open, Close)
  69     add_bool(CFG_PREFIX "headphones", false,
  70              HEADPHONES_TEXT, HEADPHONES_LONGTEXT, true)
  71     add_loadfile("hrtf-file", NULL,
  72                  HRTF_FILE_TEXT, HRTF_FILE_LONGTEXT, true)
  73     add_shortcut("ambisonics")
  74
  75     add_submodule()
  76     set_shortname(N_("Binauralizer"))
  77     set_capability("audio filter", 0)
  78     set_callbacks(OpenBinauralizer, Close)
  79     add_shortcut("binauralizer")
  80 vlc_module_end()
  81
  82 #define AMB_BLOCK_TIME_LEN 1024
  83
  84 struct filter_sys_t
  85 {
  86     filter_sys_t()
  87         : speakers(NULL)
  88         , i_inputPTS(0)
  89         , inBuf(NULL)
  90         , outBuf(NULL)
  91     {}
  92     ~filter_sys_t()
  93     {
  94         delete[] speakers;
  95         if (inBuf != NULL)
  96             for (unsigned i = 0; i < i_inputNb; ++i)
  97                 free(inBuf[i]);
  98         free(inBuf);
  99
 100         if (outBuf != NULL)
 101             for (unsigned i = 0; i < i_outputNb; ++i)
 102                 free(outBuf[i]);
 103         free(outBuf);
 104     }
 105
 106     enum
 107     {
 108         AMBISONICS_DECODER, // Ambisonics decoding module
 109         AMBISONICS_BINAURAL_DECODER, // Ambisonics decoding module using binaural
 110         BINAURALIZER // Binauralizer module
 111     } mode;
 112
 113     CAmbisonicBinauralizer binauralDecoder;
 114     SpeakersBinauralizer binauralizer;
 115     CAmbisonicDecoder speakerDecoder;
 116     CAmbisonicProcessor processor;
 117     CAmbisonicZoomer zoomer;
 118
 119     CAmbisonicSpeaker *speakers;
 120
 121     std::vector<float> inputSamples;
 122     mtime_t i_inputPTS;
 123     unsigned i_rate;
 124     unsigned i_order;
 125
 126     float** inBuf;
 127     float** outBuf;
 128     unsigned i_inputNb;
 129     unsigned i_outputNb;
 130
 131     /* View point. */
 132     float f_teta;
 133     float f_phi;
 134     float f_roll;
 135     float f_zoom;
 136 };
 137
 138 static std::string getHRTFPath(filter_t *p_filter)
 139 {
 140     std::string HRTFPath;
 141
 142     char *userHRTFPath = var_InheritString(p_filter, "hrtf-file");
 143
 144     if (userHRTFPath != NULL)
 145     {
 146         HRTFPath = std::string(userHRTFPath);
 147         free(userHRTFPath);
 148     }
 149     else
 150     {
 151         char *dataDir = config_GetDataDir();
 152         if (dataDir != NULL)
 153         {
 154             std::stringstream ss;
 155             ss << std::string(dataDir) << DIR_SEP << DEFAULT_HRTF_PATH;
 156             HRTFPath = ss.str();
 157             free(dataDir);
 158         }
 159     }
 160
 161     return HRTFPath;
 162 }
 163
 164 static block_t *Mix( filter_t *p_filter, block_t *p_buf )
 165 {
 166     filter_sys_t *p_sys = p_filter->p_sys;
 167
 168     const size_t i_prevSize = p_sys->inputSamples.size();
 169     p_sys->inputSamples.resize(i_prevSize + p_buf->i_nb_samples * p_sys->i_inputNb);
 170     memcpy((char*)(p_sys->inputSamples.data() + i_prevSize), (char*)p_buf->p_buffer, p_buf->i_buffer);
 171
 172     const size_t i_inputBlockSize = sizeof(float) * p_sys->i_inputNb * AMB_BLOCK_TIME_LEN;
 173     const size_t i_outputBlockSize = sizeof(float) * p_sys->i_outputNb * AMB_BLOCK_TIME_LEN;
 174     const size_t i_nbBlocks = p_sys->inputSamples.size() * sizeof(float) / i_inputBlockSize;
 175
 176     block_t *p_out_buf = block_Alloc(i_outputBlockSize * i_nbBlocks);
 177     if (unlikely(p_out_buf == NULL))
 178     {
 179         block_Release(p_buf);
 180         return NULL;
 181     }
 182
 183     p_out_buf->i_nb_samples = i_nbBlocks * AMB_BLOCK_TIME_LEN;
 184     if (p_sys->i_inputPTS == 0)
 185         p_out_buf->i_pts = p_buf->i_pts;
 186     else
 187         p_out_buf->i_pts = p_sys->i_inputPTS;
 188     p_out_buf->i_dts = p_out_buf->i_pts;
 189     p_out_buf->i_length = p_out_buf->i_nb_samples * INT64_C(1000000) / p_sys->i_rate;
 190
 191     float *p_dest = (float *)p_out_buf->p_buffer;
 192     const float *p_src = (float *)p_sys->inputSamples.data();
 193
 194     for (unsigned b = 0; b < i_nbBlocks; ++b)
 195     {
 196         for (unsigned i = 0; i < p_sys->i_inputNb; ++i)
 197         {
 198             for (unsigned j = 0; j < AMB_BLOCK_TIME_LEN; ++j)
 199             {
 200                 float val = p_src[(b * AMB_BLOCK_TIME_LEN + j) * p_sys->i_inputNb + i];
 201                 p_sys->inBuf[i][j] = val;
 202             }
 203         }
 204
 205         // Compute
 206         switch (p_sys->mode)
 207         {
 208             case filter_sys_t::BINAURALIZER:
 209                 p_sys->binauralizer.Process(p_sys->inBuf, p_sys->outBuf);
 210                 break;
 211             case filter_sys_t::AMBISONICS_DECODER:
 212             case filter_sys_t::AMBISONICS_BINAURAL_DECODER:
 213             {
 214                 CBFormat inData;
 215                 inData.Configure(p_sys->i_order, true, AMB_BLOCK_TIME_LEN);
 216
 217                 for (unsigned i = 0; i < p_sys->i_inputNb; ++i)
 218                     inData.InsertStream(p_sys->inBuf[i], i, AMB_BLOCK_TIME_LEN);
 219
 220                 Orientation ori(p_sys->f_teta, p_sys->f_phi, p_sys->f_roll);
 221                 p_sys->processor.SetOrientation(ori);
 222                 p_sys->processor.Refresh();
 223                 p_sys->processor.Process(&inData, inData.GetSampleCount());
 224
 225                 p_sys->zoomer.SetZoom(p_sys->f_zoom);
 226                 p_sys->zoomer.Refresh();
 227                 p_sys->zoomer.Process(&inData, inData.GetSampleCount());
 228
 229                 if (p_sys->mode == filter_sys_t::AMBISONICS_DECODER)
 230                     p_sys->speakerDecoder.Process(&inData, inData.GetSampleCount(), p_sys->outBuf);
 231                 else
 232                     p_sys->binauralDecoder.Process(&inData, p_sys->outBuf);
 233                 break;
 234             }
 235             default:
 236                 vlc_assert_unreachable();
 237         }
 238
 239         // Interleave the results.
 240         for (unsigned i = 0; i < p_sys->i_outputNb; ++i)
 241             for (unsigned j = 0; j < AMB_BLOCK_TIME_LEN; ++j)
 242                 p_dest[(b * AMB_BLOCK_TIME_LEN + j) * p_sys->i_outputNb + i] = p_sys->outBuf[i][j];
 243     }
 244
 245     p_sys->inputSamples.erase(p_sys->inputSamples.begin(),
 246                               p_sys->inputSamples.begin() + i_inputBlockSize * i_nbBlocks / sizeof(float));
 247
 248     assert(p_sys->inputSamples.size() < i_inputBlockSize);
 249
 250     p_sys->i_inputPTS = p_out_buf->i_pts + p_out_buf->i_length;
 251
 252     block_Release(p_buf);
 253     return p_out_buf;
 254 }
 255
 256 static void Flush( filter_t *p_filter )
 257 {
 258     filter_sys_t *p_sys = p_filter->p_sys;
 259     p_sys->inputSamples.clear();
 260     p_sys->i_inputPTS = 0;
 261 }
 262
 263 static void ChangeViewpoint( filter_t *p_filter, const vlc_viewpoint_t *p_vp)
 264 {
 265     filter_sys_t *p_sys = (filter_sys_t *)p_filter->p_sys;
 266
 267 #define RAD(d) ((float) ((d) * M_PI / 180.f))
 268     p_sys->f_teta = -RAD(p_vp->yaw);
 269     p_sys->f_phi = RAD(p_vp->pitch);
 270     p_sys->f_roll = RAD(p_vp->roll);
 271
 272     if (p_vp->fov >= FIELD_OF_VIEW_DEGREES_DEFAULT)
 273         p_sys->f_zoom = 0.f; // no unzoom as it does not really make sense.
 274     else
 275         p_sys->f_zoom = (FIELD_OF_VIEW_DEGREES_DEFAULT - p_vp->fov) / (FIELD_OF_VIEW_DEGREES_DEFAULT - FIELD_OF_VIEW_DEGREES_MIN);
 276 #undef RAD
 277 }
 278
 279 static int allocateBuffers(filter_sys_t *p_sys)
 280 {
 281     p_sys->inBuf = (float**)calloc(p_sys->i_inputNb, sizeof(float*));
 282     if (p_sys->inBuf == NULL)
 283         return VLC_ENOMEM;
 284
 285     for (unsigned i = 0; i < p_sys->i_inputNb; ++i)
 286     {
 287         p_sys->inBuf[i] = (float *)vlc_alloc(AMB_BLOCK_TIME_LEN, sizeof(float));
 288         if (p_sys->inBuf[i] == NULL)
 289             return VLC_ENOMEM;
 290     }
 291
 292     p_sys->outBuf = (float**)calloc(p_sys->i_outputNb, sizeof(float*));
 293     if (p_sys->outBuf == NULL)
 294         return VLC_ENOMEM;
 295
 296     for (unsigned i = 0; i < p_sys->i_outputNb; ++i)
 297     {
 298         p_sys->outBuf[i] = (float *)vlc_alloc(AMB_BLOCK_TIME_LEN, sizeof(float));
 299         if (p_sys->outBuf[i] == NULL)
 300             return VLC_ENOMEM;
 301     }
 302
 303     return VLC_SUCCESS;
 304 }
 305
 306 static int OpenBinauralizer(vlc_object_t *p_this)
 307 {
 308     filter_t *p_filter = (filter_t *)p_this;
 309     audio_format_t *infmt = &p_filter->fmt_in.audio;
 310     audio_format_t *outfmt = &p_filter->fmt_out.audio;
 311
 312     filter_sys_t *p_sys;
 313     p_sys = p_filter->p_sys = (filter_sys_t*)new(std::nothrow)filter_sys_t();
 314     if (p_sys == NULL)
 315         return VLC_ENOMEM;
 316
 317     p_sys->mode = filter_sys_t::BINAURALIZER;
 318     p_sys->i_rate = p_filter->fmt_in.audio.i_rate;
 319     p_sys->i_inputNb = p_filter->fmt_in.audio.i_channels;
 320     p_sys->i_outputNb = 2;
 321
 322     if (allocateBuffers(p_sys) != VLC_SUCCESS)
 323     {
 324         delete p_sys;
 325         return VLC_ENOMEM;
 326     }
 327
 328     unsigned s = 0;
 329     p_sys->speakers = new(std::nothrow)CAmbisonicSpeaker[infmt->i_channels]();
 330     if (!p_sys->speakers)
 331     {
 332         delete p_sys;
 333         return VLC_ENOMEM;
 334     }
 335
 336     p_sys->speakers[s++].SetPosition({DegreesToRadians(30), 0.f, 1.f});
 337     p_sys->speakers[s++].SetPosition({DegreesToRadians(-30), 0.f, 1.f});
 338
 339     if ((infmt->i_physical_channels & AOUT_CHANS_MIDDLE) == AOUT_CHANS_MIDDLE)
 340     {
 341         /* Middle */
 342         p_sys->speakers[s++].SetPosition({DegreesToRadians(110), 0.f, 1.f});
 343         p_sys->speakers[s++].SetPosition({DegreesToRadians(-110), 0.f, 1.f});
 344     }
 345
 346     if ((infmt->i_physical_channels & AOUT_CHANS_REAR) == AOUT_CHANS_REAR)
 347     {
 348         /* Rear */
 349         p_sys->speakers[s++].SetPosition({DegreesToRadians(145), 0.f, 1.f});
 350         p_sys->speakers[s++].SetPosition({DegreesToRadians(-145), 0.f, 1.f});
 351     }
 352
 353     if ((infmt->i_physical_channels & AOUT_CHAN_CENTER) == AOUT_CHAN_CENTER)
 354         p_sys->speakers[s++].SetPosition({DegreesToRadians(0), 0.f, 1.f});
 355
 356     if ((infmt->i_physical_channels & AOUT_CHAN_LFE) == AOUT_CHAN_LFE)
 357         p_sys->speakers[s++].SetPosition({DegreesToRadians(0), 0.f, 0.5f});
 358
 359     std::string HRTFPath = getHRTFPath(p_filter);
 360     msg_Dbg(p_filter, "Using the HRTF file: %s", HRTFPath.c_str());
 361
 362     unsigned i_tailLength = 0;
 363     if (!p_sys->binauralizer.Configure(p_sys->i_rate, AMB_BLOCK_TIME_LEN,
 364                                        p_sys->speakers, infmt->i_channels, i_tailLength,
 365                                        HRTFPath))
 366     {
 367         msg_Err(p_filter, "Error creating the binauralizer.");
 368         delete p_sys;
 369         return VLC_EGENERIC;
 370     }
 371     p_sys->binauralizer.Reset();
 372
 373     outfmt->i_format = infmt->i_format = VLC_CODEC_FL32;
 374     outfmt->i_physical_channels = AOUT_CHANS_STEREO;
 375     aout_FormatPrepare(infmt);
 376     aout_FormatPrepare(outfmt);
 377
 378     p_filter->pf_audio_filter = Mix;
 379     p_filter->pf_flush = Flush;
 380     p_filter->pf_change_viewpoint = ChangeViewpoint;
 381
 382     return VLC_SUCCESS;
 383 }
 384
 385 static int Open(vlc_object_t *p_this)
 386 {
 387     filter_t *p_filter = (filter_t *)p_this;
 388     audio_format_t *infmt = &p_filter->fmt_in.audio;
 389     audio_format_t *outfmt = &p_filter->fmt_out.audio;
 390
 391     assert(infmt->channel_type != outfmt->channel_type);
 392
 393     if (infmt->channel_type != AUDIO_CHANNEL_TYPE_AMBISONICS)
 394         return VLC_EGENERIC;
 395
 396     if (infmt->i_format != VLC_CODEC_FL32 || outfmt->i_format != VLC_CODEC_FL32)
 397         return VLC_EGENERIC;
 398
 399     filter_sys_t *p_sys;
 400     p_sys = p_filter->p_sys = (filter_sys_t*)new(std::nothrow)filter_sys_t();
 401     if (p_sys == NULL)
 402         return VLC_ENOMEM;
 403
 404     p_sys->f_teta = 0.f;
 405     p_sys->f_phi = 0.f;
 406     p_sys->f_roll = 0.f;
 407     p_sys->f_zoom = 0.f;
 408     p_sys->i_rate = p_filter->fmt_in.audio.i_rate;
 409     p_sys->i_inputNb = p_filter->fmt_in.audio.i_channels;
 410     p_sys->i_outputNb = p_filter->fmt_out.audio.i_channels;
 411
 412     if (allocateBuffers(p_sys) != VLC_SUCCESS)
 413     {
 414         delete p_sys;
 415         return VLC_ENOMEM;
 416     }
 417
 418     p_sys->i_order = sqrt(infmt->i_channels) - 1;
 419
 420     if (p_sys->i_order < 1)
 421     {
 422         msg_Err(p_filter, "Invalid number of Ambisonics channels");
 423         delete p_sys;
 424         return VLC_EGENERIC;
 425     }
 426
 427     msg_Dbg(p_filter, "Order: %d %d", p_sys->i_order, infmt->i_channels);
 428
 429     static const char *const options[] = { "headphones", NULL };
 430     config_ChainParse(p_filter, CFG_PREFIX, options, p_filter->p_cfg);
 431
 432     unsigned i_tailLength = 0;
 433     if (p_filter->fmt_out.audio.i_channels == 2
 434      && var_InheritBool(p_filter, CFG_PREFIX "headphones"))
 435     {
 436         p_sys->mode = filter_sys_t::AMBISONICS_BINAURAL_DECODER;
 437
 438         std::string HRTFPath = getHRTFPath(p_filter);
 439         msg_Dbg(p_filter, "Using the HRTF file: %s", HRTFPath.c_str());
 440
 441         if (!p_sys->binauralDecoder.Configure(p_sys->i_order, true,
 442                 p_sys->i_rate, AMB_BLOCK_TIME_LEN, i_tailLength,
 443                 HRTFPath))
 444         {
 445             msg_Err(p_filter, "Error creating the binaural decoder.");
 446             delete p_sys;
 447             return VLC_EGENERIC;
 448         }
 449         p_sys->binauralDecoder.Reset();
 450     }
 451     else
 452     {
 453         p_sys->mode = filter_sys_t::AMBISONICS_DECODER;
 454
 455         unsigned i_nbChannels = aout_FormatNbChannels(&p_filter->fmt_out.audio);
 456         if (i_nbChannels == 1
 457          || !p_sys->speakerDecoder.Configure(p_sys->i_order, true,
 458                                              kAmblib_CustomSpeakerSetUp,
 459                                              i_nbChannels))
 460         {
 461             msg_Err(p_filter, "Error creating the Ambisonics decoder.");
 462             delete p_sys;
 463             return VLC_EGENERIC;
 464         }
 465
 466         /* Speaker setup, inspired from:
 467          * https://www.dolby.com/us/en/guide/surround-sound-speaker-setup/7-1-setup.html
 468          * The position must follow the order of pi_vlc_chan_order_wg4 */
 469         unsigned s = 0;
 470
 471         p_sys->speakerDecoder.SetPosition(s++, {DegreesToRadians(30), 0.f, 1.f});
 472         p_sys->speakerDecoder.SetPosition(s++, {DegreesToRadians(-30), 0.f, 1.f});
 473
 474         if ((outfmt->i_physical_channels & AOUT_CHANS_MIDDLE) == AOUT_CHANS_MIDDLE)
 475         {
 476             p_sys->speakerDecoder.SetPosition(s++, {DegreesToRadians(110), 0.f, 1.f});
 477             p_sys->speakerDecoder.SetPosition(s++, {DegreesToRadians(-110), 0.f, 1.f});
 478         }
 479
 480         if ((outfmt->i_physical_channels & AOUT_CHANS_REAR) == AOUT_CHANS_REAR)
 481         {
 482             p_sys->speakerDecoder.SetPosition(s++, {DegreesToRadians(145), 0.f, 1.f});
 483             p_sys->speakerDecoder.SetPosition(s++, {DegreesToRadians(-145), 0.f, 1.f});
 484         }
 485
 486         if ((outfmt->i_physical_channels & AOUT_CHAN_CENTER) == AOUT_CHAN_CENTER)
 487             p_sys->speakerDecoder.SetPosition(s++, {DegreesToRadians(0), 0.f, 1.f});
 488
 489         if ((outfmt->i_physical_channels & AOUT_CHAN_LFE) == AOUT_CHAN_LFE)
 490             p_sys->speakerDecoder.SetPosition(s++, {DegreesToRadians(0), 0.f, 0.5f});
 491
 492         /* Check we have setup the right number of speaker. */
 493         assert(s == i_nbChannels);
 494
 495         p_sys->speakerDecoder.Refresh();
 496     }
 497
 498     if (!p_sys->processor.Configure(p_sys->i_order, true, AMB_BLOCK_TIME_LEN, 0))
 499     {
 500         msg_Err(p_filter, "Error creating the ambisonic processor.");
 501         delete p_sys;
 502         return VLC_EGENERIC;
 503     }
 504
 505     if (!p_sys->zoomer.Configure(p_sys->i_order, true, 0))
 506     {
 507         msg_Err(p_filter, "Error creating the ambisonic zoomer.");
 508         delete p_sys;
 509         return VLC_EGENERIC;
 510     }
 511
 512     p_filter->pf_audio_filter = Mix;
 513     p_filter->pf_flush = Flush;
 514     p_filter->pf_change_viewpoint = ChangeViewpoint;
 515
 516     return VLC_SUCCESS;
 517 }
 518
 519 static void Close(vlc_object_t *p_this)
 520 {
 521     filter_t *p_filter = (filter_t *)p_this;
 522
 523     delete p_filter->p_sys;
 524 }