alc/alu.cpp

   1 /**
   2  * OpenAL cross platform audio library
   3  * Copyright (C) 1999-2007 by authors.
   4  * This library is free software; you can redistribute it and/or
   5  *  modify it under the terms of the GNU Library General Public
   6  *  License as published by the Free Software Foundation; either
   7  *  version 2 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  *  Library General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Library General Public
  15  *  License along with this library; if not, write to the
  16  *  Free Software Foundation, Inc.,
  17  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18  * Or go to http://www.gnu.org/copyleft/lgpl.html
  19  */
  20
  21 #include "config.h"
  22
  23 #include "alu.h"
  24
  25 #include <algorithm>
  26 #include <array>
  27 #include <atomic>
  28 #include <cassert>
  29 #include <chrono>
  30 #include <climits>
  31 #include <cstdarg>
  32 #include <cstdint>
  33 #include <cstdio>
  34 #include <cstdlib>
  35 #include <functional>
  36 #include <iterator>
  37 #include <limits>
  38 #include <memory>
  39 #include <new>
  40 #include <optional>
  41 #include <utility>
  42
  43 #include "almalloc.h"
  44 #include "alnumbers.h"
  45 #include "alnumeric.h"
  46 #include "alspan.h"
  47 #include "alstring.h"
  48 #include "atomic.h"
  49 #include "core/ambidefs.h"
  50 #include "core/async_event.h"
  51 #include "core/bformatdec.h"
  52 #include "core/bs2b.h"
  53 #include "core/bsinc_defs.h"
  54 #include "core/bsinc_tables.h"
  55 #include "core/bufferline.h"
  56 #include "core/buffer_storage.h"
  57 #include "core/context.h"
  58 #include "core/cpu_caps.h"
  59 #include "core/cubic_tables.h"
  60 #include "core/devformat.h"
  61 #include "core/device.h"
  62 #include "core/effects/base.h"
  63 #include "core/effectslot.h"
  64 #include "core/filters/biquad.h"
  65 #include "core/filters/nfc.h"
  66 #include "core/fpu_ctrl.h"
  67 #include "core/hrtf.h"
  68 #include "core/mastering.h"
  69 #include "core/mixer.h"
  70 #include "core/mixer/defs.h"
  71 #include "core/mixer/hrtfdefs.h"
  72 #include "core/resampler_limits.h"
  73 #include "core/uhjfilter.h"
  74 #include "core/voice.h"
  75 #include "core/voice_change.h"
  76 #include "intrusive_ptr.h"
  77 #include "opthelpers.h"
  78 #include "ringbuffer.h"
  79 #include "strutils.h"
  80 #include "vecmat.h"
  81 #include "vector.h"
  82
  83 struct CTag;
  84 #ifdef HAVE_SSE
  85 struct SSETag;
  86 #endif
  87 #ifdef HAVE_SSE2
  88 struct SSE2Tag;
  89 #endif
  90 #ifdef HAVE_SSE4_1
  91 struct SSE4Tag;
  92 #endif
  93 #ifdef HAVE_NEON
  94 struct NEONTag;
  95 #endif
  96 struct PointTag;
  97 struct LerpTag;
  98 struct CubicTag;
  99 struct BSincTag;
 100 struct FastBSincTag;
 101
 102
 103 static_assert(!(MaxResamplerPadding&1), "MaxResamplerPadding is not a multiple of two");
 104
 105
 106 namespace {
 107
 108 using uint = unsigned int;
 109 using namespace std::chrono;
 110 using namespace std::string_view_literals;
 111
 112 float InitConeScale()
 113 {
 114     float ret{1.0f};
 115     if(auto optval = al::getenv("__ALSOFT_HALF_ANGLE_CONES"))
 116     {
 117         if(al::case_compare(*optval, "true"sv) == 0
 118             || strtol(optval->c_str(), nullptr, 0) == 1)
 119             ret *= 0.5f;
 120     }
 121     return ret;
 122 }
 123 /* Cone scalar */
 124 const float ConeScale{InitConeScale()};
 125
 126 /* Localized scalars for mono sources (initialized in aluInit, after
 127  * configuration is loaded).
 128  */
 129 float XScale{1.0f};
 130 float YScale{1.0f};
 131 float ZScale{1.0f};
 132
 133 /* Source distance scale for NFC filters. */
 134 float NfcScale{1.0f};
 135
 136
 137 using HrtfDirectMixerFunc = void(*)(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut,
 138     const al::span<const FloatBufferLine> InSamples, const al::span<float2> AccumSamples,
 139     const al::span<float,BufferLineSize> TempBuf, const al::span<HrtfChannelState> ChanState,
 140     const size_t IrSize, const size_t SamplesToDo);
 141
 142 HrtfDirectMixerFunc MixDirectHrtf{MixDirectHrtf_<CTag>};
 143
 144 inline HrtfDirectMixerFunc SelectHrtfMixer()
 145 {
 146 #ifdef HAVE_NEON
 147     if((CPUCapFlags&CPU_CAP_NEON))
 148         return MixDirectHrtf_<NEONTag>;
 149 #endif
 150 #ifdef HAVE_SSE
 151     if((CPUCapFlags&CPU_CAP_SSE))
 152         return MixDirectHrtf_<SSETag>;
 153 #endif
 154
 155     return MixDirectHrtf_<CTag>;
 156 }
 157
 158
 159 inline void BsincPrepare(const uint increment, BsincState *state, const BSincTable *table)
 160 {
 161     size_t si{BSincScaleCount - 1};
 162     float sf{0.0f};
 163
 164     if(increment > MixerFracOne)
 165     {
 166         sf = MixerFracOne/static_cast<float>(increment) - table->scaleBase;
 167         sf = std::max(0.0f, BSincScaleCount*sf*table->scaleRange - 1.0f);
 168         si = float2uint(sf);
 169         /* The interpolation factor is fit to this diagonally-symmetric curve
 170          * to reduce the transition ripple caused by interpolating different
 171          * scales of the sinc function.
 172          */
 173         sf = 1.0f - std::cos(std::asin(sf - static_cast<float>(si)));
 174     }
 175
 176     state->sf = sf;
 177     state->m = table->m[si];
 178     state->l = (state->m/2) - 1;
 179     state->filter = table->Tab.subspan(table->filterOffset[si]);
 180 }
 181
 182 inline ResamplerFunc SelectResampler(Resampler resampler, uint increment)
 183 {
 184     switch(resampler)
 185     {
 186     case Resampler::Point:
 187         return Resample_<PointTag,CTag>;
 188     case Resampler::Linear:
 189 #ifdef HAVE_NEON
 190         if((CPUCapFlags&CPU_CAP_NEON))
 191             return Resample_<LerpTag,NEONTag>;
 192 #endif
 193 #ifdef HAVE_SSE4_1
 194         if((CPUCapFlags&CPU_CAP_SSE4_1))
 195             return Resample_<LerpTag,SSE4Tag>;
 196 #endif
 197 #ifdef HAVE_SSE2
 198         if((CPUCapFlags&CPU_CAP_SSE2))
 199             return Resample_<LerpTag,SSE2Tag>;
 200 #endif
 201         return Resample_<LerpTag,CTag>;
 202     case Resampler::Spline:
 203     case Resampler::Gaussian:
 204 #ifdef HAVE_NEON
 205         if((CPUCapFlags&CPU_CAP_NEON))
 206             return Resample_<CubicTag,NEONTag>;
 207 #endif
 208 #ifdef HAVE_SSE4_1
 209         if((CPUCapFlags&CPU_CAP_SSE4_1))
 210             return Resample_<CubicTag,SSE4Tag>;
 211 #endif
 212 #ifdef HAVE_SSE2
 213         if((CPUCapFlags&CPU_CAP_SSE2))
 214             return Resample_<CubicTag,SSE2Tag>;
 215 #endif
 216 #ifdef HAVE_SSE
 217         if((CPUCapFlags&CPU_CAP_SSE))
 218             return Resample_<CubicTag,SSETag>;
 219 #endif
 220         return Resample_<CubicTag,CTag>;
 221     case Resampler::BSinc12:
 222     case Resampler::BSinc24:
 223         if(increment > MixerFracOne)
 224         {
 225 #ifdef HAVE_NEON
 226             if((CPUCapFlags&CPU_CAP_NEON))
 227                 return Resample_<BSincTag,NEONTag>;
 228 #endif
 229 #ifdef HAVE_SSE
 230             if((CPUCapFlags&CPU_CAP_SSE))
 231                 return Resample_<BSincTag,SSETag>;
 232 #endif
 233             return Resample_<BSincTag,CTag>;
 234         }
 235         /* fall-through */
 236     case Resampler::FastBSinc12:
 237     case Resampler::FastBSinc24:
 238 #ifdef HAVE_NEON
 239         if((CPUCapFlags&CPU_CAP_NEON))
 240             return Resample_<FastBSincTag,NEONTag>;
 241 #endif
 242 #ifdef HAVE_SSE
 243         if((CPUCapFlags&CPU_CAP_SSE))
 244             return Resample_<FastBSincTag,SSETag>;
 245 #endif
 246         return Resample_<FastBSincTag,CTag>;
 247     }
 248
 249     return Resample_<PointTag,CTag>;
 250 }
 251
 252 } // namespace
 253
 254 void aluInit(CompatFlagBitset flags, const float nfcscale)
 255 {
 256     MixDirectHrtf = SelectHrtfMixer();
 257     XScale = flags.test(CompatFlags::ReverseX) ? -1.0f : 1.0f;
 258     YScale = flags.test(CompatFlags::ReverseY) ? -1.0f : 1.0f;
 259     ZScale = flags.test(CompatFlags::ReverseZ) ? -1.0f : 1.0f;
 260
 261     NfcScale = std::clamp(nfcscale, 0.0001f, 10000.0f);
 262 }
 263
 264
 265 ResamplerFunc PrepareResampler(Resampler resampler, uint increment, InterpState *state)
 266 {
 267     switch(resampler)
 268     {
 269     case Resampler::Point:
 270     case Resampler::Linear:
 271         break;
 272     case Resampler::Spline:
 273         state->emplace<CubicState>(al::span{gSplineFilter.mTable});
 274         break;
 275     case Resampler::Gaussian:
 276         state->emplace<CubicState>(al::span{gGaussianFilter.mTable});
 277         break;
 278     case Resampler::FastBSinc12:
 279     case Resampler::BSinc12:
 280         BsincPrepare(increment, &state->emplace<BsincState>(), &gBSinc12);
 281         break;
 282     case Resampler::FastBSinc24:
 283     case Resampler::BSinc24:
 284         BsincPrepare(increment, &state->emplace<BsincState>(), &gBSinc24);
 285         break;
 286     }
 287     return SelectResampler(resampler, increment);
 288 }
 289
 290
 291 void DeviceBase::ProcessHrtf(const size_t SamplesToDo)
 292 {
 293     /* HRTF is stereo output only. */
 294     const size_t lidx{RealOut.ChannelIndex[FrontLeft]};
 295     const size_t ridx{RealOut.ChannelIndex[FrontRight]};
 296
 297     MixDirectHrtf(RealOut.Buffer[lidx], RealOut.Buffer[ridx], Dry.Buffer, HrtfAccumData,
 298         mHrtfState->mTemp, mHrtfState->mChannels, mHrtfState->mIrSize, SamplesToDo);
 299 }
 300
 301 void DeviceBase::ProcessAmbiDec(const size_t SamplesToDo)
 302 {
 303     AmbiDecoder->process(RealOut.Buffer, Dry.Buffer, SamplesToDo);
 304 }
 305
 306 void DeviceBase::ProcessAmbiDecStablized(const size_t SamplesToDo)
 307 {
 308     /* Decode with front image stablization. */
 309     const size_t lidx{RealOut.ChannelIndex[FrontLeft]};
 310     const size_t ridx{RealOut.ChannelIndex[FrontRight]};
 311     const size_t cidx{RealOut.ChannelIndex[FrontCenter]};
 312
 313     AmbiDecoder->processStablize(RealOut.Buffer, Dry.Buffer, lidx, ridx, cidx, SamplesToDo);
 314 }
 315
 316 void DeviceBase::ProcessUhj(const size_t SamplesToDo)
 317 {
 318     /* UHJ is stereo output only. */
 319     const size_t lidx{RealOut.ChannelIndex[FrontLeft]};
 320     const size_t ridx{RealOut.ChannelIndex[FrontRight]};
 321
 322     /* Encode to stereo-compatible 2-channel UHJ output. */
 323     mUhjEncoder->encode(RealOut.Buffer[lidx].data(), RealOut.Buffer[ridx].data(),
 324         {{Dry.Buffer[0].data(), Dry.Buffer[1].data(), Dry.Buffer[2].data()}}, SamplesToDo);
 325 }
 326
 327 void DeviceBase::ProcessBs2b(const size_t SamplesToDo)
 328 {
 329     /* First, decode the ambisonic mix to the "real" output. */
 330     AmbiDecoder->process(RealOut.Buffer, Dry.Buffer, SamplesToDo);
 331
 332     /* BS2B is stereo output only. */
 333     const size_t lidx{RealOut.ChannelIndex[FrontLeft]};
 334     const size_t ridx{RealOut.ChannelIndex[FrontRight]};
 335
 336     /* Now apply the BS2B binaural/crossfeed filter. */
 337     Bs2b->cross_feed(RealOut.Buffer[lidx].data(), RealOut.Buffer[ridx].data(), SamplesToDo);
 338 }
 339
 340
 341 namespace {
 342
 343 /* This RNG method was created based on the math found in opusdec. It's quick,
 344  * and starting with a seed value of 22222, is suitable for generating
 345  * whitenoise.
 346  */
 347 inline uint dither_rng(uint *seed) noexcept
 348 {
 349     *seed = (*seed * 96314165) + 907633515;
 350     return *seed;
 351 }
 352
 353
 354 /* Ambisonic upsampler function. It's effectively a matrix multiply. It takes
 355  * an 'upsampler' and 'rotator' as the input matrices, and creates a matrix
 356  * that behaves as if the B-Format input was first decoded to a speaker array
 357  * at its input order, encoded back into the higher order mix, then finally
 358  * rotated.
 359  */
 360 void UpsampleBFormatTransform(
 361     const al::span<std::array<float,MaxAmbiChannels>,MaxAmbiChannels> output,
 362     const al::span<const std::array<float,MaxAmbiChannels>> upsampler,
 363     const al::span<const std::array<float,MaxAmbiChannels>,MaxAmbiChannels> rotator,
 364     size_t ambi_order)
 365 {
 366     const size_t num_chans{AmbiChannelsFromOrder(ambi_order)};
 367     for(size_t i{0};i < upsampler.size();++i)
 368         output[i].fill(0.0f);
 369     for(size_t i{0};i < upsampler.size();++i)
 370     {
 371         for(size_t k{0};k < num_chans;++k)
 372         {
 373             const float a{upsampler[i][k]};
 374             /* Write the full number of channels. The compiler will have an
 375              * easier time optimizing if it has a fixed length.
 376              */
 377             std::transform(rotator[k].cbegin(), rotator[k].cend(), output[i].cbegin(),
 378                 output[i].begin(), [a](float rot, float dst) noexcept { return rot*a + dst; });
 379         }
 380     }
 381 }
 382
 383
 384 constexpr auto GetAmbiScales(AmbiScaling scaletype) noexcept
 385 {
 386     switch(scaletype)
 387     {
 388     case AmbiScaling::FuMa: return al::span{AmbiScale::FromFuMa};
 389     case AmbiScaling::SN3D: return al::span{AmbiScale::FromSN3D};
 390     case AmbiScaling::UHJ: return al::span{AmbiScale::FromUHJ};
 391     case AmbiScaling::N3D: break;
 392     }
 393     return al::span{AmbiScale::FromN3D};
 394 }
 395
 396 constexpr auto GetAmbiLayout(AmbiLayout layouttype) noexcept
 397 {
 398     if(layouttype == AmbiLayout::FuMa) return al::span{AmbiIndex::FromFuMa};
 399     return al::span{AmbiIndex::FromACN};
 400 }
 401
 402 constexpr auto GetAmbi2DLayout(AmbiLayout layouttype) noexcept
 403 {
 404     if(layouttype == AmbiLayout::FuMa) return al::span{AmbiIndex::FromFuMa2D};
 405     return al::span{AmbiIndex::FromACN2D};
 406 }
 407
 408
 409 bool CalcContextParams(ContextBase *ctx)
 410 {
 411     ContextProps *props{ctx->mParams.ContextUpdate.exchange(nullptr, std::memory_order_acq_rel)};
 412     if(!props) return false;
 413
 414     const alu::Vector pos{props->Position[0], props->Position[1], props->Position[2], 1.0f};
 415     ctx->mParams.Position = pos;
 416
 417     /* AT then UP */
 418     alu::Vector N{props->OrientAt[0], props->OrientAt[1], props->OrientAt[2], 0.0f};
 419     N.normalize();
 420     alu::Vector V{props->OrientUp[0], props->OrientUp[1], props->OrientUp[2], 0.0f};
 421     V.normalize();
 422     /* Build and normalize right-vector */
 423     alu::Vector U{N.cross_product(V)};
 424     U.normalize();
 425
 426     const alu::Matrix rot{
 427         U[0], V[0], -N[0], 0.0,
 428         U[1], V[1], -N[1], 0.0,
 429         U[2], V[2], -N[2], 0.0,
 430          0.0,  0.0,   0.0, 1.0};
 431     const alu::Vector vel{props->Velocity[0], props->Velocity[1], props->Velocity[2], 0.0};
 432
 433     ctx->mParams.Matrix = rot;
 434     ctx->mParams.Velocity = rot * vel;
 435
 436     ctx->mParams.Gain = props->Gain * ctx->mGainBoost;
 437     ctx->mParams.MetersPerUnit = props->MetersPerUnit;
 438     ctx->mParams.AirAbsorptionGainHF = props->AirAbsorptionGainHF;
 439
 440     ctx->mParams.DopplerFactor = props->DopplerFactor;
 441     ctx->mParams.SpeedOfSound = props->SpeedOfSound * props->DopplerVelocity;
 442
 443     ctx->mParams.SourceDistanceModel = props->SourceDistanceModel;
 444     ctx->mParams.mDistanceModel = props->mDistanceModel;
 445
 446     AtomicReplaceHead(ctx->mFreeContextProps, props);
 447     return true;
 448 }
 449
 450 bool CalcEffectSlotParams(EffectSlot *slot, EffectSlot **sorted_slots, ContextBase *context)
 451 {
 452     EffectSlotProps *props{slot->Update.exchange(nullptr, std::memory_order_acq_rel)};
 453     if(!props) return false;
 454
 455     /* If the effect slot target changed, clear the first sorted entry to force
 456      * a re-sort.
 457      */
 458     if(slot->Target != props->Target)
 459         *sorted_slots = nullptr;
 460     slot->Gain = props->Gain;
 461     slot->AuxSendAuto = props->AuxSendAuto;
 462     slot->Target = props->Target;
 463     slot->EffectType = props->Type;
 464     slot->mEffectProps = props->Props;
 465     if(auto *reverbprops = std::get_if<ReverbProps>(&props->Props))
 466     {
 467         slot->RoomRolloff = reverbprops->RoomRolloffFactor;
 468         slot->DecayTime = reverbprops->DecayTime;
 469         slot->DecayLFRatio = reverbprops->DecayLFRatio;
 470         slot->DecayHFRatio = reverbprops->DecayHFRatio;
 471         slot->DecayHFLimit = reverbprops->DecayHFLimit;
 472         slot->AirAbsorptionGainHF = reverbprops->AirAbsorptionGainHF;
 473     }
 474     else
 475     {
 476         slot->RoomRolloff = 0.0f;
 477         slot->DecayTime = 0.0f;
 478         slot->DecayLFRatio = 0.0f;
 479         slot->DecayHFRatio = 0.0f;
 480         slot->DecayHFLimit = false;
 481         slot->AirAbsorptionGainHF = 1.0f;
 482     }
 483
 484     EffectState *state{props->State.release()};
 485     EffectState *oldstate{slot->mEffectState.release()};
 486     slot->mEffectState.reset(state);
 487
 488     /* Only release the old state if it won't get deleted, since we can't be
 489      * deleting/freeing anything in the mixer.
 490      */
 491     if(!oldstate->releaseIfNoDelete())
 492     {
 493         /* Otherwise, if it would be deleted send it off with a release event. */
 494         RingBuffer *ring{context->mAsyncEvents.get()};
 495         auto evt_vec = ring->getWriteVector();
 496         if(evt_vec.first.len > 0) LIKELY
 497         {
 498             auto &evt = InitAsyncEvent<AsyncEffectReleaseEvent>(evt_vec.first.buf);
 499             evt.mEffectState = oldstate;
 500             ring->writeAdvance(1);
 501         }
 502         else
 503         {
 504             /* If writing the event failed, the queue was probably full. Store
 505              * the old state in the property object where it can eventually be
 506              * cleaned up sometime later (not ideal, but better than blocking
 507              * or leaking).
 508              */
 509             props->State.reset(oldstate);
 510         }
 511     }
 512
 513     AtomicReplaceHead(context->mFreeEffectSlotProps, props);
 514
 515     const auto output = [slot,context]() -> EffectTarget
 516     {
 517         if(EffectSlot *target{slot->Target})
 518             return EffectTarget{&target->Wet, nullptr};
 519         DeviceBase *device{context->mDevice};
 520         return EffectTarget{&device->Dry, &device->RealOut};
 521     }();
 522     state->update(context, slot, &slot->mEffectProps, output);
 523     return true;
 524 }
 525
 526
 527 /* Scales the azimuth of the given vector by 3 if it's in front. Effectively
 528  * scales +/-30 degrees to +/-90 degrees, leaving > +90 and < -90 alone.
 529  */
 530 inline std::array<float,3> ScaleAzimuthFront3(std::array<float,3> pos)
 531 {
 532     if(pos[2] < 0.0f)
 533     {
 534         /* Normalize the length of the x,z components for a 2D vector of the
 535          * azimuth angle. Negate Z since {0,0,-1} is angle 0.
 536          */
 537         const float len2d{std::sqrt(pos[0]*pos[0] + pos[2]*pos[2])};
 538         float x{pos[0] / len2d};
 539         float z{-pos[2] / len2d};
 540
 541         /* Z > cos(pi/6) = -30 < azimuth < 30 degrees. */
 542         if(z > 0.866025403785f)
 543         {
 544             /* Triple the angle represented by x,z. */
 545             x = x*3.0f - x*x*x*4.0f;
 546             z = z*z*z*4.0f - z*3.0f;
 547
 548             /* Scale the vector back to fit in 3D. */
 549             pos[0] = x * len2d;
 550             pos[2] = -z * len2d;
 551         }
 552         else
 553         {
 554             /* If azimuth >= 30 degrees, clamp to 90 degrees. */
 555             pos[0] = std::copysign(len2d, pos[0]);
 556             pos[2] = 0.0f;
 557         }
 558     }
 559     return pos;
 560 }
 561
 562 /* Scales the azimuth of the given vector by 1.5 (3/2) if it's in front. */
 563 inline std::array<float,3> ScaleAzimuthFront3_2(std::array<float,3> pos)
 564 {
 565     if(pos[2] < 0.0f)
 566     {
 567         const float len2d{std::sqrt(pos[0]*pos[0] + pos[2]*pos[2])};
 568         float x{pos[0] / len2d};
 569         float z{-pos[2] / len2d};
 570
 571         /* Z > cos(pi/3) = -60 < azimuth < 60 degrees. */
 572         if(z > 0.5f)
 573         {
 574             /* Halve the angle represented by x,z. */
 575             x = std::copysign(std::sqrt((1.0f - z) * 0.5f), x);
 576             z = std::sqrt((1.0f + z) * 0.5f);
 577
 578             /* Triple the angle represented by x,z. */
 579             x = x*3.0f - x*x*x*4.0f;
 580             z = z*z*z*4.0f - z*3.0f;
 581
 582             /* Scale the vector back to fit in 3D. */
 583             pos[0] = x * len2d;
 584             pos[2] = -z * len2d;
 585         }
 586         else
 587         {
 588             /* If azimuth >= 60 degrees, clamp to 90 degrees. */
 589             pos[0] = std::copysign(len2d, pos[0]);
 590             pos[2] = 0.0f;
 591         }
 592     }
 593     return pos;
 594 }
 595
 596
 597 /* Begin ambisonic rotation helpers.
 598  *
 599  * Rotating first-order B-Format just needs a straight-forward X/Y/Z rotation
 600  * matrix. Higher orders, however, are more complicated. The method implemented
 601  * here is a recursive algorithm (the rotation for first-order is used to help
 602  * generate the second-order rotation, which helps generate the third-order
 603  * rotation, etc).
 604  *
 605  * Adapted from
 606  * <https://github.com/polarch/Spherical-Harmonic-Transform/blob/master/getSHrotMtx.m>,
 607  * provided under the BSD 3-Clause license.
 608  *
 609  * Copyright (c) 2015, Archontis Politis
 610  * Copyright (c) 2019, Christopher Robinson
 611  *
 612  * The u, v, and w coefficients used for generating higher-order rotations are
 613  * precomputed since they're constant. The second-order coefficients are
 614  * followed by the third-order coefficients, etc.
 615  */
 616 constexpr size_t CalcRotatorSize(size_t l) noexcept
 617 {
 618     if(l >= 2)
 619         return (l*2 + 1)*(l*2 + 1) + CalcRotatorSize(l-1);
 620     return 0;
 621 }
 622
 623 struct RotatorCoeffs {
 624     struct CoeffValues {
 625         float u, v, w;
 626     };
 627     std::array<CoeffValues,CalcRotatorSize(MaxAmbiOrder)> mCoeffs{};
 628
 629     RotatorCoeffs()
 630     {
 631         auto coeffs = mCoeffs.begin();
 632
 633         for(int l=2;l <= MaxAmbiOrder;++l)
 634         {
 635             for(int n{-l};n <= l;++n)
 636             {
 637                 for(int m{-l};m <= l;++m)
 638                 {
 639                     /* compute u,v,w terms of Eq.8.1 (Table I)
 640                      *
 641                      * const bool d{m == 0}; // the delta function d_m0
 642                      * const double denom{(std::abs(n) == l) ?
 643                      *     (2*l) * (2*l - 1) : (l*l - n*n)};
 644                      *
 645                      * const int abs_m{std::abs(m)};
 646                      * coeffs->u = std::sqrt((l*l - m*m) / denom);
 647                      * coeffs->v = std::sqrt((l+abs_m-1) * (l+abs_m) / denom) *
 648                      *     (1.0+d) * (1.0 - 2.0*d) * 0.5;
 649                      * coeffs->w = std::sqrt((l-abs_m-1) * (l-abs_m) / denom) *
 650                      *     (1.0-d) * -0.5;
 651                      */
 652
 653                     const double denom{static_cast<double>((std::abs(n) == l) ?
 654                           (2*l) * (2*l - 1) : (l*l - n*n))};
 655
 656                     if(m == 0)
 657                     {
 658                         coeffs->u = static_cast<float>(std::sqrt(l * l / denom));
 659                         coeffs->v = static_cast<float>(std::sqrt((l-1) * l / denom) * -1.0);
 660                         coeffs->w = 0.0f;
 661                     }
 662                     else
 663                     {
 664                         const int abs_m{std::abs(m)};
 665                         coeffs->u = static_cast<float>(std::sqrt((l*l - m*m) / denom));
 666                         coeffs->v = static_cast<float>(std::sqrt((l+abs_m-1) * (l+abs_m) / denom) *
 667                             0.5);
 668                         coeffs->w = static_cast<float>(std::sqrt((l-abs_m-1) * (l-abs_m) / denom) *
 669                             -0.5);
 670                     }
 671                     ++coeffs;
 672                 }
 673             }
 674         }
 675     }
 676 };
 677 const RotatorCoeffs RotatorCoeffArray{};
 678
 679 /**
 680  * Given the matrix, pre-filled with the (zeroth- and) first-order rotation
 681  * coefficients, this fills in the coefficients for the higher orders up to and
 682  * including the given order. The matrix is in ACN layout.
 683  */
 684 void AmbiRotator(AmbiRotateMatrix &matrix, const int order)
 685 {
 686     /* Don't do anything for < 2nd order. */
 687     if(order < 2) return;
 688
 689     auto P = [](const int i, const int l, const int a, const int n, const size_t last_band,
 690         const AmbiRotateMatrix &R)
 691     {
 692         const float ri1{ R[ 1+2][static_cast<size_t>(i+2_z)]};
 693         const float rim1{R[-1+2][static_cast<size_t>(i+2_z)]};
 694         const float ri0{ R[ 0+2][static_cast<size_t>(i+2_z)]};
 695
 696         const size_t y{last_band + static_cast<size_t>(a+l-1)};
 697         if(n == -l)
 698             return ri1*R[last_band][y] + rim1*R[last_band + static_cast<size_t>(l-1_z)*2][y];
 699         if(n == l)
 700             return ri1*R[last_band + static_cast<size_t>(l-1_z)*2][y] - rim1*R[last_band][y];
 701         return ri0*R[last_band + static_cast<size_t>(l-1_z+n)][y];
 702     };
 703
 704     auto U = [P](const int l, const int m, const int n, const size_t last_band,
 705         const AmbiRotateMatrix &R)
 706     {
 707         return P(0, l, m, n, last_band, R);
 708     };
 709     auto V = [P](const int l, const int m, const int n, const size_t last_band,
 710         const AmbiRotateMatrix &R)
 711     {
 712         using namespace al::numbers;
 713         if(m > 0)
 714         {
 715             const bool d{m == 1};
 716             const float p0{P( 1, l,  m-1, n, last_band, R)};
 717             const float p1{P(-1, l, -m+1, n, last_band, R)};
 718             return d ? p0*sqrt2_v<float> : (p0 - p1);
 719         }
 720         const bool d{m == -1};
 721         const float p0{P( 1, l,  m+1, n, last_band, R)};
 722         const float p1{P(-1, l, -m-1, n, last_band, R)};
 723         return d ? p1*sqrt2_v<float> : (p0 + p1);
 724     };
 725     auto W = [P](const int l, const int m, const int n, const size_t last_band,
 726         const AmbiRotateMatrix &R)
 727     {
 728         assert(m != 0);
 729         if(m > 0)
 730         {
 731             const float p0{P( 1, l,  m+1, n, last_band, R)};
 732             const float p1{P(-1, l, -m-1, n, last_band, R)};
 733             return p0 + p1;
 734         }
 735         const float p0{P( 1, l,  m-1, n, last_band, R)};
 736         const float p1{P(-1, l, -m+1, n, last_band, R)};
 737         return p0 - p1;
 738     };
 739
 740     // compute rotation matrix of each subsequent band recursively
 741     auto coeffs = RotatorCoeffArray.mCoeffs.cbegin();
 742     size_t band_idx{4}, last_band{1};
 743     for(int l{2};l <= order;++l)
 744     {
 745         size_t y{band_idx};
 746         for(int n{-l};n <= l;++n,++y)
 747         {
 748             size_t x{band_idx};
 749             for(int m{-l};m <= l;++m,++x)
 750             {
 751                 float r{0.0f};
 752
 753                 // computes Eq.8.1
 754                 if(const float u{coeffs->u}; u != 0.0f)
 755                     r += u * U(l, m, n, last_band, matrix);
 756                 if(const float v{coeffs->v}; v != 0.0f)
 757                     r += v * V(l, m, n, last_band, matrix);
 758                 if(const float w{coeffs->w}; w != 0.0f)
 759                     r += w * W(l, m, n, last_band, matrix);
 760
 761                 matrix[y][x] = r;
 762                 ++coeffs;
 763             }
 764         }
 765         last_band = band_idx;
 766         band_idx += static_cast<uint>(l)*2_uz + 1;
 767     }
 768 }
 769 /* End ambisonic rotation helpers. */
 770
 771
 772 constexpr float sin30{0.5f};
 773 constexpr float cos30{0.866025403785f};
 774 constexpr float sin45{al::numbers::sqrt2_v<float>*0.5f};
 775 constexpr float cos45{al::numbers::sqrt2_v<float>*0.5f};
 776 constexpr float sin110{ 0.939692620786f};
 777 constexpr float cos110{-0.342020143326f};
 778
 779 struct ChanPosMap {
 780     Channel channel;
 781     std::array<float,3> pos;
 782 };
 783
 784
 785 struct GainTriplet { float Base, HF, LF; };
 786
 787 void CalcPanningAndFilters(Voice *voice, const float xpos, const float ypos, const float zpos,
 788     const float Distance, const float Spread, const GainTriplet &DryGain,
 789     const al::span<const GainTriplet,MaxSendCount> WetGain,
 790     const al::span<EffectSlot*,MaxSendCount> SendSlots, const VoiceProps *props,
 791     const ContextParams &Context, DeviceBase *Device)
 792 {
 793     static constexpr std::array MonoMap{
 794         ChanPosMap{FrontCenter, std::array{0.0f, 0.0f, -1.0f}}
 795     };
 796     static constexpr std::array RearMap{
 797         ChanPosMap{BackLeft,  std::array{-sin30, 0.0f, cos30}},
 798         ChanPosMap{BackRight, std::array{ sin30, 0.0f, cos30}},
 799     };
 800     static constexpr std::array QuadMap{
 801         ChanPosMap{FrontLeft,  std::array{-sin45, 0.0f, -cos45}},
 802         ChanPosMap{FrontRight, std::array{ sin45, 0.0f, -cos45}},
 803         ChanPosMap{BackLeft,   std::array{-sin45, 0.0f,  cos45}},
 804         ChanPosMap{BackRight,  std::array{ sin45, 0.0f,  cos45}},
 805     };
 806     static constexpr std::array X51Map{
 807         ChanPosMap{FrontLeft,   std::array{-sin30, 0.0f, -cos30}},
 808         ChanPosMap{FrontRight,  std::array{ sin30, 0.0f, -cos30}},
 809         ChanPosMap{FrontCenter, std::array{  0.0f, 0.0f, -1.0f}},
 810         ChanPosMap{LFE, {}},
 811         ChanPosMap{SideLeft,    std::array{-sin110, 0.0f, -cos110}},
 812         ChanPosMap{SideRight,   std::array{ sin110, 0.0f, -cos110}},
 813     };
 814     static constexpr std::array X61Map{
 815         ChanPosMap{FrontLeft,   std::array{-sin30, 0.0f, -cos30}},
 816         ChanPosMap{FrontRight,  std::array{ sin30, 0.0f, -cos30}},
 817         ChanPosMap{FrontCenter, std::array{  0.0f, 0.0f, -1.0f}},
 818         ChanPosMap{LFE, {}},
 819         ChanPosMap{BackCenter,  std::array{ 0.0f, 0.0f, 1.0f}},
 820         ChanPosMap{SideLeft,    std::array{-1.0f, 0.0f, 0.0f}},
 821         ChanPosMap{SideRight,   std::array{ 1.0f, 0.0f, 0.0f}},
 822     };
 823     static constexpr std::array X71Map{
 824         ChanPosMap{FrontLeft,   std::array{-sin30, 0.0f, -cos30}},
 825         ChanPosMap{FrontRight,  std::array{ sin30, 0.0f, -cos30}},
 826         ChanPosMap{FrontCenter, std::array{  0.0f, 0.0f, -1.0f}},
 827         ChanPosMap{LFE, {}},
 828         ChanPosMap{BackLeft,    std::array{-sin30, 0.0f, cos30}},
 829         ChanPosMap{BackRight,   std::array{ sin30, 0.0f, cos30}},
 830         ChanPosMap{SideLeft,    std::array{ -1.0f, 0.0f, 0.0f}},
 831         ChanPosMap{SideRight,   std::array{  1.0f, 0.0f, 0.0f}},
 832     };
 833
 834     std::array StereoMap{
 835         ChanPosMap{FrontLeft,   std::array{-sin30, 0.0f, -cos30}},
 836         ChanPosMap{FrontRight,  std::array{ sin30, 0.0f, -cos30}},
 837     };
 838
 839     const auto Frequency = static_cast<float>(Device->Frequency);
 840     const uint NumSends{Device->NumAuxSends};
 841
 842     const size_t num_channels{voice->mChans.size()};
 843     ASSUME(num_channels > 0);
 844
 845     for(auto &chandata : voice->mChans)
 846     {
 847         chandata.mDryParams.Hrtf.Target = HrtfFilter{};
 848         chandata.mDryParams.Gains.Target.fill(0.0f);
 849         std::for_each(chandata.mWetParams.begin(), chandata.mWetParams.begin()+NumSends,
 850             [](SendParams &params) -> void { params.Gains.Target.fill(0.0f); });
 851     }
 852
 853     const auto getChans = [props,&StereoMap](FmtChannels chanfmt) noexcept
 854         -> std::pair<DirectMode,al::span<const ChanPosMap>>
 855     {
 856         switch(chanfmt)
 857         {
 858         case FmtMono:
 859             /* Mono buffers are never played direct. */
 860             return {DirectMode::Off, al::span{MonoMap}};
 861
 862         case FmtStereo:
 863         case FmtMonoDup:
 864             if(props->DirectChannels == DirectMode::Off)
 865             {
 866                 for(size_t i{0};i < 2;++i)
 867                 {
 868                     /* StereoPan is counter-clockwise in radians. */
 869                     const float a{props->StereoPan[i]};
 870                     StereoMap[i].pos[0] = -std::sin(a);
 871                     StereoMap[i].pos[2] = -std::cos(a);
 872                 }
 873             }
 874             return {props->DirectChannels, al::span{StereoMap}};
 875
 876         case FmtRear: return {props->DirectChannels, al::span{RearMap}};
 877         case FmtQuad: return {props->DirectChannels, al::span{QuadMap}};
 878         case FmtX51: return {props->DirectChannels, al::span{X51Map}};
 879         case FmtX61: return {props->DirectChannels, al::span{X61Map}};
 880         case FmtX71: return {props->DirectChannels, al::span{X71Map}};
 881
 882         case FmtBFormat2D:
 883         case FmtBFormat3D:
 884         case FmtUHJ2:
 885         case FmtUHJ3:
 886         case FmtUHJ4:
 887         case FmtSuperStereo:
 888             return {DirectMode::Off, {}};
 889         }
 890         return {props->DirectChannels, {}};
 891     };
 892     const auto [DirectChannels,chans] = getChans(voice->mFmtChannels);
 893
 894     voice->mFlags.reset(VoiceHasHrtf).reset(VoiceHasNfc);
 895     if(auto *decoder{voice->mDecoder.get()})
 896         decoder->mWidthControl = std::min(props->EnhWidth, 0.7f);
 897
 898     const float lgain{std::min(1.0f-props->Panning, 1.0f)};
 899     const float rgain{std::min(1.0f+props->Panning, 1.0f)};
 900     const float mingain{std::min(lgain, rgain)};
 901     auto SelectChannelGain = [lgain,rgain,mingain](const Channel chan) noexcept
 902     {
 903         switch(chan)
 904         {
 905         case FrontLeft: return lgain;
 906         case FrontRight: return rgain;
 907         case FrontCenter: break;
 908         case LFE: break;
 909         case BackLeft: return lgain;
 910         case BackRight: return rgain;
 911         case BackCenter: break;
 912         case SideLeft: return lgain;
 913         case SideRight: return rgain;
 914         case TopCenter: break;
 915         case TopFrontLeft: return lgain;
 916         case TopFrontCenter: break;
 917         case TopFrontRight: return rgain;
 918         case TopBackLeft: return lgain;
 919         case TopBackCenter: break;
 920         case TopBackRight: return rgain;
 921         case Aux0: case Aux1: case Aux2: case Aux3: case Aux4: case Aux5: case Aux6: case Aux7:
 922         case Aux8: case Aux9: case Aux10: case Aux11: case Aux12: case Aux13: case Aux14:
 923         case Aux15: case MaxChannels: break;
 924         }
 925         return mingain;
 926     };
 927
 928     if(IsAmbisonic(voice->mFmtChannels))
 929     {
 930         /* Special handling for B-Format and UHJ sources. */
 931
 932         if(Device->AvgSpeakerDist > 0.0f && voice->mFmtChannels != FmtUHJ2
 933             && voice->mFmtChannels != FmtSuperStereo)
 934         {
 935             if(!(Distance > std::numeric_limits<float>::epsilon()))
 936             {
 937                 /* NOTE: The NFCtrlFilters were created with a w0 of 0, which
 938                  * is what we want for FOA input. The first channel may have
 939                  * been previously re-adjusted if panned, so reset it.
 940                  */
 941                 voice->mChans[0].mDryParams.NFCtrlFilter.adjust(0.0f);
 942             }
 943             else
 944             {
 945                 /* Clamp the distance for really close sources, to prevent
 946                  * excessive bass.
 947                  */
 948                 const float mdist{std::max(Distance*NfcScale, Device->AvgSpeakerDist/4.0f)};
 949                 const float w0{SpeedOfSoundMetersPerSec / (mdist * Frequency)};
 950
 951                 /* Only need to adjust the first channel of a B-Format source. */
 952                 voice->mChans[0].mDryParams.NFCtrlFilter.adjust(w0);
 953             }
 954
 955             voice->mFlags.set(VoiceHasNfc);
 956         }
 957
 958         /* Panning a B-Format sound toward some direction is easy. Just pan the
 959          * first (W) channel as a normal mono sound. The angular spread is used
 960          * as a directional scalar to blend between full coverage and full
 961          * panning.
 962          */
 963         const float coverage{!(Distance > std::numeric_limits<float>::epsilon()) ? 1.0f :
 964             (al::numbers::inv_pi_v<float>/2.0f * Spread)};
 965
 966         auto calc_coeffs = [xpos,ypos,zpos](RenderMode mode)
 967         {
 968             if(mode != RenderMode::Pairwise)
 969                 return CalcDirectionCoeffs(std::array{xpos, ypos, zpos}, 0.0f);
 970             const auto pos = ScaleAzimuthFront3_2(std::array{xpos, ypos, zpos});
 971             return CalcDirectionCoeffs(pos, 0.0f);
 972         };
 973         const auto scales = GetAmbiScales(voice->mAmbiScaling);
 974         auto coeffs = calc_coeffs(Device->mRenderMode);
 975
 976         if(!(coverage > 0.0f))
 977         {
 978             ComputePanGains(&Device->Dry, coeffs, DryGain.Base*scales[0],
 979                 voice->mChans[0].mDryParams.Gains.Target);
 980             for(uint i{0};i < NumSends;i++)
 981             {
 982                 if(const EffectSlot *Slot{SendSlots[i]})
 983                     ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base*scales[0],
 984                         voice->mChans[0].mWetParams[i].Gains.Target);
 985             }
 986         }
 987         else
 988         {
 989             /* Local B-Format sources have their XYZ channels rotated according
 990              * to the orientation.
 991              */
 992             /* AT then UP */
 993             alu::Vector N{props->OrientAt[0], props->OrientAt[1], props->OrientAt[2], 0.0f};
 994             N.normalize();
 995             alu::Vector V{props->OrientUp[0], props->OrientUp[1], props->OrientUp[2], 0.0f};
 996             V.normalize();
 997             if(!props->HeadRelative)
 998             {
 999                 N = Context.Matrix * N;
1000                 V = Context.Matrix * V;
1001             }
1002             /* Build and normalize right-vector */
1003             alu::Vector U{N.cross_product(V)};
1004             U.normalize();
1005
1006             /* Build a rotation matrix. Manually fill the zeroth- and first-
1007              * order elements, then construct the rotation for the higher
1008              * orders.
1009              */
1010             AmbiRotateMatrix &shrot = Device->mAmbiRotateMatrix;
1011             shrot.fill(AmbiRotateMatrix::value_type{});
1012
1013             shrot[0][0] = 1.0f;
1014             shrot[1][1] =  U[0]; shrot[1][2] = -U[1]; shrot[1][3] =  U[2];
1015             shrot[2][1] = -V[0]; shrot[2][2] =  V[1]; shrot[2][3] = -V[2];
1016             shrot[3][1] = -N[0]; shrot[3][2] =  N[1]; shrot[3][3] = -N[2];
1017             AmbiRotator(shrot, static_cast<int>(Device->mAmbiOrder));
1018
1019             /* If the device is higher order than the voice, "upsample" the
1020              * matrix.
1021              *
1022              * NOTE: Starting with second-order, a 2D upsample needs to be
1023              * applied with a 2D source and 3D output, even when they're the
1024              * same order. This is because higher orders have a height offset
1025              * on various channels (i.e. when elevation=0, those height-related
1026              * channels should be non-0).
1027              */
1028             AmbiRotateMatrix &mixmatrix = Device->mAmbiRotateMatrix2;
1029             if(Device->mAmbiOrder > voice->mAmbiOrder
1030                 || (Device->mAmbiOrder >= 2 && !Device->m2DMixing
1031                     && Is2DAmbisonic(voice->mFmtChannels)))
1032             {
1033                 if(voice->mAmbiOrder == 1)
1034                 {
1035                     const auto upsampler = Is2DAmbisonic(voice->mFmtChannels) ?
1036                         al::span{AmbiScale::FirstOrder2DUp} : al::span{AmbiScale::FirstOrderUp};
1037                     UpsampleBFormatTransform(mixmatrix, upsampler, shrot, Device->mAmbiOrder);
1038                 }
1039                 else if(voice->mAmbiOrder == 2)
1040                 {
1041                     const auto upsampler = Is2DAmbisonic(voice->mFmtChannels) ?
1042                         al::span{AmbiScale::SecondOrder2DUp} : al::span{AmbiScale::SecondOrderUp};
1043                     UpsampleBFormatTransform(mixmatrix, upsampler, shrot, Device->mAmbiOrder);
1044                 }
1045                 else if(voice->mAmbiOrder == 3)
1046                 {
1047                     const auto upsampler = Is2DAmbisonic(voice->mFmtChannels) ?
1048                         al::span{AmbiScale::ThirdOrder2DUp} : al::span{AmbiScale::ThirdOrderUp};
1049                     UpsampleBFormatTransform(mixmatrix, upsampler, shrot, Device->mAmbiOrder);
1050                 }
1051                 else if(voice->mAmbiOrder == 4)
1052                 {
1053                     const auto upsampler = al::span{AmbiScale::FourthOrder2DUp};
1054                     UpsampleBFormatTransform(mixmatrix, upsampler, shrot, Device->mAmbiOrder);
1055                 }
1056                 else
1057                     al::unreachable();
1058             }
1059             else
1060                 mixmatrix = shrot;
1061
1062             /* Convert the rotation matrix for input ordering and scaling, and
1063              * whether input is 2D or 3D.
1064              */
1065             const auto index_map = Is2DAmbisonic(voice->mFmtChannels) ?
1066                 GetAmbi2DLayout(voice->mAmbiLayout).subspan(0) :
1067                 GetAmbiLayout(voice->mAmbiLayout).subspan(0);
1068
1069             /* Scale the panned W signal inversely to coverage (full coverage
1070              * means no panned signal), and according to the channel scaling.
1071              */
1072             std::for_each(coeffs.begin(), coeffs.end(),
1073                 [scale=(1.0f-coverage)*scales[0]](float &coeff) noexcept { coeff *= scale; });
1074
1075             for(size_t c{0};c < num_channels;c++)
1076             {
1077                 const size_t acn{index_map[c]};
1078                 const float scale{scales[acn] * coverage};
1079
1080                 /* For channel 0, combine the B-Format signal (scaled according
1081                  * to the coverage amount) with the directional pan. For all
1082                  * other channels, use just the (scaled) B-Format signal.
1083                  */
1084                 std::transform(mixmatrix[acn].cbegin(), mixmatrix[acn].cend(), coeffs.begin(),
1085                     coeffs.begin(), [scale](const float in, const float coeff) noexcept
1086                     { return in*scale + coeff; });
1087
1088                 ComputePanGains(&Device->Dry, coeffs, DryGain.Base,
1089                     voice->mChans[c].mDryParams.Gains.Target);
1090
1091                 for(uint i{0};i < NumSends;i++)
1092                 {
1093                     if(const EffectSlot *Slot{SendSlots[i]})
1094                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base,
1095                             voice->mChans[c].mWetParams[i].Gains.Target);
1096                 }
1097
1098                 coeffs = std::array<float,MaxAmbiChannels>{};
1099             }
1100         }
1101     }
1102     else if(DirectChannels != DirectMode::Off && !Device->RealOut.RemixMap.empty())
1103     {
1104         /* Direct source channels always play local. Skip the virtual channels
1105          * and write inputs to the matching real outputs.
1106          */
1107         voice->mDirect.Buffer = Device->RealOut.Buffer;
1108
1109         for(size_t c{0};c < num_channels;c++)
1110         {
1111             const float pangain{SelectChannelGain(chans[c].channel)};
1112             if(uint idx{Device->channelIdxByName(chans[c].channel)}; idx != InvalidChannelIndex)
1113                 voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base * pangain;
1114             else if(DirectChannels == DirectMode::RemixMismatch)
1115             {
1116                 auto match_channel = [channel=chans[c].channel](const InputRemixMap &map) noexcept
1117                 { return channel == map.channel; };
1118                 auto remap = std::find_if(Device->RealOut.RemixMap.cbegin(),
1119                     Device->RealOut.RemixMap.cend(), match_channel);
1120                 if(remap != Device->RealOut.RemixMap.cend())
1121                 {
1122                     for(const auto &target : remap->targets)
1123                     {
1124                         idx = Device->channelIdxByName(target.channel);
1125                         if(idx != InvalidChannelIndex)
1126                             voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base * pangain
1127                                 * target.mix;
1128                     }
1129                 }
1130             }
1131         }
1132
1133         /* Auxiliary sends still use normal channel panning since they mix to
1134          * B-Format, which can't channel-match.
1135          */
1136         for(size_t c{0};c < num_channels;c++)
1137         {
1138             /* Skip LFE */
1139             if(chans[c].channel == LFE)
1140                 continue;
1141
1142             const float pangain{SelectChannelGain(chans[c].channel)};
1143             const auto coeffs = CalcDirectionCoeffs(chans[c].pos, 0.0f);
1144
1145             for(uint i{0};i < NumSends;i++)
1146             {
1147                 if(const EffectSlot *Slot{SendSlots[i]})
1148                     ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1149                         voice->mChans[c].mWetParams[i].Gains.Target);
1150             }
1151         }
1152     }
1153     else if(Device->mRenderMode == RenderMode::Hrtf)
1154     {
1155         /* Full HRTF rendering. Skip the virtual channels and render to the
1156          * real outputs.
1157          */
1158         voice->mDirect.Buffer = Device->RealOut.Buffer;
1159
1160         if(Distance > std::numeric_limits<float>::epsilon())
1161         {
1162             if(voice->mFmtChannels == FmtMono)
1163             {
1164                 const float src_ev{std::asin(std::clamp(ypos, -1.0f, 1.0f))};
1165                 const float src_az{std::atan2(xpos, -zpos)};
1166
1167                 Device->mHrtf->getCoeffs(src_ev, src_az, Distance*NfcScale, Spread,
1168                     voice->mChans[0].mDryParams.Hrtf.Target.Coeffs,
1169                     voice->mChans[0].mDryParams.Hrtf.Target.Delay);
1170                 voice->mChans[0].mDryParams.Hrtf.Target.Gain = DryGain.Base;
1171
1172                 const auto coeffs = CalcDirectionCoeffs(std::array{xpos, ypos, zpos}, Spread);
1173                 for(uint i{0};i < NumSends;i++)
1174                 {
1175                     if(const EffectSlot *Slot{SendSlots[i]})
1176                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base,
1177                             voice->mChans[0].mWetParams[i].Gains.Target);
1178                 }
1179             }
1180             else for(size_t c{0};c < num_channels;c++)
1181             {
1182                 using namespace al::numbers;
1183
1184                 /* Skip LFE */
1185                 if(chans[c].channel == LFE) continue;
1186                 const float pangain{SelectChannelGain(chans[c].channel)};
1187
1188                 /* Warp the channel position toward the source position as the
1189                  * source spread decreases. With no spread, all channels are at
1190                  * the source position, at full spread (pi*2), each channel is
1191                  * left unchanged.
1192                  */
1193                 const float a{1.0f - (inv_pi_v<float>/2.0f)*Spread};
1194                 std::array pos{
1195                     lerpf(chans[c].pos[0], xpos, a),
1196                     lerpf(chans[c].pos[1], ypos, a),
1197                     lerpf(chans[c].pos[2], zpos, a)};
1198                 const float len{std::sqrt(pos[0]*pos[0] + pos[1]*pos[1] + pos[2]*pos[2])};
1199                 if(len < 1.0f)
1200                 {
1201                     pos[0] /= len;
1202                     pos[1] /= len;
1203                     pos[2] /= len;
1204                 }
1205
1206                 const float ev{std::asin(std::clamp(pos[1], -1.0f, 1.0f))};
1207                 const float az{std::atan2(pos[0], -pos[2])};
1208
1209                 Device->mHrtf->getCoeffs(ev, az, Distance*NfcScale, 0.0f,
1210                     voice->mChans[c].mDryParams.Hrtf.Target.Coeffs,
1211                     voice->mChans[c].mDryParams.Hrtf.Target.Delay);
1212                 voice->mChans[c].mDryParams.Hrtf.Target.Gain = DryGain.Base * pangain;
1213
1214                 const auto coeffs = CalcDirectionCoeffs(pos, 0.0f);
1215                 for(uint i{0};i < NumSends;i++)
1216                 {
1217                     if(const EffectSlot *Slot{SendSlots[i]})
1218                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1219                             voice->mChans[c].mWetParams[i].Gains.Target);
1220                 }
1221             }
1222         }
1223         else
1224         {
1225             /* With no distance, spread is only meaningful for mono sources
1226              * where it can be 0 or full (non-mono sources are always full
1227              * spread here).
1228              */
1229             const float spread{Spread * float(voice->mFmtChannels == FmtMono)};
1230
1231             /* Local sources on HRTF play with each channel panned to its
1232              * relative location around the listener, providing "virtual
1233              * speaker" responses.
1234              */
1235             for(size_t c{0};c < num_channels;c++)
1236             {
1237                 /* Skip LFE */
1238                 if(chans[c].channel == LFE)
1239                     continue;
1240                 const float pangain{SelectChannelGain(chans[c].channel)};
1241
1242                 /* Get the HRIR coefficients and delays for this channel
1243                  * position.
1244                  */
1245                 const float ev{std::asin(chans[c].pos[1])};
1246                 const float az{std::atan2(chans[c].pos[0], -chans[c].pos[2])};
1247
1248                 Device->mHrtf->getCoeffs(ev, az, std::numeric_limits<float>::infinity(), spread,
1249                     voice->mChans[c].mDryParams.Hrtf.Target.Coeffs,
1250                     voice->mChans[c].mDryParams.Hrtf.Target.Delay);
1251                 voice->mChans[c].mDryParams.Hrtf.Target.Gain = DryGain.Base * pangain;
1252
1253                 /* Normal panning for auxiliary sends. */
1254                 const auto coeffs = CalcDirectionCoeffs(chans[c].pos, spread);
1255
1256                 for(uint i{0};i < NumSends;i++)
1257                 {
1258                     if(const EffectSlot *Slot{SendSlots[i]})
1259                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1260                             voice->mChans[c].mWetParams[i].Gains.Target);
1261                 }
1262             }
1263         }
1264
1265         voice->mFlags.set(VoiceHasHrtf);
1266     }
1267     else
1268     {
1269         /* Non-HRTF rendering. Use normal panning to the output. */
1270
1271         if(Distance > std::numeric_limits<float>::epsilon())
1272         {
1273             /* Calculate NFC filter coefficient if needed. */
1274             if(Device->AvgSpeakerDist > 0.0f)
1275             {
1276                 /* Clamp the distance for really close sources, to prevent
1277                  * excessive bass.
1278                  */
1279                 const float mdist{std::max(Distance*NfcScale, Device->AvgSpeakerDist/4.0f)};
1280                 const float w0{SpeedOfSoundMetersPerSec / (mdist * Frequency)};
1281
1282                 /* Adjust NFC filters. */
1283                 for(size_t c{0};c < num_channels;c++)
1284                     voice->mChans[c].mDryParams.NFCtrlFilter.adjust(w0);
1285
1286                 voice->mFlags.set(VoiceHasNfc);
1287             }
1288
1289             if(voice->mFmtChannels == FmtMono)
1290             {
1291                 auto calc_coeffs = [xpos,ypos,zpos,Spread](RenderMode mode)
1292                 {
1293                     if(mode != RenderMode::Pairwise)
1294                         return CalcDirectionCoeffs(std::array{xpos, ypos, zpos}, Spread);
1295                     const auto pos = ScaleAzimuthFront3_2(std::array{xpos, ypos, zpos});
1296                     return CalcDirectionCoeffs(pos, Spread);
1297                 };
1298                 const auto coeffs = calc_coeffs(Device->mRenderMode);
1299
1300                 ComputePanGains(&Device->Dry, coeffs, DryGain.Base,
1301                     voice->mChans[0].mDryParams.Gains.Target);
1302                 for(uint i{0};i < NumSends;i++)
1303                 {
1304                     if(const EffectSlot *Slot{SendSlots[i]})
1305                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base,
1306                             voice->mChans[0].mWetParams[i].Gains.Target);
1307                 }
1308             }
1309             else
1310             {
1311                 using namespace al::numbers;
1312
1313                 for(size_t c{0};c < num_channels;c++)
1314                 {
1315                     const float pangain{SelectChannelGain(chans[c].channel)};
1316
1317                     /* Special-case LFE */
1318                     if(chans[c].channel == LFE)
1319                     {
1320                         if(Device->Dry.Buffer.data() == Device->RealOut.Buffer.data())
1321                         {
1322                             const uint idx{Device->channelIdxByName(chans[c].channel)};
1323                             if(idx != InvalidChannelIndex)
1324                                 voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base
1325                                     * pangain;
1326                         }
1327                         continue;
1328                     }
1329
1330                     /* Warp the channel position toward the source position as
1331                      * the spread decreases. With no spread, all channels are
1332                      * at the source position, at full spread (pi*2), each
1333                      * channel position is left unchanged.
1334                      */
1335                     const float a{1.0f - (inv_pi_v<float>/2.0f)*Spread};
1336                     std::array pos{
1337                         lerpf(chans[c].pos[0], xpos, a),
1338                         lerpf(chans[c].pos[1], ypos, a),
1339                         lerpf(chans[c].pos[2], zpos, a)};
1340                     const float len{std::sqrt(pos[0]*pos[0] + pos[1]*pos[1] + pos[2]*pos[2])};
1341                     if(len < 1.0f)
1342                     {
1343                         pos[0] /= len;
1344                         pos[1] /= len;
1345                         pos[2] /= len;
1346                     }
1347
1348                     if(Device->mRenderMode == RenderMode::Pairwise)
1349                         pos = ScaleAzimuthFront3(pos);
1350                     const auto coeffs = CalcDirectionCoeffs(pos, 0.0f);
1351
1352                     ComputePanGains(&Device->Dry, coeffs, DryGain.Base * pangain,
1353                         voice->mChans[c].mDryParams.Gains.Target);
1354                     for(uint i{0};i < NumSends;i++)
1355                     {
1356                         if(const EffectSlot *Slot{SendSlots[i]})
1357                             ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1358                                 voice->mChans[c].mWetParams[i].Gains.Target);
1359                     }
1360                 }
1361             }
1362         }
1363         else
1364         {
1365             if(Device->AvgSpeakerDist > 0.0f)
1366             {
1367                 /* If the source distance is 0, simulate a plane-wave by using
1368                  * infinite distance, which results in a w0 of 0.
1369                  */
1370                 static constexpr float w0{0.0f};
1371                 for(size_t c{0};c < num_channels;c++)
1372                     voice->mChans[c].mDryParams.NFCtrlFilter.adjust(w0);
1373
1374                 voice->mFlags.set(VoiceHasNfc);
1375             }
1376
1377             /* With no distance, spread is only meaningful for mono sources
1378              * where it can be 0 or full (non-mono sources are always full
1379              * spread here).
1380              */
1381             const float spread{Spread * float(voice->mFmtChannels == FmtMono)};
1382             for(size_t c{0};c < num_channels;c++)
1383             {
1384                 const float pangain{SelectChannelGain(chans[c].channel)};
1385
1386                 /* Special-case LFE */
1387                 if(chans[c].channel == LFE)
1388                 {
1389                     if(Device->Dry.Buffer.data() == Device->RealOut.Buffer.data())
1390                     {
1391                         const uint idx{Device->channelIdxByName(chans[c].channel)};
1392                         if(idx != InvalidChannelIndex)
1393                             voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base * pangain;
1394                     }
1395                     continue;
1396                 }
1397
1398                 const auto coeffs = CalcDirectionCoeffs((Device->mRenderMode==RenderMode::Pairwise)
1399                     ? ScaleAzimuthFront3(chans[c].pos) : chans[c].pos, spread);
1400
1401                 ComputePanGains(&Device->Dry, coeffs, DryGain.Base * pangain,
1402                     voice->mChans[c].mDryParams.Gains.Target);
1403                 for(uint i{0};i < NumSends;i++)
1404                 {
1405                     if(const EffectSlot *Slot{SendSlots[i]})
1406                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1407                             voice->mChans[c].mWetParams[i].Gains.Target);
1408                 }
1409             }
1410         }
1411     }
1412
1413     {
1414         const float hfNorm{props->Direct.HFReference / Frequency};
1415         const float lfNorm{props->Direct.LFReference / Frequency};
1416
1417         voice->mDirect.FilterType = AF_None;
1418         if(DryGain.HF != 1.0f) voice->mDirect.FilterType |= AF_LowPass;
1419         if(DryGain.LF != 1.0f) voice->mDirect.FilterType |= AF_HighPass;
1420
1421         auto &lowpass = voice->mChans[0].mDryParams.LowPass;
1422         auto &highpass = voice->mChans[0].mDryParams.HighPass;
1423         lowpass.setParamsFromSlope(BiquadType::HighShelf, hfNorm, DryGain.HF, 1.0f);
1424         highpass.setParamsFromSlope(BiquadType::LowShelf, lfNorm, DryGain.LF, 1.0f);
1425         for(size_t c{1};c < num_channels;c++)
1426         {
1427             voice->mChans[c].mDryParams.LowPass.copyParamsFrom(lowpass);
1428             voice->mChans[c].mDryParams.HighPass.copyParamsFrom(highpass);
1429         }
1430     }
1431     for(uint i{0};i < NumSends;i++)
1432     {
1433         const float hfNorm{props->Send[i].HFReference / Frequency};
1434         const float lfNorm{props->Send[i].LFReference / Frequency};
1435
1436         voice->mSend[i].FilterType = AF_None;
1437         if(WetGain[i].HF != 1.0f) voice->mSend[i].FilterType |= AF_LowPass;
1438         if(WetGain[i].LF != 1.0f) voice->mSend[i].FilterType |= AF_HighPass;
1439
1440         auto &lowpass = voice->mChans[0].mWetParams[i].LowPass;
1441         auto &highpass = voice->mChans[0].mWetParams[i].HighPass;
1442         lowpass.setParamsFromSlope(BiquadType::HighShelf, hfNorm, WetGain[i].HF, 1.0f);
1443         highpass.setParamsFromSlope(BiquadType::LowShelf, lfNorm, WetGain[i].LF, 1.0f);
1444         for(size_t c{1};c < num_channels;c++)
1445         {
1446             voice->mChans[c].mWetParams[i].LowPass.copyParamsFrom(lowpass);
1447             voice->mChans[c].mWetParams[i].HighPass.copyParamsFrom(highpass);
1448         }
1449     }
1450 }
1451
1452 void CalcNonAttnSourceParams(Voice *voice, const VoiceProps *props, const ContextBase *context)
1453 {
1454     DeviceBase *Device{context->mDevice};
1455     std::array<EffectSlot*,MaxSendCount> SendSlots{};
1456
1457     voice->mDirect.Buffer = Device->Dry.Buffer;
1458     for(uint i{0};i < Device->NumAuxSends;i++)
1459     {
1460         SendSlots[i] = props->Send[i].Slot;
1461         if(!SendSlots[i] || SendSlots[i]->EffectType == EffectSlotType::None)
1462         {
1463             SendSlots[i] = nullptr;
1464             voice->mSend[i].Buffer = {};
1465         }
1466         else
1467             voice->mSend[i].Buffer = SendSlots[i]->Wet.Buffer;
1468     }
1469
1470     /* Calculate the stepping value */
1471     const auto Pitch = static_cast<float>(voice->mFrequency) /
1472         static_cast<float>(Device->Frequency) * props->Pitch;
1473     if(Pitch > float{MaxPitch})
1474         voice->mStep = MaxPitch<<MixerFracBits;
1475     else
1476         voice->mStep = std::max(fastf2u(Pitch * MixerFracOne), 1u);
1477     voice->mResampler = PrepareResampler(props->mResampler, voice->mStep, &voice->mResampleState);
1478
1479     /* Calculate gains */
1480     GainTriplet DryGain{};
1481     DryGain.Base = std::min(std::clamp(props->Gain, props->MinGain, props->MaxGain) *
1482         props->Direct.Gain * context->mParams.Gain, GainMixMax);
1483     DryGain.HF = props->Direct.GainHF;
1484     DryGain.LF = props->Direct.GainLF;
1485
1486     std::array<GainTriplet,MaxSendCount> WetGain{};
1487     for(uint i{0};i < Device->NumAuxSends;i++)
1488     {
1489         WetGain[i].Base = std::min(std::clamp(props->Gain, props->MinGain, props->MaxGain) *
1490             props->Send[i].Gain * context->mParams.Gain, GainMixMax);
1491         WetGain[i].HF = props->Send[i].GainHF;
1492         WetGain[i].LF = props->Send[i].GainLF;
1493     }
1494
1495     CalcPanningAndFilters(voice, 0.0f, 0.0f, -1.0f, 0.0f, 0.0f, DryGain, WetGain, SendSlots, props,
1496         context->mParams, Device);
1497 }
1498
1499 void CalcAttnSourceParams(Voice *voice, const VoiceProps *props, const ContextBase *context)
1500 {
1501     DeviceBase *Device{context->mDevice};
1502     const uint NumSends{Device->NumAuxSends};
1503
1504     /* Set mixing buffers and get send parameters. */
1505     voice->mDirect.Buffer = Device->Dry.Buffer;
1506     std::array<EffectSlot*,MaxSendCount> SendSlots{};
1507     std::array<float,MaxSendCount> RoomRolloff{};
1508     std::bitset<MaxSendCount> UseDryAttnForRoom{0};
1509     for(uint i{0};i < NumSends;i++)
1510     {
1511         SendSlots[i] = props->Send[i].Slot;
1512         if(!SendSlots[i] || SendSlots[i]->EffectType == EffectSlotType::None)
1513             SendSlots[i] = nullptr;
1514         else if(SendSlots[i]->AuxSendAuto)
1515         {
1516             /* NOTE: Contrary to the EFX docs, the effect's room rolloff factor
1517              * applies to the selected distance model along with the source's
1518              * room rolloff factor, not necessarily the inverse distance model.
1519              *
1520              * Generic Software also applies these rolloff factors regardless
1521              * of any setting. It doesn't seem to use the effect slot's send
1522              * auto for anything, though as far as I understand, it's supposed
1523              * to control whether the send gets the same gain/gainhf as the
1524              * direct path (excluding the filter).
1525              */
1526             RoomRolloff[i] = props->RoomRolloffFactor + SendSlots[i]->RoomRolloff;
1527         }
1528         else
1529             UseDryAttnForRoom.set(i);
1530
1531         if(!SendSlots[i])
1532             voice->mSend[i].Buffer = {};
1533         else
1534             voice->mSend[i].Buffer = SendSlots[i]->Wet.Buffer;
1535     }
1536
1537     /* Transform source to listener space (convert to head relative) */
1538     alu::Vector Position{props->Position[0], props->Position[1], props->Position[2], 1.0f};
1539     alu::Vector Velocity{props->Velocity[0], props->Velocity[1], props->Velocity[2], 0.0f};
1540     alu::Vector Direction{props->Direction[0], props->Direction[1], props->Direction[2], 0.0f};
1541     if(!props->HeadRelative)
1542     {
1543         /* Transform source vectors */
1544         Position = context->mParams.Matrix * (Position - context->mParams.Position);
1545         Velocity = context->mParams.Matrix * Velocity;
1546         Direction = context->mParams.Matrix * Direction;
1547     }
1548     else
1549     {
1550         /* Offset the source velocity to be relative of the listener velocity */
1551         Velocity += context->mParams.Velocity;
1552     }
1553
1554     const bool directional{Direction.normalize() > 0.0f};
1555     alu::Vector ToSource{Position[0], Position[1], Position[2], 0.0f};
1556     const float Distance{ToSource.normalize()};
1557
1558     /* Calculate distance attenuation */
1559     float ClampedDist{Distance};
1560     float DryGainBase{props->Gain};
1561     std::array<float,MaxSendCount> WetGainBase{};
1562     WetGainBase.fill(props->Gain);
1563
1564     float DryAttnBase{1.0f};
1565     switch(context->mParams.SourceDistanceModel ? props->mDistanceModel
1566         : context->mParams.mDistanceModel)
1567     {
1568     case DistanceModel::InverseClamped:
1569         if(props->MaxDistance < props->RefDistance) break;
1570         ClampedDist = std::clamp(ClampedDist, props->RefDistance, props->MaxDistance);
1571         /*fall-through*/
1572     case DistanceModel::Inverse:
1573         if(props->RefDistance > 0.0f)
1574         {
1575             float dist{lerpf(props->RefDistance, ClampedDist, props->RolloffFactor)};
1576             if(dist > 0.0f)
1577             {
1578                 DryAttnBase = props->RefDistance / dist;
1579                 DryGainBase *= DryAttnBase;
1580             }
1581
1582             for(size_t i{0};i < NumSends;++i)
1583             {
1584                 dist = lerpf(props->RefDistance, ClampedDist, RoomRolloff[i]);
1585                 if(dist > 0.0f) WetGainBase[i] *= props->RefDistance / dist;
1586             }
1587         }
1588         break;
1589
1590     case DistanceModel::LinearClamped:
1591         if(props->MaxDistance < props->RefDistance) break;
1592         ClampedDist = std::clamp(ClampedDist, props->RefDistance, props->MaxDistance);
1593         /*fall-through*/
1594     case DistanceModel::Linear:
1595         if(props->MaxDistance != props->RefDistance)
1596         {
1597             float attn{(ClampedDist-props->RefDistance) /
1598                 (props->MaxDistance-props->RefDistance) * props->RolloffFactor};
1599             DryAttnBase = std::max(1.0f - attn, 0.0f);
1600             DryGainBase *= DryAttnBase;
1601
1602             for(size_t i{0};i < NumSends;++i)
1603             {
1604                 attn = (ClampedDist-props->RefDistance) /
1605                     (props->MaxDistance-props->RefDistance) * RoomRolloff[i];
1606                 WetGainBase[i] *= std::max(1.0f - attn, 0.0f);
1607             }
1608         }
1609         break;
1610
1611     case DistanceModel::ExponentClamped:
1612         if(props->MaxDistance < props->RefDistance) break;
1613         ClampedDist = std::clamp(ClampedDist, props->RefDistance, props->MaxDistance);
1614         /*fall-through*/
1615     case DistanceModel::Exponent:
1616         if(ClampedDist > 0.0f && props->RefDistance > 0.0f)
1617         {
1618             const float dist_ratio{ClampedDist/props->RefDistance};
1619             DryAttnBase = std::pow(dist_ratio, -props->RolloffFactor);
1620             DryGainBase *= DryAttnBase;
1621             for(size_t i{0};i < NumSends;++i)
1622                 WetGainBase[i] *= std::pow(dist_ratio, -RoomRolloff[i]);
1623         }
1624         break;
1625
1626     case DistanceModel::Disable:
1627         break;
1628     }
1629
1630     /* Calculate directional soundcones */
1631     float ConeHF{1.0f}, WetCone{1.0f}, WetConeHF{1.0f};
1632     if(directional && props->InnerAngle < 360.0f)
1633     {
1634         static constexpr float Rad2Deg{static_cast<float>(180.0 / al::numbers::pi)};
1635         const float Angle{Rad2Deg*2.0f * std::acos(-Direction.dot_product(ToSource)) * ConeScale};
1636
1637         float ConeGain{1.0f};
1638         if(Angle >= props->OuterAngle)
1639         {
1640             ConeGain = props->OuterGain;
1641             if(props->DryGainHFAuto)
1642                 ConeHF = props->OuterGainHF;
1643         }
1644         else if(Angle >= props->InnerAngle)
1645         {
1646             const float scale{(Angle-props->InnerAngle) / (props->OuterAngle-props->InnerAngle)};
1647             ConeGain = lerpf(1.0f, props->OuterGain, scale);
1648             if(props->DryGainHFAuto)
1649                 ConeHF = lerpf(1.0f, props->OuterGainHF, scale);
1650         }
1651
1652         DryGainBase *= ConeGain;
1653         if(props->WetGainAuto)
1654             WetCone = ConeGain;
1655         if(props->WetGainHFAuto)
1656             WetConeHF = ConeHF;
1657     }
1658
1659     /* Apply gain and frequency filters */
1660     GainTriplet DryGain{};
1661     DryGainBase = std::clamp(DryGainBase, props->MinGain, props->MaxGain) * context->mParams.Gain;
1662     DryGain.Base = std::min(DryGainBase * props->Direct.Gain, GainMixMax);
1663     DryGain.HF = ConeHF * props->Direct.GainHF;
1664     DryGain.LF = props->Direct.GainLF;
1665
1666     std::array<GainTriplet,MaxSendCount> WetGain{};
1667     for(uint i{0};i < NumSends;i++)
1668     {
1669         WetGainBase[i] = std::clamp(WetGainBase[i]*WetCone, props->MinGain, props->MaxGain) *
1670             context->mParams.Gain;
1671         /* If this effect slot's Auxiliary Send Auto is off, then use the dry
1672          * path distance and cone attenuation, otherwise use the wet (room)
1673          * path distance and cone attenuation. The send filter is used instead
1674          * of the direct filter, regardless.
1675          */
1676         const bool use_room{!UseDryAttnForRoom.test(i)};
1677         const float gain{use_room ? WetGainBase[i] : DryGainBase};
1678         WetGain[i].Base = std::min(gain * props->Send[i].Gain, GainMixMax);
1679         WetGain[i].HF = (use_room ? WetConeHF : ConeHF) * props->Send[i].GainHF;
1680         WetGain[i].LF = props->Send[i].GainLF;
1681     }
1682
1683     /* Distance-based air absorption and initial send decay. */
1684     if(Distance > props->RefDistance) LIKELY
1685     {
1686         const float distance_base{(Distance-props->RefDistance) * props->RolloffFactor};
1687         const float distance_meters{distance_base * context->mParams.MetersPerUnit};
1688         const float dryabsorb{distance_meters * props->AirAbsorptionFactor};
1689         if(dryabsorb > std::numeric_limits<float>::epsilon())
1690             DryGain.HF *= std::pow(context->mParams.AirAbsorptionGainHF, dryabsorb);
1691
1692         /* If the source's Auxiliary Send Filter Gain Auto is off, no extra
1693          * adjustment is applied to the send gains.
1694          */
1695         for(uint i{props->WetGainAuto ? 0u : NumSends};i < NumSends;++i)
1696         {
1697             if(!SendSlots[i] || !(SendSlots[i]->DecayTime > 0.0f))
1698                 continue;
1699
1700             if(distance_meters > std::numeric_limits<float>::epsilon())
1701                 WetGain[i].HF *= std::pow(SendSlots[i]->AirAbsorptionGainHF, distance_meters);
1702
1703             /* If this effect slot's Auxiliary Send Auto is off, don't apply
1704              * the automatic initial reverb decay.
1705              *
1706              * NOTE: Generic Software applies the initial decay regardless of
1707              * this setting. It doesn't seem to use it for anything, only the
1708              * source's send filter gain auto flag affects this.
1709              */
1710             if(!SendSlots[i]->AuxSendAuto)
1711                 continue;
1712
1713             const float DecayDistance{SendSlots[i]->DecayTime * SpeedOfSoundMetersPerSec};
1714
1715             /* Apply a decay-time transformation to the wet path, based on the
1716              * source distance. The initial decay of the reverb effect is
1717              * calculated and applied to the wet path.
1718              *
1719              * FIXME: This is very likely not correct. It more likely should
1720              * work by calculating a rolloff dynamically based on the reverb
1721              * parameters (and source distance?) and add it to the room rolloff
1722              * with the reverb and source rolloff parameters.
1723              */
1724             const float baseAttn{DryAttnBase};
1725             const float fact{distance_base / DecayDistance};
1726             const float gain{std::pow(ReverbDecayGain, fact)*(1.0f-baseAttn) + baseAttn};
1727             WetGain[i].Base *= gain;
1728         }
1729     }
1730
1731
1732     /* Initial source pitch */
1733     float Pitch{props->Pitch};
1734
1735     /* Calculate velocity-based doppler effect */
1736     float DopplerFactor{props->DopplerFactor * context->mParams.DopplerFactor};
1737     if(DopplerFactor > 0.0f)
1738     {
1739         const alu::Vector &lvelocity = context->mParams.Velocity;
1740         float vss{Velocity.dot_product(ToSource) * -DopplerFactor};
1741         float vls{lvelocity.dot_product(ToSource) * -DopplerFactor};
1742
1743         const float SpeedOfSound{context->mParams.SpeedOfSound};
1744         if(!(vls < SpeedOfSound))
1745         {
1746             /* Listener moving away from the source at the speed of sound.
1747              * Sound waves can't catch it.
1748              */
1749             Pitch = 0.0f;
1750         }
1751         else if(!(vss < SpeedOfSound))
1752         {
1753             /* Source moving toward the listener at the speed of sound. Sound
1754              * waves bunch up to extreme frequencies.
1755              */
1756             Pitch = std::numeric_limits<float>::infinity();
1757         }
1758         else
1759         {
1760             /* Source and listener movement is nominal. Calculate the proper
1761              * doppler shift.
1762              */
1763             Pitch *= (SpeedOfSound-vls) / (SpeedOfSound-vss);
1764         }
1765     }
1766
1767     /* Adjust pitch based on the buffer and output frequencies, and calculate
1768      * fixed-point stepping value.
1769      */
1770     Pitch *= static_cast<float>(voice->mFrequency) / static_cast<float>(Device->Frequency);
1771     if(Pitch > float{MaxPitch})
1772         voice->mStep = MaxPitch<<MixerFracBits;
1773     else
1774         voice->mStep = std::max(fastf2u(Pitch * MixerFracOne), 1u);
1775     voice->mResampler = PrepareResampler(props->mResampler, voice->mStep, &voice->mResampleState);
1776
1777     float spread{0.0f};
1778     if(props->Radius > Distance)
1779         spread = al::numbers::pi_v<float>*2.0f - Distance/props->Radius*al::numbers::pi_v<float>;
1780     else if(Distance > 0.0f)
1781         spread = std::asin(props->Radius/Distance) * 2.0f;
1782
1783     CalcPanningAndFilters(voice, ToSource[0]*XScale, ToSource[1]*YScale, ToSource[2]*ZScale,
1784         Distance, spread, DryGain, WetGain, SendSlots, props, context->mParams, Device);
1785 }
1786
1787 void CalcSourceParams(Voice *voice, ContextBase *context, bool force)
1788 {
1789     VoicePropsItem *props{voice->mUpdate.exchange(nullptr, std::memory_order_acq_rel)};
1790     if(!props && !force) return;
1791
1792     if(props)
1793     {
1794         voice->mProps = static_cast<VoiceProps&>(*props);
1795
1796         AtomicReplaceHead(context->mFreeVoiceProps, props);
1797     }
1798
1799     if((voice->mProps.DirectChannels != DirectMode::Off && voice->mFmtChannels != FmtMono
1800             && !IsAmbisonic(voice->mFmtChannels))
1801         || voice->mProps.mSpatializeMode == SpatializeMode::Off
1802         || (voice->mProps.mSpatializeMode==SpatializeMode::Auto && voice->mFmtChannels != FmtMono))
1803         CalcNonAttnSourceParams(voice, &voice->mProps, context);
1804     else
1805         CalcAttnSourceParams(voice, &voice->mProps, context);
1806 }
1807
1808
1809 void SendSourceStateEvent(ContextBase *context, uint id, VChangeState state)
1810 {
1811     RingBuffer *ring{context->mAsyncEvents.get()};
1812     auto evt_vec = ring->getWriteVector();
1813     if(evt_vec.first.len < 1) return;
1814
1815     auto &evt = InitAsyncEvent<AsyncSourceStateEvent>(evt_vec.first.buf);
1816     evt.mId = id;
1817     switch(state)
1818     {
1819     case VChangeState::Reset:
1820         evt.mState = AsyncSrcState::Reset;
1821         break;
1822     case VChangeState::Stop:
1823         evt.mState = AsyncSrcState::Stop;
1824         break;
1825     case VChangeState::Play:
1826         evt.mState = AsyncSrcState::Play;
1827         break;
1828     case VChangeState::Pause:
1829         evt.mState = AsyncSrcState::Pause;
1830         break;
1831     /* Shouldn't happen. */
1832     case VChangeState::Restart:
1833         al::unreachable();
1834     }
1835
1836     ring->writeAdvance(1);
1837 }
1838
1839 void ProcessVoiceChanges(ContextBase *ctx)
1840 {
1841     VoiceChange *cur{ctx->mCurrentVoiceChange.load(std::memory_order_acquire)};
1842     VoiceChange *next{cur->mNext.load(std::memory_order_acquire)};
1843     if(!next) return;
1844
1845     const auto enabledevt = ctx->mEnabledEvts.load(std::memory_order_acquire);
1846     do {
1847         cur = next;
1848
1849         bool sendevt{false};
1850         if(cur->mState == VChangeState::Reset || cur->mState == VChangeState::Stop)
1851         {
1852             if(Voice *voice{cur->mVoice})
1853             {
1854                 voice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1855                 voice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1856                 /* A source ID indicates the voice was playing or paused, which
1857                  * gets a reset/stop event.
1858                  */
1859                 sendevt = voice->mSourceID.exchange(0u, std::memory_order_relaxed) != 0u;
1860                 Voice::State oldvstate{Voice::Playing};
1861                 voice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1862                     std::memory_order_relaxed, std::memory_order_acquire);
1863                 voice->mPendingChange.store(false, std::memory_order_release);
1864             }
1865             /* Reset state change events are always sent, even if the voice is
1866              * already stopped or even if there is no voice.
1867              */
1868             sendevt |= (cur->mState == VChangeState::Reset);
1869         }
1870         else if(cur->mState == VChangeState::Pause)
1871         {
1872             Voice *voice{cur->mVoice};
1873             Voice::State oldvstate{Voice::Playing};
1874             sendevt = voice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1875                 std::memory_order_release, std::memory_order_acquire);
1876         }
1877         else if(cur->mState == VChangeState::Play)
1878         {
1879             /* NOTE: When playing a voice, sending a source state change event
1880              * depends if there's an old voice to stop and if that stop is
1881              * successful. If there is no old voice, a playing event is always
1882              * sent. If there is an old voice, an event is sent only if the
1883              * voice is already stopped.
1884              */
1885             if(Voice *oldvoice{cur->mOldVoice})
1886             {
1887                 oldvoice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1888                 oldvoice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1889                 oldvoice->mSourceID.store(0u, std::memory_order_relaxed);
1890                 Voice::State oldvstate{Voice::Playing};
1891                 sendevt = !oldvoice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1892                     std::memory_order_relaxed, std::memory_order_acquire);
1893                 oldvoice->mPendingChange.store(false, std::memory_order_release);
1894             }
1895             else
1896                 sendevt = true;
1897
1898             Voice *voice{cur->mVoice};
1899             voice->mPlayState.store(Voice::Playing, std::memory_order_release);
1900         }
1901         else if(cur->mState == VChangeState::Restart)
1902         {
1903             /* Restarting a voice never sends a source change event. */
1904             Voice *oldvoice{cur->mOldVoice};
1905             oldvoice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1906             oldvoice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1907             /* If there's no sourceID, the old voice finished so don't start
1908              * the new one at its new offset.
1909              */
1910             if(oldvoice->mSourceID.exchange(0u, std::memory_order_relaxed) != 0u)
1911             {
1912                 /* Otherwise, set the voice to stopping if it's not already (it
1913                  * might already be, if paused), and play the new voice as
1914                  * appropriate.
1915                  */
1916                 Voice::State oldvstate{Voice::Playing};
1917                 oldvoice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1918                     std::memory_order_relaxed, std::memory_order_acquire);
1919
1920                 Voice *voice{cur->mVoice};
1921                 voice->mPlayState.store((oldvstate == Voice::Playing) ? Voice::Playing
1922                     : Voice::Stopped, std::memory_order_release);
1923             }
1924             oldvoice->mPendingChange.store(false, std::memory_order_release);
1925         }
1926         if(sendevt && enabledevt.test(al::to_underlying(AsyncEnableBits::SourceState)))
1927             SendSourceStateEvent(ctx, cur->mSourceID, cur->mState);
1928
1929         next = cur->mNext.load(std::memory_order_acquire);
1930     } while(next);
1931     ctx->mCurrentVoiceChange.store(cur, std::memory_order_release);
1932 }
1933
1934 void ProcessParamUpdates(ContextBase *ctx, const al::span<EffectSlot*> slots,
1935     const al::span<EffectSlot*> sorted_slots, const al::span<Voice*> voices)
1936 {
1937     ProcessVoiceChanges(ctx);
1938
1939     IncrementRef(ctx->mUpdateCount);
1940     if(!ctx->mHoldUpdates.load(std::memory_order_acquire)) LIKELY
1941     {
1942         bool force{CalcContextParams(ctx)};
1943         auto sorted_slot_base = al::to_address(sorted_slots.begin());
1944         for(EffectSlot *slot : slots)
1945             force |= CalcEffectSlotParams(slot, sorted_slot_base, ctx);
1946
1947         for(Voice *voice : voices)
1948         {
1949             /* Only update voices that have a source. */
1950             if(voice->mSourceID.load(std::memory_order_relaxed) != 0)
1951                 CalcSourceParams(voice, ctx, force);
1952         }
1953     }
1954     IncrementRef(ctx->mUpdateCount);
1955 }
1956
1957 void ProcessContexts(DeviceBase *device, const uint SamplesToDo)
1958 {
1959     ASSUME(SamplesToDo > 0);
1960
1961     const nanoseconds curtime{device->mClockBase.load(std::memory_order_relaxed) +
1962         nanoseconds{seconds{device->mSamplesDone.load(std::memory_order_relaxed)}}/
1963         device->Frequency};
1964
1965     for(ContextBase *ctx : *device->mContexts.load(std::memory_order_acquire))
1966     {
1967         const auto auxslotspan = al::span{*ctx->mActiveAuxSlots.load(std::memory_order_acquire)};
1968         const auto auxslots = auxslotspan.first(auxslotspan.size()>>1);
1969         const auto sorted_slots = auxslotspan.last(auxslotspan.size()>>1);
1970         const al::span<Voice*> voices{ctx->getVoicesSpanAcquired()};
1971
1972         /* Process pending property updates for objects on the context. */
1973         ProcessParamUpdates(ctx, auxslots, sorted_slots, voices);
1974
1975         /* Clear auxiliary effect slot mixing buffers. */
1976         for(EffectSlot *slot : auxslots)
1977         {
1978             for(auto &buffer : slot->Wet.Buffer)
1979                 buffer.fill(0.0f);
1980         }
1981
1982         /* Process voices that have a playing source. */
1983         for(Voice *voice : voices)
1984         {
1985             const Voice::State vstate{voice->mPlayState.load(std::memory_order_acquire)};
1986             if(vstate != Voice::Stopped && vstate != Voice::Pending)
1987                 voice->mix(vstate, ctx, curtime, SamplesToDo);
1988         }
1989
1990         /* Process effects. */
1991         if(!auxslots.empty())
1992         {
1993             /* Sort the slots into extra storage, so that effect slots come
1994              * before their effect slot target (or their targets' target). Skip
1995              * sorting if it has already been done.
1996              */
1997             if(!sorted_slots[0])
1998             {
1999                 /* First, copy the slots to the sorted list, then partition the
2000                  * sorted list so that all slots without a target slot go to
2001                  * the end.
2002                  */
2003                 std::copy(auxslots.begin(), auxslots.end(), sorted_slots.begin());
2004                 auto split_point = std::partition(sorted_slots.begin(), sorted_slots.end(),
2005                     [](const EffectSlot *slot) noexcept -> bool
2006                     { return slot->Target != nullptr; });
2007                 /* There must be at least one slot without a slot target. */
2008                 assert(split_point != sorted_slots.end());
2009
2010                 /* Simple case: no more than 1 slot has a target slot. Either
2011                  * all slots go right to the output, or the remaining one must
2012                  * target an already-partitioned slot.
2013                  */
2014                 if(split_point - sorted_slots.begin() > 1)
2015                 {
2016                     /* At least two slots target other slots. Starting from the
2017                      * back of the sorted list, continue partitioning the front
2018                      * of the list given each target until all targets are
2019                      * accounted for. This ensures all slots without a target
2020                      * go last, all slots directly targeting those last slots
2021                      * go second-to-last, all slots directly targeting those
2022                      * second-last slots go third-to-last, etc.
2023                      */
2024                     auto next_target = sorted_slots.end();
2025                     do {
2026                         /* This shouldn't happen, but if there's unsorted slots
2027                          * left that don't target any sorted slots, they can't
2028                          * contribute to the output, so leave them.
2029                          */
2030                         if(next_target == split_point) UNLIKELY
2031                             break;
2032
2033                         --next_target;
2034                         split_point = std::partition(sorted_slots.begin(), split_point,
2035                             [next_target](const EffectSlot *slot) noexcept -> bool
2036                             { return slot->Target != *next_target; });
2037                     } while(split_point - sorted_slots.begin() > 1);
2038                 }
2039             }
2040
2041             for(const EffectSlot *slot : sorted_slots)
2042             {
2043                 EffectState *state{slot->mEffectState.get()};
2044                 state->process(SamplesToDo, slot->Wet.Buffer, state->mOutTarget);
2045             }
2046         }
2047
2048         /* Signal the event handler if there are any events to read. */
2049         RingBuffer *ring{ctx->mAsyncEvents.get()};
2050         if(ring->readSpace() > 0)
2051             ctx->mEventSem.post();
2052     }
2053 }
2054
2055
2056 void ApplyDistanceComp(const al::span<FloatBufferLine> Samples, const size_t SamplesToDo,
2057     const al::span<const DistanceComp::ChanData,MaxOutputChannels> chandata)
2058 {
2059     ASSUME(SamplesToDo > 0);
2060
2061     auto distcomp = chandata.begin();
2062     for(auto &chanbuffer : Samples)
2063     {
2064         const float gain{distcomp->Gain};
2065         auto distbuf = al::span{al::assume_aligned<16>(distcomp->Buffer.data()),
2066             distcomp->Buffer.size()};
2067         ++distcomp;
2068
2069         const size_t base{distbuf.size()};
2070         if(base < 1) continue;
2071
2072         const auto inout = al::span{al::assume_aligned<16>(chanbuffer.data()), SamplesToDo};
2073         if(SamplesToDo >= base) LIKELY
2074         {
2075             auto delay_end = std::rotate(inout.begin(), inout.end()-ptrdiff_t(base), inout.end());
2076             std::swap_ranges(inout.begin(), delay_end, distbuf.begin());
2077         }
2078         else
2079         {
2080             auto delay_start = std::swap_ranges(inout.begin(), inout.end(), distbuf.begin());
2081             std::rotate(distbuf.begin(), delay_start, distbuf.begin()+ptrdiff_t(base));
2082         }
2083         std::transform(inout.begin(), inout.end(), inout.begin(),
2084             [gain](float s) { return s*gain; });
2085     }
2086 }
2087
2088 void ApplyDither(const al::span<FloatBufferLine> Samples, uint *dither_seed,
2089     const float quant_scale, const size_t SamplesToDo)
2090 {
2091     static constexpr double invRNGRange{1.0 / std::numeric_limits<uint>::max()};
2092     ASSUME(SamplesToDo > 0);
2093
2094     /* Dithering. Generate whitenoise (uniform distribution of random values
2095      * between -1 and +1) and add it to the sample values, after scaling up to
2096      * the desired quantization depth and before rounding.
2097      */
2098     const float invscale{1.0f / quant_scale};
2099     uint seed{*dither_seed};
2100     auto dither_sample = [&seed,invscale,quant_scale](const float sample) noexcept -> float
2101     {
2102         float val{sample * quant_scale};
2103         uint rng0{dither_rng(&seed)};
2104         uint rng1{dither_rng(&seed)};
2105         val += static_cast<float>(rng0*invRNGRange - rng1*invRNGRange);
2106         return fast_roundf(val) * invscale;
2107     };
2108     for(FloatBufferLine &inout : Samples)
2109         std::transform(inout.begin(), inout.begin()+SamplesToDo, inout.begin(), dither_sample);
2110     *dither_seed = seed;
2111 }
2112
2113
2114 /* Base template left undefined. Should be marked =delete, but Clang 3.8.1
2115  * chokes on that given the inline specializations.
2116  */
2117 template<typename T>
2118 inline T SampleConv(float) noexcept;
2119
2120 template<> inline float SampleConv(float val) noexcept
2121 { return val; }
2122 template<> inline int32_t SampleConv(float val) noexcept
2123 {
2124     /* Floats have a 23-bit mantissa, plus an implied 1 bit and a sign bit.
2125      * This means a normalized float has at most 25 bits of signed precision.
2126      * When scaling and clamping for a signed 32-bit integer, these following
2127      * values are the best a float can give.
2128      */
2129     return fastf2i(std::clamp(val*2147483648.0f, -2147483648.0f, 2147483520.0f));
2130 }
2131 template<> inline int16_t SampleConv(float val) noexcept
2132 { return static_cast<int16_t>(fastf2i(std::clamp(val*32768.0f, -32768.0f, 32767.0f))); }
2133 template<> inline int8_t SampleConv(float val) noexcept
2134 { return static_cast<int8_t>(fastf2i(std::clamp(val*128.0f, -128.0f, 127.0f))); }
2135
2136 /* Define unsigned output variations. */
2137 template<> inline uint32_t SampleConv(float val) noexcept
2138 { return static_cast<uint32_t>(SampleConv<int32_t>(val)) + 2147483648u; }
2139 template<> inline uint16_t SampleConv(float val) noexcept
2140 { return static_cast<uint16_t>(SampleConv<int16_t>(val) + 32768); }
2141 template<> inline uint8_t SampleConv(float val) noexcept
2142 { return static_cast<uint8_t>(SampleConv<int8_t>(val) + 128); }
2143
2144 template<typename T>
2145 void Write(const al::span<const FloatBufferLine> InBuffer, void *OutBuffer, const size_t Offset,
2146     const size_t SamplesToDo, const size_t FrameStep)
2147 {
2148     ASSUME(FrameStep > 0);
2149     ASSUME(SamplesToDo > 0);
2150
2151     const auto output = al::span{static_cast<T*>(OutBuffer), (Offset+SamplesToDo)*FrameStep}
2152         .subspan(Offset*FrameStep);
2153     size_t c{0};
2154     for(const FloatBufferLine &inbuf : InBuffer)
2155     {
2156         auto out = output.begin();
2157         auto conv_sample = [FrameStep,c,&out](const float s) noexcept
2158         {
2159             out[c] = SampleConv<T>(s);
2160             out += ptrdiff_t(FrameStep);
2161         };
2162         std::for_each_n(inbuf.cbegin(), SamplesToDo, conv_sample);
2163         ++c;
2164     }
2165     if(const size_t extra{FrameStep - c})
2166     {
2167         const auto silence = SampleConv<T>(0.0f);
2168         for(size_t i{0};i < SamplesToDo;++i)
2169             std::fill_n(&output[i*FrameStep + c], extra, silence);
2170     }
2171 }
2172
2173 } // namespace
2174
2175 uint DeviceBase::renderSamples(const uint numSamples)
2176 {
2177     const uint samplesToDo{std::min(numSamples, uint{BufferLineSize})};
2178
2179     /* Clear main mixing buffers. */
2180     for(FloatBufferLine &buffer : MixBuffer)
2181         buffer.fill(0.0f);
2182
2183     {
2184         const auto mixLock = getWriteMixLock();
2185
2186         /* Process and mix each context's sources and effects. */
2187         ProcessContexts(this, samplesToDo);
2188
2189         /* Every second's worth of samples is converted and added to clock base
2190          * so that large sample counts don't overflow during conversion. This
2191          * also guarantees a stable conversion.
2192          */
2193         auto samplesDone = mSamplesDone.load(std::memory_order_relaxed) + samplesToDo;
2194         auto clockBase = mClockBase.load(std::memory_order_relaxed) +
2195             std::chrono::seconds{samplesDone/Frequency};
2196         mSamplesDone.store(samplesDone%Frequency, std::memory_order_relaxed);
2197         mClockBase.store(clockBase, std::memory_order_relaxed);
2198     }
2199
2200     /* Apply any needed post-process for finalizing the Dry mix to the RealOut
2201      * (Ambisonic decode, UHJ encode, etc).
2202      */
2203     postProcess(samplesToDo);
2204
2205     /* Apply compression, limiting sample amplitude if needed or desired. */
2206     if(Limiter) Limiter->process(samplesToDo, RealOut.Buffer.data());
2207
2208     /* Apply delays and attenuation for mismatched speaker distances. */
2209     if(ChannelDelays)
2210         ApplyDistanceComp(RealOut.Buffer, samplesToDo, ChannelDelays->mChannels);
2211
2212     /* Apply dithering. The compressor should have left enough headroom for the
2213      * dither noise to not saturate.
2214      */
2215     if(DitherDepth > 0.0f)
2216         ApplyDither(RealOut.Buffer, &DitherSeed, DitherDepth, samplesToDo);
2217
2218     return samplesToDo;
2219 }
2220
2221 void DeviceBase::renderSamples(const al::span<float*> outBuffers, const uint numSamples)
2222 {
2223     FPUCtl mixer_mode{};
2224     uint total{0};
2225     while(const uint todo{numSamples - total})
2226     {
2227         const uint samplesToDo{renderSamples(todo)};
2228
2229         auto srcbuf = RealOut.Buffer.cbegin();
2230         for(auto *dstbuf : outBuffers)
2231         {
2232             const auto dst = al::span{dstbuf, numSamples}.subspan(total);
2233             std::copy_n(srcbuf->cbegin(), samplesToDo, dst.begin());
2234             ++srcbuf;
2235         }
2236
2237         total += samplesToDo;
2238     }
2239 }
2240
2241 void DeviceBase::renderSamples(void *outBuffer, const uint numSamples, const size_t frameStep)
2242 {
2243     FPUCtl mixer_mode{};
2244     uint total{0};
2245     while(const uint todo{numSamples - total})
2246     {
2247         const uint samplesToDo{renderSamples(todo)};
2248
2249         if(outBuffer) LIKELY
2250         {
2251             /* Finally, interleave and convert samples, writing to the device's
2252              * output buffer.
2253              */
2254             switch(FmtType)
2255             {
2256 #define HANDLE_WRITE(T) case T:                                               \
2257     Write<DevFmtType_t<T>>(RealOut.Buffer, outBuffer, total, samplesToDo, frameStep); break;
2258             HANDLE_WRITE(DevFmtByte)
2259             HANDLE_WRITE(DevFmtUByte)
2260             HANDLE_WRITE(DevFmtShort)
2261             HANDLE_WRITE(DevFmtUShort)
2262             HANDLE_WRITE(DevFmtInt)
2263             HANDLE_WRITE(DevFmtUInt)
2264             HANDLE_WRITE(DevFmtFloat)
2265 #undef HANDLE_WRITE
2266             }
2267         }
2268
2269         total += samplesToDo;
2270     }
2271 }
2272
2273 void DeviceBase::handleDisconnect(const char *msg, ...)
2274 {
2275     const auto mixLock = getWriteMixLock();
2276
2277     if(Connected.exchange(false, std::memory_order_acq_rel))
2278     {
2279         AsyncEvent evt{std::in_place_type<AsyncDisconnectEvent>};
2280         auto &disconnect = std::get<AsyncDisconnectEvent>(evt);
2281
2282         /* NOLINTBEGIN(*-array-to-pointer-decay) */
2283         va_list args;
2284         va_start(args, msg);
2285         int msglen{vsnprintf(disconnect.msg.data(), disconnect.msg.size(), msg, args)};
2286         va_end(args);
2287         /* NOLINTEND(*-array-to-pointer-decay) */
2288
2289         if(msglen < 0 || static_cast<size_t>(msglen) >= disconnect.msg.size())
2290             disconnect.msg[sizeof(disconnect.msg)-1] = 0;
2291
2292         for(ContextBase *ctx : *mContexts.load())
2293         {
2294             RingBuffer *ring{ctx->mAsyncEvents.get()};
2295             auto evt_data = ring->getWriteVector().first;
2296             if(evt_data.len > 0)
2297             {
2298                 al::construct_at(reinterpret_cast<AsyncEvent*>(evt_data.buf), evt);
2299                 ring->writeAdvance(1);
2300                 ctx->mEventSem.post();
2301             }
2302
2303             if(!ctx->mStopVoicesOnDisconnect.load())
2304             {
2305                 ProcessVoiceChanges(ctx);
2306                 continue;
2307             }
2308
2309             auto voicelist = ctx->getVoicesSpanAcquired();
2310             auto stop_voice = [](Voice *voice) -> void
2311             {
2312                 voice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
2313                 voice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
2314                 voice->mSourceID.store(0u, std::memory_order_relaxed);
2315                 voice->mPlayState.store(Voice::Stopped, std::memory_order_release);
2316             };
2317             std::for_each(voicelist.begin(), voicelist.end(), stop_voice);
2318         }
2319     }
2320 }