29 #include "inprogext.h"
31 #include "polymorphism.h"
38 template<typename T
, size_t N
>
39 constexpr inline size_t countof(const T(&)[N
]) noexcept
41 #define COUNTOF countof
52 #define HAS_BUILTIN __has_builtin
54 #define HAS_BUILTIN(x) (0)
58 /* LIKELY optimizes the case where the condition is true. The condition is not
59 * required to be true, but it can result in more optimal code for the true
60 * path at the expense of a less optimal false path.
62 #define LIKELY(x) __builtin_expect(!!(x), !0)
63 /* The opposite of LIKELY, optimizing the case where the condition is false. */
64 #define UNLIKELY(x) __builtin_expect(!!(x), 0)
65 /* Unlike LIKELY, ASSUME requires the condition to be true or else it invokes
66 * undefined behavior. It's essentially an assert without actually checking the
67 * condition at run-time, allowing for stronger optimizations than LIKELY.
69 #if HAS_BUILTIN(__builtin_assume)
70 #define ASSUME __builtin_assume
72 #define ASSUME(x) do { if(!(x)) __builtin_unreachable(); } while(0)
77 #define LIKELY(x) (!!(x))
78 #define UNLIKELY(x) (!!(x))
80 #define ASSUME __assume
82 #define ASSUME(x) ((void)0)
87 #define UINT64_MAX U64(18446744073709551615)
91 #if defined(__cplusplus)
93 #elif defined(__GNUC__)
94 #define UNUSED(x) UNUSED_##x __attribute__((unused))
95 #elif defined(__LCLINT__)
96 #define UNUSED(x) /*@unused@*/ x
102 /* Calculates the size of a struct with N elements of a flexible array member.
103 * GCC and Clang allow offsetof(Type, fam[N]) for this, but MSVC seems to have
104 * trouble, so a bit more verbose workaround is needed.
106 #define FAM_SIZE(T, M, N) (offsetof(T, M) + sizeof(((T*)NULL)->M[0])*(N))
109 typedef ALint64SOFT ALint64
;
110 typedef ALuint64SOFT ALuint64
;
113 #if defined(_MSC_VER)
114 #define U64(x) ((ALuint64)(x##ui64))
115 #elif SIZEOF_LONG == 8
116 #define U64(x) ((ALuint64)(x##ul))
117 #elif SIZEOF_LONG_LONG == 8
118 #define U64(x) ((ALuint64)(x##ull))
123 #if defined(_MSC_VER)
124 #define I64(x) ((ALint64)(x##i64))
125 #elif SIZEOF_LONG == 8
126 #define I64(x) ((ALint64)(x##l))
127 #elif SIZEOF_LONG_LONG == 8
128 #define I64(x) ((ALint64)(x##ll))
132 /* Define a CTZ64 macro (count trailing zeros, for 64-bit integers). The result
133 * is *UNDEFINED* if the value is 0.
138 #define CTZ64 __builtin_ctzl
140 #define CTZ64 __builtin_ctzll
143 #elif defined(HAVE_BITSCANFORWARD64_INTRINSIC)
145 inline int msvc64_ctz64(ALuint64 v
)
147 unsigned long idx
= 64;
148 _BitScanForward64(&idx
, v
);
151 #define CTZ64 msvc64_ctz64
153 #elif defined(HAVE_BITSCANFORWARD_INTRINSIC)
155 inline int msvc_ctz64(ALuint64 v
)
157 unsigned long idx
= 64;
158 if(!_BitScanForward(&idx
, v
&0xffffffff))
160 if(_BitScanForward(&idx
, v
>>32))
165 #define CTZ64 msvc_ctz64
169 /* There be black magics here. The popcnt64 method is derived from
170 * https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
171 * while the ctz-utilizing-popcnt algorithm is shown here
172 * http://www.hackersdelight.org/hdcodetxt/ntz.c.txt
173 * as the ntz2 variant. These likely aren't the most efficient methods, but
174 * they're good enough if the GCC or MSVC intrinsics aren't available.
176 inline int fallback_popcnt64(ALuint64 v
)
178 v
= v
- ((v
>> 1) & U64(0x5555555555555555));
179 v
= (v
& U64(0x3333333333333333)) + ((v
>> 2) & U64(0x3333333333333333));
180 v
= (v
+ (v
>> 4)) & U64(0x0f0f0f0f0f0f0f0f);
181 return (int)((v
* U64(0x0101010101010101)) >> 56);
184 inline int fallback_ctz64(ALuint64 value
)
186 return fallback_popcnt64(~value
& (value
- 1));
188 #define CTZ64 fallback_ctz64
191 #if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__)
192 #define IS_LITTLE_ENDIAN (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
196 ALubyte b
[sizeof(ALuint
)];
197 } EndianTest
= { 1 };
198 #define IS_LITTLE_ENDIAN (EndianTest.b[0] == 1)
204 struct DirectHrtfState
;
205 struct FrontStablizer
;
214 struct AmbiUpsampler
;
218 #define DEFAULT_OUTPUT_RATE (44100)
219 #define MIN_OUTPUT_RATE (8000)
222 /* Find the next power-of-2 for non-power-of-2 numbers. */
223 inline ALuint
NextPowerOf2(ALuint value
) noexcept
237 /** Round up a value to the next multiple. */
238 inline size_t RoundUp(size_t value
, size_t r
) noexcept
241 return value
- (value
%r
);
244 /* Fast float-to-int conversion. No particular rounding mode is assumed; the
245 * IEEE-754 default is round-to-nearest with ties-to-even, though an app could
246 * change it on its own threads. On some systems, a truncating conversion may
247 * always be the fastest method.
249 inline ALint
fastf2i(ALfloat f
) noexcept
251 #if defined(HAVE_INTRIN_H) && ((defined(_M_IX86_FP) && (_M_IX86_FP > 0)) || defined(_M_X64))
252 return _mm_cvt_ss2si(_mm_set1_ps(f
));
254 #elif defined(_MSC_VER) && defined(_M_IX86_FP)
261 #elif (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
265 __asm__("cvtss2si %1, %0" : "=r"(i
) : "x"(f
));
267 __asm__
__volatile__("fistpl %0" : "=m"(i
) : "t"(f
) : "st");
271 /* On GCC when compiling with -fno-math-errno, lrintf can be inlined to
272 * some simple instructions. Clang does not inline it, always generating a
273 * libc call, while MSVC's implementation is horribly slow, so always fall
274 * back to a normal integer conversion for them.
276 #elif !defined(_MSC_VER) && !defined(__clang__)
286 /* Converts float-to-int using standard behavior (truncation). */
287 inline int float2int(float f
) noexcept
289 #if ((defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__)) && \
290 !defined(__SSE_MATH__)) || (defined(_MSC_VER) && defined(_M_IX86_FP) && _M_IX86_FP == 0)
291 ALint sign
, shift
, mant
;
298 sign
= (conv
.i
>>31) | 1;
299 shift
= ((conv
.i
>>23)&0xff) - (127+23);
302 if(UNLIKELY(shift
>= 31 || shift
< -23))
305 mant
= (conv
.i
&0x7fffff) | 0x800000;
306 if(LIKELY(shift
< 0))
307 return (mant
>> -shift
) * sign
;
308 return (mant
<< shift
) * sign
;
316 /* Rounds a float to the nearest integral value, according to the current
317 * rounding mode. This is essentially an inlined version of rintf, although
318 * makes fewer promises (e.g. -0 or -0.25 rounded to 0 may result in +0).
320 inline float fast_roundf(float f
) noexcept
322 #if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__)) && \
323 !defined(__SSE_MATH__)
326 __asm__
__volatile__("frndint" : "=t"(out
) : "0"(f
));
331 /* Integral limit, where sub-integral precision is not available for
334 static const float ilim
[2] = {
335 8388608.0f
/* 0x1.0p+23 */,
336 -8388608.0f
/* -0x1.0p+23 */
345 sign
= (conv
.i
>>31)&0x01;
346 expo
= (conv
.i
>>23)&0xff;
348 if(UNLIKELY(expo
>= 150/*+23*/))
350 /* An exponent (base-2) of 23 or higher is incapable of sub-integral
351 * precision, so it's already an integral value. We don't need to worry
352 * about infinity or NaN here.
356 /* Adding the integral limit to the value (with a matching sign) forces a
357 * result that has no sub-integral precision, and is consequently forced to
358 * round to an integral value. Removing the integral limit then restores
359 * the initial value rounded to the integral. The compiler should not
360 * optimize this out because of non-associative rules on floating-point
361 * math (as long as you don't use -fassociative-math,
362 * -funsafe-math-optimizations, -ffast-math, or -Ofast, in which case this
366 return f
- ilim
[sign
];
420 DevFmtByte
= ALC_BYTE_SOFT
,
421 DevFmtUByte
= ALC_UNSIGNED_BYTE_SOFT
,
422 DevFmtShort
= ALC_SHORT_SOFT
,
423 DevFmtUShort
= ALC_UNSIGNED_SHORT_SOFT
,
424 DevFmtInt
= ALC_INT_SOFT
,
425 DevFmtUInt
= ALC_UNSIGNED_INT_SOFT
,
426 DevFmtFloat
= ALC_FLOAT_SOFT
,
428 DevFmtTypeDefault
= DevFmtFloat
430 enum DevFmtChannels
{
431 DevFmtMono
= ALC_MONO_SOFT
,
432 DevFmtStereo
= ALC_STEREO_SOFT
,
433 DevFmtQuad
= ALC_QUAD_SOFT
,
434 DevFmtX51
= ALC_5POINT1_SOFT
,
435 DevFmtX61
= ALC_6POINT1_SOFT
,
436 DevFmtX71
= ALC_7POINT1_SOFT
,
437 DevFmtAmbi3D
= ALC_BFORMAT3D_SOFT
,
439 /* Similar to 5.1, except using rear channels instead of sides */
440 DevFmtX51Rear
= 0x80000000,
442 DevFmtChannelsDefault
= DevFmtStereo
444 #define MAX_OUTPUT_CHANNELS (16)
446 /* DevFmtType traits, providing the type, etc given a DevFmtType. */
447 template<DevFmtType T
>
448 struct DevFmtTypeTraits
{ };
451 struct DevFmtTypeTraits
<DevFmtByte
> { using Type
= ALbyte
; };
453 struct DevFmtTypeTraits
<DevFmtUByte
> { using Type
= ALubyte
; };
455 struct DevFmtTypeTraits
<DevFmtShort
> { using Type
= ALshort
; };
457 struct DevFmtTypeTraits
<DevFmtUShort
> { using Type
= ALushort
; };
459 struct DevFmtTypeTraits
<DevFmtInt
> { using Type
= ALint
; };
461 struct DevFmtTypeTraits
<DevFmtUInt
> { using Type
= ALuint
; };
463 struct DevFmtTypeTraits
<DevFmtFloat
> { using Type
= ALfloat
; };
466 ALsizei
BytesFromDevFmt(enum DevFmtType type
);
467 ALsizei
ChannelsFromDevFmt(enum DevFmtChannels chans
, ALsizei ambiorder
);
468 inline ALsizei
FrameSizeFromDevFmt(enum DevFmtChannels chans
, enum DevFmtType type
, ALsizei ambiorder
)
470 return ChannelsFromDevFmt(chans
, ambiorder
) * BytesFromDevFmt(type
);
473 enum class AmbiLayout
{
474 FuMa
= ALC_FUMA_SOFT
, /* FuMa channel order */
475 ACN
= ALC_ACN_SOFT
, /* ACN channel order */
480 enum class AmbiNorm
{
481 FuMa
= ALC_FUMA_SOFT
, /* FuMa normalization */
482 SN3D
= ALC_SN3D_SOFT
, /* SN3D normalization */
483 N3D
= ALC_N3D_SOFT
, /* N3D normalization */
503 /* The maximum number of Ambisonics coefficients. For a given order (o), the
504 * size needed will be (o+1)**2, thus zero-order has 1, first-order has 4,
505 * second-order has 9, third-order has 16, and fourth-order has 25.
507 #define MAX_AMBI_ORDER 3
508 #define MAX_AMBI_COEFFS ((MAX_AMBI_ORDER+1) * (MAX_AMBI_ORDER+1))
510 /* A bitmask of ambisonic channels with height information. If none of these
511 * channels are used/needed, there's no height (e.g. with most surround sound
512 * speaker setups). This only specifies up to 4th order, which is the highest
513 * order a 32-bit mask value can specify (a 64-bit mask could handle up to 7th
514 * order). This is ACN ordering, with bit 0 being ACN 0, etc.
516 #define AMBI_PERIPHONIC_MASK (0xfe7ce4)
518 /* The maximum number of Ambisonic coefficients for 2D (non-periphonic)
519 * representation. This is 2 per each order above zero-order, plus 1 for zero-
520 * order. Or simply, o*2 + 1.
522 #define MAX_AMBI2D_COEFFS (MAX_AMBI_ORDER*2 + 1)
525 typedef ALfloat ChannelConfig
[MAX_AMBI_COEFFS
];
526 typedef struct BFChannelConfig
{
531 typedef union AmbiConfig
{
532 /* Ambisonic coefficients for mixing to the dry buffer. */
533 ChannelConfig Coeffs
[MAX_OUTPUT_CHANNELS
];
534 /* Coefficient channel mapping for mixing to the dry buffer. */
535 BFChannelConfig Map
[MAX_OUTPUT_CHANNELS
];
539 typedef struct BufferSubList
{
540 ALuint64 FreeMask
{0u};
541 struct ALbuffer
*Buffers
{nullptr}; /* 64 */
544 typedef struct EffectSubList
{
545 ALuint64 FreeMask
{0u};
546 struct ALeffect
*Effects
{nullptr}; /* 64 */
549 typedef struct FilterSubList
{
550 ALuint64 FreeMask
{0u};
551 struct ALfilter
*Filters
{nullptr}; /* 64 */
555 typedef struct EnumeratedHrtf
{
558 struct HrtfEntry
*hrtf
;
562 /* Maximum delay in samples for speaker distance compensation. */
563 #define MAX_DELAY_LENGTH 1024
568 ALsizei Length
{0}; /* Valid range is [0...MAX_DELAY_LENGTH). */
569 ALfloat
*Buffer
{nullptr};
570 } mChannel
[MAX_OUTPUT_CHANNELS
];
571 al::vector
<ALfloat
,16> mSamples
;
574 void resize(size_t amt
) { mSamples
.resize(amt
); }
575 void shrink_to_fit() { mSamples
.shrink_to_fit(); }
576 void clear() noexcept
578 for(auto &chan
: mChannel
)
582 chan
.Buffer
= nullptr;
587 ALfloat
*data() noexcept
{ return mSamples
.data(); }
588 const ALfloat
*data() const noexcept
{ return mSamples
.data(); }
590 DistData
& operator[](size_t o
) noexcept
{ return mChannel
[o
]; }
591 const DistData
& operator[](size_t o
) const noexcept
{ return mChannel
[o
]; }
594 /* Size for temporary storage of buffer data, in ALfloats. Larger values need
595 * more memory, while smaller values may need more iterations. The value needs
596 * to be a sensible size, however, as it constrains the max stepping value used
597 * for mixing, as well as the maximum number of samples per mixing iteration.
599 #define BUFFERSIZE 2048
601 typedef struct MixParams
{
603 /* Number of coefficients in each Ambi.Coeffs to mix together (4 for first-
604 * order, 9 for second-order, etc). If the count is 0, Ambi.Map is used
605 * instead to map each output to a coefficient index.
607 ALsizei CoeffCount
{0};
609 ALfloat (*Buffer
)[BUFFERSIZE
]{nullptr};
610 ALsizei NumChannels
{0};
613 typedef struct RealMixParams
{
614 enum Channel ChannelName
[MAX_OUTPUT_CHANNELS
]{};
616 ALfloat (*Buffer
)[BUFFERSIZE
]{nullptr};
617 ALsizei NumChannels
{0};
620 typedef void (*POSTPROCESS
)(ALCdevice
*device
, ALsizei SamplesToDo
);
622 struct ALCdevice_struct
{
625 ATOMIC(ALenum
) Connected
{AL_TRUE
};
631 DevFmtChannels FmtChans
{};
632 DevFmtType FmtType
{};
633 ALboolean IsHeadphones
{AL_FALSE
};
634 ALsizei mAmbiOrder
{0};
635 /* For DevFmtAmbi* output only, specifies the channel order and
638 AmbiLayout mAmbiLayout
{AmbiLayout::Default
};
639 AmbiNorm mAmbiScale
{AmbiNorm::Default
};
641 ALCenum LimiterState
{ALC_DONT_CARE_SOFT
};
643 std::string DeviceName
;
645 ATOMIC(ALCenum
) LastError
{ALC_NO_ERROR
};
647 // Maximum number of sources that can be created
649 // Maximum number of slots that can be created
650 ALuint AuxiliaryEffectSlotMax
{};
652 ALCuint NumMonoSources
{};
653 ALCuint NumStereoSources
{};
654 ALsizei NumAuxSends
{};
656 // Map of Buffers for this device
657 al::vector
<BufferSubList
> BufferList
;
660 // Map of Effects for this device
661 al::vector
<EffectSubList
> EffectList
;
664 // Map of Filters for this device
665 al::vector
<FilterSubList
> FilterList
;
668 POSTPROCESS PostProcess
{};
670 /* HRTF state and info */
671 std::unique_ptr
<DirectHrtfState
> mHrtfState
;
672 std::string HrtfName
;
673 Hrtf
*HrtfHandle
{nullptr};
674 al::vector
<EnumeratedHrtf
> HrtfList
;
675 ALCenum HrtfStatus
{ALC_FALSE
};
677 /* UHJ encoder state */
678 std::unique_ptr
<Uhj2Encoder
> Uhj_Encoder
;
680 /* High quality Ambisonic decoder */
681 std::unique_ptr
<BFormatDec
> AmbiDecoder
;
683 /* Stereo-to-binaural filter */
684 std::unique_ptr
<bs2b
> Bs2b
;
686 /* First-order ambisonic upsampler for higher-order output */
687 std::unique_ptr
<AmbiUpsampler
> AmbiUp
;
689 /* Rendering mode. */
690 RenderMode Render_Mode
{NormalRender
};
695 ALuint SamplesDone
{0u};
696 std::chrono::nanoseconds ClockBase
{0};
697 std::chrono::nanoseconds FixedLatency
{0};
699 /* Temp storage used for mixer processing. */
700 alignas(16) ALfloat TempBuffer
[4][BUFFERSIZE
];
702 /* Mixing buffer used by the Dry mix, FOAOut, and Real out. */
703 al::vector
<std::array
<ALfloat
,BUFFERSIZE
>, 16> MixBuffer
;
705 /* The "dry" path corresponds to the main output. */
707 ALsizei NumChannelsPerOrder
[MAX_AMBI_ORDER
+1]{};
709 /* First-order ambisonics output, to be upsampled to the dry buffer if different. */
712 /* "Real" output, which will be written to the device buffer. May alias the
715 RealMixParams RealOut
;
717 std::unique_ptr
<FrontStablizer
> Stablizer
;
719 std::unique_ptr
<Compressor
> Limiter
;
721 /* The average speaker distance as determined by the ambdec configuration
722 * (or alternatively, by the NFC-HOA reference delay). Only used for NFC.
724 ALfloat AvgSpeakerDist
{0.0f
};
726 /* Delay buffers used to compensate for speaker distances. */
727 DistanceComp ChannelDelay
;
729 /* Dithering control. */
730 ALfloat DitherDepth
{0.0f
};
731 ALuint DitherSeed
{0u};
733 /* Running count of the mixer invocations, in 31.1 fixed point. This
734 * actually increments *twice* when mixing, first at the start and then at
735 * the end, so the bottom bit indicates if the device is currently mixing
736 * and the upper bits indicates how many mixes have been done.
738 RefCount MixCount
{0u};
740 // Contexts created on this device
741 ATOMIC(ALCcontext
*) ContextList
{nullptr};
744 ALCbackend
*Backend
{nullptr};
746 ATOMIC(ALCdevice
*) next
{nullptr};
749 ALCdevice_struct(DeviceType type
);
750 ALCdevice_struct(const ALCdevice_struct
&) = delete;
751 ALCdevice_struct
& operator=(const ALCdevice_struct
&) = delete;
754 DEF_NEWDEL(ALCdevice
)
757 // Frequency was requested by the app or config file
758 #define DEVICE_FREQUENCY_REQUEST (1u<<1)
759 // Channel configuration was requested by the config file
760 #define DEVICE_CHANNELS_REQUEST (1u<<2)
761 // Sample type was requested by the config file
762 #define DEVICE_SAMPLE_TYPE_REQUEST (1u<<3)
764 // Specifies if the DSP is paused at user request
765 #define DEVICE_PAUSED (1u<<30)
767 // Specifies if the device is currently running
768 #define DEVICE_RUNNING (1u<<31)
771 /* Nanosecond resolution for the device clock time. */
772 #define DEVICE_CLOCK_RES U64(1000000000)
775 /* Must be less than 15 characters (16 including terminating null) for
776 * compatibility with pthread_setname_np limitations. */
777 #define MIXER_THREAD_NAME "alsoft-mixer"
779 #define RECORD_THREAD_NAME "alsoft-record"
783 /* End event thread processing. */
784 EventType_KillThread
= 0,
786 /* User event types. */
787 EventType_SourceStateChange
= 1<<0,
788 EventType_BufferCompleted
= 1<<1,
789 EventType_Error
= 1<<2,
790 EventType_Performance
= 1<<3,
791 EventType_Deprecated
= 1<<4,
792 EventType_Disconnected
= 1<<5,
794 /* Internal events. */
795 EventType_ReleaseEffectState
= 65536,
798 typedef struct AsyncEvent
{
799 unsigned int EnumType
;
808 EffectState
*mEffectState
;
811 #define ASYNC_EVENT(t) { t, { 0 } }
814 void AllocateVoices(ALCcontext
*context
, ALsizei num_voices
, ALsizei old_sends
);
817 extern ALint RTPrioLevel
;
818 void SetRTPriority(void);
820 void SetDefaultChannelOrder(ALCdevice
*device
);
821 void SetDefaultWFXChannelOrder(ALCdevice
*device
);
823 const ALCchar
*DevFmtTypeString(enum DevFmtType type
);
824 const ALCchar
*DevFmtChannelsString(enum DevFmtChannels chans
);
826 inline ALint
GetChannelIndex(const enum Channel (&names
)[MAX_OUTPUT_CHANNELS
], enum Channel chan
)
828 auto iter
= std::find(std::begin(names
), std::end(names
), chan
);
829 if(iter
== std::end(names
)) return -1;
830 return std::distance(names
, iter
);
833 * GetChannelIdxByName
835 * Returns the index for the given channel name (e.g. FrontCenter), or -1 if it
838 inline ALint
GetChannelIdxByName(const RealMixParams
*real
, enum Channel chan
)
839 { return GetChannelIndex(real
->ChannelName
, chan
); }
842 inline void LockBufferList(ALCdevice
*device
) { almtx_lock(&device
->BufferLock
); }
843 inline void UnlockBufferList(ALCdevice
*device
) { almtx_unlock(&device
->BufferLock
); }
845 inline void LockEffectList(ALCdevice
*device
) { almtx_lock(&device
->EffectLock
); }
846 inline void UnlockEffectList(ALCdevice
*device
) { almtx_unlock(&device
->EffectLock
); }
848 inline void LockFilterList(ALCdevice
*device
) { almtx_lock(&device
->FilterLock
); }
849 inline void UnlockFilterList(ALCdevice
*device
) { almtx_unlock(&device
->FilterLock
); }
852 void StartEventThrd(ALCcontext
*ctx
);
853 void StopEventThrd(ALCcontext
*ctx
);
856 std::vector
<std::string
> SearchDataFiles(const char *match
, const char *subdir
);