27 #include "inprogext.h"
29 #include "polymorphism.h"
36 template<typename T
, size_t N
>
37 constexpr inline size_t countof(const T(&)[N
]) noexcept
39 #define COUNTOF countof
50 #define HAS_BUILTIN __has_builtin
52 #define HAS_BUILTIN(x) (0)
56 /* LIKELY optimizes the case where the condition is true. The condition is not
57 * required to be true, but it can result in more optimal code for the true
58 * path at the expense of a less optimal false path.
60 #define LIKELY(x) __builtin_expect(!!(x), !0)
61 /* The opposite of LIKELY, optimizing the case where the condition is false. */
62 #define UNLIKELY(x) __builtin_expect(!!(x), 0)
63 /* Unlike LIKELY, ASSUME requires the condition to be true or else it invokes
64 * undefined behavior. It's essentially an assert without actually checking the
65 * condition at run-time, allowing for stronger optimizations than LIKELY.
67 #if HAS_BUILTIN(__builtin_assume)
68 #define ASSUME __builtin_assume
70 #define ASSUME(x) do { if(!(x)) __builtin_unreachable(); } while(0)
75 #define LIKELY(x) (!!(x))
76 #define UNLIKELY(x) (!!(x))
78 #define ASSUME __assume
80 #define ASSUME(x) ((void)0)
85 #define UINT64_MAX U64(18446744073709551615)
89 #if defined(__cplusplus)
91 #elif defined(__GNUC__)
92 #define UNUSED(x) UNUSED_##x __attribute__((unused))
93 #elif defined(__LCLINT__)
94 #define UNUSED(x) /*@unused@*/ x
100 /* Calculates the size of a struct with N elements of a flexible array member.
101 * GCC and Clang allow offsetof(Type, fam[N]) for this, but MSVC seems to have
102 * trouble, so a bit more verbose workaround is needed.
104 #define FAM_SIZE(T, M, N) (offsetof(T, M) + sizeof(((T*)NULL)->M[0])*(N))
107 typedef ALint64SOFT ALint64
;
108 typedef ALuint64SOFT ALuint64
;
111 #if defined(_MSC_VER)
112 #define U64(x) ((ALuint64)(x##ui64))
113 #elif SIZEOF_LONG == 8
114 #define U64(x) ((ALuint64)(x##ul))
115 #elif SIZEOF_LONG_LONG == 8
116 #define U64(x) ((ALuint64)(x##ull))
121 #if defined(_MSC_VER)
122 #define I64(x) ((ALint64)(x##i64))
123 #elif SIZEOF_LONG == 8
124 #define I64(x) ((ALint64)(x##l))
125 #elif SIZEOF_LONG_LONG == 8
126 #define I64(x) ((ALint64)(x##ll))
130 /* Define a CTZ64 macro (count trailing zeros, for 64-bit integers). The result
131 * is *UNDEFINED* if the value is 0.
136 #define CTZ64 __builtin_ctzl
138 #define CTZ64 __builtin_ctzll
141 #elif defined(HAVE_BITSCANFORWARD64_INTRINSIC)
143 inline int msvc64_ctz64(ALuint64 v
)
145 unsigned long idx
= 64;
146 _BitScanForward64(&idx
, v
);
149 #define CTZ64 msvc64_ctz64
151 #elif defined(HAVE_BITSCANFORWARD_INTRINSIC)
153 inline int msvc_ctz64(ALuint64 v
)
155 unsigned long idx
= 64;
156 if(!_BitScanForward(&idx
, v
&0xffffffff))
158 if(_BitScanForward(&idx
, v
>>32))
163 #define CTZ64 msvc_ctz64
167 /* There be black magics here. The popcnt64 method is derived from
168 * https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
169 * while the ctz-utilizing-popcnt algorithm is shown here
170 * http://www.hackersdelight.org/hdcodetxt/ntz.c.txt
171 * as the ntz2 variant. These likely aren't the most efficient methods, but
172 * they're good enough if the GCC or MSVC intrinsics aren't available.
174 inline int fallback_popcnt64(ALuint64 v
)
176 v
= v
- ((v
>> 1) & U64(0x5555555555555555));
177 v
= (v
& U64(0x3333333333333333)) + ((v
>> 2) & U64(0x3333333333333333));
178 v
= (v
+ (v
>> 4)) & U64(0x0f0f0f0f0f0f0f0f);
179 return (int)((v
* U64(0x0101010101010101)) >> 56);
182 inline int fallback_ctz64(ALuint64 value
)
184 return fallback_popcnt64(~value
& (value
- 1));
186 #define CTZ64 fallback_ctz64
189 #if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__)
190 #define IS_LITTLE_ENDIAN (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
194 ALubyte b
[sizeof(ALuint
)];
195 } EndianTest
= { 1 };
196 #define IS_LITTLE_ENDIAN (EndianTest.b[0] == 1)
202 struct DirectHrtfState
;
203 struct FrontStablizer
;
213 #define DEFAULT_OUTPUT_RATE (44100)
214 #define MIN_OUTPUT_RATE (8000)
217 /* Find the next power-of-2 for non-power-of-2 numbers. */
218 inline ALuint
NextPowerOf2(ALuint value
)
232 /** Round up a value to the next multiple. */
233 inline size_t RoundUp(size_t value
, size_t r
)
236 return value
- (value
%r
);
239 /* Fast float-to-int conversion. No particular rounding mode is assumed; the
240 * IEEE-754 default is round-to-nearest with ties-to-even, though an app could
241 * change it on its own threads. On some systems, a truncating conversion may
242 * always be the fastest method.
244 inline ALint
fastf2i(ALfloat f
)
246 #if defined(HAVE_INTRIN_H) && ((defined(_M_IX86_FP) && (_M_IX86_FP > 0)) || defined(_M_X64))
247 return _mm_cvt_ss2si(_mm_set1_ps(f
));
249 #elif defined(_MSC_VER) && defined(_M_IX86_FP)
256 #elif (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
260 __asm__("cvtss2si %1, %0" : "=r"(i
) : "x"(f
));
262 __asm__
__volatile__("fistpl %0" : "=m"(i
) : "t"(f
) : "st");
266 /* On GCC when compiling with -fno-math-errno, lrintf can be inlined to
267 * some simple instructions. Clang does not inline it, always generating a
268 * libc call, while MSVC's implementation is horribly slow, so always fall
269 * back to a normal integer conversion for them.
271 #elif !defined(_MSC_VER) && !defined(__clang__)
281 /* Converts float-to-int using standard behavior (truncation). */
282 inline int float2int(float f
)
284 #if ((defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__)) && \
285 !defined(__SSE_MATH__)) || (defined(_MSC_VER) && defined(_M_IX86_FP) && _M_IX86_FP == 0)
286 ALint sign
, shift
, mant
;
293 sign
= (conv
.i
>>31) | 1;
294 shift
= ((conv
.i
>>23)&0xff) - (127+23);
297 if(UNLIKELY(shift
>= 31 || shift
< -23))
300 mant
= (conv
.i
&0x7fffff) | 0x800000;
301 if(LIKELY(shift
< 0))
302 return (mant
>> -shift
) * sign
;
303 return (mant
<< shift
) * sign
;
311 /* Rounds a float to the nearest integral value, according to the current
312 * rounding mode. This is essentially an inlined version of rintf, although
313 * makes fewer promises (e.g. -0 or -0.25 rounded to 0 may result in +0).
315 inline float fast_roundf(float f
)
317 #if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__)) && \
318 !defined(__SSE_MATH__)
321 __asm__
__volatile__("frndint" : "=t"(out
) : "0"(f
));
326 /* Integral limit, where sub-integral precision is not available for
329 static const float ilim
[2] = {
330 8388608.0f
/* 0x1.0p+23 */,
331 -8388608.0f
/* -0x1.0p+23 */
340 sign
= (conv
.i
>>31)&0x01;
341 expo
= (conv
.i
>>23)&0xff;
343 if(UNLIKELY(expo
>= 150/*+23*/))
345 /* An exponent (base-2) of 23 or higher is incapable of sub-integral
346 * precision, so it's already an integral value. We don't need to worry
347 * about infinity or NaN here.
351 /* Adding the integral limit to the value (with a matching sign) forces a
352 * result that has no sub-integral precision, and is consequently forced to
353 * round to an integral value. Removing the integral limit then restores
354 * the initial value rounded to the integral. The compiler should not
355 * optimize this out because of non-associative rules on floating-point
356 * math (as long as you don't use -fassociative-math,
357 * -funsafe-math-optimizations, -ffast-math, or -Ofast, in which case this
361 return f
- ilim
[sign
];
415 DevFmtByte
= ALC_BYTE_SOFT
,
416 DevFmtUByte
= ALC_UNSIGNED_BYTE_SOFT
,
417 DevFmtShort
= ALC_SHORT_SOFT
,
418 DevFmtUShort
= ALC_UNSIGNED_SHORT_SOFT
,
419 DevFmtInt
= ALC_INT_SOFT
,
420 DevFmtUInt
= ALC_UNSIGNED_INT_SOFT
,
421 DevFmtFloat
= ALC_FLOAT_SOFT
,
423 DevFmtTypeDefault
= DevFmtFloat
425 enum DevFmtChannels
{
426 DevFmtMono
= ALC_MONO_SOFT
,
427 DevFmtStereo
= ALC_STEREO_SOFT
,
428 DevFmtQuad
= ALC_QUAD_SOFT
,
429 DevFmtX51
= ALC_5POINT1_SOFT
,
430 DevFmtX61
= ALC_6POINT1_SOFT
,
431 DevFmtX71
= ALC_7POINT1_SOFT
,
432 DevFmtAmbi3D
= ALC_BFORMAT3D_SOFT
,
434 /* Similar to 5.1, except using rear channels instead of sides */
435 DevFmtX51Rear
= 0x80000000,
437 DevFmtChannelsDefault
= DevFmtStereo
439 #define MAX_OUTPUT_CHANNELS (16)
441 /* DevFmtType traits, providing the type, etc given a DevFmtType. */
442 template<DevFmtType T
>
443 struct DevFmtTypeTraits
{ };
446 struct DevFmtTypeTraits
<DevFmtByte
> { using Type
= ALbyte
; };
448 struct DevFmtTypeTraits
<DevFmtUByte
> { using Type
= ALubyte
; };
450 struct DevFmtTypeTraits
<DevFmtShort
> { using Type
= ALshort
; };
452 struct DevFmtTypeTraits
<DevFmtUShort
> { using Type
= ALushort
; };
454 struct DevFmtTypeTraits
<DevFmtInt
> { using Type
= ALint
; };
456 struct DevFmtTypeTraits
<DevFmtUInt
> { using Type
= ALuint
; };
458 struct DevFmtTypeTraits
<DevFmtFloat
> { using Type
= ALfloat
; };
461 ALsizei
BytesFromDevFmt(enum DevFmtType type
);
462 ALsizei
ChannelsFromDevFmt(enum DevFmtChannels chans
, ALsizei ambiorder
);
463 inline ALsizei
FrameSizeFromDevFmt(enum DevFmtChannels chans
, enum DevFmtType type
, ALsizei ambiorder
)
465 return ChannelsFromDevFmt(chans
, ambiorder
) * BytesFromDevFmt(type
);
468 enum class AmbiLayout
{
469 FuMa
= ALC_FUMA_SOFT
, /* FuMa channel order */
470 ACN
= ALC_ACN_SOFT
, /* ACN channel order */
475 enum class AmbiNorm
{
476 FuMa
= ALC_FUMA_SOFT
, /* FuMa normalization */
477 SN3D
= ALC_SN3D_SOFT
, /* SN3D normalization */
478 N3D
= ALC_N3D_SOFT
, /* N3D normalization */
498 /* The maximum number of Ambisonics coefficients. For a given order (o), the
499 * size needed will be (o+1)**2, thus zero-order has 1, first-order has 4,
500 * second-order has 9, third-order has 16, and fourth-order has 25.
502 #define MAX_AMBI_ORDER 3
503 #define MAX_AMBI_COEFFS ((MAX_AMBI_ORDER+1) * (MAX_AMBI_ORDER+1))
505 /* A bitmask of ambisonic channels with height information. If none of these
506 * channels are used/needed, there's no height (e.g. with most surround sound
507 * speaker setups). This only specifies up to 4th order, which is the highest
508 * order a 32-bit mask value can specify (a 64-bit mask could handle up to 7th
509 * order). This is ACN ordering, with bit 0 being ACN 0, etc.
511 #define AMBI_PERIPHONIC_MASK (0xfe7ce4)
513 /* The maximum number of Ambisonic coefficients for 2D (non-periphonic)
514 * representation. This is 2 per each order above zero-order, plus 1 for zero-
515 * order. Or simply, o*2 + 1.
517 #define MAX_AMBI2D_COEFFS (MAX_AMBI_ORDER*2 + 1)
520 typedef ALfloat ChannelConfig
[MAX_AMBI_COEFFS
];
521 typedef struct BFChannelConfig
{
526 typedef union AmbiConfig
{
527 /* Ambisonic coefficients for mixing to the dry buffer. */
528 ChannelConfig Coeffs
[MAX_OUTPUT_CHANNELS
];
529 /* Coefficient channel mapping for mixing to the dry buffer. */
530 BFChannelConfig Map
[MAX_OUTPUT_CHANNELS
];
534 typedef struct BufferSubList
{
535 ALuint64 FreeMask
{0u};
536 struct ALbuffer
*Buffers
{nullptr}; /* 64 */
539 typedef struct EffectSubList
{
540 ALuint64 FreeMask
{0u};
541 struct ALeffect
*Effects
{nullptr}; /* 64 */
544 typedef struct FilterSubList
{
545 ALuint64 FreeMask
{0u};
546 struct ALfilter
*Filters
{nullptr}; /* 64 */
550 typedef struct EnumeratedHrtf
{
553 struct HrtfEntry
*hrtf
;
557 /* Maximum delay in samples for speaker distance compensation. */
558 #define MAX_DELAY_LENGTH 1024
563 ALsizei Length
{0}; /* Valid range is [0...MAX_DELAY_LENGTH). */
564 ALfloat
*Buffer
{nullptr};
565 } mChannel
[MAX_OUTPUT_CHANNELS
];
566 al::vector
<ALfloat
,16> mSamples
;
569 void resize(size_t amt
) { mSamples
.resize(amt
); }
570 void shrink_to_fit() { mSamples
.shrink_to_fit(); }
571 void clear() noexcept
573 for(auto &chan
: mChannel
)
577 chan
.Buffer
= nullptr;
582 ALfloat
*data() noexcept
{ return mSamples
.data(); }
583 const ALfloat
*data() const noexcept
{ return mSamples
.data(); }
585 DistData
& operator[](size_t o
) noexcept
{ return mChannel
[o
]; }
586 const DistData
& operator[](size_t o
) const noexcept
{ return mChannel
[o
]; }
589 /* Size for temporary storage of buffer data, in ALfloats. Larger values need
590 * more memory, while smaller values may need more iterations. The value needs
591 * to be a sensible size, however, as it constrains the max stepping value used
592 * for mixing, as well as the maximum number of samples per mixing iteration.
594 #define BUFFERSIZE 2048
596 typedef struct MixParams
{
598 /* Number of coefficients in each Ambi.Coeffs to mix together (4 for first-
599 * order, 9 for second-order, etc). If the count is 0, Ambi.Map is used
600 * instead to map each output to a coefficient index.
602 ALsizei CoeffCount
{0};
604 ALfloat (*Buffer
)[BUFFERSIZE
]{nullptr};
605 ALsizei NumChannels
{0};
608 typedef struct RealMixParams
{
609 enum Channel ChannelName
[MAX_OUTPUT_CHANNELS
]{};
611 ALfloat (*Buffer
)[BUFFERSIZE
]{nullptr};
612 ALsizei NumChannels
{0};
615 typedef void (*POSTPROCESS
)(ALCdevice
*device
, ALsizei SamplesToDo
);
617 struct ALCdevice_struct
{
620 ATOMIC(ALenum
) Connected
{AL_TRUE
};
626 DevFmtChannels FmtChans
{};
627 DevFmtType FmtType
{};
628 ALboolean IsHeadphones
{AL_FALSE
};
629 ALsizei mAmbiOrder
{0};
630 /* For DevFmtAmbi* output only, specifies the channel order and
633 AmbiLayout mAmbiLayout
{AmbiLayout::Default
};
634 AmbiNorm mAmbiScale
{AmbiNorm::Default
};
636 ALCenum LimiterState
{ALC_DONT_CARE_SOFT
};
638 std::string DeviceName
;
640 ATOMIC(ALCenum
) LastError
{ALC_NO_ERROR
};
642 // Maximum number of sources that can be created
644 // Maximum number of slots that can be created
645 ALuint AuxiliaryEffectSlotMax
{};
647 ALCuint NumMonoSources
{};
648 ALCuint NumStereoSources
{};
649 ALsizei NumAuxSends
{};
651 // Map of Buffers for this device
652 al::vector
<BufferSubList
> BufferList
;
655 // Map of Effects for this device
656 al::vector
<EffectSubList
> EffectList
;
659 // Map of Filters for this device
660 al::vector
<FilterSubList
> FilterList
;
663 POSTPROCESS PostProcess
{};
665 /* HRTF state and info */
666 struct DirectHrtfState
*Hrtf
{nullptr};
667 std::string HrtfName
;
668 struct Hrtf
*HrtfHandle
{nullptr};
669 al::vector
<EnumeratedHrtf
> HrtfList
;
670 ALCenum HrtfStatus
{ALC_FALSE
};
672 /* UHJ encoder state */
673 std::unique_ptr
<Uhj2Encoder
> Uhj_Encoder
;
675 /* High quality Ambisonic decoder */
676 struct BFormatDec
*AmbiDecoder
{nullptr};
678 /* Stereo-to-binaural filter */
679 struct bs2b
*Bs2b
{nullptr};
681 /* First-order ambisonic upsampler for higher-order output */
682 struct AmbiUpsampler
*AmbiUp
{nullptr};
684 /* Rendering mode. */
685 RenderMode Render_Mode
{NormalRender
};
690 ALuint64 ClockBase
{0u};
691 ALuint SamplesDone
{0u};
692 ALuint FixedLatency
{0u};
694 /* Temp storage used for mixer processing. */
695 alignas(16) ALfloat TempBuffer
[4][BUFFERSIZE
];
697 /* Mixing buffer used by the Dry mix, FOAOut, and Real out. */
698 al::vector
<std::array
<ALfloat
,BUFFERSIZE
>, 16> MixBuffer
;
700 /* The "dry" path corresponds to the main output. */
702 ALsizei NumChannelsPerOrder
[MAX_AMBI_ORDER
+1]{};
704 /* First-order ambisonics output, to be upsampled to the dry buffer if different. */
707 /* "Real" output, which will be written to the device buffer. May alias the
710 RealMixParams RealOut
;
712 std::unique_ptr
<FrontStablizer
> Stablizer
;
714 std::unique_ptr
<Compressor
> Limiter
;
716 /* The average speaker distance as determined by the ambdec configuration
717 * (or alternatively, by the NFC-HOA reference delay). Only used for NFC.
719 ALfloat AvgSpeakerDist
{0.0f
};
721 /* Delay buffers used to compensate for speaker distances. */
722 DistanceComp ChannelDelay
;
724 /* Dithering control. */
725 ALfloat DitherDepth
{0.0f
};
726 ALuint DitherSeed
{0u};
728 /* Running count of the mixer invocations, in 31.1 fixed point. This
729 * actually increments *twice* when mixing, first at the start and then at
730 * the end, so the bottom bit indicates if the device is currently mixing
731 * and the upper bits indicates how many mixes have been done.
733 RefCount MixCount
{0u};
735 // Contexts created on this device
736 ATOMIC(ALCcontext
*) ContextList
{nullptr};
739 struct ALCbackend
*Backend
{nullptr};
741 ATOMIC(ALCdevice
*) next
{nullptr};
744 ALCdevice_struct(DeviceType type
);
745 ALCdevice_struct(const ALCdevice_struct
&) = delete;
746 ALCdevice_struct
& operator=(const ALCdevice_struct
&) = delete;
749 DEF_NEWDEL(ALCdevice
)
752 // Frequency was requested by the app or config file
753 #define DEVICE_FREQUENCY_REQUEST (1u<<1)
754 // Channel configuration was requested by the config file
755 #define DEVICE_CHANNELS_REQUEST (1u<<2)
756 // Sample type was requested by the config file
757 #define DEVICE_SAMPLE_TYPE_REQUEST (1u<<3)
759 // Specifies if the DSP is paused at user request
760 #define DEVICE_PAUSED (1u<<30)
762 // Specifies if the device is currently running
763 #define DEVICE_RUNNING (1u<<31)
766 /* Nanosecond resolution for the device clock time. */
767 #define DEVICE_CLOCK_RES U64(1000000000)
770 /* Must be less than 15 characters (16 including terminating null) for
771 * compatibility with pthread_setname_np limitations. */
772 #define MIXER_THREAD_NAME "alsoft-mixer"
774 #define RECORD_THREAD_NAME "alsoft-record"
778 /* End event thread processing. */
779 EventType_KillThread
= 0,
781 /* User event types. */
782 EventType_SourceStateChange
= 1<<0,
783 EventType_BufferCompleted
= 1<<1,
784 EventType_Error
= 1<<2,
785 EventType_Performance
= 1<<3,
786 EventType_Deprecated
= 1<<4,
787 EventType_Disconnected
= 1<<5,
789 /* Internal events. */
790 EventType_ReleaseEffectState
= 65536,
793 typedef struct AsyncEvent
{
794 unsigned int EnumType
;
803 EffectState
*mEffectState
;
806 #define ASYNC_EVENT(t) { t, { 0 } }
809 void AllocateVoices(ALCcontext
*context
, ALsizei num_voices
, ALsizei old_sends
);
812 extern ALint RTPrioLevel
;
813 void SetRTPriority(void);
815 void SetDefaultChannelOrder(ALCdevice
*device
);
816 void SetDefaultWFXChannelOrder(ALCdevice
*device
);
818 const ALCchar
*DevFmtTypeString(enum DevFmtType type
);
819 const ALCchar
*DevFmtChannelsString(enum DevFmtChannels chans
);
821 inline ALint
GetChannelIndex(const enum Channel names
[MAX_OUTPUT_CHANNELS
], enum Channel chan
)
824 for(i
= 0;i
< MAX_OUTPUT_CHANNELS
;i
++)
832 * GetChannelIdxByName
834 * Returns the index for the given channel name (e.g. FrontCenter), or -1 if it
837 inline ALint
GetChannelIdxByName(const RealMixParams
*real
, enum Channel chan
)
838 { return GetChannelIndex(real
->ChannelName
, chan
); }
841 inline void LockBufferList(ALCdevice
*device
) { almtx_lock(&device
->BufferLock
); }
842 inline void UnlockBufferList(ALCdevice
*device
) { almtx_unlock(&device
->BufferLock
); }
844 inline void LockEffectList(ALCdevice
*device
) { almtx_lock(&device
->EffectLock
); }
845 inline void UnlockEffectList(ALCdevice
*device
) { almtx_unlock(&device
->EffectLock
); }
847 inline void LockFilterList(ALCdevice
*device
) { almtx_lock(&device
->FilterLock
); }
848 inline void UnlockFilterList(ALCdevice
*device
) { almtx_unlock(&device
->FilterLock
); }
851 void StartEventThrd(ALCcontext
*ctx
);
852 void StopEventThrd(ALCcontext
*ctx
);
855 std::vector
<std::string
> SearchDataFiles(const char *match
, const char *subdir
);