27 #include "inprogext.h"
29 #include "polymorphism.h"
36 template<typename T
, size_t N
>
37 constexpr inline size_t countof(const T(&)[N
]) noexcept
39 #define COUNTOF countof
50 #define HAS_BUILTIN __has_builtin
52 #define HAS_BUILTIN(x) (0)
56 /* LIKELY optimizes the case where the condition is true. The condition is not
57 * required to be true, but it can result in more optimal code for the true
58 * path at the expense of a less optimal false path.
60 #define LIKELY(x) __builtin_expect(!!(x), !0)
61 /* The opposite of LIKELY, optimizing the case where the condition is false. */
62 #define UNLIKELY(x) __builtin_expect(!!(x), 0)
63 /* Unlike LIKELY, ASSUME requires the condition to be true or else it invokes
64 * undefined behavior. It's essentially an assert without actually checking the
65 * condition at run-time, allowing for stronger optimizations than LIKELY.
67 #if HAS_BUILTIN(__builtin_assume)
68 #define ASSUME __builtin_assume
70 #define ASSUME(x) do { if(!(x)) __builtin_unreachable(); } while(0)
75 #define LIKELY(x) (!!(x))
76 #define UNLIKELY(x) (!!(x))
78 #define ASSUME __assume
80 #define ASSUME(x) ((void)0)
85 #define UINT64_MAX U64(18446744073709551615)
89 #if defined(__cplusplus)
91 #elif defined(__GNUC__)
92 #define UNUSED(x) UNUSED_##x __attribute__((unused))
93 #elif defined(__LCLINT__)
94 #define UNUSED(x) /*@unused@*/ x
100 /* Calculates the size of a struct with N elements of a flexible array member.
101 * GCC and Clang allow offsetof(Type, fam[N]) for this, but MSVC seems to have
102 * trouble, so a bit more verbose workaround is needed.
104 #define FAM_SIZE(T, M, N) (offsetof(T, M) + sizeof(((T*)NULL)->M[0])*(N))
107 typedef ALint64SOFT ALint64
;
108 typedef ALuint64SOFT ALuint64
;
111 #if defined(_MSC_VER)
112 #define U64(x) ((ALuint64)(x##ui64))
113 #elif SIZEOF_LONG == 8
114 #define U64(x) ((ALuint64)(x##ul))
115 #elif SIZEOF_LONG_LONG == 8
116 #define U64(x) ((ALuint64)(x##ull))
121 #if defined(_MSC_VER)
122 #define I64(x) ((ALint64)(x##i64))
123 #elif SIZEOF_LONG == 8
124 #define I64(x) ((ALint64)(x##l))
125 #elif SIZEOF_LONG_LONG == 8
126 #define I64(x) ((ALint64)(x##ll))
130 /* Define a CTZ64 macro (count trailing zeros, for 64-bit integers). The result
131 * is *UNDEFINED* if the value is 0.
136 #define CTZ64 __builtin_ctzl
138 #define CTZ64 __builtin_ctzll
141 #elif defined(HAVE_BITSCANFORWARD64_INTRINSIC)
143 inline int msvc64_ctz64(ALuint64 v
)
145 unsigned long idx
= 64;
146 _BitScanForward64(&idx
, v
);
149 #define CTZ64 msvc64_ctz64
151 #elif defined(HAVE_BITSCANFORWARD_INTRINSIC)
153 inline int msvc_ctz64(ALuint64 v
)
155 unsigned long idx
= 64;
156 if(!_BitScanForward(&idx
, v
&0xffffffff))
158 if(_BitScanForward(&idx
, v
>>32))
163 #define CTZ64 msvc_ctz64
167 /* There be black magics here. The popcnt64 method is derived from
168 * https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
169 * while the ctz-utilizing-popcnt algorithm is shown here
170 * http://www.hackersdelight.org/hdcodetxt/ntz.c.txt
171 * as the ntz2 variant. These likely aren't the most efficient methods, but
172 * they're good enough if the GCC or MSVC intrinsics aren't available.
174 inline int fallback_popcnt64(ALuint64 v
)
176 v
= v
- ((v
>> 1) & U64(0x5555555555555555));
177 v
= (v
& U64(0x3333333333333333)) + ((v
>> 2) & U64(0x3333333333333333));
178 v
= (v
+ (v
>> 4)) & U64(0x0f0f0f0f0f0f0f0f);
179 return (int)((v
* U64(0x0101010101010101)) >> 56);
182 inline int fallback_ctz64(ALuint64 value
)
184 return fallback_popcnt64(~value
& (value
- 1));
186 #define CTZ64 fallback_ctz64
189 #if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__)
190 #define IS_LITTLE_ENDIAN (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
194 ALubyte b
[sizeof(ALuint
)];
195 } EndianTest
= { 1 };
196 #define IS_LITTLE_ENDIAN (EndianTest.b[0] == 1)
202 struct DirectHrtfState
;
203 struct FrontStablizer
;
212 struct AmbiUpsampler
;
215 #define DEFAULT_OUTPUT_RATE (44100)
216 #define MIN_OUTPUT_RATE (8000)
219 /* Find the next power-of-2 for non-power-of-2 numbers. */
220 inline ALuint
NextPowerOf2(ALuint value
)
234 /** Round up a value to the next multiple. */
235 inline size_t RoundUp(size_t value
, size_t r
)
238 return value
- (value
%r
);
241 /* Fast float-to-int conversion. No particular rounding mode is assumed; the
242 * IEEE-754 default is round-to-nearest with ties-to-even, though an app could
243 * change it on its own threads. On some systems, a truncating conversion may
244 * always be the fastest method.
246 inline ALint
fastf2i(ALfloat f
)
248 #if defined(HAVE_INTRIN_H) && ((defined(_M_IX86_FP) && (_M_IX86_FP > 0)) || defined(_M_X64))
249 return _mm_cvt_ss2si(_mm_set1_ps(f
));
251 #elif defined(_MSC_VER) && defined(_M_IX86_FP)
258 #elif (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
262 __asm__("cvtss2si %1, %0" : "=r"(i
) : "x"(f
));
264 __asm__
__volatile__("fistpl %0" : "=m"(i
) : "t"(f
) : "st");
268 /* On GCC when compiling with -fno-math-errno, lrintf can be inlined to
269 * some simple instructions. Clang does not inline it, always generating a
270 * libc call, while MSVC's implementation is horribly slow, so always fall
271 * back to a normal integer conversion for them.
273 #elif !defined(_MSC_VER) && !defined(__clang__)
283 /* Converts float-to-int using standard behavior (truncation). */
284 inline int float2int(float f
)
286 #if ((defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__)) && \
287 !defined(__SSE_MATH__)) || (defined(_MSC_VER) && defined(_M_IX86_FP) && _M_IX86_FP == 0)
288 ALint sign
, shift
, mant
;
295 sign
= (conv
.i
>>31) | 1;
296 shift
= ((conv
.i
>>23)&0xff) - (127+23);
299 if(UNLIKELY(shift
>= 31 || shift
< -23))
302 mant
= (conv
.i
&0x7fffff) | 0x800000;
303 if(LIKELY(shift
< 0))
304 return (mant
>> -shift
) * sign
;
305 return (mant
<< shift
) * sign
;
313 /* Rounds a float to the nearest integral value, according to the current
314 * rounding mode. This is essentially an inlined version of rintf, although
315 * makes fewer promises (e.g. -0 or -0.25 rounded to 0 may result in +0).
317 inline float fast_roundf(float f
)
319 #if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__)) && \
320 !defined(__SSE_MATH__)
323 __asm__
__volatile__("frndint" : "=t"(out
) : "0"(f
));
328 /* Integral limit, where sub-integral precision is not available for
331 static const float ilim
[2] = {
332 8388608.0f
/* 0x1.0p+23 */,
333 -8388608.0f
/* -0x1.0p+23 */
342 sign
= (conv
.i
>>31)&0x01;
343 expo
= (conv
.i
>>23)&0xff;
345 if(UNLIKELY(expo
>= 150/*+23*/))
347 /* An exponent (base-2) of 23 or higher is incapable of sub-integral
348 * precision, so it's already an integral value. We don't need to worry
349 * about infinity or NaN here.
353 /* Adding the integral limit to the value (with a matching sign) forces a
354 * result that has no sub-integral precision, and is consequently forced to
355 * round to an integral value. Removing the integral limit then restores
356 * the initial value rounded to the integral. The compiler should not
357 * optimize this out because of non-associative rules on floating-point
358 * math (as long as you don't use -fassociative-math,
359 * -funsafe-math-optimizations, -ffast-math, or -Ofast, in which case this
363 return f
- ilim
[sign
];
417 DevFmtByte
= ALC_BYTE_SOFT
,
418 DevFmtUByte
= ALC_UNSIGNED_BYTE_SOFT
,
419 DevFmtShort
= ALC_SHORT_SOFT
,
420 DevFmtUShort
= ALC_UNSIGNED_SHORT_SOFT
,
421 DevFmtInt
= ALC_INT_SOFT
,
422 DevFmtUInt
= ALC_UNSIGNED_INT_SOFT
,
423 DevFmtFloat
= ALC_FLOAT_SOFT
,
425 DevFmtTypeDefault
= DevFmtFloat
427 enum DevFmtChannels
{
428 DevFmtMono
= ALC_MONO_SOFT
,
429 DevFmtStereo
= ALC_STEREO_SOFT
,
430 DevFmtQuad
= ALC_QUAD_SOFT
,
431 DevFmtX51
= ALC_5POINT1_SOFT
,
432 DevFmtX61
= ALC_6POINT1_SOFT
,
433 DevFmtX71
= ALC_7POINT1_SOFT
,
434 DevFmtAmbi3D
= ALC_BFORMAT3D_SOFT
,
436 /* Similar to 5.1, except using rear channels instead of sides */
437 DevFmtX51Rear
= 0x80000000,
439 DevFmtChannelsDefault
= DevFmtStereo
441 #define MAX_OUTPUT_CHANNELS (16)
443 /* DevFmtType traits, providing the type, etc given a DevFmtType. */
444 template<DevFmtType T
>
445 struct DevFmtTypeTraits
{ };
448 struct DevFmtTypeTraits
<DevFmtByte
> { using Type
= ALbyte
; };
450 struct DevFmtTypeTraits
<DevFmtUByte
> { using Type
= ALubyte
; };
452 struct DevFmtTypeTraits
<DevFmtShort
> { using Type
= ALshort
; };
454 struct DevFmtTypeTraits
<DevFmtUShort
> { using Type
= ALushort
; };
456 struct DevFmtTypeTraits
<DevFmtInt
> { using Type
= ALint
; };
458 struct DevFmtTypeTraits
<DevFmtUInt
> { using Type
= ALuint
; };
460 struct DevFmtTypeTraits
<DevFmtFloat
> { using Type
= ALfloat
; };
463 ALsizei
BytesFromDevFmt(enum DevFmtType type
);
464 ALsizei
ChannelsFromDevFmt(enum DevFmtChannels chans
, ALsizei ambiorder
);
465 inline ALsizei
FrameSizeFromDevFmt(enum DevFmtChannels chans
, enum DevFmtType type
, ALsizei ambiorder
)
467 return ChannelsFromDevFmt(chans
, ambiorder
) * BytesFromDevFmt(type
);
470 enum class AmbiLayout
{
471 FuMa
= ALC_FUMA_SOFT
, /* FuMa channel order */
472 ACN
= ALC_ACN_SOFT
, /* ACN channel order */
477 enum class AmbiNorm
{
478 FuMa
= ALC_FUMA_SOFT
, /* FuMa normalization */
479 SN3D
= ALC_SN3D_SOFT
, /* SN3D normalization */
480 N3D
= ALC_N3D_SOFT
, /* N3D normalization */
500 /* The maximum number of Ambisonics coefficients. For a given order (o), the
501 * size needed will be (o+1)**2, thus zero-order has 1, first-order has 4,
502 * second-order has 9, third-order has 16, and fourth-order has 25.
504 #define MAX_AMBI_ORDER 3
505 #define MAX_AMBI_COEFFS ((MAX_AMBI_ORDER+1) * (MAX_AMBI_ORDER+1))
507 /* A bitmask of ambisonic channels with height information. If none of these
508 * channels are used/needed, there's no height (e.g. with most surround sound
509 * speaker setups). This only specifies up to 4th order, which is the highest
510 * order a 32-bit mask value can specify (a 64-bit mask could handle up to 7th
511 * order). This is ACN ordering, with bit 0 being ACN 0, etc.
513 #define AMBI_PERIPHONIC_MASK (0xfe7ce4)
515 /* The maximum number of Ambisonic coefficients for 2D (non-periphonic)
516 * representation. This is 2 per each order above zero-order, plus 1 for zero-
517 * order. Or simply, o*2 + 1.
519 #define MAX_AMBI2D_COEFFS (MAX_AMBI_ORDER*2 + 1)
522 typedef ALfloat ChannelConfig
[MAX_AMBI_COEFFS
];
523 typedef struct BFChannelConfig
{
528 typedef union AmbiConfig
{
529 /* Ambisonic coefficients for mixing to the dry buffer. */
530 ChannelConfig Coeffs
[MAX_OUTPUT_CHANNELS
];
531 /* Coefficient channel mapping for mixing to the dry buffer. */
532 BFChannelConfig Map
[MAX_OUTPUT_CHANNELS
];
536 typedef struct BufferSubList
{
537 ALuint64 FreeMask
{0u};
538 struct ALbuffer
*Buffers
{nullptr}; /* 64 */
541 typedef struct EffectSubList
{
542 ALuint64 FreeMask
{0u};
543 struct ALeffect
*Effects
{nullptr}; /* 64 */
546 typedef struct FilterSubList
{
547 ALuint64 FreeMask
{0u};
548 struct ALfilter
*Filters
{nullptr}; /* 64 */
552 typedef struct EnumeratedHrtf
{
555 struct HrtfEntry
*hrtf
;
559 /* Maximum delay in samples for speaker distance compensation. */
560 #define MAX_DELAY_LENGTH 1024
565 ALsizei Length
{0}; /* Valid range is [0...MAX_DELAY_LENGTH). */
566 ALfloat
*Buffer
{nullptr};
567 } mChannel
[MAX_OUTPUT_CHANNELS
];
568 al::vector
<ALfloat
,16> mSamples
;
571 void resize(size_t amt
) { mSamples
.resize(amt
); }
572 void shrink_to_fit() { mSamples
.shrink_to_fit(); }
573 void clear() noexcept
575 for(auto &chan
: mChannel
)
579 chan
.Buffer
= nullptr;
584 ALfloat
*data() noexcept
{ return mSamples
.data(); }
585 const ALfloat
*data() const noexcept
{ return mSamples
.data(); }
587 DistData
& operator[](size_t o
) noexcept
{ return mChannel
[o
]; }
588 const DistData
& operator[](size_t o
) const noexcept
{ return mChannel
[o
]; }
591 /* Size for temporary storage of buffer data, in ALfloats. Larger values need
592 * more memory, while smaller values may need more iterations. The value needs
593 * to be a sensible size, however, as it constrains the max stepping value used
594 * for mixing, as well as the maximum number of samples per mixing iteration.
596 #define BUFFERSIZE 2048
598 typedef struct MixParams
{
600 /* Number of coefficients in each Ambi.Coeffs to mix together (4 for first-
601 * order, 9 for second-order, etc). If the count is 0, Ambi.Map is used
602 * instead to map each output to a coefficient index.
604 ALsizei CoeffCount
{0};
606 ALfloat (*Buffer
)[BUFFERSIZE
]{nullptr};
607 ALsizei NumChannels
{0};
610 typedef struct RealMixParams
{
611 enum Channel ChannelName
[MAX_OUTPUT_CHANNELS
]{};
613 ALfloat (*Buffer
)[BUFFERSIZE
]{nullptr};
614 ALsizei NumChannels
{0};
617 typedef void (*POSTPROCESS
)(ALCdevice
*device
, ALsizei SamplesToDo
);
619 struct ALCdevice_struct
{
622 ATOMIC(ALenum
) Connected
{AL_TRUE
};
628 DevFmtChannels FmtChans
{};
629 DevFmtType FmtType
{};
630 ALboolean IsHeadphones
{AL_FALSE
};
631 ALsizei mAmbiOrder
{0};
632 /* For DevFmtAmbi* output only, specifies the channel order and
635 AmbiLayout mAmbiLayout
{AmbiLayout::Default
};
636 AmbiNorm mAmbiScale
{AmbiNorm::Default
};
638 ALCenum LimiterState
{ALC_DONT_CARE_SOFT
};
640 std::string DeviceName
;
642 ATOMIC(ALCenum
) LastError
{ALC_NO_ERROR
};
644 // Maximum number of sources that can be created
646 // Maximum number of slots that can be created
647 ALuint AuxiliaryEffectSlotMax
{};
649 ALCuint NumMonoSources
{};
650 ALCuint NumStereoSources
{};
651 ALsizei NumAuxSends
{};
653 // Map of Buffers for this device
654 al::vector
<BufferSubList
> BufferList
;
657 // Map of Effects for this device
658 al::vector
<EffectSubList
> EffectList
;
661 // Map of Filters for this device
662 al::vector
<FilterSubList
> FilterList
;
665 POSTPROCESS PostProcess
{};
667 /* HRTF state and info */
668 struct DirectHrtfState
*Hrtf
{nullptr};
669 std::string HrtfName
;
670 struct Hrtf
*HrtfHandle
{nullptr};
671 al::vector
<EnumeratedHrtf
> HrtfList
;
672 ALCenum HrtfStatus
{ALC_FALSE
};
674 /* UHJ encoder state */
675 std::unique_ptr
<Uhj2Encoder
> Uhj_Encoder
;
677 /* High quality Ambisonic decoder */
678 std::unique_ptr
<BFormatDec
> AmbiDecoder
;
680 /* Stereo-to-binaural filter */
681 struct bs2b
*Bs2b
{nullptr};
683 /* First-order ambisonic upsampler for higher-order output */
684 std::unique_ptr
<AmbiUpsampler
> AmbiUp
;
686 /* Rendering mode. */
687 RenderMode Render_Mode
{NormalRender
};
692 ALuint64 ClockBase
{0u};
693 ALuint SamplesDone
{0u};
694 ALuint FixedLatency
{0u};
696 /* Temp storage used for mixer processing. */
697 alignas(16) ALfloat TempBuffer
[4][BUFFERSIZE
];
699 /* Mixing buffer used by the Dry mix, FOAOut, and Real out. */
700 al::vector
<std::array
<ALfloat
,BUFFERSIZE
>, 16> MixBuffer
;
702 /* The "dry" path corresponds to the main output. */
704 ALsizei NumChannelsPerOrder
[MAX_AMBI_ORDER
+1]{};
706 /* First-order ambisonics output, to be upsampled to the dry buffer if different. */
709 /* "Real" output, which will be written to the device buffer. May alias the
712 RealMixParams RealOut
;
714 std::unique_ptr
<FrontStablizer
> Stablizer
;
716 std::unique_ptr
<Compressor
> Limiter
;
718 /* The average speaker distance as determined by the ambdec configuration
719 * (or alternatively, by the NFC-HOA reference delay). Only used for NFC.
721 ALfloat AvgSpeakerDist
{0.0f
};
723 /* Delay buffers used to compensate for speaker distances. */
724 DistanceComp ChannelDelay
;
726 /* Dithering control. */
727 ALfloat DitherDepth
{0.0f
};
728 ALuint DitherSeed
{0u};
730 /* Running count of the mixer invocations, in 31.1 fixed point. This
731 * actually increments *twice* when mixing, first at the start and then at
732 * the end, so the bottom bit indicates if the device is currently mixing
733 * and the upper bits indicates how many mixes have been done.
735 RefCount MixCount
{0u};
737 // Contexts created on this device
738 ATOMIC(ALCcontext
*) ContextList
{nullptr};
741 struct ALCbackend
*Backend
{nullptr};
743 ATOMIC(ALCdevice
*) next
{nullptr};
746 ALCdevice_struct(DeviceType type
);
747 ALCdevice_struct(const ALCdevice_struct
&) = delete;
748 ALCdevice_struct
& operator=(const ALCdevice_struct
&) = delete;
751 DEF_NEWDEL(ALCdevice
)
754 // Frequency was requested by the app or config file
755 #define DEVICE_FREQUENCY_REQUEST (1u<<1)
756 // Channel configuration was requested by the config file
757 #define DEVICE_CHANNELS_REQUEST (1u<<2)
758 // Sample type was requested by the config file
759 #define DEVICE_SAMPLE_TYPE_REQUEST (1u<<3)
761 // Specifies if the DSP is paused at user request
762 #define DEVICE_PAUSED (1u<<30)
764 // Specifies if the device is currently running
765 #define DEVICE_RUNNING (1u<<31)
768 /* Nanosecond resolution for the device clock time. */
769 #define DEVICE_CLOCK_RES U64(1000000000)
772 /* Must be less than 15 characters (16 including terminating null) for
773 * compatibility with pthread_setname_np limitations. */
774 #define MIXER_THREAD_NAME "alsoft-mixer"
776 #define RECORD_THREAD_NAME "alsoft-record"
780 /* End event thread processing. */
781 EventType_KillThread
= 0,
783 /* User event types. */
784 EventType_SourceStateChange
= 1<<0,
785 EventType_BufferCompleted
= 1<<1,
786 EventType_Error
= 1<<2,
787 EventType_Performance
= 1<<3,
788 EventType_Deprecated
= 1<<4,
789 EventType_Disconnected
= 1<<5,
791 /* Internal events. */
792 EventType_ReleaseEffectState
= 65536,
795 typedef struct AsyncEvent
{
796 unsigned int EnumType
;
805 EffectState
*mEffectState
;
808 #define ASYNC_EVENT(t) { t, { 0 } }
811 void AllocateVoices(ALCcontext
*context
, ALsizei num_voices
, ALsizei old_sends
);
814 extern ALint RTPrioLevel
;
815 void SetRTPriority(void);
817 void SetDefaultChannelOrder(ALCdevice
*device
);
818 void SetDefaultWFXChannelOrder(ALCdevice
*device
);
820 const ALCchar
*DevFmtTypeString(enum DevFmtType type
);
821 const ALCchar
*DevFmtChannelsString(enum DevFmtChannels chans
);
823 inline ALint
GetChannelIndex(const enum Channel names
[MAX_OUTPUT_CHANNELS
], enum Channel chan
)
826 for(i
= 0;i
< MAX_OUTPUT_CHANNELS
;i
++)
834 * GetChannelIdxByName
836 * Returns the index for the given channel name (e.g. FrontCenter), or -1 if it
839 inline ALint
GetChannelIdxByName(const RealMixParams
*real
, enum Channel chan
)
840 { return GetChannelIndex(real
->ChannelName
, chan
); }
843 inline void LockBufferList(ALCdevice
*device
) { almtx_lock(&device
->BufferLock
); }
844 inline void UnlockBufferList(ALCdevice
*device
) { almtx_unlock(&device
->BufferLock
); }
846 inline void LockEffectList(ALCdevice
*device
) { almtx_lock(&device
->EffectLock
); }
847 inline void UnlockEffectList(ALCdevice
*device
) { almtx_unlock(&device
->EffectLock
); }
849 inline void LockFilterList(ALCdevice
*device
) { almtx_lock(&device
->FilterLock
); }
850 inline void UnlockFilterList(ALCdevice
*device
) { almtx_unlock(&device
->FilterLock
); }
853 void StartEventThrd(ALCcontext
*ctx
);
854 void StopEventThrd(ALCcontext
*ctx
);
857 std::vector
<std::string
> SearchDataFiles(const char *match
, const char *subdir
);