27 #include "inprogext.h"
29 #include "polymorphism.h"
36 template<typename T
, size_t N
>
37 constexpr inline size_t countof(const T(&)[N
]) noexcept
39 #define COUNTOF countof
50 #define HAS_BUILTIN __has_builtin
52 #define HAS_BUILTIN(x) (0)
56 /* LIKELY optimizes the case where the condition is true. The condition is not
57 * required to be true, but it can result in more optimal code for the true
58 * path at the expense of a less optimal false path.
60 #define LIKELY(x) __builtin_expect(!!(x), !0)
61 /* The opposite of LIKELY, optimizing the case where the condition is false. */
62 #define UNLIKELY(x) __builtin_expect(!!(x), 0)
63 /* Unlike LIKELY, ASSUME requires the condition to be true or else it invokes
64 * undefined behavior. It's essentially an assert without actually checking the
65 * condition at run-time, allowing for stronger optimizations than LIKELY.
67 #if HAS_BUILTIN(__builtin_assume)
68 #define ASSUME __builtin_assume
70 #define ASSUME(x) do { if(!(x)) __builtin_unreachable(); } while(0)
75 #define LIKELY(x) (!!(x))
76 #define UNLIKELY(x) (!!(x))
78 #define ASSUME __assume
80 #define ASSUME(x) ((void)0)
85 #define UINT64_MAX U64(18446744073709551615)
89 #if defined(__cplusplus)
91 #elif defined(__GNUC__)
92 #define UNUSED(x) UNUSED_##x __attribute__((unused))
93 #elif defined(__LCLINT__)
94 #define UNUSED(x) /*@unused@*/ x
100 /* Calculates the size of a struct with N elements of a flexible array member.
101 * GCC and Clang allow offsetof(Type, fam[N]) for this, but MSVC seems to have
102 * trouble, so a bit more verbose workaround is needed.
104 #define FAM_SIZE(T, M, N) (offsetof(T, M) + sizeof(((T*)NULL)->M[0])*(N))
107 typedef ALint64SOFT ALint64
;
108 typedef ALuint64SOFT ALuint64
;
111 #if defined(_MSC_VER)
112 #define U64(x) ((ALuint64)(x##ui64))
113 #elif SIZEOF_LONG == 8
114 #define U64(x) ((ALuint64)(x##ul))
115 #elif SIZEOF_LONG_LONG == 8
116 #define U64(x) ((ALuint64)(x##ull))
121 #if defined(_MSC_VER)
122 #define I64(x) ((ALint64)(x##i64))
123 #elif SIZEOF_LONG == 8
124 #define I64(x) ((ALint64)(x##l))
125 #elif SIZEOF_LONG_LONG == 8
126 #define I64(x) ((ALint64)(x##ll))
130 /* Define a CTZ64 macro (count trailing zeros, for 64-bit integers). The result
131 * is *UNDEFINED* if the value is 0.
136 #define CTZ64 __builtin_ctzl
138 #define CTZ64 __builtin_ctzll
141 #elif defined(HAVE_BITSCANFORWARD64_INTRINSIC)
143 inline int msvc64_ctz64(ALuint64 v
)
145 unsigned long idx
= 64;
146 _BitScanForward64(&idx
, v
);
149 #define CTZ64 msvc64_ctz64
151 #elif defined(HAVE_BITSCANFORWARD_INTRINSIC)
153 inline int msvc_ctz64(ALuint64 v
)
155 unsigned long idx
= 64;
156 if(!_BitScanForward(&idx
, v
&0xffffffff))
158 if(_BitScanForward(&idx
, v
>>32))
163 #define CTZ64 msvc_ctz64
167 /* There be black magics here. The popcnt64 method is derived from
168 * https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
169 * while the ctz-utilizing-popcnt algorithm is shown here
170 * http://www.hackersdelight.org/hdcodetxt/ntz.c.txt
171 * as the ntz2 variant. These likely aren't the most efficient methods, but
172 * they're good enough if the GCC or MSVC intrinsics aren't available.
174 inline int fallback_popcnt64(ALuint64 v
)
176 v
= v
- ((v
>> 1) & U64(0x5555555555555555));
177 v
= (v
& U64(0x3333333333333333)) + ((v
>> 2) & U64(0x3333333333333333));
178 v
= (v
+ (v
>> 4)) & U64(0x0f0f0f0f0f0f0f0f);
179 return (int)((v
* U64(0x0101010101010101)) >> 56);
182 inline int fallback_ctz64(ALuint64 value
)
184 return fallback_popcnt64(~value
& (value
- 1));
186 #define CTZ64 fallback_ctz64
189 #if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__)
190 #define IS_LITTLE_ENDIAN (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
194 ALubyte b
[sizeof(ALuint
)];
195 } EndianTest
= { 1 };
196 #define IS_LITTLE_ENDIAN (EndianTest.b[0] == 1)
202 struct DirectHrtfState
;
203 struct FrontStablizer
;
212 struct AmbiUpsampler
;
216 #define DEFAULT_OUTPUT_RATE (44100)
217 #define MIN_OUTPUT_RATE (8000)
220 /* Find the next power-of-2 for non-power-of-2 numbers. */
221 inline ALuint
NextPowerOf2(ALuint value
)
235 /** Round up a value to the next multiple. */
236 inline size_t RoundUp(size_t value
, size_t r
)
239 return value
- (value
%r
);
242 /* Fast float-to-int conversion. No particular rounding mode is assumed; the
243 * IEEE-754 default is round-to-nearest with ties-to-even, though an app could
244 * change it on its own threads. On some systems, a truncating conversion may
245 * always be the fastest method.
247 inline ALint
fastf2i(ALfloat f
)
249 #if defined(HAVE_INTRIN_H) && ((defined(_M_IX86_FP) && (_M_IX86_FP > 0)) || defined(_M_X64))
250 return _mm_cvt_ss2si(_mm_set1_ps(f
));
252 #elif defined(_MSC_VER) && defined(_M_IX86_FP)
259 #elif (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
263 __asm__("cvtss2si %1, %0" : "=r"(i
) : "x"(f
));
265 __asm__
__volatile__("fistpl %0" : "=m"(i
) : "t"(f
) : "st");
269 /* On GCC when compiling with -fno-math-errno, lrintf can be inlined to
270 * some simple instructions. Clang does not inline it, always generating a
271 * libc call, while MSVC's implementation is horribly slow, so always fall
272 * back to a normal integer conversion for them.
274 #elif !defined(_MSC_VER) && !defined(__clang__)
284 /* Converts float-to-int using standard behavior (truncation). */
285 inline int float2int(float f
)
287 #if ((defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__)) && \
288 !defined(__SSE_MATH__)) || (defined(_MSC_VER) && defined(_M_IX86_FP) && _M_IX86_FP == 0)
289 ALint sign
, shift
, mant
;
296 sign
= (conv
.i
>>31) | 1;
297 shift
= ((conv
.i
>>23)&0xff) - (127+23);
300 if(UNLIKELY(shift
>= 31 || shift
< -23))
303 mant
= (conv
.i
&0x7fffff) | 0x800000;
304 if(LIKELY(shift
< 0))
305 return (mant
>> -shift
) * sign
;
306 return (mant
<< shift
) * sign
;
314 /* Rounds a float to the nearest integral value, according to the current
315 * rounding mode. This is essentially an inlined version of rintf, although
316 * makes fewer promises (e.g. -0 or -0.25 rounded to 0 may result in +0).
318 inline float fast_roundf(float f
)
320 #if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__)) && \
321 !defined(__SSE_MATH__)
324 __asm__
__volatile__("frndint" : "=t"(out
) : "0"(f
));
329 /* Integral limit, where sub-integral precision is not available for
332 static const float ilim
[2] = {
333 8388608.0f
/* 0x1.0p+23 */,
334 -8388608.0f
/* -0x1.0p+23 */
343 sign
= (conv
.i
>>31)&0x01;
344 expo
= (conv
.i
>>23)&0xff;
346 if(UNLIKELY(expo
>= 150/*+23*/))
348 /* An exponent (base-2) of 23 or higher is incapable of sub-integral
349 * precision, so it's already an integral value. We don't need to worry
350 * about infinity or NaN here.
354 /* Adding the integral limit to the value (with a matching sign) forces a
355 * result that has no sub-integral precision, and is consequently forced to
356 * round to an integral value. Removing the integral limit then restores
357 * the initial value rounded to the integral. The compiler should not
358 * optimize this out because of non-associative rules on floating-point
359 * math (as long as you don't use -fassociative-math,
360 * -funsafe-math-optimizations, -ffast-math, or -Ofast, in which case this
364 return f
- ilim
[sign
];
418 DevFmtByte
= ALC_BYTE_SOFT
,
419 DevFmtUByte
= ALC_UNSIGNED_BYTE_SOFT
,
420 DevFmtShort
= ALC_SHORT_SOFT
,
421 DevFmtUShort
= ALC_UNSIGNED_SHORT_SOFT
,
422 DevFmtInt
= ALC_INT_SOFT
,
423 DevFmtUInt
= ALC_UNSIGNED_INT_SOFT
,
424 DevFmtFloat
= ALC_FLOAT_SOFT
,
426 DevFmtTypeDefault
= DevFmtFloat
428 enum DevFmtChannels
{
429 DevFmtMono
= ALC_MONO_SOFT
,
430 DevFmtStereo
= ALC_STEREO_SOFT
,
431 DevFmtQuad
= ALC_QUAD_SOFT
,
432 DevFmtX51
= ALC_5POINT1_SOFT
,
433 DevFmtX61
= ALC_6POINT1_SOFT
,
434 DevFmtX71
= ALC_7POINT1_SOFT
,
435 DevFmtAmbi3D
= ALC_BFORMAT3D_SOFT
,
437 /* Similar to 5.1, except using rear channels instead of sides */
438 DevFmtX51Rear
= 0x80000000,
440 DevFmtChannelsDefault
= DevFmtStereo
442 #define MAX_OUTPUT_CHANNELS (16)
444 /* DevFmtType traits, providing the type, etc given a DevFmtType. */
445 template<DevFmtType T
>
446 struct DevFmtTypeTraits
{ };
449 struct DevFmtTypeTraits
<DevFmtByte
> { using Type
= ALbyte
; };
451 struct DevFmtTypeTraits
<DevFmtUByte
> { using Type
= ALubyte
; };
453 struct DevFmtTypeTraits
<DevFmtShort
> { using Type
= ALshort
; };
455 struct DevFmtTypeTraits
<DevFmtUShort
> { using Type
= ALushort
; };
457 struct DevFmtTypeTraits
<DevFmtInt
> { using Type
= ALint
; };
459 struct DevFmtTypeTraits
<DevFmtUInt
> { using Type
= ALuint
; };
461 struct DevFmtTypeTraits
<DevFmtFloat
> { using Type
= ALfloat
; };
464 ALsizei
BytesFromDevFmt(enum DevFmtType type
);
465 ALsizei
ChannelsFromDevFmt(enum DevFmtChannels chans
, ALsizei ambiorder
);
466 inline ALsizei
FrameSizeFromDevFmt(enum DevFmtChannels chans
, enum DevFmtType type
, ALsizei ambiorder
)
468 return ChannelsFromDevFmt(chans
, ambiorder
) * BytesFromDevFmt(type
);
471 enum class AmbiLayout
{
472 FuMa
= ALC_FUMA_SOFT
, /* FuMa channel order */
473 ACN
= ALC_ACN_SOFT
, /* ACN channel order */
478 enum class AmbiNorm
{
479 FuMa
= ALC_FUMA_SOFT
, /* FuMa normalization */
480 SN3D
= ALC_SN3D_SOFT
, /* SN3D normalization */
481 N3D
= ALC_N3D_SOFT
, /* N3D normalization */
501 /* The maximum number of Ambisonics coefficients. For a given order (o), the
502 * size needed will be (o+1)**2, thus zero-order has 1, first-order has 4,
503 * second-order has 9, third-order has 16, and fourth-order has 25.
505 #define MAX_AMBI_ORDER 3
506 #define MAX_AMBI_COEFFS ((MAX_AMBI_ORDER+1) * (MAX_AMBI_ORDER+1))
508 /* A bitmask of ambisonic channels with height information. If none of these
509 * channels are used/needed, there's no height (e.g. with most surround sound
510 * speaker setups). This only specifies up to 4th order, which is the highest
511 * order a 32-bit mask value can specify (a 64-bit mask could handle up to 7th
512 * order). This is ACN ordering, with bit 0 being ACN 0, etc.
514 #define AMBI_PERIPHONIC_MASK (0xfe7ce4)
516 /* The maximum number of Ambisonic coefficients for 2D (non-periphonic)
517 * representation. This is 2 per each order above zero-order, plus 1 for zero-
518 * order. Or simply, o*2 + 1.
520 #define MAX_AMBI2D_COEFFS (MAX_AMBI_ORDER*2 + 1)
523 typedef ALfloat ChannelConfig
[MAX_AMBI_COEFFS
];
524 typedef struct BFChannelConfig
{
529 typedef union AmbiConfig
{
530 /* Ambisonic coefficients for mixing to the dry buffer. */
531 ChannelConfig Coeffs
[MAX_OUTPUT_CHANNELS
];
532 /* Coefficient channel mapping for mixing to the dry buffer. */
533 BFChannelConfig Map
[MAX_OUTPUT_CHANNELS
];
537 typedef struct BufferSubList
{
538 ALuint64 FreeMask
{0u};
539 struct ALbuffer
*Buffers
{nullptr}; /* 64 */
542 typedef struct EffectSubList
{
543 ALuint64 FreeMask
{0u};
544 struct ALeffect
*Effects
{nullptr}; /* 64 */
547 typedef struct FilterSubList
{
548 ALuint64 FreeMask
{0u};
549 struct ALfilter
*Filters
{nullptr}; /* 64 */
553 typedef struct EnumeratedHrtf
{
556 struct HrtfEntry
*hrtf
;
560 /* Maximum delay in samples for speaker distance compensation. */
561 #define MAX_DELAY_LENGTH 1024
566 ALsizei Length
{0}; /* Valid range is [0...MAX_DELAY_LENGTH). */
567 ALfloat
*Buffer
{nullptr};
568 } mChannel
[MAX_OUTPUT_CHANNELS
];
569 al::vector
<ALfloat
,16> mSamples
;
572 void resize(size_t amt
) { mSamples
.resize(amt
); }
573 void shrink_to_fit() { mSamples
.shrink_to_fit(); }
574 void clear() noexcept
576 for(auto &chan
: mChannel
)
580 chan
.Buffer
= nullptr;
585 ALfloat
*data() noexcept
{ return mSamples
.data(); }
586 const ALfloat
*data() const noexcept
{ return mSamples
.data(); }
588 DistData
& operator[](size_t o
) noexcept
{ return mChannel
[o
]; }
589 const DistData
& operator[](size_t o
) const noexcept
{ return mChannel
[o
]; }
592 /* Size for temporary storage of buffer data, in ALfloats. Larger values need
593 * more memory, while smaller values may need more iterations. The value needs
594 * to be a sensible size, however, as it constrains the max stepping value used
595 * for mixing, as well as the maximum number of samples per mixing iteration.
597 #define BUFFERSIZE 2048
599 typedef struct MixParams
{
601 /* Number of coefficients in each Ambi.Coeffs to mix together (4 for first-
602 * order, 9 for second-order, etc). If the count is 0, Ambi.Map is used
603 * instead to map each output to a coefficient index.
605 ALsizei CoeffCount
{0};
607 ALfloat (*Buffer
)[BUFFERSIZE
]{nullptr};
608 ALsizei NumChannels
{0};
611 typedef struct RealMixParams
{
612 enum Channel ChannelName
[MAX_OUTPUT_CHANNELS
]{};
614 ALfloat (*Buffer
)[BUFFERSIZE
]{nullptr};
615 ALsizei NumChannels
{0};
618 typedef void (*POSTPROCESS
)(ALCdevice
*device
, ALsizei SamplesToDo
);
620 struct ALCdevice_struct
{
623 ATOMIC(ALenum
) Connected
{AL_TRUE
};
629 DevFmtChannels FmtChans
{};
630 DevFmtType FmtType
{};
631 ALboolean IsHeadphones
{AL_FALSE
};
632 ALsizei mAmbiOrder
{0};
633 /* For DevFmtAmbi* output only, specifies the channel order and
636 AmbiLayout mAmbiLayout
{AmbiLayout::Default
};
637 AmbiNorm mAmbiScale
{AmbiNorm::Default
};
639 ALCenum LimiterState
{ALC_DONT_CARE_SOFT
};
641 std::string DeviceName
;
643 ATOMIC(ALCenum
) LastError
{ALC_NO_ERROR
};
645 // Maximum number of sources that can be created
647 // Maximum number of slots that can be created
648 ALuint AuxiliaryEffectSlotMax
{};
650 ALCuint NumMonoSources
{};
651 ALCuint NumStereoSources
{};
652 ALsizei NumAuxSends
{};
654 // Map of Buffers for this device
655 al::vector
<BufferSubList
> BufferList
;
658 // Map of Effects for this device
659 al::vector
<EffectSubList
> EffectList
;
662 // Map of Filters for this device
663 al::vector
<FilterSubList
> FilterList
;
666 POSTPROCESS PostProcess
{};
668 /* HRTF state and info */
669 std::unique_ptr
<DirectHrtfState
> mHrtfState
;
670 std::string HrtfName
;
671 Hrtf
*HrtfHandle
{nullptr};
672 al::vector
<EnumeratedHrtf
> HrtfList
;
673 ALCenum HrtfStatus
{ALC_FALSE
};
675 /* UHJ encoder state */
676 std::unique_ptr
<Uhj2Encoder
> Uhj_Encoder
;
678 /* High quality Ambisonic decoder */
679 std::unique_ptr
<BFormatDec
> AmbiDecoder
;
681 /* Stereo-to-binaural filter */
682 std::unique_ptr
<bs2b
> Bs2b
;
684 /* First-order ambisonic upsampler for higher-order output */
685 std::unique_ptr
<AmbiUpsampler
> AmbiUp
;
687 /* Rendering mode. */
688 RenderMode Render_Mode
{NormalRender
};
693 ALuint64 ClockBase
{0u};
694 ALuint SamplesDone
{0u};
695 ALuint FixedLatency
{0u};
697 /* Temp storage used for mixer processing. */
698 alignas(16) ALfloat TempBuffer
[4][BUFFERSIZE
];
700 /* Mixing buffer used by the Dry mix, FOAOut, and Real out. */
701 al::vector
<std::array
<ALfloat
,BUFFERSIZE
>, 16> MixBuffer
;
703 /* The "dry" path corresponds to the main output. */
705 ALsizei NumChannelsPerOrder
[MAX_AMBI_ORDER
+1]{};
707 /* First-order ambisonics output, to be upsampled to the dry buffer if different. */
710 /* "Real" output, which will be written to the device buffer. May alias the
713 RealMixParams RealOut
;
715 std::unique_ptr
<FrontStablizer
> Stablizer
;
717 std::unique_ptr
<Compressor
> Limiter
;
719 /* The average speaker distance as determined by the ambdec configuration
720 * (or alternatively, by the NFC-HOA reference delay). Only used for NFC.
722 ALfloat AvgSpeakerDist
{0.0f
};
724 /* Delay buffers used to compensate for speaker distances. */
725 DistanceComp ChannelDelay
;
727 /* Dithering control. */
728 ALfloat DitherDepth
{0.0f
};
729 ALuint DitherSeed
{0u};
731 /* Running count of the mixer invocations, in 31.1 fixed point. This
732 * actually increments *twice* when mixing, first at the start and then at
733 * the end, so the bottom bit indicates if the device is currently mixing
734 * and the upper bits indicates how many mixes have been done.
736 RefCount MixCount
{0u};
738 // Contexts created on this device
739 ATOMIC(ALCcontext
*) ContextList
{nullptr};
742 ALCbackend
*Backend
{nullptr};
744 ATOMIC(ALCdevice
*) next
{nullptr};
747 ALCdevice_struct(DeviceType type
);
748 ALCdevice_struct(const ALCdevice_struct
&) = delete;
749 ALCdevice_struct
& operator=(const ALCdevice_struct
&) = delete;
752 DEF_NEWDEL(ALCdevice
)
755 // Frequency was requested by the app or config file
756 #define DEVICE_FREQUENCY_REQUEST (1u<<1)
757 // Channel configuration was requested by the config file
758 #define DEVICE_CHANNELS_REQUEST (1u<<2)
759 // Sample type was requested by the config file
760 #define DEVICE_SAMPLE_TYPE_REQUEST (1u<<3)
762 // Specifies if the DSP is paused at user request
763 #define DEVICE_PAUSED (1u<<30)
765 // Specifies if the device is currently running
766 #define DEVICE_RUNNING (1u<<31)
769 /* Nanosecond resolution for the device clock time. */
770 #define DEVICE_CLOCK_RES U64(1000000000)
773 /* Must be less than 15 characters (16 including terminating null) for
774 * compatibility with pthread_setname_np limitations. */
775 #define MIXER_THREAD_NAME "alsoft-mixer"
777 #define RECORD_THREAD_NAME "alsoft-record"
781 /* End event thread processing. */
782 EventType_KillThread
= 0,
784 /* User event types. */
785 EventType_SourceStateChange
= 1<<0,
786 EventType_BufferCompleted
= 1<<1,
787 EventType_Error
= 1<<2,
788 EventType_Performance
= 1<<3,
789 EventType_Deprecated
= 1<<4,
790 EventType_Disconnected
= 1<<5,
792 /* Internal events. */
793 EventType_ReleaseEffectState
= 65536,
796 typedef struct AsyncEvent
{
797 unsigned int EnumType
;
806 EffectState
*mEffectState
;
809 #define ASYNC_EVENT(t) { t, { 0 } }
812 void AllocateVoices(ALCcontext
*context
, ALsizei num_voices
, ALsizei old_sends
);
815 extern ALint RTPrioLevel
;
816 void SetRTPriority(void);
818 void SetDefaultChannelOrder(ALCdevice
*device
);
819 void SetDefaultWFXChannelOrder(ALCdevice
*device
);
821 const ALCchar
*DevFmtTypeString(enum DevFmtType type
);
822 const ALCchar
*DevFmtChannelsString(enum DevFmtChannels chans
);
824 inline ALint
GetChannelIndex(const enum Channel names
[MAX_OUTPUT_CHANNELS
], enum Channel chan
)
827 for(i
= 0;i
< MAX_OUTPUT_CHANNELS
;i
++)
835 * GetChannelIdxByName
837 * Returns the index for the given channel name (e.g. FrontCenter), or -1 if it
840 inline ALint
GetChannelIdxByName(const RealMixParams
*real
, enum Channel chan
)
841 { return GetChannelIndex(real
->ChannelName
, chan
); }
844 inline void LockBufferList(ALCdevice
*device
) { almtx_lock(&device
->BufferLock
); }
845 inline void UnlockBufferList(ALCdevice
*device
) { almtx_unlock(&device
->BufferLock
); }
847 inline void LockEffectList(ALCdevice
*device
) { almtx_lock(&device
->EffectLock
); }
848 inline void UnlockEffectList(ALCdevice
*device
) { almtx_unlock(&device
->EffectLock
); }
850 inline void LockFilterList(ALCdevice
*device
) { almtx_lock(&device
->FilterLock
); }
851 inline void UnlockFilterList(ALCdevice
*device
) { almtx_unlock(&device
->FilterLock
); }
854 void StartEventThrd(ALCcontext
*ctx
);
855 void StopEventThrd(ALCcontext
*ctx
);
858 std::vector
<std::string
> SearchDataFiles(const char *match
, const char *subdir
);