26 #include "inprogext.h"
28 #include "polymorphism.h"
35 template<typename T
, size_t N
>
36 constexpr inline size_t countof(const T(&)[N
]) noexcept
38 #define COUNTOF countof
49 #define HAS_BUILTIN __has_builtin
51 #define HAS_BUILTIN(x) (0)
55 /* LIKELY optimizes the case where the condition is true. The condition is not
56 * required to be true, but it can result in more optimal code for the true
57 * path at the expense of a less optimal false path.
59 #define LIKELY(x) __builtin_expect(!!(x), !0)
60 /* The opposite of LIKELY, optimizing the case where the condition is false. */
61 #define UNLIKELY(x) __builtin_expect(!!(x), 0)
62 /* Unlike LIKELY, ASSUME requires the condition to be true or else it invokes
63 * undefined behavior. It's essentially an assert without actually checking the
64 * condition at run-time, allowing for stronger optimizations than LIKELY.
66 #if HAS_BUILTIN(__builtin_assume)
67 #define ASSUME __builtin_assume
69 #define ASSUME(x) do { if(!(x)) __builtin_unreachable(); } while(0)
74 #define LIKELY(x) (!!(x))
75 #define UNLIKELY(x) (!!(x))
77 #define ASSUME __assume
79 #define ASSUME(x) ((void)0)
84 #define UINT64_MAX U64(18446744073709551615)
88 #if defined(__cplusplus)
90 #elif defined(__GNUC__)
91 #define UNUSED(x) UNUSED_##x __attribute__((unused))
92 #elif defined(__LCLINT__)
93 #define UNUSED(x) /*@unused@*/ x
99 /* Calculates the size of a struct with N elements of a flexible array member.
100 * GCC and Clang allow offsetof(Type, fam[N]) for this, but MSVC seems to have
101 * trouble, so a bit more verbose workaround is needed.
103 #define FAM_SIZE(T, M, N) (offsetof(T, M) + sizeof(((T*)NULL)->M[0])*(N))
106 typedef ALint64SOFT ALint64
;
107 typedef ALuint64SOFT ALuint64
;
110 #if defined(_MSC_VER)
111 #define U64(x) ((ALuint64)(x##ui64))
112 #elif SIZEOF_LONG == 8
113 #define U64(x) ((ALuint64)(x##ul))
114 #elif SIZEOF_LONG_LONG == 8
115 #define U64(x) ((ALuint64)(x##ull))
120 #if defined(_MSC_VER)
121 #define I64(x) ((ALint64)(x##i64))
122 #elif SIZEOF_LONG == 8
123 #define I64(x) ((ALint64)(x##l))
124 #elif SIZEOF_LONG_LONG == 8
125 #define I64(x) ((ALint64)(x##ll))
129 /* Define a CTZ64 macro (count trailing zeros, for 64-bit integers). The result
130 * is *UNDEFINED* if the value is 0.
135 #define CTZ64 __builtin_ctzl
137 #define CTZ64 __builtin_ctzll
140 #elif defined(HAVE_BITSCANFORWARD64_INTRINSIC)
142 inline int msvc64_ctz64(ALuint64 v
)
144 unsigned long idx
= 64;
145 _BitScanForward64(&idx
, v
);
148 #define CTZ64 msvc64_ctz64
150 #elif defined(HAVE_BITSCANFORWARD_INTRINSIC)
152 inline int msvc_ctz64(ALuint64 v
)
154 unsigned long idx
= 64;
155 if(!_BitScanForward(&idx
, v
&0xffffffff))
157 if(_BitScanForward(&idx
, v
>>32))
162 #define CTZ64 msvc_ctz64
166 /* There be black magics here. The popcnt64 method is derived from
167 * https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
168 * while the ctz-utilizing-popcnt algorithm is shown here
169 * http://www.hackersdelight.org/hdcodetxt/ntz.c.txt
170 * as the ntz2 variant. These likely aren't the most efficient methods, but
171 * they're good enough if the GCC or MSVC intrinsics aren't available.
173 inline int fallback_popcnt64(ALuint64 v
)
175 v
= v
- ((v
>> 1) & U64(0x5555555555555555));
176 v
= (v
& U64(0x3333333333333333)) + ((v
>> 2) & U64(0x3333333333333333));
177 v
= (v
+ (v
>> 4)) & U64(0x0f0f0f0f0f0f0f0f);
178 return (int)((v
* U64(0x0101010101010101)) >> 56);
181 inline int fallback_ctz64(ALuint64 value
)
183 return fallback_popcnt64(~value
& (value
- 1));
185 #define CTZ64 fallback_ctz64
188 #if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__)
189 #define IS_LITTLE_ENDIAN (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
193 ALubyte b
[sizeof(ALuint
)];
194 } EndianTest
= { 1 };
195 #define IS_LITTLE_ENDIAN (EndianTest.b[0] == 1)
201 struct DirectHrtfState
;
202 struct FrontStablizer
;
210 #define DEFAULT_OUTPUT_RATE (44100)
211 #define MIN_OUTPUT_RATE (8000)
214 /* Find the next power-of-2 for non-power-of-2 numbers. */
215 inline ALuint
NextPowerOf2(ALuint value
)
229 /** Round up a value to the next multiple. */
230 inline size_t RoundUp(size_t value
, size_t r
)
233 return value
- (value
%r
);
236 /* Fast float-to-int conversion. No particular rounding mode is assumed; the
237 * IEEE-754 default is round-to-nearest with ties-to-even, though an app could
238 * change it on its own threads. On some systems, a truncating conversion may
239 * always be the fastest method.
241 inline ALint
fastf2i(ALfloat f
)
243 #if defined(HAVE_INTRIN_H) && ((defined(_M_IX86_FP) && (_M_IX86_FP > 0)) || defined(_M_X64))
244 return _mm_cvt_ss2si(_mm_set1_ps(f
));
246 #elif defined(_MSC_VER) && defined(_M_IX86_FP)
253 #elif (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
257 __asm__("cvtss2si %1, %0" : "=r"(i
) : "x"(f
));
259 __asm__
__volatile__("fistpl %0" : "=m"(i
) : "t"(f
) : "st");
263 /* On GCC when compiling with -fno-math-errno, lrintf can be inlined to
264 * some simple instructions. Clang does not inline it, always generating a
265 * libc call, while MSVC's implementation is horribly slow, so always fall
266 * back to a normal integer conversion for them.
268 #elif !defined(_MSC_VER) && !defined(__clang__)
278 /* Converts float-to-int using standard behavior (truncation). */
279 inline int float2int(float f
)
281 #if ((defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__)) && \
282 !defined(__SSE_MATH__)) || (defined(_MSC_VER) && defined(_M_IX86_FP) && _M_IX86_FP == 0)
283 ALint sign
, shift
, mant
;
290 sign
= (conv
.i
>>31) | 1;
291 shift
= ((conv
.i
>>23)&0xff) - (127+23);
294 if(UNLIKELY(shift
>= 31 || shift
< -23))
297 mant
= (conv
.i
&0x7fffff) | 0x800000;
298 if(LIKELY(shift
< 0))
299 return (mant
>> -shift
) * sign
;
300 return (mant
<< shift
) * sign
;
308 /* Rounds a float to the nearest integral value, according to the current
309 * rounding mode. This is essentially an inlined version of rintf, although
310 * makes fewer promises (e.g. -0 or -0.25 rounded to 0 may result in +0).
312 inline float fast_roundf(float f
)
314 #if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__)) && \
315 !defined(__SSE_MATH__)
318 __asm__
__volatile__("frndint" : "=t"(out
) : "0"(f
));
323 /* Integral limit, where sub-integral precision is not available for
326 static const float ilim
[2] = {
327 8388608.0f
/* 0x1.0p+23 */,
328 -8388608.0f
/* -0x1.0p+23 */
337 sign
= (conv
.i
>>31)&0x01;
338 expo
= (conv
.i
>>23)&0xff;
340 if(UNLIKELY(expo
>= 150/*+23*/))
342 /* An exponent (base-2) of 23 or higher is incapable of sub-integral
343 * precision, so it's already an integral value. We don't need to worry
344 * about infinity or NaN here.
348 /* Adding the integral limit to the value (with a matching sign) forces a
349 * result that has no sub-integral precision, and is consequently forced to
350 * round to an integral value. Removing the integral limit then restores
351 * the initial value rounded to the integral. The compiler should not
352 * optimize this out because of non-associative rules on floating-point
353 * math (as long as you don't use -fassociative-math,
354 * -funsafe-math-optimizations, -ffast-math, or -Ofast, in which case this
358 return f
- ilim
[sign
];
412 DevFmtByte
= ALC_BYTE_SOFT
,
413 DevFmtUByte
= ALC_UNSIGNED_BYTE_SOFT
,
414 DevFmtShort
= ALC_SHORT_SOFT
,
415 DevFmtUShort
= ALC_UNSIGNED_SHORT_SOFT
,
416 DevFmtInt
= ALC_INT_SOFT
,
417 DevFmtUInt
= ALC_UNSIGNED_INT_SOFT
,
418 DevFmtFloat
= ALC_FLOAT_SOFT
,
420 DevFmtTypeDefault
= DevFmtFloat
422 enum DevFmtChannels
{
423 DevFmtMono
= ALC_MONO_SOFT
,
424 DevFmtStereo
= ALC_STEREO_SOFT
,
425 DevFmtQuad
= ALC_QUAD_SOFT
,
426 DevFmtX51
= ALC_5POINT1_SOFT
,
427 DevFmtX61
= ALC_6POINT1_SOFT
,
428 DevFmtX71
= ALC_7POINT1_SOFT
,
429 DevFmtAmbi3D
= ALC_BFORMAT3D_SOFT
,
431 /* Similar to 5.1, except using rear channels instead of sides */
432 DevFmtX51Rear
= 0x80000000,
434 DevFmtChannelsDefault
= DevFmtStereo
436 #define MAX_OUTPUT_CHANNELS (16)
438 ALsizei
BytesFromDevFmt(enum DevFmtType type
);
439 ALsizei
ChannelsFromDevFmt(enum DevFmtChannels chans
, ALsizei ambiorder
);
440 inline ALsizei
FrameSizeFromDevFmt(enum DevFmtChannels chans
, enum DevFmtType type
, ALsizei ambiorder
)
442 return ChannelsFromDevFmt(chans
, ambiorder
) * BytesFromDevFmt(type
);
446 AmbiLayout_FuMa
= ALC_FUMA_SOFT
, /* FuMa channel order */
447 AmbiLayout_ACN
= ALC_ACN_SOFT
, /* ACN channel order */
449 AmbiLayout_Default
= AmbiLayout_ACN
453 AmbiNorm_FuMa
= ALC_FUMA_SOFT
, /* FuMa normalization */
454 AmbiNorm_SN3D
= ALC_SN3D_SOFT
, /* SN3D normalization */
455 AmbiNorm_N3D
= ALC_N3D_SOFT
, /* N3D normalization */
457 AmbiNorm_Default
= AmbiNorm_SN3D
475 /* The maximum number of Ambisonics coefficients. For a given order (o), the
476 * size needed will be (o+1)**2, thus zero-order has 1, first-order has 4,
477 * second-order has 9, third-order has 16, and fourth-order has 25.
479 #define MAX_AMBI_ORDER 3
480 #define MAX_AMBI_COEFFS ((MAX_AMBI_ORDER+1) * (MAX_AMBI_ORDER+1))
482 /* A bitmask of ambisonic channels with height information. If none of these
483 * channels are used/needed, there's no height (e.g. with most surround sound
484 * speaker setups). This only specifies up to 4th order, which is the highest
485 * order a 32-bit mask value can specify (a 64-bit mask could handle up to 7th
486 * order). This is ACN ordering, with bit 0 being ACN 0, etc.
488 #define AMBI_PERIPHONIC_MASK (0xfe7ce4)
490 /* The maximum number of Ambisonic coefficients for 2D (non-periphonic)
491 * representation. This is 2 per each order above zero-order, plus 1 for zero-
492 * order. Or simply, o*2 + 1.
494 #define MAX_AMBI2D_COEFFS (MAX_AMBI_ORDER*2 + 1)
497 typedef ALfloat ChannelConfig
[MAX_AMBI_COEFFS
];
498 typedef struct BFChannelConfig
{
503 typedef union AmbiConfig
{
504 /* Ambisonic coefficients for mixing to the dry buffer. */
505 ChannelConfig Coeffs
[MAX_OUTPUT_CHANNELS
];
506 /* Coefficient channel mapping for mixing to the dry buffer. */
507 BFChannelConfig Map
[MAX_OUTPUT_CHANNELS
];
511 typedef struct BufferSubList
{
512 ALuint64 FreeMask
{0u};
513 struct ALbuffer
*Buffers
{nullptr}; /* 64 */
516 typedef struct EffectSubList
{
517 ALuint64 FreeMask
{0u};
518 struct ALeffect
*Effects
{nullptr}; /* 64 */
521 typedef struct FilterSubList
{
522 ALuint64 FreeMask
{0u};
523 struct ALfilter
*Filters
{nullptr}; /* 64 */
527 typedef struct EnumeratedHrtf
{
530 struct HrtfEntry
*hrtf
;
534 /* Maximum delay in samples for speaker distance compensation. */
535 #define MAX_DELAY_LENGTH 1024
537 typedef struct DistanceComp
{
539 ALsizei Length
{0}; /* Valid range is [0...MAX_DELAY_LENGTH). */
540 ALfloat
*Buffer
{nullptr};
543 /* Size for temporary storage of buffer data, in ALfloats. Larger values need
544 * more memory, while smaller values may need more iterations. The value needs
545 * to be a sensible size, however, as it constrains the max stepping value used
546 * for mixing, as well as the maximum number of samples per mixing iteration.
548 #define BUFFERSIZE 2048
550 typedef struct MixParams
{
552 /* Number of coefficients in each Ambi.Coeffs to mix together (4 for first-
553 * order, 9 for second-order, etc). If the count is 0, Ambi.Map is used
554 * instead to map each output to a coefficient index.
556 ALsizei CoeffCount
{0};
558 ALfloat (*Buffer
)[BUFFERSIZE
]{nullptr};
559 ALsizei NumChannels
{0};
562 typedef struct RealMixParams
{
563 enum Channel ChannelName
[MAX_OUTPUT_CHANNELS
]{};
565 ALfloat (*Buffer
)[BUFFERSIZE
]{nullptr};
566 ALsizei NumChannels
{0};
569 typedef void (*POSTPROCESS
)(ALCdevice
*device
, ALsizei SamplesToDo
);
571 struct ALCdevice_struct
{
574 ATOMIC(ALenum
) Connected
{AL_TRUE
};
580 DevFmtChannels FmtChans
{};
581 DevFmtType FmtType
{};
582 ALboolean IsHeadphones
{};
583 ALsizei mAmbiOrder
{};
584 /* For DevFmtAmbi* output only, specifies the channel order and
587 AmbiLayout mAmbiLayout
{};
588 AmbiNorm mAmbiScale
{};
590 ALCenum LimiterState
{ALC_DONT_CARE_SOFT
};
592 std::string DeviceName
;
594 ATOMIC(ALCenum
) LastError
{ALC_NO_ERROR
};
596 // Maximum number of sources that can be created
598 // Maximum number of slots that can be created
599 ALuint AuxiliaryEffectSlotMax
{};
601 ALCuint NumMonoSources
{};
602 ALCuint NumStereoSources
{};
603 ALsizei NumAuxSends
{};
605 // Map of Buffers for this device
606 al::vector
<BufferSubList
> BufferList
;
609 // Map of Effects for this device
610 al::vector
<EffectSubList
> EffectList
;
613 // Map of Filters for this device
614 al::vector
<FilterSubList
> FilterList
;
617 POSTPROCESS PostProcess
{};
619 /* HRTF state and info */
620 struct DirectHrtfState
*Hrtf
{nullptr};
621 std::string HrtfName
;
622 struct Hrtf
*HrtfHandle
{nullptr};
623 al::vector
<EnumeratedHrtf
> HrtfList
;
624 ALCenum HrtfStatus
{ALC_FALSE
};
626 /* UHJ encoder state */
627 struct Uhj2Encoder
*Uhj_Encoder
{nullptr};
629 /* High quality Ambisonic decoder */
630 struct BFormatDec
*AmbiDecoder
{nullptr};
632 /* Stereo-to-binaural filter */
633 struct bs2b
*Bs2b
{nullptr};
635 /* First-order ambisonic upsampler for higher-order output */
636 struct AmbiUpsampler
*AmbiUp
{nullptr};
638 /* Rendering mode. */
639 RenderMode Render_Mode
{NormalRender
};
644 ALuint64 ClockBase
{0u};
645 ALuint SamplesDone
{0u};
646 ALuint FixedLatency
{0u};
648 /* Temp storage used for mixer processing. */
649 alignas(16) ALfloat TempBuffer
[4][BUFFERSIZE
];
651 /* The "dry" path corresponds to the main output. */
653 ALsizei NumChannelsPerOrder
[MAX_AMBI_ORDER
+1]{};
655 /* First-order ambisonics output, to be upsampled to the dry buffer if different. */
658 /* "Real" output, which will be written to the device buffer. May alias the
661 RealMixParams RealOut
;
663 struct FrontStablizer
*Stablizer
{nullptr};
665 struct Compressor
*Limiter
{nullptr};
667 /* The average speaker distance as determined by the ambdec configuration
668 * (or alternatively, by the NFC-HOA reference delay). Only used for NFC.
670 ALfloat AvgSpeakerDist
{0.0f
};
672 /* Delay buffers used to compensate for speaker distances. */
673 DistanceComp ChannelDelay
[MAX_OUTPUT_CHANNELS
];
675 /* Dithering control. */
676 ALfloat DitherDepth
{0.0f
};
677 ALuint DitherSeed
{0u};
679 /* Running count of the mixer invocations, in 31.1 fixed point. This
680 * actually increments *twice* when mixing, first at the start and then at
681 * the end, so the bottom bit indicates if the device is currently mixing
682 * and the upper bits indicates how many mixes have been done.
684 RefCount MixCount
{0u};
686 // Contexts created on this device
687 ATOMIC(ALCcontext
*) ContextList
{nullptr};
690 struct ALCbackend
*Backend
{nullptr};
692 ATOMIC(ALCdevice
*) next
{nullptr};
695 ALCdevice_struct(DeviceType type
);
696 ALCdevice_struct(const ALCdevice_struct
&) = delete;
697 ALCdevice_struct
& operator=(const ALCdevice_struct
&) = delete;
700 DEF_NEWDEL(ALCdevice
)
703 // Frequency was requested by the app or config file
704 #define DEVICE_FREQUENCY_REQUEST (1u<<1)
705 // Channel configuration was requested by the config file
706 #define DEVICE_CHANNELS_REQUEST (1u<<2)
707 // Sample type was requested by the config file
708 #define DEVICE_SAMPLE_TYPE_REQUEST (1u<<3)
710 // Specifies if the DSP is paused at user request
711 #define DEVICE_PAUSED (1u<<30)
713 // Specifies if the device is currently running
714 #define DEVICE_RUNNING (1u<<31)
717 /* Nanosecond resolution for the device clock time. */
718 #define DEVICE_CLOCK_RES U64(1000000000)
721 /* Must be less than 15 characters (16 including terminating null) for
722 * compatibility with pthread_setname_np limitations. */
723 #define MIXER_THREAD_NAME "alsoft-mixer"
725 #define RECORD_THREAD_NAME "alsoft-record"
729 /* End event thread processing. */
730 EventType_KillThread
= 0,
732 /* User event types. */
733 EventType_SourceStateChange
= 1<<0,
734 EventType_BufferCompleted
= 1<<1,
735 EventType_Error
= 1<<2,
736 EventType_Performance
= 1<<3,
737 EventType_Deprecated
= 1<<4,
738 EventType_Disconnected
= 1<<5,
740 /* Internal events. */
741 EventType_ReleaseEffectState
= 65536,
744 typedef struct AsyncEvent
{
745 unsigned int EnumType
;
754 struct ALeffectState
*EffectState
;
757 #define ASYNC_EVENT(t) { t, { 0 } }
760 void AllocateVoices(ALCcontext
*context
, ALsizei num_voices
, ALsizei old_sends
);
763 extern ALint RTPrioLevel
;
764 void SetRTPriority(void);
766 void SetDefaultChannelOrder(ALCdevice
*device
);
767 void SetDefaultWFXChannelOrder(ALCdevice
*device
);
769 const ALCchar
*DevFmtTypeString(enum DevFmtType type
);
770 const ALCchar
*DevFmtChannelsString(enum DevFmtChannels chans
);
772 inline ALint
GetChannelIndex(const enum Channel names
[MAX_OUTPUT_CHANNELS
], enum Channel chan
)
775 for(i
= 0;i
< MAX_OUTPUT_CHANNELS
;i
++)
783 * GetChannelIdxByName
785 * Returns the index for the given channel name (e.g. FrontCenter), or -1 if it
788 inline ALint
GetChannelIdxByName(const RealMixParams
*real
, enum Channel chan
)
789 { return GetChannelIndex(real
->ChannelName
, chan
); }
792 inline void LockBufferList(ALCdevice
*device
) { almtx_lock(&device
->BufferLock
); }
793 inline void UnlockBufferList(ALCdevice
*device
) { almtx_unlock(&device
->BufferLock
); }
795 inline void LockEffectList(ALCdevice
*device
) { almtx_lock(&device
->EffectLock
); }
796 inline void UnlockEffectList(ALCdevice
*device
) { almtx_unlock(&device
->EffectLock
); }
798 inline void LockFilterList(ALCdevice
*device
) { almtx_lock(&device
->FilterLock
); }
799 inline void UnlockFilterList(ALCdevice
*device
) { almtx_unlock(&device
->FilterLock
); }
802 void StartEventThrd(ALCcontext
*ctx
);
803 void StopEventThrd(ALCcontext
*ctx
);
806 std::vector
<std::string
> SearchDataFiles(const char *match
, const char *subdir
);