Add an SSE-enhanced path for applying the mixer matrix
[openal-soft/openal-hmr.git] / Alc / mixer_inc.c
blobbc817993b63ccb1bad39aac67494d52a799cd2af
1 #include "config.h"
3 #include "AL/alc.h"
4 #include "AL/al.h"
5 #include "alMain.h"
6 #include "alSource.h"
7 #include "alAuxEffectSlot.h"
8 #include "mixer_defs.h"
10 #ifdef __GNUC__
11 #define LIKELY(x) __builtin_expect(!!(x), 1)
12 #define UNLIKELY(x) __builtin_expect(!!(x), 0)
13 #else
14 #define LIKELY(x) (x)
15 #define UNLIKELY(x) (x)
16 #endif
18 #define REAL_MERGE2(a,b) a##b
19 #define MERGE2(a,b) REAL_MERGE2(a,b)
20 #define REAL_MERGE4(a,b,c,d) a##b##c##d
21 #define MERGE4(a,b,c,d) REAL_MERGE4(a,b,c,d)
24 static __inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*RESTRICT Values)[2],
25 ALfloat (*RESTRICT Coeffs)[2],
26 ALfloat (*RESTRICT CoeffStep)[2],
27 ALfloat left, ALfloat right);
28 static __inline void ApplyCoeffs(ALuint Offset, ALfloat (*RESTRICT Values)[2],
29 ALfloat (*RESTRICT Coeffs)[2],
30 ALfloat left, ALfloat right);
31 static __inline void ApplyValue(ALfloat *RESTRICT Output, ALfloat value,
32 const ALfloat *DrySend);
35 void MERGE4(MixDirect_Hrtf_,SAMPLER,_,SUFFIX)(
36 ALsource *Source, ALCdevice *Device, DirectParams *params,
37 const ALfloat *RESTRICT data, ALuint srcfrac,
38 ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize)
40 const ALuint NumChannels = Source->NumChannels;
41 const ALint *RESTRICT DelayStep = params->Hrtf.DelayStep;
42 ALfloat (*RESTRICT DryBuffer)[MaxChannels];
43 ALfloat *RESTRICT ClickRemoval, *RESTRICT PendingClicks;
44 ALfloat (*RESTRICT CoeffStep)[2] = params->Hrtf.CoeffStep;
45 ALuint pos, frac;
46 FILTER *DryFilter;
47 ALuint BufferIdx;
48 ALuint increment;
49 ALfloat value;
50 ALuint i, c;
52 increment = Source->Params.Step;
54 DryBuffer = Device->DryBuffer;
55 ClickRemoval = Device->ClickRemoval;
56 PendingClicks = Device->PendingClicks;
57 DryFilter = &params->iirFilter;
59 for(i = 0;i < NumChannels;i++)
61 ALfloat (*RESTRICT TargetCoeffs)[2] = params->Hrtf.Coeffs[i];
62 ALuint *RESTRICT TargetDelay = params->Hrtf.Delay[i];
63 ALfloat *RESTRICT History = Source->Hrtf.History[i];
64 ALfloat (*RESTRICT Values)[2] = Source->Hrtf.Values[i];
65 ALint Counter = maxu(Source->Hrtf.Counter, OutPos) - OutPos;
66 ALuint Offset = Source->Hrtf.Offset + OutPos;
67 ALIGN(16) ALfloat Coeffs[HRIR_LENGTH][2];
68 ALuint Delay[2];
69 ALfloat left, right;
71 pos = 0;
72 frac = srcfrac;
74 for(c = 0;c < HRIR_LENGTH;c++)
76 Coeffs[c][0] = TargetCoeffs[c][0] - (CoeffStep[c][0]*Counter);
77 Coeffs[c][1] = TargetCoeffs[c][1] - (CoeffStep[c][1]*Counter);
80 Delay[0] = TargetDelay[0] - (DelayStep[0]*Counter);
81 Delay[1] = TargetDelay[1] - (DelayStep[1]*Counter);
83 if(LIKELY(OutPos == 0))
85 value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac);
86 value = lpFilter2PC(DryFilter, i, value);
88 History[Offset&SRC_HISTORY_MASK] = value;
89 left = lerp(History[(Offset-(Delay[0]>>HRTFDELAY_BITS))&SRC_HISTORY_MASK],
90 History[(Offset-(Delay[0]>>HRTFDELAY_BITS)-1)&SRC_HISTORY_MASK],
91 (Delay[0]&HRTFDELAY_MASK)*(1.0f/HRTFDELAY_FRACONE));
92 right = lerp(History[(Offset-(Delay[1]>>HRTFDELAY_BITS))&SRC_HISTORY_MASK],
93 History[(Offset-(Delay[1]>>HRTFDELAY_BITS)-1)&SRC_HISTORY_MASK],
94 (Delay[1]&HRTFDELAY_MASK)*(1.0f/HRTFDELAY_FRACONE));
96 ClickRemoval[FrontLeft] -= Values[(Offset+1)&HRIR_MASK][0] +
97 Coeffs[0][0] * left;
98 ClickRemoval[FrontRight] -= Values[(Offset+1)&HRIR_MASK][1] +
99 Coeffs[0][1] * right;
101 for(BufferIdx = 0;BufferIdx < BufferSize && Counter > 0;BufferIdx++)
103 value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac);
104 value = lpFilter2P(DryFilter, i, value);
106 History[Offset&SRC_HISTORY_MASK] = value;
107 left = lerp(History[(Offset-(Delay[0]>>HRTFDELAY_BITS))&SRC_HISTORY_MASK],
108 History[(Offset-(Delay[0]>>HRTFDELAY_BITS)-1)&SRC_HISTORY_MASK],
109 (Delay[0]&HRTFDELAY_MASK)*(1.0f/HRTFDELAY_FRACONE));
110 right = lerp(History[(Offset-(Delay[1]>>HRTFDELAY_BITS))&SRC_HISTORY_MASK],
111 History[(Offset-(Delay[1]>>HRTFDELAY_BITS)-1)&SRC_HISTORY_MASK],
112 (Delay[1]&HRTFDELAY_MASK)*(1.0f/HRTFDELAY_FRACONE));
114 Delay[0] += DelayStep[0];
115 Delay[1] += DelayStep[1];
117 Values[Offset&HRIR_MASK][0] = 0.0f;
118 Values[Offset&HRIR_MASK][1] = 0.0f;
119 Offset++;
121 ApplyCoeffsStep(Offset, Values, Coeffs, CoeffStep, left, right);
122 DryBuffer[OutPos][FrontLeft] += Values[Offset&HRIR_MASK][0];
123 DryBuffer[OutPos][FrontRight] += Values[Offset&HRIR_MASK][1];
125 frac += increment;
126 pos += frac>>FRACTIONBITS;
127 frac &= FRACTIONMASK;
128 OutPos++;
129 Counter--;
132 Delay[0] >>= HRTFDELAY_BITS;
133 Delay[1] >>= HRTFDELAY_BITS;
134 for(;BufferIdx < BufferSize;BufferIdx++)
136 value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac);
137 value = lpFilter2P(DryFilter, i, value);
139 History[Offset&SRC_HISTORY_MASK] = value;
140 left = History[(Offset-Delay[0])&SRC_HISTORY_MASK];
141 right = History[(Offset-Delay[1])&SRC_HISTORY_MASK];
143 Values[Offset&HRIR_MASK][0] = 0.0f;
144 Values[Offset&HRIR_MASK][1] = 0.0f;
145 Offset++;
147 ApplyCoeffs(Offset, Values, Coeffs, left, right);
148 DryBuffer[OutPos][FrontLeft] += Values[Offset&HRIR_MASK][0];
149 DryBuffer[OutPos][FrontRight] += Values[Offset&HRIR_MASK][1];
151 frac += increment;
152 pos += frac>>FRACTIONBITS;
153 frac &= FRACTIONMASK;
154 OutPos++;
156 if(LIKELY(OutPos == SamplesToDo))
158 value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac);
159 value = lpFilter2PC(DryFilter, i, value);
161 History[Offset&SRC_HISTORY_MASK] = value;
162 left = History[(Offset-Delay[0])&SRC_HISTORY_MASK];
163 right = History[(Offset-Delay[1])&SRC_HISTORY_MASK];
165 PendingClicks[FrontLeft] += Values[(Offset+1)&HRIR_MASK][0] +
166 Coeffs[0][0] * left;
167 PendingClicks[FrontRight] += Values[(Offset+1)&HRIR_MASK][1] +
168 Coeffs[0][1] * right;
170 OutPos -= BufferSize;
175 void MERGE4(MixDirect_,SAMPLER,_,SUFFIX)(
176 ALsource *Source, ALCdevice *Device, DirectParams *params,
177 const ALfloat *RESTRICT data, ALuint srcfrac,
178 ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize)
180 const ALuint NumChannels = Source->NumChannels;
181 ALfloat (*RESTRICT DryBuffer)[MaxChannels];
182 ALfloat *RESTRICT ClickRemoval, *RESTRICT PendingClicks;
183 ALIGN(16) ALfloat DrySend[MaxChannels];
184 FILTER *DryFilter;
185 ALuint pos, frac;
186 ALuint BufferIdx;
187 ALuint increment;
188 ALfloat value;
189 ALuint i, c;
191 increment = Source->Params.Step;
193 DryBuffer = Device->DryBuffer;
194 ClickRemoval = Device->ClickRemoval;
195 PendingClicks = Device->PendingClicks;
196 DryFilter = &params->iirFilter;
198 for(i = 0;i < NumChannels;i++)
200 for(c = 0;c < MaxChannels;c++)
201 DrySend[c] = params->Gains[i][c];
203 pos = 0;
204 frac = srcfrac;
206 if(OutPos == 0)
208 value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac);
210 value = lpFilter2PC(DryFilter, i, value);
211 ApplyValue(ClickRemoval, -value, DrySend);
213 for(BufferIdx = 0;BufferIdx < BufferSize;BufferIdx++)
215 value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac);
217 value = lpFilter2P(DryFilter, i, value);
218 ApplyValue(DryBuffer[OutPos], value, DrySend);
220 frac += increment;
221 pos += frac>>FRACTIONBITS;
222 frac &= FRACTIONMASK;
223 OutPos++;
225 if(OutPos == SamplesToDo)
227 value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac);
229 value = lpFilter2PC(DryFilter, i, value);
230 ApplyValue(PendingClicks, value, DrySend);
232 OutPos -= BufferSize;
237 void MERGE4(MixSend_,SAMPLER,_,SUFFIX)(
238 ALsource *Source, ALuint sendidx, SendParams *params,
239 const ALfloat *RESTRICT data, ALuint srcfrac,
240 ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize)
242 const ALuint NumChannels = Source->NumChannels;
243 ALeffectslot *Slot;
244 ALfloat WetSend;
245 ALfloat *WetBuffer;
246 ALfloat *WetClickRemoval;
247 ALfloat *WetPendingClicks;
248 FILTER *WetFilter;
249 ALuint pos, frac;
250 ALuint BufferIdx;
251 ALuint increment;
252 ALfloat value;
253 ALuint i;
255 increment = Source->Params.Step;
257 Slot = Source->Params.Slot[sendidx];
258 WetBuffer = Slot->WetBuffer;
259 WetClickRemoval = Slot->ClickRemoval;
260 WetPendingClicks = Slot->PendingClicks;
261 WetFilter = &params->iirFilter;
262 WetSend = params->Gain;
264 for(i = 0;i < NumChannels;i++)
266 pos = 0;
267 frac = srcfrac;
269 if(OutPos == 0)
271 value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac);
273 value = lpFilter2PC(WetFilter, i, value);
274 WetClickRemoval[0] -= value * WetSend;
276 for(BufferIdx = 0;BufferIdx < BufferSize;BufferIdx++)
278 value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac);
280 value = lpFilter2P(WetFilter, i, value);
281 WetBuffer[OutPos] += value * WetSend;
283 frac += increment;
284 pos += frac>>FRACTIONBITS;
285 frac &= FRACTIONMASK;
286 OutPos++;
288 if(OutPos == SamplesToDo)
290 value = SAMPLER(data + pos*NumChannels + i, NumChannels, frac);
292 value = lpFilter2PC(WetFilter, i, value);
293 WetPendingClicks[0] += value * WetSend;
295 OutPos -= BufferSize;
299 #undef MERGE4
300 #undef REAL_MERGE4
301 #undef MERGE2
302 #undef REAL_MERGE2
304 #undef UNLIKELY
305 #undef LIKELY