Add SSE2 and SSE4.1 cubic resamplers
[openal-soft.git] / Alc / mixer.c
blob87d94fff3a44ba62e1ddab1294e8d667d1608d0b
1 /**
2 * OpenAL cross platform audio library
3 * Copyright (C) 1999-2007 by authors.
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 * Or go to http://www.gnu.org/copyleft/lgpl.html
21 #include "config.h"
23 #include <math.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <ctype.h>
27 #include <assert.h>
29 #include "alMain.h"
30 #include "AL/al.h"
31 #include "AL/alc.h"
32 #include "alSource.h"
33 #include "alBuffer.h"
34 #include "alListener.h"
35 #include "alAuxEffectSlot.h"
36 #include "alu.h"
38 #include "mixer_defs.h"
41 extern inline void InitiatePositionArrays(ALuint frac, ALuint increment, ALuint *frac_arr, ALuint *pos_arr, ALuint size);
43 alignas(16) ALfloat CubicLUT[FRACTIONONE][4];
46 void aluInitResamplers(void)
48 ALuint i;
49 for(i = 0;i < FRACTIONONE;i++)
51 ALfloat mu = (ALfloat)i / FRACTIONONE;
52 ALfloat mu2 = mu*mu, mu3 = mu*mu*mu;
53 CubicLUT[i][0] = -0.5f*mu3 + mu2 + -0.5f*mu;
54 CubicLUT[i][1] = 1.5f*mu3 + -2.5f*mu2 + 1.0f;
55 CubicLUT[i][2] = -1.5f*mu3 + 2.0f*mu2 + 0.5f*mu;
56 CubicLUT[i][3] = 0.5f*mu3 + -0.5f*mu2;
61 static inline HrtfMixerFunc SelectHrtfMixer(void)
63 #ifdef HAVE_SSE
64 if((CPUCapFlags&CPU_CAP_SSE))
65 return MixHrtf_SSE;
66 #endif
67 #ifdef HAVE_NEON
68 if((CPUCapFlags&CPU_CAP_NEON))
69 return MixHrtf_Neon;
70 #endif
72 return MixHrtf_C;
75 static inline MixerFunc SelectMixer(void)
77 #ifdef HAVE_SSE
78 if((CPUCapFlags&CPU_CAP_SSE))
79 return Mix_SSE;
80 #endif
81 #ifdef HAVE_NEON
82 if((CPUCapFlags&CPU_CAP_NEON))
83 return Mix_Neon;
84 #endif
86 return Mix_C;
89 static inline ResamplerFunc SelectResampler(enum Resampler resampler)
91 switch(resampler)
93 case PointResampler:
94 return Resample_point32_C;
95 case LinearResampler:
96 #ifdef HAVE_SSE4_1
97 if((CPUCapFlags&CPU_CAP_SSE4_1))
98 return Resample_lerp32_SSE41;
99 #endif
100 #ifdef HAVE_SSE2
101 if((CPUCapFlags&CPU_CAP_SSE2))
102 return Resample_lerp32_SSE2;
103 #endif
104 return Resample_lerp32_C;
105 case CubicResampler:
106 #ifdef HAVE_SSE4_1
107 if((CPUCapFlags&CPU_CAP_SSE4_1))
108 return Resample_cubic32_SSE41;
109 #endif
110 #ifdef HAVE_SSE2
111 if((CPUCapFlags&CPU_CAP_SSE2))
112 return Resample_cubic32_SSE2;
113 #endif
114 return Resample_cubic32_C;
115 case ResamplerMax:
116 /* Shouldn't happen */
117 break;
120 return Resample_point32_C;
124 static inline ALfloat Sample_ALbyte(ALbyte val)
125 { return val * (1.0f/127.0f); }
127 static inline ALfloat Sample_ALshort(ALshort val)
128 { return val * (1.0f/32767.0f); }
130 static inline ALfloat Sample_ALfloat(ALfloat val)
131 { return val; }
133 #define DECL_TEMPLATE(T) \
134 static void Load_##T(ALfloat *dst, const T *src, ALuint srcstep, ALuint samples)\
136 ALuint i; \
137 for(i = 0;i < samples;i++) \
138 dst[i] = Sample_##T(src[i*srcstep]); \
141 DECL_TEMPLATE(ALbyte)
142 DECL_TEMPLATE(ALshort)
143 DECL_TEMPLATE(ALfloat)
145 #undef DECL_TEMPLATE
147 static void LoadSamples(ALfloat *dst, const ALvoid *src, ALuint srcstep, enum FmtType srctype, ALuint samples)
149 switch(srctype)
151 case FmtByte:
152 Load_ALbyte(dst, src, srcstep, samples);
153 break;
154 case FmtShort:
155 Load_ALshort(dst, src, srcstep, samples);
156 break;
157 case FmtFloat:
158 Load_ALfloat(dst, src, srcstep, samples);
159 break;
163 static void SilenceSamples(ALfloat *dst, ALuint samples)
165 ALuint i;
166 for(i = 0;i < samples;i++)
167 dst[i] = 0.0f;
171 static const ALfloat *DoFilters(ALfilterState *lpfilter, ALfilterState *hpfilter,
172 ALfloat *restrict dst, const ALfloat *restrict src,
173 ALuint numsamples, enum ActiveFilters type)
175 ALuint i;
176 switch(type)
178 case AF_None:
179 break;
181 case AF_LowPass:
182 ALfilterState_process(lpfilter, dst, src, numsamples);
183 return dst;
184 case AF_HighPass:
185 ALfilterState_process(hpfilter, dst, src, numsamples);
186 return dst;
188 case AF_BandPass:
189 for(i = 0;i < numsamples;)
191 ALfloat temp[64];
192 ALuint todo = minu(64, numsamples-i);
194 ALfilterState_process(lpfilter, temp, src+i, todo);
195 ALfilterState_process(hpfilter, dst+i, temp, todo);
196 i += todo;
198 return dst;
200 return src;
204 ALvoid MixSource(ALvoice *voice, ALsource *Source, ALCdevice *Device, ALuint SamplesToDo)
206 MixerFunc Mix;
207 HrtfMixerFunc HrtfMix;
208 ResamplerFunc Resample;
209 ALbufferlistitem *BufferListItem;
210 ALuint DataPosInt, DataPosFrac;
211 ALboolean isbformat = AL_FALSE;
212 ALboolean Looping;
213 ALuint increment;
214 enum Resampler Resampler;
215 ALenum State;
216 ALuint OutPos;
217 ALuint NumChannels;
218 ALuint SampleSize;
219 ALint64 DataSize64;
220 ALuint IrSize;
221 ALuint chan, j;
223 /* Get source info */
224 State = Source->state;
225 BufferListItem = ATOMIC_LOAD(&Source->current_buffer);
226 DataPosInt = Source->position;
227 DataPosFrac = Source->position_fraction;
228 Looping = Source->Looping;
229 Resampler = Source->Resampler;
230 NumChannels = Source->NumChannels;
231 SampleSize = Source->SampleSize;
232 increment = voice->Step;
234 while(BufferListItem)
236 ALbuffer *buffer;
237 if((buffer=BufferListItem->buffer) != NULL)
239 isbformat = (buffer->FmtChannels == FmtBFormat2D ||
240 buffer->FmtChannels == FmtBFormat3D);
241 break;
243 BufferListItem = BufferListItem->next;
246 IrSize = (Device->Hrtf ? GetHrtfIrSize(Device->Hrtf) : 0);
248 Mix = SelectMixer();
249 HrtfMix = SelectHrtfMixer();
250 Resample = ((increment == FRACTIONONE && DataPosFrac == 0) ?
251 Resample_copy32_C : SelectResampler(Resampler));
253 OutPos = 0;
254 do {
255 const ALuint BufferPrePadding = ResamplerPrePadding[Resampler];
256 const ALuint BufferPadding = ResamplerPadding[Resampler];
257 ALuint SrcBufferSize, DstBufferSize;
259 /* Figure out how many buffer samples will be needed */
260 DataSize64 = SamplesToDo-OutPos;
261 DataSize64 *= increment;
262 DataSize64 += DataPosFrac+FRACTIONMASK;
263 DataSize64 >>= FRACTIONBITS;
264 DataSize64 += BufferPadding+BufferPrePadding;
266 SrcBufferSize = (ALuint)mini64(DataSize64, BUFFERSIZE);
268 /* Figure out how many samples we can actually mix from this. */
269 DataSize64 = SrcBufferSize;
270 DataSize64 -= BufferPadding+BufferPrePadding;
271 DataSize64 <<= FRACTIONBITS;
272 DataSize64 -= DataPosFrac;
274 DstBufferSize = (ALuint)((DataSize64+(increment-1)) / increment);
275 DstBufferSize = minu(DstBufferSize, (SamplesToDo-OutPos));
277 /* Some mixers like having a multiple of 4, so try to give that unless
278 * this is the last update. */
279 if(OutPos+DstBufferSize < SamplesToDo)
280 DstBufferSize &= ~3;
282 for(chan = 0;chan < NumChannels;chan++)
284 const ALfloat *ResampledData;
285 ALfloat *SrcData = Device->SourceData;
286 ALuint SrcDataSize = 0;
288 if(Source->SourceType == AL_STATIC)
290 const ALbuffer *ALBuffer = BufferListItem->buffer;
291 const ALubyte *Data = ALBuffer->data;
292 ALuint DataSize;
293 ALuint pos;
295 /* If current pos is beyond the loop range, do not loop */
296 if(Looping == AL_FALSE || DataPosInt >= (ALuint)ALBuffer->LoopEnd)
298 Looping = AL_FALSE;
300 if(DataPosInt >= BufferPrePadding)
301 pos = DataPosInt - BufferPrePadding;
302 else
304 DataSize = BufferPrePadding - DataPosInt;
305 DataSize = minu(SrcBufferSize - SrcDataSize, DataSize);
307 SilenceSamples(&SrcData[SrcDataSize], DataSize);
308 SrcDataSize += DataSize;
310 pos = 0;
313 /* Copy what's left to play in the source buffer, and clear the
314 * rest of the temp buffer */
315 DataSize = minu(SrcBufferSize - SrcDataSize, ALBuffer->SampleLen - pos);
317 LoadSamples(&SrcData[SrcDataSize], &Data[(pos*NumChannels + chan)*SampleSize],
318 NumChannels, ALBuffer->FmtType, DataSize);
319 SrcDataSize += DataSize;
321 SilenceSamples(&SrcData[SrcDataSize], SrcBufferSize - SrcDataSize);
322 SrcDataSize += SrcBufferSize - SrcDataSize;
324 else
326 ALuint LoopStart = ALBuffer->LoopStart;
327 ALuint LoopEnd = ALBuffer->LoopEnd;
329 if(DataPosInt >= LoopStart)
331 pos = DataPosInt-LoopStart;
332 while(pos < BufferPrePadding)
333 pos += LoopEnd-LoopStart;
334 pos -= BufferPrePadding;
335 pos += LoopStart;
337 else if(DataPosInt >= BufferPrePadding)
338 pos = DataPosInt - BufferPrePadding;
339 else
341 DataSize = BufferPrePadding - DataPosInt;
342 DataSize = minu(SrcBufferSize - SrcDataSize, DataSize);
344 SilenceSamples(&SrcData[SrcDataSize], DataSize);
345 SrcDataSize += DataSize;
347 pos = 0;
350 /* Copy what's left of this loop iteration, then copy repeats
351 * of the loop section */
352 DataSize = LoopEnd - pos;
353 DataSize = minu(SrcBufferSize - SrcDataSize, DataSize);
355 LoadSamples(&SrcData[SrcDataSize], &Data[(pos*NumChannels + chan)*SampleSize],
356 NumChannels, ALBuffer->FmtType, DataSize);
357 SrcDataSize += DataSize;
359 DataSize = LoopEnd-LoopStart;
360 while(SrcBufferSize > SrcDataSize)
362 DataSize = minu(SrcBufferSize - SrcDataSize, DataSize);
364 LoadSamples(&SrcData[SrcDataSize], &Data[(LoopStart*NumChannels + chan)*SampleSize],
365 NumChannels, ALBuffer->FmtType, DataSize);
366 SrcDataSize += DataSize;
370 else
372 /* Crawl the buffer queue to fill in the temp buffer */
373 ALbufferlistitem *tmpiter = BufferListItem;
374 ALuint pos;
376 if(DataPosInt >= BufferPrePadding)
377 pos = DataPosInt - BufferPrePadding;
378 else
380 pos = BufferPrePadding - DataPosInt;
381 while(pos > 0)
383 ALbufferlistitem *prev;
384 if((prev=tmpiter->prev) != NULL)
385 tmpiter = prev;
386 else if(Looping)
388 while(tmpiter->next)
389 tmpiter = tmpiter->next;
391 else
393 ALuint DataSize = minu(SrcBufferSize - SrcDataSize, pos);
395 SilenceSamples(&SrcData[SrcDataSize], DataSize);
396 SrcDataSize += DataSize;
398 pos = 0;
399 break;
402 if(tmpiter->buffer)
404 if((ALuint)tmpiter->buffer->SampleLen > pos)
406 pos = tmpiter->buffer->SampleLen - pos;
407 break;
409 pos -= tmpiter->buffer->SampleLen;
414 while(tmpiter && SrcBufferSize > SrcDataSize)
416 const ALbuffer *ALBuffer;
417 if((ALBuffer=tmpiter->buffer) != NULL)
419 const ALubyte *Data = ALBuffer->data;
420 ALuint DataSize = ALBuffer->SampleLen;
422 /* Skip the data already played */
423 if(DataSize <= pos)
424 pos -= DataSize;
425 else
427 Data += (pos*NumChannels + chan)*SampleSize;
428 DataSize -= pos;
429 pos -= pos;
431 DataSize = minu(SrcBufferSize - SrcDataSize, DataSize);
432 LoadSamples(&SrcData[SrcDataSize], Data, NumChannels,
433 ALBuffer->FmtType, DataSize);
434 SrcDataSize += DataSize;
437 tmpiter = tmpiter->next;
438 if(!tmpiter && Looping)
439 tmpiter = ATOMIC_LOAD(&Source->queue);
440 else if(!tmpiter)
442 SilenceSamples(&SrcData[SrcDataSize], SrcBufferSize - SrcDataSize);
443 SrcDataSize += SrcBufferSize - SrcDataSize;
448 /* Now resample, then filter and mix to the appropriate outputs. */
449 ResampledData = Resample(
450 &SrcData[BufferPrePadding], DataPosFrac, increment,
451 Device->ResampledData, DstBufferSize
454 DirectParams *parms = &voice->Direct;
455 const ALfloat *samples;
457 samples = DoFilters(
458 &parms->Filters[chan].LowPass, &parms->Filters[chan].HighPass,
459 Device->FilteredData, ResampledData, DstBufferSize,
460 parms->Filters[chan].ActiveType
462 if(!voice->IsHrtf)
463 Mix(samples, parms->OutChannels, parms->OutBuffer, parms->Gains[chan],
464 parms->Counter, OutPos, DstBufferSize);
465 else
466 HrtfMix(parms->OutBuffer, samples, parms->Counter, voice->Offset,
467 OutPos, IrSize, &parms->Hrtf.Params[chan],
468 &parms->Hrtf.State[chan], DstBufferSize);
471 /* Only the first channel for B-Format buffers (W channel) goes to
472 * the send paths. */
473 if(chan > 0 && isbformat)
474 continue;
475 for(j = 0;j < Device->NumAuxSends;j++)
477 SendParams *parms = &voice->Send[j];
478 const ALfloat *samples;
480 if(!parms->OutBuffer)
481 continue;
483 samples = DoFilters(
484 &parms->Filters[chan].LowPass, &parms->Filters[chan].HighPass,
485 Device->FilteredData, ResampledData, DstBufferSize,
486 parms->Filters[chan].ActiveType
488 Mix(samples, 1, parms->OutBuffer, &parms->Gain,
489 parms->Counter, OutPos, DstBufferSize);
492 /* Update positions */
493 DataPosFrac += increment*DstBufferSize;
494 DataPosInt += DataPosFrac>>FRACTIONBITS;
495 DataPosFrac &= FRACTIONMASK;
497 OutPos += DstBufferSize;
498 voice->Offset += DstBufferSize;
499 voice->Direct.Counter = maxu(voice->Direct.Counter, DstBufferSize) - DstBufferSize;
500 for(j = 0;j < Device->NumAuxSends;j++)
501 voice->Send[j].Counter = maxu(voice->Send[j].Counter, DstBufferSize) - DstBufferSize;
503 /* Handle looping sources */
504 while(1)
506 const ALbuffer *ALBuffer;
507 ALuint DataSize = 0;
508 ALuint LoopStart = 0;
509 ALuint LoopEnd = 0;
511 if((ALBuffer=BufferListItem->buffer) != NULL)
513 DataSize = ALBuffer->SampleLen;
514 LoopStart = ALBuffer->LoopStart;
515 LoopEnd = ALBuffer->LoopEnd;
516 if(LoopEnd > DataPosInt)
517 break;
520 if(Looping && Source->SourceType == AL_STATIC)
522 assert(LoopEnd > LoopStart);
523 DataPosInt = ((DataPosInt-LoopStart)%(LoopEnd-LoopStart)) + LoopStart;
524 break;
527 if(DataSize > DataPosInt)
528 break;
530 if(!(BufferListItem=BufferListItem->next))
532 if(Looping)
533 BufferListItem = ATOMIC_LOAD(&Source->queue);
534 else
536 State = AL_STOPPED;
537 BufferListItem = NULL;
538 DataPosInt = 0;
539 DataPosFrac = 0;
540 break;
544 DataPosInt -= DataSize;
546 } while(State == AL_PLAYING && OutPos < SamplesToDo);
548 /* Update source info */
549 Source->state = State;
550 ATOMIC_STORE(&Source->current_buffer, BufferListItem);
551 Source->position = DataPosInt;
552 Source->position_fraction = DataPosFrac;