From f1ce13999725ac068f8d3a2126708bc11c10aa58 Mon Sep 17 00:00:00 2001 From: Chris Robinson Date: Sun, 9 Sep 2012 04:02:54 -0700 Subject: [PATCH] Use SSE to do 4 samples at once (non-HRTF direct mix), instead of to apply a matrix row MaxCHannels no longer needs to be a multiple of 4 now, either. --- Alc/mixer_sse.c | 65 +++++++++++++++++++++++++++++++++----------------- OpenAL32/Include/alu.h | 21 ++++++++-------- 2 files changed, 53 insertions(+), 33 deletions(-) diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c index 101b296e..cc3b52cc 100644 --- a/Alc/mixer_sse.c +++ b/Alc/mixer_sse.c @@ -65,20 +65,6 @@ static __inline void ApplyCoeffs(ALuint Offset, ALfloat (*RESTRICT Values)[2], } -static __inline void ApplyValue(ALfloat *RESTRICT Output, ALfloat value, const ALfloat *DrySend) -{ - const __m128 val4 = _mm_set1_ps(value); - ALuint c; - for(c = 0;c < MaxChannels;c += 4) - { - const __m128 gains = _mm_load_ps(&DrySend[c]); - __m128 out = _mm_load_ps(&Output[c]); - out = _mm_add_ps(out, _mm_mul_ps(val4, gains)); - _mm_store_ps(&Output[c], out); - } -} - - void MixDirect_SSE(ALsource *Source, ALCdevice *Device, DirectParams *params, const ALfloat *RESTRICT data, ALuint srcchan, ALuint OutPos, ALuint SamplesToDo, ALuint BufferSize) @@ -86,9 +72,9 @@ void MixDirect_SSE(ALsource *Source, ALCdevice *Device, DirectParams *params, ALfloat (*RESTRICT DryBuffer)[MaxChannels]; ALfloat *RESTRICT ClickRemoval, *RESTRICT PendingClicks; ALIGN(16) ALfloat DrySend[MaxChannels]; + ALIGN(16) ALfloat value[4]; FILTER *DryFilter; ALuint pos; - ALfloat value; ALuint c; (void)Source; @@ -103,19 +89,54 @@ void MixDirect_SSE(ALsource *Source, ALCdevice *Device, DirectParams *params, pos = 0; if(OutPos == 0) { - value = lpFilter2PC(DryFilter, srcchan, data[pos]); - ApplyValue(ClickRemoval, -value, DrySend); + value[0] = lpFilter2PC(DryFilter, srcchan, data[pos]); + for(c = 0;c < MaxChannels;c++) + ClickRemoval[c] -= value[0]*DrySend[c]; + } + for(pos = 0;pos < BufferSize-3;pos += 4) + { + __m128 val4; + + value[0] = lpFilter2P(DryFilter, srcchan, data[pos ]); + value[1] = lpFilter2P(DryFilter, srcchan, data[pos+1]); + value[2] = lpFilter2P(DryFilter, srcchan, data[pos+2]); + value[3] = lpFilter2P(DryFilter, srcchan, data[pos+3]); + val4 = _mm_load_ps(value); + + for(c = 0;c < MaxChannels;c++) + { + const __m128 gain = _mm_set1_ps(DrySend[c]); + __m128 dry4; + + value[0] = DryBuffer[OutPos ][c]; + value[1] = DryBuffer[OutPos+1][c]; + value[2] = DryBuffer[OutPos+2][c]; + value[3] = DryBuffer[OutPos+3][c]; + dry4 = _mm_load_ps(value); + + dry4 = _mm_add_ps(dry4, _mm_mul_ps(val4, gain)); + + _mm_store_ps(value, dry4); + DryBuffer[OutPos ][c] = value[0]; + DryBuffer[OutPos+1][c] = value[1]; + DryBuffer[OutPos+2][c] = value[2]; + DryBuffer[OutPos+3][c] = value[3]; + } + + OutPos += 4; } - for(pos = 0;pos < BufferSize;pos++) + for(;pos < BufferSize;pos++) { - value = lpFilter2P(DryFilter, srcchan, data[pos]); - ApplyValue(DryBuffer[OutPos], value, DrySend); + value[0] = lpFilter2P(DryFilter, srcchan, data[pos]); + for(c = 0;c < MaxChannels;c++) + DryBuffer[OutPos][c] += value[0]*DrySend[c]; OutPos++; } if(OutPos == SamplesToDo) { - value = lpFilter2PC(DryFilter, srcchan, data[pos]); - ApplyValue(PendingClicks, value, DrySend); + value[0] = lpFilter2PC(DryFilter, srcchan, data[pos]); + for(c = 0;c < MaxChannels;c++) + PendingClicks[c] += value[0]*DrySend[c]; } } #define NO_MIXDIRECT diff --git a/OpenAL32/Include/alu.h b/OpenAL32/Include/alu.h index d55f7269..7bb579de 100644 --- a/OpenAL32/Include/alu.h +++ b/OpenAL32/Include/alu.h @@ -128,17 +128,16 @@ enum Resampler { enum Channel { FrontLeft = 0, - FrontRight, /* 1 */ - FrontCenter, /* 2 */ - LFE, /* 3 */ - BackLeft, /* 4 */ - BackRight, /* 5 */ - BackCenter, /* 6 */ - SideLeft, /* 7 */ - SideRight, /* 8 */ - - /* Must be a multiple of 4 */ - MaxChannels = 12, + FrontRight, + FrontCenter, + LFE, + BackLeft, + BackRight, + BackCenter, + SideLeft, + SideRight, + + MaxChannels, }; enum DistanceModel { -- 2.11.4.GIT