From 904cdfda32d11f5c3376a20363051b98bc1d3033 Mon Sep 17 00:00:00 2001 From: Chris Robinson Date: Wed, 30 Sep 2015 17:25:28 -0700 Subject: [PATCH] Avoid double-checks for the stepping mixer loops --- Alc/mixer_c.c | 3 ++- Alc/mixer_neon.c | 7 +++++-- Alc/mixer_sse.c | 14 +++++++++----- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/Alc/mixer_c.c b/Alc/mixer_c.c index 59ee5ebf..0d3e99a6 100644 --- a/Alc/mixer_c.c +++ b/Alc/mixer_c.c @@ -122,7 +122,8 @@ void Mix_C(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[B step = Gains[c].Step; if(step != 0.0f && Counter > 0) { - for(;pos < BufferSize && pos < Counter;pos++) + ALuint minsize = minu(BufferSize, Counter); + for(;pos < minsize;pos++) { OutBuffer[c][OutPos+pos] += data[pos]*gain; gain += step; diff --git a/Alc/mixer_neon.c b/Alc/mixer_neon.c index 6078a40d..0c50140e 100644 --- a/Alc/mixer_neon.c +++ b/Alc/mixer_neon.c @@ -107,7 +107,8 @@ void Mix_Neon(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer step = Gains[c].Step; if(step != 0.0f && Counter > 0) { - for(;pos < BufferSize && pos < Counter;pos++) + ALuint minsize = minu(BufferSize, Counter); + for(;pos < minsize;pos++) { OutBuffer[c][OutPos+pos] += data[pos]*gain; gain += step; @@ -115,8 +116,10 @@ void Mix_Neon(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer if(pos == Counter) gain = Gains[c].Target; Gains[c].Current = gain; + /* Mix until pos is aligned with 4 or the mix is done. */ - for(;pos < BufferSize && (pos&3) != 0;pos++) + minsize = minu(BufferSize, (pos+3)&3); + for(;pos < minsize;pos++) OutBuffer[c][OutPos+pos] += data[pos]*gain; } diff --git a/Alc/mixer_sse.c b/Alc/mixer_sse.c index 208542ff..12765486 100644 --- a/Alc/mixer_sse.c +++ b/Alc/mixer_sse.c @@ -152,7 +152,7 @@ void Mix_SSE(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer) MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize) { ALfloat gain, step; - __m128 gain4, step4; + __m128 gain4; ALuint c; for(c = 0;c < OutChans;c++) @@ -162,9 +162,11 @@ void Mix_SSE(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer) step = Gains[c].Step; if(step != 0.0f && Counter > 0) { + ALuint minsize = minu(BufferSize, Counter); /* Mix with applying gain steps in aligned multiples of 4. */ - if(BufferSize-pos > 3 && Counter-pos > 3) + if(minsize-pos > 3) { + __m128 step4; gain4 = _mm_setr_ps( gain, gain + step, @@ -179,11 +181,11 @@ void Mix_SSE(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer) gain4 = _mm_add_ps(gain4, step4); _mm_store_ps(&OutBuffer[c][OutPos+pos], dry4); pos += 4; - } while(BufferSize-pos > 3 && Counter-pos > 3); + } while(minsize-pos > 3); gain = _mm_cvtss_f32(gain4); } /* Mix with applying left over gain steps that aren't aligned multiples of 4. */ - for(;pos < BufferSize && pos < Counter;pos++) + for(;pos < minsize;pos++) { OutBuffer[c][OutPos+pos] += data[pos]*gain; gain += step; @@ -191,8 +193,10 @@ void Mix_SSE(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer) if(pos == Counter) gain = Gains[c].Target; Gains[c].Current = gain; + /* Mix until pos is aligned with 4 or the mix is done. */ - for(;pos < BufferSize && (pos&3) != 0;pos++) + minsize = minu(BufferSize, (pos+3)&3); + for(;pos < minsize;pos++) OutBuffer[c][OutPos+pos] += data[pos]*gain; } -- 2.11.4.GIT