Alc/mixer_neon.c

   1 #include "config.h"
   2
   3 #include <arm_neon.h>
   4
   5 #include "AL/al.h"
   6 #include "AL/alc.h"
   7 #include "alMain.h"
   8 #include "alu.h"
   9 #include "hrtf.h"
  10
  11
  12 static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2],
  13                                const ALuint IrSize,
  14                                ALfloat (*restrict Coeffs)[2],
  15                                ALfloat left, ALfloat right)
  16 {
  17     ALuint c;
  18     float32x4_t leftright4;
  19     {
  20         float32x2_t leftright2 = vdup_n_f32(0.0);
  21         leftright2 = vset_lane_f32(left, leftright2, 0);
  22         leftright2 = vset_lane_f32(right, leftright2, 1);
  23         leftright4 = vcombine_f32(leftright2, leftright2);
  24     }
  25     for(c = 0;c < IrSize;c += 2)
  26     {
  27         const ALuint o0 = (Offset+c)&HRIR_MASK;
  28         const ALuint o1 = (o0+1)&HRIR_MASK;
  29         float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]),
  30                                         vld1_f32((float32_t*)&Values[o1][0]));
  31         float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]);
  32
  33         vals = vmlaq_f32(vals, coefs, leftright4);
  34
  35         vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals));
  36         vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals));
  37     }
  38 }
  39
  40
  41 #define SUFFIX Neon
  42 #include "mixer_inc.c"
  43 #undef SUFFIX
  44
  45
  46 void Mix_Neon(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE],
  47               MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize)
  48 {
  49     ALfloat gain, step;
  50     float32x4_t gain4;
  51     ALuint c;
  52
  53     for(c = 0;c < OutChans;c++)
  54     {
  55         ALuint pos = 0;
  56         gain = Gains[c].Current;
  57         step = Gains[c].Step;
  58         if(step != 1.0f && Counter > 0)
  59         {
  60             for(;pos < BufferSize && pos < Counter;pos++)
  61             {
  62                 OutBuffer[c][OutPos+pos] += data[pos]*gain;
  63                 gain *= step;
  64             }
  65             if(pos == Counter)
  66                 gain = Gains[c].Target;
  67             Gains[c].Current = gain;
  68             /* Mix until pos is aligned with 4 or the mix is done. */
  69             for(;pos < BufferSize && (pos&3) != 0;pos++)
  70                 OutBuffer[c][OutPos+pos] += data[pos]*gain;
  71         }
  72
  73         if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD))
  74             continue;
  75         gain4 = vdupq_n_f32(gain);
  76         for(;BufferSize-pos > 3;pos += 4)
  77         {
  78             const float32x4_t val4 = vld1q_f32(&data[pos]);
  79             float32x4_t dry4 = vld1q_f32(&OutBuffer[c][OutPos+pos]);
  80             dry4 = vaddq_f32(dry4, vmulq_f32(val4, gain4));
  81             vst1q_f32(&OutBuffer[c][OutPos+pos], dry4);
  82         }
  83         for(;pos < BufferSize;pos++)
  84             OutBuffer[c][OutPos+pos] += data[pos]*gain;
  85     }
  86 }