Better organize the reverb code into separate labeled sections
[openal-soft.git] / Alc / mixer_neon.c
bloba89caeaefc5341ab51a767c7e55e8e29507feeec
1 #include "config.h"
3 #include <arm_neon.h>
5 #include "AL/al.h"
6 #include "AL/alc.h"
7 #include "alMain.h"
8 #include "alu.h"
9 #include "hrtf.h"
12 static inline void SetupCoeffs(ALfloat (*restrict OutCoeffs)[2],
13 const HrtfParams *hrtfparams,
14 ALuint IrSize, ALuint Counter)
16 ALuint c;
17 float32x4_t counter4;
19 float32x2_t counter2 = vdup_n_f32(-(float)Counter);
20 counter4 = vcombine_f32(counter2, counter2);
22 for(c = 0;c < IrSize;c += 2)
24 float32x4_t step4 = vld1q_f32((float32_t*)hrtfparams->CoeffStep[c]);
25 float32x4_t coeffs = vld1q_f32((float32_t*)hrtfparams->Coeffs[c]);
26 coeffs = vmlaq_f32(coeffs, step4, counter4);
27 vst1q_f32((float32_t*)OutCoeffs[c], coeffs);
31 static inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*restrict Values)[2],
32 const ALuint IrSize,
33 ALfloat (*restrict Coeffs)[2],
34 const ALfloat (*restrict CoeffStep)[2],
35 ALfloat left, ALfloat right)
37 ALuint c;
38 float32x4_t leftright4;
40 float32x2_t leftright2 = vdup_n_f32(0.0);
41 leftright2 = vset_lane_f32(left, leftright2, 0);
42 leftright2 = vset_lane_f32(right, leftright2, 1);
43 leftright4 = vcombine_f32(leftright2, leftright2);
45 for(c = 0;c < IrSize;c += 2)
47 const ALuint o0 = (Offset+c)&HRIR_MASK;
48 const ALuint o1 = (o0+1)&HRIR_MASK;
49 float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]),
50 vld1_f32((float32_t*)&Values[o1][0]));
51 float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]);
52 float32x4_t deltas = vld1q_f32(&CoeffStep[c][0]);
54 vals = vmlaq_f32(vals, coefs, leftright4);
55 coefs = vaddq_f32(coefs, deltas);
57 vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals));
58 vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals));
59 vst1q_f32(&Coeffs[c][0], coefs);
63 static inline void ApplyCoeffs(ALuint Offset, ALfloat (*restrict Values)[2],
64 const ALuint IrSize,
65 ALfloat (*restrict Coeffs)[2],
66 ALfloat left, ALfloat right)
68 ALuint c;
69 float32x4_t leftright4;
71 float32x2_t leftright2 = vdup_n_f32(0.0);
72 leftright2 = vset_lane_f32(left, leftright2, 0);
73 leftright2 = vset_lane_f32(right, leftright2, 1);
74 leftright4 = vcombine_f32(leftright2, leftright2);
76 for(c = 0;c < IrSize;c += 2)
78 const ALuint o0 = (Offset+c)&HRIR_MASK;
79 const ALuint o1 = (o0+1)&HRIR_MASK;
80 float32x4_t vals = vcombine_f32(vld1_f32((float32_t*)&Values[o0][0]),
81 vld1_f32((float32_t*)&Values[o1][0]));
82 float32x4_t coefs = vld1q_f32((float32_t*)&Coeffs[c][0]);
84 vals = vmlaq_f32(vals, coefs, leftright4);
86 vst1_f32((float32_t*)&Values[o0][0], vget_low_f32(vals));
87 vst1_f32((float32_t*)&Values[o1][0], vget_high_f32(vals));
91 #define MixHrtf MixHrtf_Neon
92 #include "mixer_inc.c"
93 #undef MixHrtf
96 void Mix_Neon(const ALfloat *data, ALuint OutChans, ALfloat (*restrict OutBuffer)[BUFFERSIZE],
97 MixGains *Gains, ALuint Counter, ALuint OutPos, ALuint BufferSize)
99 ALfloat gain, step;
100 float32x4_t gain4;
101 ALuint c;
103 for(c = 0;c < OutChans;c++)
105 ALuint pos = 0;
106 gain = Gains[c].Current;
107 step = Gains[c].Step;
108 if(step != 0.0f && Counter > 0)
110 ALuint minsize = minu(BufferSize, Counter);
111 for(;pos < minsize;pos++)
113 OutBuffer[c][OutPos+pos] += data[pos]*gain;
114 gain += step;
116 if(pos == Counter)
117 gain = Gains[c].Target;
118 Gains[c].Current = gain;
120 /* Mix until pos is aligned with 4 or the mix is done. */
121 minsize = minu(BufferSize, (pos+3)&~3);
122 for(;pos < minsize;pos++)
123 OutBuffer[c][OutPos+pos] += data[pos]*gain;
126 if(!(fabsf(gain) > GAIN_SILENCE_THRESHOLD))
127 continue;
128 gain4 = vdupq_n_f32(gain);
129 for(;BufferSize-pos > 3;pos += 4)
131 const float32x4_t val4 = vld1q_f32(&data[pos]);
132 float32x4_t dry4 = vld1q_f32(&OutBuffer[c][OutPos+pos]);
133 dry4 = vmlaq_f32(dry4, val4, gain4);
134 vst1q_f32(&OutBuffer[c][OutPos+pos], dry4);
136 for(;pos < BufferSize;pos++)
137 OutBuffer[c][OutPos+pos] += data[pos]*gain;