12 static inline void SetupCoeffs(ALfloat (*restrict OutCoeffs
)[2],
13 const HrtfParams
*hrtfparams
,
14 ALuint IrSize
, ALuint Counter
)
19 float32x2_t counter2
= vdup_n_f32(-(float)Counter
);
20 counter4
= vcombine_f32(counter2
, counter2
);
22 for(c
= 0;c
< IrSize
;c
+= 2)
24 float32x4_t step4
= vld1q_f32((float32_t
*)hrtfparams
->CoeffStep
[c
]);
25 float32x4_t coeffs
= vld1q_f32((float32_t
*)hrtfparams
->Coeffs
[c
]);
26 coeffs
= vmlaq_f32(coeffs
, step4
, counter4
);
27 vst1q_f32((float32_t
*)OutCoeffs
[c
], coeffs
);
31 static inline void ApplyCoeffsStep(ALuint Offset
, ALfloat (*restrict Values
)[2],
33 ALfloat (*restrict Coeffs
)[2],
34 const ALfloat (*restrict CoeffStep
)[2],
35 ALfloat left
, ALfloat right
)
38 float32x4_t leftright4
;
40 float32x2_t leftright2
= vdup_n_f32(0.0);
41 leftright2
= vset_lane_f32(left
, leftright2
, 0);
42 leftright2
= vset_lane_f32(right
, leftright2
, 1);
43 leftright4
= vcombine_f32(leftright2
, leftright2
);
45 for(c
= 0;c
< IrSize
;c
+= 2)
47 const ALuint o0
= (Offset
+c
)&HRIR_MASK
;
48 const ALuint o1
= (o0
+1)&HRIR_MASK
;
49 float32x4_t vals
= vcombine_f32(vld1_f32((float32_t
*)&Values
[o0
][0]),
50 vld1_f32((float32_t
*)&Values
[o1
][0]));
51 float32x4_t coefs
= vld1q_f32((float32_t
*)&Coeffs
[c
][0]);
52 float32x4_t deltas
= vld1q_f32(&CoeffStep
[c
][0]);
54 vals
= vmlaq_f32(vals
, coefs
, leftright4
);
55 coefs
= vaddq_f32(coefs
, deltas
);
57 vst1_f32((float32_t
*)&Values
[o0
][0], vget_low_f32(vals
));
58 vst1_f32((float32_t
*)&Values
[o1
][0], vget_high_f32(vals
));
59 vst1q_f32(&Coeffs
[c
][0], coefs
);
63 static inline void ApplyCoeffs(ALuint Offset
, ALfloat (*restrict Values
)[2],
65 ALfloat (*restrict Coeffs
)[2],
66 ALfloat left
, ALfloat right
)
69 float32x4_t leftright4
;
71 float32x2_t leftright2
= vdup_n_f32(0.0);
72 leftright2
= vset_lane_f32(left
, leftright2
, 0);
73 leftright2
= vset_lane_f32(right
, leftright2
, 1);
74 leftright4
= vcombine_f32(leftright2
, leftright2
);
76 for(c
= 0;c
< IrSize
;c
+= 2)
78 const ALuint o0
= (Offset
+c
)&HRIR_MASK
;
79 const ALuint o1
= (o0
+1)&HRIR_MASK
;
80 float32x4_t vals
= vcombine_f32(vld1_f32((float32_t
*)&Values
[o0
][0]),
81 vld1_f32((float32_t
*)&Values
[o1
][0]));
82 float32x4_t coefs
= vld1q_f32((float32_t
*)&Coeffs
[c
][0]);
84 vals
= vmlaq_f32(vals
, coefs
, leftright4
);
86 vst1_f32((float32_t
*)&Values
[o0
][0], vget_low_f32(vals
));
87 vst1_f32((float32_t
*)&Values
[o1
][0], vget_high_f32(vals
));
91 #define MixHrtf MixHrtf_Neon
92 #include "mixer_inc.c"
96 void Mix_Neon(const ALfloat
*data
, ALuint OutChans
, ALfloat (*restrict OutBuffer
)[BUFFERSIZE
],
97 MixGains
*Gains
, ALuint Counter
, ALuint OutPos
, ALuint BufferSize
)
103 for(c
= 0;c
< OutChans
;c
++)
106 gain
= Gains
[c
].Current
;
107 step
= Gains
[c
].Step
;
108 if(step
!= 0.0f
&& Counter
> 0)
110 ALuint minsize
= minu(BufferSize
, Counter
);
111 for(;pos
< minsize
;pos
++)
113 OutBuffer
[c
][OutPos
+pos
] += data
[pos
]*gain
;
117 gain
= Gains
[c
].Target
;
118 Gains
[c
].Current
= gain
;
120 /* Mix until pos is aligned with 4 or the mix is done. */
121 minsize
= minu(BufferSize
, (pos
+3)&~3);
122 for(;pos
< minsize
;pos
++)
123 OutBuffer
[c
][OutPos
+pos
] += data
[pos
]*gain
;
126 if(!(fabsf(gain
) > GAIN_SILENCE_THRESHOLD
))
128 gain4
= vdupq_n_f32(gain
);
129 for(;BufferSize
-pos
> 3;pos
+= 4)
131 const float32x4_t val4
= vld1q_f32(&data
[pos
]);
132 float32x4_t dry4
= vld1q_f32(&OutBuffer
[c
][OutPos
+pos
]);
133 dry4
= vmlaq_f32(dry4
, val4
, gain4
);
134 vst1q_f32(&OutBuffer
[c
][OutPos
+pos
], dry4
);
136 for(;pos
< BufferSize
;pos
++)
137 OutBuffer
[c
][OutPos
+pos
] += data
[pos
]*gain
;