12 static inline void ApplyCoeffsStep(ALuint Offset
, ALfloat (*restrict Values
)[2],
14 ALfloat (*restrict Coeffs
)[2],
15 const ALfloat (*restrict CoeffStep
)[2],
16 ALfloat left
, ALfloat right
)
19 float32x4_t leftright4
;
21 float32x2_t leftright2
= vdup_n_f32(0.0);
22 leftright2
= vset_lane_f32(left
, leftright2
, 0);
23 leftright2
= vset_lane_f32(right
, leftright2
, 1);
24 leftright4
= vcombine_f32(leftright2
, leftright2
);
26 for(c
= 0;c
< IrSize
;c
+= 2)
28 const ALuint o0
= (Offset
+c
)&HRIR_MASK
;
29 const ALuint o1
= (o0
+1)&HRIR_MASK
;
30 float32x4_t vals
= vcombine_f32(vld1_f32((float32_t
*)&Values
[o0
][0]),
31 vld1_f32((float32_t
*)&Values
[o1
][0]));
32 float32x4_t coefs
= vld1q_f32((float32_t
*)&Coeffs
[c
][0]);
33 float32x4_t deltas
= vld1q_f32(&CoeffStep
[c
][0]);
35 vals
= vmlaq_f32(vals
, coefs
, leftright4
);
36 coefs
= vaddq_f32(coefs
, deltas
);
38 vst1_f32((float32_t
*)&Values
[o0
][0], vget_low_f32(vals
));
39 vst1_f32((float32_t
*)&Values
[o1
][0], vget_high_f32(vals
));
40 vst1q_f32(&Coeffs
[c
][0], coefs
);
44 static inline void ApplyCoeffs(ALuint Offset
, ALfloat (*restrict Values
)[2],
46 ALfloat (*restrict Coeffs
)[2],
47 ALfloat left
, ALfloat right
)
50 float32x4_t leftright4
;
52 float32x2_t leftright2
= vdup_n_f32(0.0);
53 leftright2
= vset_lane_f32(left
, leftright2
, 0);
54 leftright2
= vset_lane_f32(right
, leftright2
, 1);
55 leftright4
= vcombine_f32(leftright2
, leftright2
);
57 for(c
= 0;c
< IrSize
;c
+= 2)
59 const ALuint o0
= (Offset
+c
)&HRIR_MASK
;
60 const ALuint o1
= (o0
+1)&HRIR_MASK
;
61 float32x4_t vals
= vcombine_f32(vld1_f32((float32_t
*)&Values
[o0
][0]),
62 vld1_f32((float32_t
*)&Values
[o1
][0]));
63 float32x4_t coefs
= vld1q_f32((float32_t
*)&Coeffs
[c
][0]);
65 vals
= vmlaq_f32(vals
, coefs
, leftright4
);
67 vst1_f32((float32_t
*)&Values
[o0
][0], vget_low_f32(vals
));
68 vst1_f32((float32_t
*)&Values
[o1
][0], vget_high_f32(vals
));
72 #define MixHrtf MixHrtf_Neon
73 #include "mixer_inc.c"
77 void Mix_Neon(const ALfloat
*data
, ALuint OutChans
, ALfloat (*restrict OutBuffer
)[BUFFERSIZE
],
78 MixGains
*Gains
, ALuint Counter
, ALuint OutPos
, ALuint BufferSize
)
84 for(c
= 0;c
< OutChans
;c
++)
87 gain
= Gains
[c
].Current
;
89 if(step
!= 0.0f
&& Counter
> 0)
91 ALuint minsize
= minu(BufferSize
, Counter
);
92 for(;pos
< minsize
;pos
++)
94 OutBuffer
[c
][OutPos
+pos
] += data
[pos
]*gain
;
98 gain
= Gains
[c
].Target
;
99 Gains
[c
].Current
= gain
;
101 /* Mix until pos is aligned with 4 or the mix is done. */
102 minsize
= minu(BufferSize
, (pos
+3)&~3);
103 for(;pos
< minsize
;pos
++)
104 OutBuffer
[c
][OutPos
+pos
] += data
[pos
]*gain
;
107 if(!(fabsf(gain
) > GAIN_SILENCE_THRESHOLD
))
109 gain4
= vdupq_n_f32(gain
);
110 for(;BufferSize
-pos
> 3;pos
+= 4)
112 const float32x4_t val4
= vld1q_f32(&data
[pos
]);
113 float32x4_t dry4
= vld1q_f32(&OutBuffer
[c
][OutPos
+pos
]);
114 dry4
= vmlaq_f32(dry4
, val4
, gain4
);
115 vst1q_f32(&OutBuffer
[c
][OutPos
+pos
], dry4
);
117 for(;pos
< BufferSize
;pos
++)
118 OutBuffer
[c
][OutPos
+pos
] += data
[pos
]*gain
;