12 static inline void ApplyCoeffsStep(ALuint Offset
, ALfloat (*restrict Values
)[2],
14 ALfloat (*restrict Coeffs
)[2],
15 const ALfloat (*restrict CoeffStep
)[2],
16 ALfloat left
, ALfloat right
)
19 float32x4_t leftright4
;
21 float32x2_t leftright2
= vdup_n_f32(0.0);
22 leftright2
= vset_lane_f32(left
, leftright2
, 0);
23 leftright2
= vset_lane_f32(right
, leftright2
, 1);
24 leftright4
= vcombine_f32(leftright2
, leftright2
);
26 for(c
= 0;c
< IrSize
;c
+= 2)
28 const ALuint o0
= (Offset
+c
)&HRIR_MASK
;
29 const ALuint o1
= (o0
+1)&HRIR_MASK
;
30 float32x4_t vals
= vcombine_f32(vld1_f32((float32_t
*)&Values
[o0
][0]),
31 vld1_f32((float32_t
*)&Values
[o1
][0]));
32 float32x4_t coefs
= vld1q_f32((float32_t
*)&Coeffs
[c
][0]);
33 float32x4_t deltas
= vld1q_f32(&CoeffStep
[c
][0]);
35 vals
= vmlaq_f32(vals
, coefs
, leftright4
);
36 coefs
= vaddq_f32(coefs
, deltas
);
38 vst1_f32((float32_t
*)&Values
[o0
][0], vget_low_f32(vals
));
39 vst1_f32((float32_t
*)&Values
[o1
][0], vget_high_f32(vals
));
40 vst1q_f32(&Coeffs
[c
][0], coefs
);
44 static inline void ApplyCoeffs(ALuint Offset
, ALfloat (*restrict Values
)[2],
46 ALfloat (*restrict Coeffs
)[2],
47 ALfloat left
, ALfloat right
)
50 float32x4_t leftright4
;
52 float32x2_t leftright2
= vdup_n_f32(0.0);
53 leftright2
= vset_lane_f32(left
, leftright2
, 0);
54 leftright2
= vset_lane_f32(right
, leftright2
, 1);
55 leftright4
= vcombine_f32(leftright2
, leftright2
);
57 for(c
= 0;c
< IrSize
;c
+= 2)
59 const ALuint o0
= (Offset
+c
)&HRIR_MASK
;
60 const ALuint o1
= (o0
+1)&HRIR_MASK
;
61 float32x4_t vals
= vcombine_f32(vld1_f32((float32_t
*)&Values
[o0
][0]),
62 vld1_f32((float32_t
*)&Values
[o1
][0]));
63 float32x4_t coefs
= vld1q_f32((float32_t
*)&Coeffs
[c
][0]);
65 vals
= vmlaq_f32(vals
, coefs
, leftright4
);
67 vst1_f32((float32_t
*)&Values
[o0
][0], vget_low_f32(vals
));
68 vst1_f32((float32_t
*)&Values
[o1
][0], vget_high_f32(vals
));
74 #include "mixer_inc.c"
78 void MixDirect_Neon(const ALfloat
*data
, ALuint OutChans
, ALfloat (*restrict OutBuffer
)[BUFFERSIZE
],
79 MixGains
*Gains
, ALuint Counter
, ALuint OutPos
, ALuint BufferSize
)
85 for(c
= 0;c
< OutChans
;c
++)
88 gain
= Gains
[c
].Current
;
90 if(step
!= 1.0f
&& Counter
> 0)
92 for(;pos
< BufferSize
&& pos
< Counter
;pos
++)
94 OutBuffer
[c
][OutPos
+pos
] += data
[pos
]*gain
;
98 gain
= Gains
[c
].Target
;
99 Gains
[c
].Current
= gain
;
100 /* Mix until pos is aligned with 4 or the mix is done. */
101 for(;pos
< BufferSize
&& (pos
&3) != 0;pos
++)
102 OutBuffer
[c
][OutPos
+pos
] += data
[pos
]*gain
;
105 if(!(gain
> GAIN_SILENCE_THRESHOLD
))
107 gain4
= vdupq_n_f32(gain
);
108 for(;BufferSize
-pos
> 3;pos
+= 4)
110 const float32x4_t val4
= vld1q_f32(&data
[pos
]);
111 float32x4_t dry4
= vld1q_f32(&OutBuffer
[c
][OutPos
+pos
]);
112 dry4
= vaddq_f32(dry4
, vmulq_f32(val4
, gain4
));
113 vst1q_f32(&OutBuffer
[c
][OutPos
+pos
], dry4
);
115 for(;pos
< BufferSize
;pos
++)
116 OutBuffer
[c
][OutPos
+pos
] += data
[pos
]*gain
;