14 static inline void ApplyCoeffsStep(ALuint Offset
, ALfloat (*restrict Values
)[2],
16 ALfloat (*restrict Coeffs
)[2],
17 const ALfloat (*restrict CoeffStep
)[2],
18 ALfloat left
, ALfloat right
)
21 float32x4_t leftright4
;
23 float32x2_t leftright2
= vdup_n_f32(0.0);
24 leftright2
= vset_lane_f32(left
, leftright2
, 0);
25 leftright2
= vset_lane_f32(right
, leftright2
, 1);
26 leftright4
= vcombine_f32(leftright2
, leftright2
);
28 for(c
= 0;c
< IrSize
;c
+= 2)
30 const ALuint o0
= (Offset
+c
)&HRIR_MASK
;
31 const ALuint o1
= (o0
+1)&HRIR_MASK
;
32 float32x4_t vals
= vcombine_f32(vld1_f32((float32_t
*)&Values
[o0
][0]),
33 vld1_f32((float32_t
*)&Values
[o1
][0]));
34 float32x4_t coefs
= vld1q_f32((float32_t
*)&Coeffs
[c
][0]);
35 float32x4_t deltas
= vld1q_f32(&CoeffStep
[c
][0]);
37 vals
= vmlaq_f32(vals
, coefs
, leftright4
);
38 coefs
= vaddq_f32(coefs
, deltas
);
40 vst1_f32((float32_t
*)&Values
[o0
][0], vget_low_f32(vals
));
41 vst1_f32((float32_t
*)&Values
[o1
][0], vget_high_f32(vals
));
42 vst1q_f32(&Coeffs
[c
][0], coefs
);
46 static inline void ApplyCoeffs(ALuint Offset
, ALfloat (*restrict Values
)[2],
48 ALfloat (*restrict Coeffs
)[2],
49 ALfloat left
, ALfloat right
)
52 float32x4_t leftright4
;
54 float32x2_t leftright2
= vdup_n_f32(0.0);
55 leftright2
= vset_lane_f32(left
, leftright2
, 0);
56 leftright2
= vset_lane_f32(right
, leftright2
, 1);
57 leftright4
= vcombine_f32(leftright2
, leftright2
);
59 for(c
= 0;c
< IrSize
;c
+= 2)
61 const ALuint o0
= (Offset
+c
)&HRIR_MASK
;
62 const ALuint o1
= (o0
+1)&HRIR_MASK
;
63 float32x4_t vals
= vcombine_f32(vld1_f32((float32_t
*)&Values
[o0
][0]),
64 vld1_f32((float32_t
*)&Values
[o1
][0]));
65 float32x4_t coefs
= vld1q_f32((float32_t
*)&Coeffs
[c
][0]);
67 vals
= vmlaq_f32(vals
, coefs
, leftright4
);
69 vst1_f32((float32_t
*)&Values
[o0
][0], vget_low_f32(vals
));
70 vst1_f32((float32_t
*)&Values
[o1
][0], vget_high_f32(vals
));
76 #include "mixer_inc.c"
80 void MixDirect_Neon(const DirectParams
*params
, const ALfloat
*restrict data
, ALuint srcchan
,
81 ALuint OutPos
, ALuint SamplesToDo
, ALuint BufferSize
)
83 ALfloat (*restrict OutBuffer
)[BUFFERSIZE
] = params
->OutBuffer
;
84 ALfloat
*restrict ClickRemoval
= params
->ClickRemoval
;
85 ALfloat
*restrict PendingClicks
= params
->PendingClicks
;
91 for(c
= 0;c
< MaxChannels
;c
++)
93 DrySend
= params
->Gains
[srcchan
][c
];
94 if(!(DrySend
> GAIN_SILENCE_THRESHOLD
))
98 ClickRemoval
[c
] -= data
[0]*DrySend
;
100 gain
= vdupq_n_f32(DrySend
);
101 for(pos
= 0;BufferSize
-pos
> 3;pos
+= 4)
103 const float32x4_t val4
= vld1q_f32(&data
[pos
]);
104 float32x4_t dry4
= vld1q_f32(&OutBuffer
[c
][OutPos
+pos
]);
105 dry4
= vaddq_f32(dry4
, vmulq_f32(val4
, gain
));
106 vst1q_f32(&OutBuffer
[c
][OutPos
+pos
], dry4
);
108 for(;pos
< BufferSize
;pos
++)
109 OutBuffer
[c
][OutPos
+pos
] += data
[pos
]*DrySend
;
111 if(OutPos
+pos
== SamplesToDo
)
112 PendingClicks
[c
] += data
[pos
]*DrySend
;
117 void MixSend_Neon(const SendParams
*params
, const ALfloat
*restrict data
,
118 ALuint OutPos
, ALuint SamplesToDo
, ALuint BufferSize
)
120 ALfloat (*restrict OutBuffer
)[BUFFERSIZE
] = params
->OutBuffer
;
121 ALfloat
*restrict ClickRemoval
= params
->ClickRemoval
;
122 ALfloat
*restrict PendingClicks
= params
->PendingClicks
;
127 WetGain
= params
->Gain
;
128 if(!(WetGain
> GAIN_SILENCE_THRESHOLD
))
132 ClickRemoval
[0] -= data
[0] * WetGain
;
134 gain
= vdupq_n_f32(WetGain
);
135 for(pos
= 0;BufferSize
-pos
> 3;pos
+= 4)
137 const float32x4_t val4
= vld1q_f32(&data
[pos
]);
138 float32x4_t wet4
= vld1q_f32(&OutBuffer
[0][OutPos
+pos
]);
139 wet4
= vaddq_f32(wet4
, vmulq_f32(val4
, gain
));
140 vst1q_f32(&OutBuffer
[0][OutPos
+pos
], wet4
);
142 for(;pos
< BufferSize
;pos
++)
143 OutBuffer
[0][OutPos
+pos
] += data
[pos
] * WetGain
;
145 if(OutPos
+pos
== SamplesToDo
)
146 PendingClicks
[0] += data
[pos
] * WetGain
;