14 static inline void ApplyCoeffsStep(ALuint Offset
, ALfloat (*restrict Values
)[2],
16 ALfloat (*restrict Coeffs
)[2],
17 const ALfloat (*restrict CoeffStep
)[2],
18 ALfloat left
, ALfloat right
)
21 float32x4_t leftright4
;
23 float32x2_t leftright2
= vdup_n_f32(0.0);
24 leftright2
= vset_lane_f32(left
, leftright2
, 0);
25 leftright2
= vset_lane_f32(right
, leftright2
, 1);
26 leftright4
= vcombine_f32(leftright2
, leftright2
);
28 for(c
= 0;c
< IrSize
;c
+= 2)
30 const ALuint o0
= (Offset
+c
)&HRIR_MASK
;
31 const ALuint o1
= (o0
+1)&HRIR_MASK
;
32 float32x4_t vals
= vcombine_f32(vld1_f32((float32_t
*)&Values
[o0
][0]),
33 vld1_f32((float32_t
*)&Values
[o1
][0]));
34 float32x4_t coefs
= vld1q_f32((float32_t
*)&Coeffs
[c
][0]);
35 float32x4_t deltas
= vld1q_f32(&CoeffStep
[c
][0]);
37 vals
= vmlaq_f32(vals
, coefs
, leftright4
);
38 coefs
= vaddq_f32(coefs
, deltas
);
40 vst1_f32((float32_t
*)&Values
[o0
][0], vget_low_f32(vals
));
41 vst1_f32((float32_t
*)&Values
[o1
][0], vget_high_f32(vals
));
42 vst1q_f32(&Coeffs
[c
][0], coefs
);
46 static inline void ApplyCoeffs(ALuint Offset
, ALfloat (*restrict Values
)[2],
48 ALfloat (*restrict Coeffs
)[2],
49 ALfloat left
, ALfloat right
)
52 float32x4_t leftright4
;
54 float32x2_t leftright2
= vdup_n_f32(0.0);
55 leftright2
= vset_lane_f32(left
, leftright2
, 0);
56 leftright2
= vset_lane_f32(right
, leftright2
, 1);
57 leftright4
= vcombine_f32(leftright2
, leftright2
);
59 for(c
= 0;c
< IrSize
;c
+= 2)
61 const ALuint o0
= (Offset
+c
)&HRIR_MASK
;
62 const ALuint o1
= (o0
+1)&HRIR_MASK
;
63 float32x4_t vals
= vcombine_f32(vld1_f32((float32_t
*)&Values
[o0
][0]),
64 vld1_f32((float32_t
*)&Values
[o1
][0]));
65 float32x4_t coefs
= vld1q_f32((float32_t
*)&Coeffs
[c
][0]);
67 vals
= vmlaq_f32(vals
, coefs
, leftright4
);
69 vst1_f32((float32_t
*)&Values
[o0
][0], vget_low_f32(vals
));
70 vst1_f32((float32_t
*)&Values
[o1
][0], vget_high_f32(vals
));
76 #include "mixer_inc.c"
80 void MixDirect_Neon(DirectParams
*params
, const ALfloat
*restrict data
, ALuint srcchan
,
81 ALuint OutPos
, ALuint
UNUSED(SamplesToDo
), ALuint BufferSize
)
83 ALfloat (*restrict OutBuffer
)[BUFFERSIZE
] = params
->OutBuffer
;
84 ALuint Counter
= maxu(params
->Counter
, OutPos
) - OutPos
;
85 ALfloat DrySend
, Step
;
89 for(c
= 0;c
< MaxChannels
;c
++)
92 Step
= params
->Mix
.Gains
.Step
[srcchan
][c
];
93 if(Step
!= 1.0f
&& Counter
> 0)
95 DrySend
= params
->Mix
.Gains
.Current
[srcchan
][c
];
96 if(BufferSize
-pos
> 3 && Counter
-pos
> 3)
98 OutBuffer
[c
][OutPos
+pos
] += data
[pos
]*DrySend
;
100 OutBuffer
[c
][OutPos
+pos
+1] += data
[pos
+1]*DrySend
;
102 OutBuffer
[c
][OutPos
+pos
+2] += data
[pos
+2]*DrySend
;
104 OutBuffer
[c
][OutPos
+pos
+4] += data
[pos
+3]*DrySend
;
107 if(!(BufferSize
-pos
> 3))
109 for(;pos
< BufferSize
&& pos
< Counter
;pos
++)
111 OutBuffer
[c
][OutPos
+pos
] += data
[pos
]*DrySend
;
115 params
->Mix
.Gains
.Current
[srcchan
][c
] = DrySend
;
118 DrySend
= params
->Mix
.Gains
.Target
[srcchan
][c
];
119 if(!(DrySend
> GAIN_SILENCE_THRESHOLD
))
121 gain
= vdupq_n_f32(DrySend
);
122 for(;BufferSize
-pos
> 3;pos
+= 4)
124 const float32x4_t val4
= vld1q_f32(&data
[pos
]);
125 float32x4_t dry4
= vld1q_f32(&OutBuffer
[c
][OutPos
+pos
]);
126 dry4
= vaddq_f32(dry4
, vmulq_f32(val4
, gain
));
127 vst1q_f32(&OutBuffer
[c
][OutPos
+pos
], dry4
);
129 for(;pos
< BufferSize
;pos
++)
130 OutBuffer
[c
][OutPos
+pos
] += data
[pos
]*DrySend
;
135 void MixSend_Neon(SendParams
*params
, const ALfloat
*restrict data
,
136 ALuint OutPos
, ALuint SamplesToDo
, ALuint BufferSize
)
138 ALfloat (*restrict OutBuffer
)[BUFFERSIZE
] = params
->OutBuffer
;
139 ALfloat
*restrict ClickRemoval
= params
->ClickRemoval
;
140 ALfloat
*restrict PendingClicks
= params
->PendingClicks
;
145 WetGain
= params
->Gain
;
146 if(!(WetGain
> GAIN_SILENCE_THRESHOLD
))
150 ClickRemoval
[0] -= data
[0] * WetGain
;
152 gain
= vdupq_n_f32(WetGain
);
153 for(pos
= 0;BufferSize
-pos
> 3;pos
+= 4)
155 const float32x4_t val4
= vld1q_f32(&data
[pos
]);
156 float32x4_t wet4
= vld1q_f32(&OutBuffer
[0][OutPos
+pos
]);
157 wet4
= vaddq_f32(wet4
, vmulq_f32(val4
, gain
));
158 vst1q_f32(&OutBuffer
[0][OutPos
+pos
], wet4
);
160 for(;pos
< BufferSize
;pos
++)
161 OutBuffer
[0][OutPos
+pos
] += data
[pos
] * WetGain
;
163 if(OutPos
+pos
== SamplesToDo
)
164 PendingClicks
[0] += data
[pos
] * WetGain
;