14 static inline void ApplyCoeffsStep(ALuint Offset
, ALfloat (*restrict Values
)[2],
16 ALfloat (*restrict Coeffs
)[2],
17 const ALfloat (*restrict CoeffStep
)[2],
18 ALfloat left
, ALfloat right
)
21 float32x4_t leftright4
;
23 float32x2_t leftright2
= vdup_n_f32(0.0);
24 leftright2
= vset_lane_f32(left
, leftright2
, 0);
25 leftright2
= vset_lane_f32(right
, leftright2
, 1);
26 leftright4
= vcombine_f32(leftright2
, leftright2
);
28 for(c
= 0;c
< IrSize
;c
+= 2)
30 const ALuint o0
= (Offset
+c
)&HRIR_MASK
;
31 const ALuint o1
= (o0
+1)&HRIR_MASK
;
32 float32x4_t vals
= vcombine_f32(vld1_f32((float32_t
*)&Values
[o0
][0]),
33 vld1_f32((float32_t
*)&Values
[o1
][0]));
34 float32x4_t coefs
= vld1q_f32((float32_t
*)&Coeffs
[c
][0]);
35 float32x4_t deltas
= vld1q_f32(&CoeffStep
[c
][0]);
37 vals
= vmlaq_f32(vals
, coefs
, leftright4
);
38 coefs
= vaddq_f32(coefs
, deltas
);
40 vst1_f32((float32_t
*)&Values
[o0
][0], vget_low_f32(vals
));
41 vst1_f32((float32_t
*)&Values
[o1
][0], vget_high_f32(vals
));
42 vst1q_f32(&Coeffs
[c
][0], coefs
);
46 static inline void ApplyCoeffs(ALuint Offset
, ALfloat (*restrict Values
)[2],
48 ALfloat (*restrict Coeffs
)[2],
49 ALfloat left
, ALfloat right
)
52 float32x4_t leftright4
;
54 float32x2_t leftright2
= vdup_n_f32(0.0);
55 leftright2
= vset_lane_f32(left
, leftright2
, 0);
56 leftright2
= vset_lane_f32(right
, leftright2
, 1);
57 leftright4
= vcombine_f32(leftright2
, leftright2
);
59 for(c
= 0;c
< IrSize
;c
+= 2)
61 const ALuint o0
= (Offset
+c
)&HRIR_MASK
;
62 const ALuint o1
= (o0
+1)&HRIR_MASK
;
63 float32x4_t vals
= vcombine_f32(vld1_f32((float32_t
*)&Values
[o0
][0]),
64 vld1_f32((float32_t
*)&Values
[o1
][0]));
65 float32x4_t coefs
= vld1q_f32((float32_t
*)&Coeffs
[c
][0]);
67 vals
= vmlaq_f32(vals
, coefs
, leftright4
);
69 vst1_f32((float32_t
*)&Values
[o0
][0], vget_low_f32(vals
));
70 vst1_f32((float32_t
*)&Values
[o1
][0], vget_high_f32(vals
));
76 #include "mixer_inc.c"
80 void MixDirect_Neon(DirectParams
*params
, const ALfloat
*restrict data
, ALuint srcchan
,
81 ALuint OutPos
, ALuint BufferSize
)
83 ALfloat (*restrict OutBuffer
)[BUFFERSIZE
] = params
->OutBuffer
;
84 ALuint Counter
= maxu(params
->Counter
, OutPos
) - OutPos
;
85 ALfloat DrySend
, Step
;
89 for(c
= 0;c
< MaxChannels
;c
++)
92 Step
= params
->Mix
.Gains
.Step
[srcchan
][c
];
93 if(Step
!= 1.0f
&& Counter
> 0)
95 DrySend
= params
->Mix
.Gains
.Current
[srcchan
][c
];
96 for(;BufferSize
-pos
> 3 && Counter
-pos
> 3;pos
+=4)
98 OutBuffer
[c
][OutPos
+pos
] += data
[pos
]*DrySend
;
100 OutBuffer
[c
][OutPos
+pos
+1] += data
[pos
+1]*DrySend
;
102 OutBuffer
[c
][OutPos
+pos
+2] += data
[pos
+2]*DrySend
;
104 OutBuffer
[c
][OutPos
+pos
+4] += data
[pos
+3]*DrySend
;
107 if(!(BufferSize
-pos
> 3))
109 for(;pos
< BufferSize
&& pos
< Counter
;pos
++)
111 OutBuffer
[c
][OutPos
+pos
] += data
[pos
]*DrySend
;
115 params
->Mix
.Gains
.Current
[srcchan
][c
] = DrySend
;
118 DrySend
= params
->Mix
.Gains
.Target
[srcchan
][c
];
119 if(!(DrySend
> GAIN_SILENCE_THRESHOLD
))
121 gain
= vdupq_n_f32(DrySend
);
122 for(;BufferSize
-pos
> 3;pos
+= 4)
124 const float32x4_t val4
= vld1q_f32(&data
[pos
]);
125 float32x4_t dry4
= vld1q_f32(&OutBuffer
[c
][OutPos
+pos
]);
126 dry4
= vaddq_f32(dry4
, vmulq_f32(val4
, gain
));
127 vst1q_f32(&OutBuffer
[c
][OutPos
+pos
], dry4
);
129 for(;pos
< BufferSize
;pos
++)
130 OutBuffer
[c
][OutPos
+pos
] += data
[pos
]*DrySend
;
135 void MixSend_Neon(SendParams
*params
, const ALfloat
*restrict data
,
136 ALuint OutPos
, ALuint
UNUSED(SamplesToDo
), ALuint BufferSize
)
138 ALfloat (*restrict OutBuffer
)[BUFFERSIZE
] = params
->OutBuffer
;
139 ALuint Counter
= maxu(params
->Counter
, OutPos
) - OutPos
;
140 ALfloat WetGain
, Step
;
145 Step
= params
->Gain
.Step
;
146 if(Step
!= 1.0f
&& Counter
> 0)
148 WetGain
= params
->Gain
.Current
;
149 for(;BufferSize
-pos
> 3 && Counter
-pos
> 3;pos
+=4)
151 OutBuffer
[0][OutPos
+pos
] += data
[pos
]*WetGain
;
153 OutBuffer
[0][OutPos
+pos
+1] += data
[pos
+1]*WetGain
;
155 OutBuffer
[0][OutPos
+pos
+2] += data
[pos
+2]*WetGain
;
157 OutBuffer
[0][OutPos
+pos
+4] += data
[pos
+3]*WetGain
;
160 if(!(BufferSize
-pos
> 3))
162 for(;pos
< BufferSize
&& pos
< Counter
;pos
++)
164 OutBuffer
[0][OutPos
+pos
] += data
[pos
]*WetGain
;
168 params
->Gain
.Current
= WetGain
;
171 WetGain
= params
->Gain
.Target
;
172 if(!(WetGain
> GAIN_SILENCE_THRESHOLD
))
174 gain
= vdupq_n_f32(WetGain
);
175 for(;BufferSize
-pos
> 3;pos
+= 4)
177 const float32x4_t val4
= vld1q_f32(&data
[pos
]);
178 float32x4_t wet4
= vld1q_f32(&OutBuffer
[0][OutPos
+pos
]);
179 wet4
= vaddq_f32(wet4
, vmulq_f32(val4
, gain
));
180 vst1q_f32(&OutBuffer
[0][OutPos
+pos
], wet4
);
182 for(;pos
< BufferSize
;pos
++)
183 OutBuffer
[0][OutPos
+pos
] += data
[pos
] * WetGain
;