12 static inline void ApplyCoeffsStep(ALuint Offset
, ALfloat (*restrict Values
)[2],
14 ALfloat (*restrict Coeffs
)[2],
15 const ALfloat (*restrict CoeffStep
)[2],
16 ALfloat left
, ALfloat right
)
19 float32x4_t leftright4
;
21 float32x2_t leftright2
= vdup_n_f32(0.0);
22 leftright2
= vset_lane_f32(left
, leftright2
, 0);
23 leftright2
= vset_lane_f32(right
, leftright2
, 1);
24 leftright4
= vcombine_f32(leftright2
, leftright2
);
26 for(c
= 0;c
< IrSize
;c
+= 2)
28 const ALuint o0
= (Offset
+c
)&HRIR_MASK
;
29 const ALuint o1
= (o0
+1)&HRIR_MASK
;
30 float32x4_t vals
= vcombine_f32(vld1_f32((float32_t
*)&Values
[o0
][0]),
31 vld1_f32((float32_t
*)&Values
[o1
][0]));
32 float32x4_t coefs
= vld1q_f32((float32_t
*)&Coeffs
[c
][0]);
33 float32x4_t deltas
= vld1q_f32(&CoeffStep
[c
][0]);
35 vals
= vmlaq_f32(vals
, coefs
, leftright4
);
36 coefs
= vaddq_f32(coefs
, deltas
);
38 vst1_f32((float32_t
*)&Values
[o0
][0], vget_low_f32(vals
));
39 vst1_f32((float32_t
*)&Values
[o1
][0], vget_high_f32(vals
));
40 vst1q_f32(&Coeffs
[c
][0], coefs
);
44 static inline void ApplyCoeffs(ALuint Offset
, ALfloat (*restrict Values
)[2],
46 ALfloat (*restrict Coeffs
)[2],
47 ALfloat left
, ALfloat right
)
50 float32x4_t leftright4
;
52 float32x2_t leftright2
= vdup_n_f32(0.0);
53 leftright2
= vset_lane_f32(left
, leftright2
, 0);
54 leftright2
= vset_lane_f32(right
, leftright2
, 1);
55 leftright4
= vcombine_f32(leftright2
, leftright2
);
57 for(c
= 0;c
< IrSize
;c
+= 2)
59 const ALuint o0
= (Offset
+c
)&HRIR_MASK
;
60 const ALuint o1
= (o0
+1)&HRIR_MASK
;
61 float32x4_t vals
= vcombine_f32(vld1_f32((float32_t
*)&Values
[o0
][0]),
62 vld1_f32((float32_t
*)&Values
[o1
][0]));
63 float32x4_t coefs
= vld1q_f32((float32_t
*)&Coeffs
[c
][0]);
65 vals
= vmlaq_f32(vals
, coefs
, leftright4
);
67 vst1_f32((float32_t
*)&Values
[o0
][0], vget_low_f32(vals
));
68 vst1_f32((float32_t
*)&Values
[o1
][0], vget_high_f32(vals
));
74 #include "mixer_inc.c"
78 void MixDirect_Neon(ALfloat (*restrict OutBuffer
)[BUFFERSIZE
], const ALfloat
*data
,
79 MixGains
*Gains
, ALuint Counter
, ALuint OutPos
, ALuint BufferSize
)
81 ALfloat DrySend
, Step
;
85 for(c
= 0;c
< MaxChannels
;c
++)
88 DrySend
= Gains
->Current
[c
];
89 Step
= Gains
->Step
[c
];
90 if(Step
!= 1.0f
&& Counter
> 0)
92 for(;pos
< BufferSize
&& pos
< Counter
;pos
++)
94 OutBuffer
[c
][OutPos
+pos
] += data
[pos
]*DrySend
;
98 DrySend
= Gains
->Target
[c
];
99 Gains
->Current
[c
] = DrySend
;
100 /* Mix until pos is aligned with 4 or the mix is done. */
101 for(;pos
< BufferSize
&& (pos
&3) != 0;pos
++)
102 OutBuffer
[c
][OutPos
+pos
] += data
[pos
]*DrySend
;
105 if(!(DrySend
> GAIN_SILENCE_THRESHOLD
))
107 gain
= vdupq_n_f32(DrySend
);
108 for(;BufferSize
-pos
> 3;pos
+= 4)
110 const float32x4_t val4
= vld1q_f32(&data
[pos
]);
111 float32x4_t dry4
= vld1q_f32(&OutBuffer
[c
][OutPos
+pos
]);
112 dry4
= vaddq_f32(dry4
, vmulq_f32(val4
, gain
));
113 vst1q_f32(&OutBuffer
[c
][OutPos
+pos
], dry4
);
115 for(;pos
< BufferSize
;pos
++)
116 OutBuffer
[c
][OutPos
+pos
] += data
[pos
]*DrySend
;
121 void MixSend_Neon(SendParams
*params
, const ALfloat
*restrict data
,
122 ALuint OutPos
, ALuint BufferSize
)
124 ALfloat (*restrict OutBuffer
)[BUFFERSIZE
] = params
->OutBuffer
;
125 ALuint Counter
= maxu(params
->Counter
, OutPos
) - OutPos
;
126 ALfloat WetGain
, Step
;
131 WetGain
= params
->Gain
.Current
;
132 Step
= params
->Gain
.Step
;
133 if(Step
!= 1.0f
&& Counter
> 0)
135 for(;pos
< BufferSize
&& pos
< Counter
;pos
++)
137 OutBuffer
[0][OutPos
+pos
] += data
[pos
]*WetGain
;
141 WetGain
= params
->Gain
.Target
;
142 params
->Gain
.Current
= WetGain
;
143 for(;pos
< BufferSize
&& (pos
&3) != 0;pos
++)
144 OutBuffer
[0][OutPos
+pos
] += data
[pos
]*WetGain
;
147 if(!(WetGain
> GAIN_SILENCE_THRESHOLD
))
149 gain
= vdupq_n_f32(WetGain
);
150 for(;BufferSize
-pos
> 3;pos
+= 4)
152 const float32x4_t val4
= vld1q_f32(&data
[pos
]);
153 float32x4_t wet4
= vld1q_f32(&OutBuffer
[0][OutPos
+pos
]);
154 wet4
= vaddq_f32(wet4
, vmulq_f32(val4
, gain
));
155 vst1q_f32(&OutBuffer
[0][OutPos
+pos
], wet4
);
157 for(;pos
< BufferSize
;pos
++)
158 OutBuffer
[0][OutPos
+pos
] += data
[pos
] * WetGain
;