Workaround MinGW __declspec(align(x)) bug
[openal-soft.git] / Alc / mixer_sse.c
blob0f80b0952d64b2994c4050fc79d984363a10625e
1 #include "config.h"
3 #ifdef HAVE_XMMINTRIN_H
4 #include <xmmintrin.h>
5 #endif
7 #include "AL/al.h"
8 #include "AL/alc.h"
9 #include "alMain.h"
10 #include "alu.h"
13 static __inline void ApplyCoeffsStep(ALuint Offset, ALfloat (*RESTRICT Values)[2],
14 ALfloat (*RESTRICT Coeffs)[2],
15 ALfloat (*RESTRICT CoeffStep)[2],
16 ALfloat left, ALfloat right)
18 const __m128 lrlr = { left, right, left, right };
19 __m128 vals = { 0.0f, 0.0f, 0.0f, 0.0f };
20 __m128 coeffs, coeffstep;
21 ALuint c;
22 for(c = 0;c < HRIR_LENGTH;c += 2)
24 const ALuint o0 = (Offset++)&HRIR_MASK;
25 const ALuint o1 = (Offset++)&HRIR_MASK;
27 coeffs = _mm_load_ps(&Coeffs[c][0]);
28 vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]);
29 vals = _mm_loadh_pi(vals, (__m64*)&Values[o1][0]);
31 vals = _mm_add_ps(vals, _mm_mul_ps(coeffs, lrlr));
32 _mm_storel_pi((__m64*)&Values[o0][0], vals);
33 _mm_storeh_pi((__m64*)&Values[o1][0], vals);
35 coeffstep = _mm_load_ps(&CoeffStep[c][0]);
36 coeffs = _mm_add_ps(coeffs, coeffstep);
37 _mm_store_ps(&Coeffs[c][0], coeffs);
41 static __inline void ApplyCoeffs(ALuint Offset, ALfloat (*RESTRICT Values)[2],
42 ALfloat (*RESTRICT Coeffs)[2],
43 ALfloat left, ALfloat right)
45 const __m128 lrlr = { left, right, left, right };
46 __m128 vals = { 0.0f, 0.0f, 0.0f, 0.0f };
47 __m128 coeffs;
48 ALuint c;
49 for(c = 0;c < HRIR_LENGTH;c += 2)
51 const ALuint o0 = (Offset++)&HRIR_MASK;
52 const ALuint o1 = (Offset++)&HRIR_MASK;
54 coeffs = _mm_load_ps(&Coeffs[c][0]);
55 vals = _mm_loadl_pi(vals, (__m64*)&Values[o0][0]);
56 vals = _mm_loadh_pi(vals, (__m64*)&Values[o1][0]);
58 vals = _mm_add_ps(vals, _mm_mul_ps(coeffs, lrlr));
59 _mm_storel_pi((__m64*)&Values[o0][0], vals);
60 _mm_storeh_pi((__m64*)&Values[o1][0], vals);
65 static __inline void ApplyValue(ALfloat *RESTRICT Output, ALfloat value, const ALfloat *DrySend)
67 const __m128 val4 = _mm_set1_ps(value);
68 ALuint c;
69 for(c = 0;c < MaxChannels;c += 4)
71 const __m128 gains = _mm_load_ps(&DrySend[c]);
72 __m128 out = _mm_load_ps(&Output[c]);
73 out = _mm_add_ps(out, _mm_mul_ps(val4, gains));
74 _mm_store_ps(&Output[c], out);
79 #define SUFFIX SSE
80 #define Sampler point32
81 #include "mixer_inc.c"
82 #undef Sampler
83 #define Sampler lerp32
84 #include "mixer_inc.c"
85 #undef Sampler
86 #define Sampler cubic32
87 #include "mixer_inc.c"
88 #undef Sampler
89 #undef SUFFIX