1 // utilities for the simd implementation
2 // Copyright (C) 2008, 2009 Tim Blechmann
4 // This program is free software; you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation; either version 2 of the License, or
7 // (at your option) any later version.
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with this program; see the file COPYING. If not, write to
16 // the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 // Boston, MA 02111-1307, USA.
19 #ifndef SIMD_UTILS_HPP
20 #define SIMD_UTILS_HPP
22 #include <xmmintrin.h>
25 #include <emmintrin.h>
29 #include <smmintrin.h>
30 #endif /* __SSE41__ */
39 inline __m128
gen_sign_mask(void)
41 __m128i x
= _mm_setzero_si128();
42 __m128i ones
= _mm_cmpeq_epi32(x
, x
);
43 return (__m128
)_mm_slli_epi32 (_mm_srli_epi32(ones
, 31), 31);
46 inline __m128
gen_abs_mask(void)
48 __m128i x
= _mm_setzero_si128();
49 __m128i ones
= _mm_cmpeq_epi32(x
, x
);
50 return (__m128
)_mm_srli_epi32 (_mm_slli_epi32(ones
, 1), 1);
53 inline __m128
gen_one(void)
55 __m128i x
= _mm_setzero_si128();
56 __m128i ones
= _mm_cmpeq_epi32(x
, x
);
57 return (__m128
)_mm_slli_epi32 (_mm_srli_epi32(ones
, 25), 23);
60 inline __m128
gen_05(void)
62 __m128i x
= _mm_setzero_si128();
63 __m128i ones
= _mm_cmpeq_epi32(x
, x
);
64 return (__m128
)_mm_slli_epi32 (_mm_srli_epi32(ones
, 26), 24);
71 inline __m128
gen_sign_mask(void)
73 static const int sign_mask
= 0x80000000;
74 float * casted
= (float*)(&sign_mask
);
75 return _mm_set_ps1(*casted
);
78 inline __m128
gen_abs_mask(void)
80 static const int abs_mask
= 0x7fffffff;
81 float * casted
= (float*)(&abs_mask
);
82 return _mm_set_ps1(*casted
);
85 inline __m128
gen_one(void)
87 return _mm_set_ps1(1.f
);
90 inline __m128
gen_05(void)
92 return _mm_set_ps1(0.5f
);
97 inline __m128
gen_025(void)
99 return _mm_set_ps1(0.25f
);
102 inline float extract_0(__m128 arg
)
105 _mm_store_ss(&r
, arg
);
109 inline float extract_3(__m128 arg
)
111 __m128 last
= _mm_shuffle_ps(arg
, arg
, _MM_SHUFFLE(2, 1, 0, 3));
113 _mm_store_ss(&r
, last
);
117 inline float horizontal_min(__m128 args
)
121 xmm1
= _mm_shuffle_ps(xmm0
, xmm0
, _MM_SHUFFLE(2,2,2,2));
122 xmm0
= _mm_min_ps(xmm0
, xmm1
);
123 xmm1
= _mm_shuffle_ps(xmm0
, xmm0
, _MM_SHUFFLE(1,1,1,1));
124 xmm0
= _mm_min_ss(xmm0
, xmm1
);
125 return extract_0(xmm0
);
128 inline float horizontal_max(__m128 args
)
132 xmm1
= _mm_shuffle_ps(xmm0
, xmm0
, _MM_SHUFFLE(2,2,2,2));
133 xmm0
= _mm_max_ps(xmm0
, xmm1
);
134 xmm1
= _mm_shuffle_ps(xmm0
, xmm0
, _MM_SHUFFLE(1,1,1,1));
135 xmm0
= _mm_max_ss(xmm0
, xmm1
);
136 return extract_0(xmm0
);
141 inline __m128
select_vector(__m128 val0
, __m128 val1
, __m128 sel
)
143 /* if bitmask is set, return value in val1, else value in val0 */
144 return _mm_blendv_ps(val0
, val1
, sel
);
149 inline __m128
select_vector(__m128 val0
, __m128 val1
, __m128 sel
)
151 /* if bitmask is set, return value in val1, else value in val0 */
152 return _mm_or_ps(_mm_andnot_ps(sel
, val0
),
153 _mm_and_ps(val1
, sel
));
158 } /* namespace detail */
159 } /* namespace nova */
161 #endif /* SIMD_UTILS_HPP */