memcpy: hide some memory latencies
[nova-simd.git] / simd_horizontal_functions.hpp
blob3e8567f2159593b425c2ff78a586fff3e7d19b4e
1 // horizontal simd functions
2 // Copyright (C) 2011 Tim Blechmann
3 //
4 // This program is free software; you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation; either version 2 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with this program; see the file COPYING. If not, write to
16 // the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 // Boston, MA 02111-1307, USA.
20 #ifndef SIMD_HORIZONTAL_FUNCTIONS_HPP
21 #define SIMD_HORIZONTAL_FUNCTIONS_HPP
23 #include "vec.hpp"
25 #include <algorithm> /* for max */
27 #if defined(__GNUC__) && defined(NDEBUG)
28 #define always_inline inline __attribute__((always_inline))
29 #else
30 #define always_inline inline
31 #endif
33 namespace nova {
35 /* horizontal max */
36 template <typename F>
37 inline F horizontal_max_vec(const F * in, unsigned int n)
39 F current = std::numeric_limits<F>::min();
40 using namespace std;
42 do {
43 current = max(current, *in++);
44 } while(--n);
46 return current;
49 template <typename F>
50 inline F horizontal_max_vec_simd(const F * in, unsigned int n)
52 F init = std::numeric_limits<F>::min();
53 vec<F> current(init);
55 /* loop */
56 const size_t vec_size = vec<F>::size;
57 n /= vec_size;
58 do {
59 vec<F> val;
60 val.load_aligned(in);
62 current = max_(current, val);
63 in += vec_size;
64 } while(--n);
66 return current.horizontal_max();
70 /* horizontal min */
71 template <typename F>
72 inline F horizontal_min_vec(const F * in, unsigned int n)
74 F current = std::numeric_limits<F>::min();
75 using namespace std;
77 do {
78 current = min(current, *in++);
79 } while(--n);
81 return current;
84 template <typename F>
85 inline F horizontal_min_vec_simd(const F * in, unsigned int n)
87 F init = std::numeric_limits<F>::min();
88 vec<F> current(init);
90 /* loop */
91 const size_t vec_size = vec<F>::size;
92 n /= vec_size;
93 do {
94 vec<F> val;
95 val.load_aligned(in);
97 current = min_(current, val);
98 in += vec_size;
99 } while(--n);
101 return current.horizontal_min();
104 /* horizontal sum */
105 template <typename F>
106 inline F horizontal_sum_vec(const F * in, unsigned int n)
108 F current = 0;
109 using namespace std;
111 do {
112 current = current + *in++;
113 } while(--n);
115 return current;
118 template <typename F>
119 inline F horizontal_sum_vec_simd(const F * in, unsigned int n)
121 vec<F> current(F(0));
123 /* loop */
124 const size_t vec_size = vec<F>::size;
125 n /= vec_size;
126 do {
127 vec<F> val;
128 val.load_aligned(in);
130 current = current + val;
131 in += vec_size;
132 } while(--n);
134 return current.horizontal_sum();
138 /* horizontal min/max */
139 template <typename F>
140 inline void horizontal_minmax_vec(F & rmin, F & rmax, const F * in, unsigned int n)
142 F current_max = std::numeric_limits<F>::min();
143 F current_min = std::numeric_limits<F>::max();
144 using namespace std;
146 do {
147 current_max = max(current_max, *in);
148 current_min = min(current_min, *in++);
149 } while(--n);
151 rmax = current_max;
152 rmin = current_min;
155 template <typename F>
156 inline void horizontal_minmax_vec_simd(F & rmin, F & rmax, const F * in, unsigned int n)
158 vec<F> current_max(std::numeric_limits<F>::min());
159 vec<F> current_min(std::numeric_limits<F>::max());
161 /* loop */
162 const size_t vec_size = vec<F>::size;
163 n /= vec_size;
164 do {
165 vec<F> val;
166 val.load_aligned(in);
168 current_max = max_(current_max, val);
169 current_min = min_(current_min, val);
170 in += vec_size;
171 } while(--n);
173 rmin = current_min.horizontal_min();
174 rmax = current_max.horizontal_max();
177 /* horizontal max/sum */
178 template <typename F>
179 inline void horizontal_maxsum_vec(F & rmax, F & rsum, const F * in, unsigned int n)
181 F current_max = std::numeric_limits<F>::min();
182 F current_sum = 0;
183 using namespace std;
185 do {
186 current_max = max(current_max, *in);
187 current_sum = current_sum + *in++;
188 } while(--n);
190 rmax = current_max;
191 rsum = current_sum;
194 template <typename F>
195 inline void horizontal_maxsum_vec_simd(F & rmax, F & rsum, const F * in, unsigned int n)
197 vec<F> current_max(std::numeric_limits<F>::min());
198 vec<F> current_sum(F(0));
200 /* loop */
201 const size_t vec_size = vec<F>::size;
202 n /= vec_size;
203 do {
204 vec<F> val;
205 val.load_aligned(in);
207 current_max = max_(current_max, val);
208 current_sum = current_sum + val;
209 in += vec_size;
210 } while(--n);
212 rsum = current_sum.horizontal_sum();
213 rmax = current_max.horizontal_max();
217 } /* namespace nova */
219 #undef always_inline
221 #endif /* SIMD_PEAKMETER_HPP */