PME-gather: 4xN SIMD
[gromacs/AngularHB.git] / src / gromacs / simd / tests / simd_floatingpoint_util.cpp
blob08ecf7402f40727e4148565b8123c92868ae146b
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 #include "gmxpre.h"
37 #include <numeric>
39 #include "gromacs/simd/simd.h"
40 #include "gromacs/utility/alignedallocator.h"
41 #include "gromacs/utility/basedefinitions.h"
43 #include "testutils/testasserts.h"
45 #include "simd.h"
47 namespace gmx
49 namespace test
51 namespace
54 /*! \cond internal */
55 /*! \addtogroup module_simd */
56 /*! \{ */
58 #if GMX_SIMD_HAVE_REAL
60 /*! \brief Test fixture for higher-level floating-point utility functions.
62 * Inherit from main SimdTest, add code to generate aligned memory and data.
64 class SimdFloatingpointUtilTest : public SimdTest
66 public:
67 SimdFloatingpointUtilTest()
69 // Resize vectors to get the amount of memory we need
70 integerMemory_.resize(GMX_SIMD_REAL_WIDTH);
72 // The total memory we allocate corresponds to two work arrays
73 // and 4 values each of GMX_SIMD_REAL_WIDTH.
74 realMemory_.resize(2*s_workMemSize_+4*GMX_SIMD_REAL_WIDTH);
76 offset_ = integerMemory_.data();
77 val0_ = realMemory_.data();
78 val1_ = val0_ + GMX_SIMD_REAL_WIDTH;
79 val2_ = val1_ + GMX_SIMD_REAL_WIDTH;
80 val3_ = val2_ + GMX_SIMD_REAL_WIDTH;
81 mem0_ = val3_ + GMX_SIMD_REAL_WIDTH;
82 mem1_ = mem0_ + s_workMemSize_;
84 // Set default values for offset and variables val0_ through val3_
85 // We cannot fill mem_ here since those values depend on the test.
86 for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
88 // Use every third point to avoid a continguous access pattern
89 offset_[i] = 3 * i;
90 // Multiply numbers by 1+100*GMX_REAL_EPS ensures some low bits are
91 // set too, so the tests make sure we read all bits correctly.
92 val0_[i] = (i ) * (1.0 + 100*GMX_REAL_EPS);
93 val1_[i] = (i + 0.1) * (1.0 + 100*GMX_REAL_EPS);
94 val2_[i] = (i + 0.2) * (1.0 + 100*GMX_REAL_EPS);
95 val3_[i] = (i + 0.3) * (1.0 + 100*GMX_REAL_EPS);
99 protected:
100 //! \brief Size of memory work buffers
102 // To have a somewhat odd access pattern, we use every
103 // third entry, so the largest value of offset_[i] is 3*GMX_SIMD_REAL_WIDTH.
104 // Then we also allow alignments up to 16, which means the largest index in mem0_[]
105 // that we might access is 16*3*GMX_SIMD_REAL_WIDTH+3.
106 static const std::size_t s_workMemSize_ = 16*3*GMX_SIMD_REAL_WIDTH+4;
108 std::vector<int, AlignedAllocator<int> > integerMemory_; //!< Aligned integer memory
109 std::vector<real, AlignedAllocator<real> > realMemory_; //!< Aligned real memory
111 int * offset_; //!< Pointer to offset indices, aligned memory
112 real * val0_; //!< Pointer to GMX_SIMD_REAL_WIDTH values, aligned
113 real * val1_; //!< Pointer to GMX_SIMD_REAL_WIDTH values, aligned
114 real * val2_; //!< Pointer to GMX_SIMD_REAL_WIDTH values, aligned
115 real * val3_; //!< Pointer to GMX_SIMD_REAL_WIDTH values, aligned
117 real * mem0_; //!< Pointer to aligned memory, s_workMemSize real values
118 real * mem1_; //!< Pointer to aligned memory, s_workMemSize real values
123 TEST_F(SimdFloatingpointUtilTest, gatherLoadTranspose4)
125 SimdReal v0, v1, v2, v3;
126 SimdReal ref0, ref1, ref2, ref3;
127 const int nalign = 3;
128 int alignmentList[nalign] = { 4, 8, 12 };
129 int i, j, align;
131 for (i = 0; i < nalign; i++)
133 align = alignmentList[i];
134 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
136 mem0_[align * offset_[j] ] = val0_[j];
137 mem0_[align * offset_[j] + 1] = val1_[j];
138 mem0_[align * offset_[j] + 2] = val2_[j];
139 mem0_[align * offset_[j] + 3] = val3_[j];
142 ref0 = load<SimdReal>(val0_);
143 ref1 = load<SimdReal>(val1_);
144 ref2 = load<SimdReal>(val2_);
145 ref3 = load<SimdReal>(val3_);
147 if (align == 4)
149 gatherLoadTranspose<4>(mem0_, offset_, &v0, &v1, &v2, &v3);
151 else if (align == 8)
153 gatherLoadTranspose<8>(mem0_, offset_, &v0, &v1, &v2, &v3);
155 else if (align == 12)
157 gatherLoadTranspose<12>(mem0_, offset_, &v0, &v1, &v2, &v3);
159 else
161 FAIL();
164 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
165 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
166 GMX_EXPECT_SIMD_REAL_EQ(ref2, v2);
167 GMX_EXPECT_SIMD_REAL_EQ(ref3, v3);
171 TEST_F(SimdFloatingpointUtilTest, gatherLoadTranspose2)
173 SimdReal v0, v1;
174 SimdReal ref0, ref1;
175 const int nalign = 3;
176 int alignmentList[nalign] = { 2, 4, c_simdBestPairAlignment };
177 int i, j, align;
179 EXPECT_TRUE(c_simdBestPairAlignment <= GMX_SIMD_REAL_WIDTH);
181 for (i = 0; i < nalign; i++)
183 align = alignmentList[i];
184 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
186 mem0_[align * offset_[j] ] = val0_[j];
187 mem0_[align * offset_[j] + 1] = val1_[j];
190 ref0 = load<SimdReal>(val0_);
191 ref1 = load<SimdReal>(val1_);
193 if (align == 2)
195 gatherLoadTranspose<2>(mem0_, offset_, &v0, &v1);
197 else if (align == 4)
199 gatherLoadTranspose<4>(mem0_, offset_, &v0, &v1);
201 else if (align == c_simdBestPairAlignment)
203 gatherLoadTranspose<c_simdBestPairAlignment>(mem0_, offset_, &v0, &v1);
205 else
207 FAIL();
210 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
211 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
215 TEST_F(SimdFloatingpointUtilTest, gatherLoadUTranspose3)
217 SimdReal v0, v1, v2;
218 SimdReal ref0, ref1, ref2;
219 const int nalign = 2;
220 int alignmentList[nalign] = { 3, 4 };
221 int i, j, align;
223 for (i = 0; i < nalign; i++)
225 align = alignmentList[i];
226 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
228 mem0_[align * offset_[j] ] = val0_[j];
229 mem0_[align * offset_[j] + 1] = val1_[j];
230 mem0_[align * offset_[j] + 2] = val2_[j];
233 ref0 = load<SimdReal>(val0_);
234 ref1 = load<SimdReal>(val1_);
235 ref2 = load<SimdReal>(val2_);
237 if (align == 3)
239 gatherLoadUTranspose<3>(mem0_, offset_, &v0, &v1, &v2);
241 else if (align == 4)
243 gatherLoadUTranspose<4>(mem0_, offset_, &v0, &v1, &v2);
245 else
247 FAIL();
250 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
251 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
252 GMX_EXPECT_SIMD_REAL_EQ(ref2, v2);
256 TEST_F(SimdFloatingpointUtilTest, transposeScatterStoreU3)
258 SimdReal v0, v1, v2;
259 real refmem[s_workMemSize_];
260 const int nalign = 2;
261 int alignmentList[nalign] = { 3, 4 };
262 int i, align;
263 FloatingPointTolerance tolerance(defaultRealTolerance());
265 for (i = 0; i < nalign; i++)
267 align = alignmentList[i];
269 // Set test and reference memory to background value
270 for (std::size_t j = 0; j < s_workMemSize_; j++)
272 // Multiply by 1+100*eps to make sure low bits are also used
273 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100*GMX_REAL_EPS);
276 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
278 // set values in _reference_ memory (we will then test with mem0_, and compare)
279 refmem[align * offset_[j] ] = val0_[j];
280 refmem[align * offset_[j] + 1] = val1_[j];
281 refmem[align * offset_[j] + 2] = val2_[j];
284 v0 = load<SimdReal>(val0_);
285 v1 = load<SimdReal>(val1_);
286 v2 = load<SimdReal>(val2_);
288 if (align == 3)
290 transposeScatterStoreU<3>(mem0_, offset_, v0, v1, v2);
292 else if (align == 4)
294 transposeScatterStoreU<4>(mem0_, offset_, v0, v1, v2);
296 else
298 FAIL();
301 for (std::size_t j = 0; j < s_workMemSize_; j++)
303 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
308 TEST_F(SimdFloatingpointUtilTest, transposeScatterIncrU3)
310 SimdReal v0, v1, v2;
311 real refmem[s_workMemSize_];
312 const int nalign = 2;
313 int alignmentList[nalign] = { 3, 4 };
314 int i, align;
315 FloatingPointTolerance tolerance(defaultRealTolerance());
317 for (i = 0; i < nalign; i++)
319 align = alignmentList[i];
321 // Set test and reference memory to background value
322 for (std::size_t j = 0; j < s_workMemSize_; j++)
324 // Multiply by 1+100*eps to make sure low bits are also used
325 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100*GMX_REAL_EPS);
328 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
330 // Add values to _reference_ memory (we will then test with mem0_, and compare)
331 refmem[align * offset_[j] ] += val0_[j];
332 refmem[align * offset_[j] + 1] += val1_[j];
333 refmem[align * offset_[j] + 2] += val2_[j];
336 v0 = load<SimdReal>(val0_);
337 v1 = load<SimdReal>(val1_);
338 v2 = load<SimdReal>(val2_);
340 if (align == 3)
342 transposeScatterIncrU<3>(mem0_, offset_, v0, v1, v2);
344 else if (align == 4)
346 transposeScatterIncrU<4>(mem0_, offset_, v0, v1, v2);
348 else
350 FAIL();
353 for (std::size_t j = 0; j < s_workMemSize_; j++)
355 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
360 TEST_F(SimdFloatingpointUtilTest, transposeScatterIncrU3Overlapping)
362 SimdReal v0, v1, v2;
363 real refmem[s_workMemSize_];
364 FloatingPointTolerance tolerance(defaultRealTolerance());
366 // Alter offset_ to make all entries point to the same (first) value, so all entries will overlap
367 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
369 offset_[j] = 0;
372 // Set test and reference memory to background value
373 for (std::size_t j = 0; j < s_workMemSize_; j++)
375 // Multiply by 1+100*eps to make sure low bits are also used
376 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100*GMX_REAL_EPS);
379 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
381 // Add values to _reference_ memory (we will then test with mem0_, and compare)
382 refmem[3 * offset_[j] ] += val0_[j];
383 refmem[3 * offset_[j] + 1] += val1_[j];
384 refmem[3 * offset_[j] + 2] += val2_[j];
387 v0 = load<SimdReal>(val0_);
388 v1 = load<SimdReal>(val1_);
389 v2 = load<SimdReal>(val2_);
391 transposeScatterIncrU<3>(mem0_, offset_, v0, v1, v2);
393 for (std::size_t j = 0; j < s_workMemSize_; j++)
395 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
399 TEST_F(SimdFloatingpointUtilTest, transposeScatterDecrU3)
401 SimdReal v0, v1, v2;
402 real refmem[s_workMemSize_];
403 const int nalign = 2;
404 int alignmentList[nalign] = { 3, 4 };
405 int i, align;
406 FloatingPointTolerance tolerance(defaultRealTolerance());
408 for (i = 0; i < nalign; i++)
410 align = alignmentList[i];
412 // Set test and reference memory to background value
413 for (std::size_t j = 0; j < s_workMemSize_; j++)
415 // Multiply by 1+100*eps to make sure low bits are also used
416 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100*GMX_REAL_EPS);
419 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
421 // Subtract values from _reference_ memory (we will then test with mem0_, and compare)
422 refmem[align * offset_[j] ] -= val0_[j];
423 refmem[align * offset_[j] + 1] -= val1_[j];
424 refmem[align * offset_[j] + 2] -= val2_[j];
427 v0 = load<SimdReal>(val0_);
428 v1 = load<SimdReal>(val1_);
429 v2 = load<SimdReal>(val2_);
431 if (align == 3)
433 transposeScatterDecrU<3>(mem0_, offset_, v0, v1, v2);
435 else if (align == 4)
437 transposeScatterDecrU<4>(mem0_, offset_, v0, v1, v2);
439 else
441 FAIL();
444 for (std::size_t j = 0; j < s_workMemSize_; j++)
446 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
451 TEST_F(SimdFloatingpointUtilTest, transposeScatterDecrU3Overlapping)
453 SimdReal v0, v1, v2;
454 real refmem[s_workMemSize_];
455 FloatingPointTolerance tolerance(defaultRealTolerance());
457 // Alter offset_ to make all entries point to the same (first) value, so all entries will overlap
458 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
460 offset_[j] = 0;
463 // Set test and reference memory to background value
464 for (std::size_t j = 0; j < s_workMemSize_; j++)
466 // Multiply by 1+100*eps to make sure low bits are also used
467 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100*GMX_REAL_EPS);
470 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
472 // Subtract values from _reference_ memory (we will then test with mem0_, and compare)
473 refmem[3 * offset_[j] ] -= val0_[j];
474 refmem[3 * offset_[j] + 1] -= val1_[j];
475 refmem[3 * offset_[j] + 2] -= val2_[j];
478 v0 = load<SimdReal>(val0_);
479 v1 = load<SimdReal>(val1_);
480 v2 = load<SimdReal>(val2_);
482 transposeScatterDecrU<3>(mem0_, offset_, v0, v1, v2);
484 for (std::size_t j = 0; j < s_workMemSize_; j++)
486 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
490 TEST_F(SimdFloatingpointUtilTest, expandScalarsToTriplets)
492 SimdReal vs, v0, v1, v2;
493 int i;
495 for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
497 mem0_[i] = i;
500 vs = load<SimdReal>(mem0_);
502 expandScalarsToTriplets(vs, &v0, &v1, &v2);
504 store(val0_, v0);
505 store(val1_, v1);
506 store(val2_, v2);
508 for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
510 EXPECT_EQ(i / 3, val0_[i]);
511 EXPECT_EQ((i + GMX_SIMD_REAL_WIDTH) / 3, val1_[i]);
512 EXPECT_EQ((i + 2 * GMX_SIMD_REAL_WIDTH) / 3, val2_[i]);
517 TEST_F(SimdFloatingpointUtilTest, gatherLoadBySimdIntTranspose4)
519 SimdReal v0, v1, v2, v3;
520 SimdReal ref0, ref1, ref2, ref3;
521 SimdInt32 simdoffset;
522 const int nalign = 3;
523 int alignmentList[nalign] = { 4, 8, 12 };
524 int i, j, align;
526 for (i = 0; i < nalign; i++)
528 align = alignmentList[i];
529 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
531 mem0_[align * offset_[j] ] = val0_[j];
532 mem0_[align * offset_[j] + 1] = val1_[j];
533 mem0_[align * offset_[j] + 2] = val2_[j];
534 mem0_[align * offset_[j] + 3] = val3_[j];
537 simdoffset = load<SimdInt32>(offset_);
538 ref0 = load<SimdReal>(val0_);
539 ref1 = load<SimdReal>(val1_);
540 ref2 = load<SimdReal>(val2_);
541 ref3 = load<SimdReal>(val3_);
543 if (align == 4)
545 gatherLoadBySimdIntTranspose<4>(mem0_, simdoffset, &v0, &v1, &v2, &v3);
547 else if (align == 8)
549 gatherLoadBySimdIntTranspose<8>(mem0_, simdoffset, &v0, &v1, &v2, &v3);
551 else if (align == 12)
553 gatherLoadBySimdIntTranspose<12>(mem0_, simdoffset, &v0, &v1, &v2, &v3);
555 else
557 FAIL();
560 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
561 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
562 GMX_EXPECT_SIMD_REAL_EQ(ref2, v2);
563 GMX_EXPECT_SIMD_REAL_EQ(ref3, v3);
568 TEST_F(SimdFloatingpointUtilTest, gatherLoadBySimdIntTranspose2)
570 SimdReal v0, v1;
571 SimdReal ref0, ref1;
572 SimdInt32 simdoffset;
573 const int nalign = 3;
574 int alignmentList[nalign] = { 4, 8, 12 };
575 int i, j, align;
577 for (i = 0; i < nalign; i++)
579 align = alignmentList[i];
580 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
582 mem0_[align * offset_[j] ] = val0_[j];
583 mem0_[align * offset_[j] + 1] = val1_[j];
586 simdoffset = load<SimdInt32>(offset_);
587 ref0 = load<SimdReal>(val0_);
588 ref1 = load<SimdReal>(val1_);
590 if (align == 4)
592 gatherLoadBySimdIntTranspose<4>(mem0_, simdoffset, &v0, &v1);
594 else if (align == 8)
596 gatherLoadBySimdIntTranspose<8>(mem0_, simdoffset, &v0, &v1);
598 else if (align == 12)
600 gatherLoadBySimdIntTranspose<12>(mem0_, simdoffset, &v0, &v1);
602 else
604 FAIL();
607 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
608 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
612 #if GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL
613 TEST_F(SimdFloatingpointUtilTest, gatherLoadUBySimdIntTranspose2)
615 SimdReal v0, v1;
616 SimdReal ref0, ref1;
617 SimdInt32 simdoffset;
618 const int nalign = 3;
619 int alignmentList[nalign] = { 1, 3, 5 };
620 int i, j, align;
622 for (i = 0; i < nalign; i++)
624 align = alignmentList[i];
625 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
627 mem0_[align * offset_[j] ] = val0_[j];
628 mem0_[align * offset_[j] + 1] = val1_[j];
631 simdoffset = load<SimdInt32>(offset_);
632 ref0 = load<SimdReal>(val0_);
633 ref1 = load<SimdReal>(val1_);
635 if (align == 1)
637 gatherLoadUBySimdIntTranspose<1>(mem0_, simdoffset, &v0, &v1);
639 else if (align == 3)
641 gatherLoadUBySimdIntTranspose<3>(mem0_, simdoffset, &v0, &v1);
643 else if (align == 5)
645 gatherLoadUBySimdIntTranspose<5>(mem0_, simdoffset, &v0, &v1);
647 else
649 FAIL();
652 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
653 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
656 #endif // GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL
658 TEST_F(SimdFloatingpointUtilTest, reduceIncr4Sum)
660 int i;
661 SimdReal v0, v1, v2, v3;
662 real sum0, sum1, sum2, sum3, tstsum;
663 FloatingPointTolerance tolerance(defaultRealTolerance());
665 v0 = load<SimdReal>(val0_);
666 v1 = load<SimdReal>(val1_);
667 v2 = load<SimdReal>(val2_);
668 v3 = load<SimdReal>(val3_);
670 sum0 = sum1 = sum2 = sum3 = 0;
671 for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
673 sum0 += val0_[i];
674 sum1 += val1_[i];
675 sum2 += val2_[i];
676 sum3 += val3_[i];
679 // Just put some numbers in memory so we check the addition is correct
680 mem0_[0] = c0;
681 mem0_[1] = c1;
682 mem0_[2] = c2;
683 mem0_[3] = c3;
685 tstsum = reduceIncr4ReturnSum(mem0_, v0, v1, v2, v3);
687 EXPECT_REAL_EQ_TOL(c0 + sum0, mem0_[0], tolerance);
688 EXPECT_REAL_EQ_TOL(c1 + sum1, mem0_[1], tolerance);
689 EXPECT_REAL_EQ_TOL(c2 + sum2, mem0_[2], tolerance);
690 EXPECT_REAL_EQ_TOL(c3 + sum3, mem0_[3], tolerance);
692 EXPECT_REAL_EQ_TOL(sum0 + sum1 + sum2 + sum3, tstsum, tolerance);
695 #if GMX_SIMD_HAVE_HSIMD_UTIL_REAL
697 TEST_F(SimdFloatingpointUtilTest, loadDualHsimd)
699 SimdReal v0, v1;
701 // Point p to the upper half of val0_
702 real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
704 v0 = load<SimdReal>(val0_);
705 v1 = loadDualHsimd(val0_, p);
707 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
710 TEST_F(SimdFloatingpointUtilTest, loadDuplicateHsimd)
712 SimdReal v0, v1;
713 int i;
714 // Point p to the upper half of val0_
715 real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
716 // Copy data so upper half is identical to lower
717 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
719 p[i] = val0_[i];
722 v0 = load<SimdReal>(val0_);
723 v1 = loadDuplicateHsimd(val0_);
725 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
729 TEST_F(SimdFloatingpointUtilTest, loadU1DualHsimd)
731 SimdReal v0, v1;
732 int i;
733 real data[2] = { 1, 2 };
735 // Point p to the upper half of val0_
736 real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
737 // Set all low elements to data[0], an high to data[1]
738 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
740 val0_[i] = data[0];
741 p[i] = data[1];
744 v0 = load<SimdReal>(val0_);
745 v1 = loadU1DualHsimd(data);
747 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
751 TEST_F(SimdFloatingpointUtilTest, storeDualHsimd)
753 SimdReal v0;
754 int i;
756 // Point p to the upper half of val0_
757 real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
759 v0 = load<SimdReal>(val2_);
760 storeDualHsimd(val0_, p, v0);
762 for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
764 EXPECT_EQ(val2_[i], val0_[i]);
768 TEST_F(SimdFloatingpointUtilTest, incrDualHsimd)
770 real reference[GMX_SIMD_REAL_WIDTH];
771 SimdReal v0;
773 // Create reference values
774 for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
776 reference[i] = val0_[i] + val2_[i];
779 // Point p to the upper half of val0_
780 real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
782 v0 = load<SimdReal>(val2_);
783 incrDualHsimd(val0_, p, v0);
785 for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
787 EXPECT_EQ(reference[i], val0_[i]);
791 TEST_F(SimdFloatingpointUtilTest, incrDualHsimdOverlapping)
793 real reference[GMX_SIMD_REAL_WIDTH/2];
794 SimdReal v0;
796 // Create reference values
797 for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH/2; i++)
799 reference[i] = val0_[i] + val2_[i] + val2_[GMX_SIMD_REAL_WIDTH/2+i];
802 v0 = load<SimdReal>(val2_);
803 incrDualHsimd(val0_, val0_, v0);
805 for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH/2; i++)
807 EXPECT_EQ(reference[i], val0_[i]);
811 TEST_F(SimdFloatingpointUtilTest, decrHsimd)
813 SimdReal v0;
814 real ref[GMX_SIMD_REAL_WIDTH / 2];
815 int i;
816 FloatingPointTolerance tolerance(defaultRealTolerance());
818 // Point p to the upper half of val1_
819 real * p = val1_ + GMX_SIMD_REAL_WIDTH / 2;
820 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
822 ref[i] = val0_[i] - ( val1_[i] + p[i] );
825 v0 = load<SimdReal>(val1_);
826 decrHsimd(val0_, v0);
828 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
830 EXPECT_REAL_EQ_TOL(ref[i], val0_[i], tolerance);
835 TEST_F(SimdFloatingpointUtilTest, gatherLoadTranspose2Hsimd)
837 SimdReal v0, v1;
838 SimdReal ref0, ref1;
840 const int nalign = 3;
841 int alignmentList[nalign] = { 2, 4, c_simdBestPairAlignment };
842 int i, j, align;
844 for (i = 0; i < nalign; i++)
846 align = alignmentList[i];
847 for (j = 0; j < GMX_SIMD_REAL_WIDTH / 2; j++)
849 // Use mem0_ as base for lower half
850 mem0_[align * offset_[j] ] = val0_[j];
851 mem0_[align * offset_[j] + 1] = val1_[j];
852 // Use mem1_ as base for upper half
853 mem1_[align * offset_[j] ] = val0_[GMX_SIMD_REAL_WIDTH / 2 + j];
854 mem1_[align * offset_[j] + 1] = val1_[GMX_SIMD_REAL_WIDTH / 2 + j];
858 ref0 = load<SimdReal>(val0_);
859 ref1 = load<SimdReal>(val1_);
861 if (align == 2)
863 gatherLoadTransposeHsimd<2>(mem0_, mem1_, offset_, &v0, &v1);
865 else if (align == 4)
867 gatherLoadTransposeHsimd<4>(mem0_, mem1_, offset_, &v0, &v1);
869 else if (align == c_simdBestPairAlignment)
871 gatherLoadTransposeHsimd<c_simdBestPairAlignment>(mem0_, mem1_, offset_, &v0, &v1);
873 else
875 FAIL();
878 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
879 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
884 TEST_F(SimdFloatingpointUtilTest, reduceIncr4SumHsimd)
886 int i;
887 SimdReal v0, v1;
888 real sum0, sum1, sum2, sum3, tstsum;
889 FloatingPointTolerance tolerance(defaultRealTolerance());
891 // Use the half-SIMD storage in memory val0_ and val1_.
892 v0 = load<SimdReal>(val0_);
893 v1 = load<SimdReal>(val1_);
895 sum0 = sum1 = sum2 = sum3 = 0;
896 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
898 sum0 += val0_[i];
899 sum1 += val0_[GMX_SIMD_REAL_WIDTH / 2 + i];
900 sum2 += val1_[i];
901 sum3 += val1_[GMX_SIMD_REAL_WIDTH / 2 + i];
904 // Just put some numbers in memory so we check the addition is correct
905 mem0_[0] = c0;
906 mem0_[1] = c1;
907 mem0_[2] = c2;
908 mem0_[3] = c3;
910 tstsum = reduceIncr4ReturnSumHsimd(mem0_, v0, v1);
912 EXPECT_REAL_EQ_TOL(c0 + sum0, mem0_[0], tolerance);
913 EXPECT_REAL_EQ_TOL(c1 + sum1, mem0_[1], tolerance);
914 EXPECT_REAL_EQ_TOL(c2 + sum2, mem0_[2], tolerance);
915 EXPECT_REAL_EQ_TOL(c3 + sum3, mem0_[3], tolerance);
917 EXPECT_REAL_EQ_TOL(sum0 + sum1 + sum2 + sum3, tstsum, tolerance);
920 #endif // GMX_SIMD_HAVE_HSIMD_UTIL_REAL
922 #if GMX_SIMD_HAVE_4NSIMD_UTIL_REAL
924 TEST_F(SimdFloatingpointUtilTest, loadUNDuplicate4)
926 SimdReal v0, v1;
927 int i;
928 real data[GMX_SIMD_REAL_WIDTH/4];
929 std::iota(data, data+GMX_SIMD_REAL_WIDTH/4, 1);
931 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 4; i++)
933 val0_[i*4] = val0_[i*4+1] = val0_[i*4+2] = val0_[i*4+3] = data[i];
936 v0 = load<SimdReal>(val0_);
937 v1 = loadUNDuplicate4(data);
939 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
942 TEST_F(SimdFloatingpointUtilTest, load4DuplicateN)
944 SimdReal v0, v1;
945 int i;
946 real data[4] = { 1, 2, 3, 4};
948 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 4; i++)
950 val0_[i*4] = data[0];
951 val0_[i*4+1] = data[1];
952 val0_[i*4+2] = data[2];
953 val0_[i*4+3] = data[3];
956 v0 = load<SimdReal>(val0_);
957 v1 = load4DuplicateN(val0_);
959 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
962 TEST_F(SimdFloatingpointUtilTest, loadU4NOffset)
964 constexpr int offset = 6; //non power of 2
965 constexpr int dataLen = 4+offset*(GMX_SIMD_REAL_WIDTH/4-1);
966 real data[dataLen];
967 std::iota(data, data+dataLen, 1);
969 for (int i = 0; i < GMX_SIMD_REAL_WIDTH / 4; i++)
971 val0_[i*4] = data[0+offset*i];
972 val0_[i*4+1] = data[1+offset*i];
973 val0_[i*4+2] = data[2+offset*i];
974 val0_[i*4+3] = data[3+offset*i];
977 const SimdReal v0 = load<SimdReal>(val0_);
978 const SimdReal v1 = loadU4NOffset(data, offset);
980 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
983 #endif // GMX_SIMD_HAVE_4NSIMD_UTIL_REAL
985 #endif // GMX_SIMD_HAVE_REAL
987 /*! \} */
988 /*! \endcond */
990 } // namespace
991 } // namespace
992 } // namespace