include/gmx_x86_simd_double.h

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  * This file is part of GROMACS.
   5  * Copyright (c) 2012-
   6  *
   7  * Written by the Gromacs development team under coordination of
   8  * David van der Spoel, Berk Hess, and Erik Lindahl.
   9  *
  10  * This library is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2
  13  * of the License, or (at your option) any later version.
  14  *
  15  * To help us fund GROMACS development, we humbly ask that you cite
  16  * the research papers on the package. Check out http://www.gromacs.org
  17  *
  18  * And Hey:
  19  * Gnomes, ROck Monsters And Chili Sauce
  20  */
  21 #ifndef _gmx_x86_simd_double_h_
  22 #define _gmx_x86_simd_double_h_
  23
  24 /* This file includes the highest possible level of x86 (math) acceleration */
  25
  26 #ifdef GMX_X86_AVX_256
  27 #include "gmx_x86_avx_256.h"
  28 #include "gmx_math_x86_avx_256_double.h"
  29 #else
  30 #ifdef GMX_X86_AVX_128_FMA
  31 #include "gmx_x86_avx_128_fma.h"
  32 #include "gmx_math_x86_avx_128_fma_double.h"
  33 #else
  34 #ifdef GMX_X86_SSE4_1
  35 #include "gmx_x86_sse4_1.h"
  36 #include "gmx_math_x86_sse4_1_double.h"
  37 #else
  38 #ifdef GMX_X86_SSE2
  39 #include "gmx_x86_sse2.h"
  40 #include "gmx_math_x86_sse2_double.h"
  41 #else
  42 #error No x86 acceleration defined
  43 #endif
  44 #endif
  45 #endif
  46 #endif
  47
  48 static inline __m128d
  49 gmx_mm_calc_rsq_pd(__m128d dx, __m128d dy, __m128d dz)
  50 {
  51     return _mm_add_pd( _mm_add_pd( _mm_mul_pd(dx,dx), _mm_mul_pd(dy,dy) ), _mm_mul_pd(dz,dz) );
  52 }
  53
  54 /* Normal sum of four __m128d registers */
  55 #define gmx_mm_sum4_pd(t0,t1,t2,t3)  _mm_add_pd(_mm_add_pd(t0,t1),_mm_add_pd(t2,t3))
  56
  57 #ifdef GMX_X86_AVX_256
  58
  59 static inline __m256d
  60 gmx_mm256_calc_rsq_pd(__m256d dx, __m256d dy, __m256d dz)
  61 {
  62     return _mm256_add_pd( _mm256_add_pd( _mm256_mul_pd(dx,dx), _mm256_mul_pd(dy,dy) ), _mm256_mul_pd(dz,dz) );
  63 }
  64
  65 /* Normal sum of four xmm registers */
  66 #define gmx_mm256_sum4_pd(t0,t1,t2,t3)  _mm256_add_pd(_mm256_add_pd(t0,t1),_mm256_add_pd(t2,t3))
  67
  68 #endif
  69
  70 #endif /* _gmx_x86_simd_double_h_ */