include/gmx_x86_simd_single.h

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  * This file is part of GROMACS.
   5  * Copyright (c) 2012-
   6  *
   7  * Written by the Gromacs development team under coordination of
   8  * David van der Spoel, Berk Hess, and Erik Lindahl.
   9  *
  10  * This library is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2
  13  * of the License, or (at your option) any later version.
  14  *
  15  * To help us fund GROMACS development, we humbly ask that you cite
  16  * the research papers on the package. Check out http://www.gromacs.org
  17  *
  18  * And Hey:
  19  * Gnomes, ROck Monsters And Chili Sauce
  20  */
  21 #ifndef _gmx_x86_simd256_single_h_
  22 #define _gmx_x86_simd256_single_h_
  23
  24 /* This file includes the highest possible level of x86 (math) acceleration */
  25
  26 #ifdef GMX_X86_AVX_256
  27 #include "gmx_x86_avx_256.h"
  28 #include "gmx_math_x86_avx_256_single.h"
  29 #else
  30 #ifdef GMX_X86_AVX_128_FMA
  31 #include "gmx_x86_avx_128_fma.h"
  32 #include "gmx_math_x86_avx_128_fma_single.h"
  33 #else
  34 #ifdef GMX_X86_SSE4_1
  35 #include "gmx_x86_sse4_1.h"
  36 #include "gmx_math_x86_sse4_1_single.h"
  37 #else
  38 #ifdef GMX_X86_SSE2
  39 #include "gmx_x86_sse2.h"
  40 #include "gmx_math_x86_sse2_single.h"
  41 #else
  42 #error No x86 acceleration defined
  43 #endif
  44 #endif
  45 #endif
  46 #endif
  47
  48
  49 static inline __m128
  50 gmx_mm_calc_rsq_ps(__m128 dx, __m128 dy, __m128 dz)
  51 {
  52     return _mm_add_ps( _mm_add_ps( _mm_mul_ps(dx,dx), _mm_mul_ps(dy,dy) ), _mm_mul_ps(dz,dz) );
  53 }
  54
  55 /* Normal sum of four __m128 registers */
  56 #define gmx_mm_sum4_ps(t0,t1,t2,t3)  _mm_add_ps(_mm_add_ps(t0,t1),_mm_add_ps(t2,t3))
  57
  58 #ifdef GMX_X86_AVX_256
  59
  60 static inline __m256
  61 gmx_mm256_calc_rsq_ps(__m256 dx, __m256 dy, __m256 dz)
  62 {
  63     return _mm256_add_ps( _mm256_add_ps( _mm256_mul_ps(dx,dx), _mm256_mul_ps(dy,dy) ), _mm256_mul_ps(dz,dz) );
  64 }
  65
  66 /* Normal sum of four __m256 registers */
  67 #define gmx_mm256_sum4_ps(t0,t1,t2,t3)  _mm256_add_ps(_mm256_add_ps(t0,t1),_mm256_add_ps(t2,t3))
  68
  69 #endif
  70
  71 #endif /* _gmx_x86_simd256_single_h_ */