2 # This file is part of the GROMACS molecular simulation package.
4 # Copyright (c) 2017,2018, by the GROMACS development team, led by
5 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 # and including many others, as listed in the AUTHORS file in the
7 # top-level source directory and at http://www.gromacs.org.
9 # GROMACS is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU Lesser General Public License
11 # as published by the Free Software Foundation; either version 2.1
12 # of the License, or (at your option) any later version.
14 # GROMACS is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 # Lesser General Public License for more details.
19 # You should have received a copy of the GNU Lesser General Public
20 # License along with GROMACS; if not, see
21 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 # If you want to redistribute modifications to GROMACS, please
25 # consider that scientific software is very special. Version
26 # control is crucial - bugs must be traceable. We will be happy to
27 # consider code for inclusion in the official distribution, but
28 # derived work must not be called official GROMACS. Details are found
29 # in the README & COPYING files - if they are missing, get the
30 # official version at http://www.gromacs.org.
32 # To help us fund GROMACS development, we humbly ask that you cite
33 # the research papers on the package. Check out http://www.gromacs.org.
35 include(gmxFindFlagsForSource)
37 # Macro that manages setting the respective C and C++ toolchain
38 # variables so that subsequent tests for SIMD support can work.
39 macro(find_x86_toolchain_flags TOOLCHAIN_C_FLAGS_VARIABLE TOOLCHAIN_CXX_FLAGS_VARIABLE)
40 # On OS X, we often want to use gcc instead of clang, since gcc
41 # supports OpenMP (until clang 3.8, or so, plus whenever Apple
42 # support it in their version). However, by default gcc uses the
43 # external system assembler, which does not support AVX, so we
44 # need to tell the linker to use the clang compilers assembler
45 # instead - and this has to happen before we detect AVX flags.
46 if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "GNU")
47 gmx_test_cflag(GNU_C_USE_CLANG_AS "-Wa,-q" ${TOOLCHAIN_C_FLAGS_VARIABLE})
49 if(APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
50 gmx_test_cxxflag(GNU_CXX_USE_CLANG_AS "-Wa,-q" ${TOOLCHAIN_CXX_FLAGS_VARIABLE})
54 # Macro that manages setting the respective C and C++ toolchain
55 # variables so that subsequent tests for SIMD support can work.
56 macro(find_power_vsx_toolchain_flags TOOLCHAIN_C_FLAGS_VARIABLE TOOLCHAIN_CXX_FLAGS_VARIABLE)
57 if(${CMAKE_CXX_COMPILER_ID} MATCHES "GNU" OR ${CMAKE_C_COMPILER_ID} MATCHES "GNU")
58 # VSX uses the same function API as Altivec/VMX, so make sure we tune for the current CPU and not VMX.
59 # By putting these flags here rather than in the general compiler flags file we can safely assume
60 # that we are at least on Power7 since that is when VSX appeared.
61 gmx_run_cpu_detection(brand)
62 if(CPU_DETECTION_BRAND MATCHES "POWER7")
63 gmx_test_cflag(GNU_C_VSX_POWER7 "-mcpu=power7 -mtune=power7" ${TOOLCHAIN_C_FLAGS_VARIABLE})
64 gmx_test_cflag(GNU_CXX_VSX_POWER7 "-mcpu=power7 -mtune=power7" ${TOOLCHAIN_CXX_FLAGS_VARIABLE})
65 elseif(CPU_DETECTION_BRAND MATCHES "POWER8")
66 # Enable power8 vector extensions on such platforms.
67 gmx_test_cflag(GNU_C_VSX_POWER8 "-mcpu=power8 -mpower8-vector -mpower8-fusion" ${TOOLCHAIN_C_FLAGS_VARIABLE})
68 gmx_test_cflag(GNU_CXX_VSX_POWER8 "-mcpu=power8 -mpower8-vector -mpower8-fusion" ${TOOLCHAIN_CXX_FLAGS_VARIABLE})
69 elseif(CPU_DETECTION_BRAND MATCHES "POWER9")
70 # Enable power9 vector extensions on such platforms.
71 # TODO consider whether adding " -mpower9-vector -mpower9-fusion"
73 gmx_test_cflag(GNU_C_VSX_POWER9 "-mcpu=power9 -mtune=power9" ${TOOLCHAIN_C_FLAGS_VARIABLE})
74 gmx_test_cflag(GNU_CXX_VSX_POWER9 "-mcpu=power9 -mtune=power9" ${TOOLCHAIN_CXX_FLAGS_VARIABLE})
76 # Don't add arch-specific flags for unknown architectures.
78 # Altivec was originally single-only, and it took a while for compilers
79 # to support the double-precision features in VSX.
80 if(GMX_DOUBLE AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9")
81 message(FATAL_ERROR "Using VSX SIMD in double precision with GCC requires GCC-4.9 or later.")
84 if(${CMAKE_CXX_COMPILER_ID} MATCHES "XL" OR ${CMAKE_C_COMPILER_ID} MATCHES "XL")
85 if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "13.1.5" OR CMAKE_C_COMPILER_VERSION VERSION_LESS "13.1.5")
86 message(FATAL_ERROR "Using VSX SIMD requires XL compiler version 13.1.5 or later.")
92 function(gmx_find_simd_sse2_flags C_FLAGS_RESULT CXX_FLAGS_RESULT C_FLAGS_VARIABLE CXX_FLAGS_VARIABLE)
93 find_x86_toolchain_flags(TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS)
94 gmx_find_flags(SIMD_SSE2_C_FLAGS_RESULT SIMD_SSE2_CXX_FLAGS_RESULT
95 "#include<xmmintrin.h>
96 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_rsqrt_ps(x);return _mm_movemask_ps(x);}"
97 TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
98 SIMD_SSE2_C_FLAGS SIMD_SSE2_CXX_FLAGS
99 "-msse2" "/arch:SSE2" "-hgnu")
101 if(${SIMD_SSE2_C_FLAGS_RESULT})
102 set(${C_FLAGS_VARIABLE} "${TOOLCHAIN_C_FLAGS} ${SIMD_SSE2_C_FLAGS}" CACHE INTERNAL "C flags required for SSE2 instructions")
104 if(${SIMD_SSE2_CXX_FLAGS_RESULT})
105 set(${CXX_FLAGS_VARIABLE} "${TOOLCHAIN_CXX_FLAGS} ${SIMD_SSE2_CXX_FLAGS}" CACHE INTERNAL "C++ flags required for SSE2 instructions")
107 set(${C_FLAGS_RESULT} ${SIMD_SSE2_C_FLAGS_RESULT} CACHE INTERNAL "Result of test for SSE2 C flags" FORCE)
108 set(${CXX_FLAGS_RESULT} ${SIMD_SSE2_CXX_FLAGS_RESULT} CACHE INTERNAL "Result of test for SSE2 C++ flags" FORCE)
112 function(gmx_find_simd_sse4_1_flags C_FLAGS_RESULT CXX_FLAGS_RESULT C_FLAGS_VARIABLE CXX_FLAGS_VARIABLE)
113 find_x86_toolchain_flags(TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS)
114 # Note: MSVC enables SSE4.1 with the SSE2 flag, so we include that in testing.
115 gmx_find_flags(SIMD_SSE4_1_C_FLAGS_RESULT SIMD_SSE4_1_CXX_FLAGS_RESULT
116 "#include<smmintrin.h>
117 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_dp_ps(x,x,0x77);return _mm_movemask_ps(x);}"
118 TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
119 SIMD_SSE4_1_C_FLAGS SIMD_SSE4_1_CXX_FLAGS
120 "-msse4.1" "/arch:SSE4.1" "/arch:SSE2" "-hgnu")
122 if(${SIMD_SSE4_1_C_FLAGS_RESULT})
123 set(${C_FLAGS_VARIABLE} "${TOOLCHAIN_C_FLAGS} ${SIMD_SSE4_1_C_FLAGS}" CACHE INTERNAL "C flags required for SSE4.1 instructions")
125 if(${SIMD_SSE4_1_CXX_FLAGS_RESULT})
126 set(${CXX_FLAGS_VARIABLE} "${TOOLCHAIN_CXX_FLAGS} ${SIMD_SSE4_1_CXX_FLAGS}" CACHE INTERNAL "C++ flags required for SSE4.1 instructions")
128 set(${C_FLAGS_RESULT} ${SIMD_SSE4_1_C_FLAGS_RESULT} CACHE INTERNAL "Result of test for SSE4.1 C flags" FORCE)
129 set(${CXX_FLAGS_RESULT} ${SIMD_SSE4_1_CXX_FLAGS_RESULT} CACHE INTERNAL "Result of test for SSE4.1 C++ flags" FORCE)
132 # AVX, but using only 128-bit instructions and FMA (AMD XOP processors)
133 function(gmx_find_simd_avx_128_fma_flags C_FLAGS_RESULT CXX_FLAGS_RESULT C_FLAGS_VARIABLE CXX_FLAGS_VARIABLE)
134 find_x86_toolchain_flags(TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS)
136 # We don't have the full compiler version string yet (BUILD_C_COMPILER),
137 # so we can't distinguish vanilla from Apple clang versions, but catering for a few rare AMD
138 # hackintoshes is not worth the effort.
139 if (APPLE AND (CMAKE_C_COMPILER_ID STREQUAL "Clang" OR
140 CMAKE_CXX_COMPILER_ID STREQUAL "Clang"))
141 message(WARNING "Due to a known compiler bug, Clang up to version 3.2 (and Apple Clang up to version 4.1) produces incorrect code with AVX_128_FMA SIMD.")
144 # clang <=3.2 contains a bug that causes incorrect code to be generated for the
145 # vfmaddps instruction and therefore the bug is triggered with AVX_128_FMA.
146 # (see: http://llvm.org/bugs/show_bug.cgi?id=15040).
147 # We can work around this by not using the integrated assembler (except on OS X
148 # which has an outdated assembler that does not support AVX instructions).
149 if (CMAKE_C_COMPILER_ID MATCHES "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_LESS "3.3")
150 # we assume that we have an external assembler that supports AVX
151 message(STATUS "Clang ${CMAKE_C_COMPILER_VERSION} detected, enabling FMA bug workaround")
152 set(TOOLCHAIN_C_FLAGS "${TOOLCHAIN_C_FLAGS} -no-integrated-as")
154 if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "3.3")
155 # we assume that we have an external assembler that supports AVX
156 message(STATUS "Clang ${CMAKE_CXX_COMPILER_VERSION} detected, enabling FMA bug workaround")
157 set(TOOLCHAIN_CXX_FLAGS "${TOOLCHAIN_CXX_FLAGS} -no-integrated-as")
160 # AVX128/FMA on AMD is a bit complicated. We need to do detection in three stages:
161 # 1) Find the flags required for generic AVX support
162 # 2) Find the flags necessary to enable fused-multiply add support
163 # 3) Optional: Find a flag to enable the AMD XOP instructions
165 ### STAGE 1: Find the generic AVX flag, but stick to 128-bit instructions
166 gmx_find_flags(SIMD_AVX_128_FMA_C_FLAGS_RESULT SIMD_AVX_128_FMA_CXX_FLAGS_RESULT
167 "#include<immintrin.h>
168 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_permute_ps(x,1);return 0;}"
169 TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
170 SIMD_AVX_GENERIC_C_FLAGS SIMD_AVX_GENERIC_CXX_FLAGS
171 "-mavx" "/arch:AVX" "-hgnu")
173 if(SIMD_AVX_128_FMA_C_FLAGS_RESULT AND SIMD_AVX_128_FMA_CXX_FLAGS_RESULT)
174 set(MERGED_C_FLAGS "${TOOLCHAIN_C_FLAGS} ${SIMD_AVX_GENERIC_C_FLAGS}")
175 set(MERGED_CXX_FLAGS "${TOOLCHAIN_CXX_FLAGS} ${SIMD_AVX_GENERIC_CXX_FLAGS}")
177 ### STAGE 2: Find the fused-multiply add flag.
178 # GCC requires x86intrin.h for FMA support. MSVC 2010 requires intrin.h for FMA support.
179 check_include_file(x86intrin.h HAVE_X86INTRIN_H ${SIMD_C_FLAGS})
180 check_include_file(intrin.h HAVE_INTRIN_H ${SIMD_C_FLAGS})
182 set(INCLUDE_X86INTRIN_H "#include <x86intrin.h>")
185 set(INCLUDE_INTRIN_H "#include <xintrin.h>")
188 gmx_find_flags(SIMD_AVX_128_FMA_C_FLAGS_RESULT SIMD_AVX_128_FMA_CXX_FLAGS_RESULT
189 "#include<immintrin.h>
190 ${INCLUDE_X86INTRIN_H}
192 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_macc_ps(x,x,x);return _mm_movemask_ps(x);}"
193 MERGED_C_FLAGS MERGED_CXX_FLAGS
194 SIMD_AVX_AMD_FMA_C_FLAGS SIMD_AVX_AMD_FMA_CXX_FLAGS
197 if(SIMD_AVX_128_FMA_C_FLAGS_RESULT AND SIMD_AVX_128_FMA_CXX_FLAGS_RESULT)
198 set(MERGED_C_FLAGS "${TOOLCHAIN_C_FLAGS} ${SIMD_AVX_AMD_FMA_C_FLAGS}")
199 set(MERGED_CXX_FLAGS "${TOOLCHAIN_CXX_FLAGS} ${SIMD_AVX_AMD_FMA_CXX_FLAGS}")
200 ### STAGE 3: Find the XOP instruction flag. This is optional.
201 gmx_find_flags(SIMD_AVX_XOP_C_FLAGS_RESULT SIMD_AVX_XOP_CXX_FLAGS_RESULT
202 "#include<immintrin.h>
203 ${INCLUDE_X86INTRIN_H}
205 int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_frcz_ps(x);return _mm_movemask_ps(x);}"
206 MERGED_C_FLAGS MERGED_CXX_FLAGS
207 SIMD_AVX_XOP_C_FLAGS SIMD_AVX_XOP_CXX_FLAGS
212 if(${SIMD_AVX_128_FMA_C_FLAGS_RESULT})
213 set(${C_FLAGS_VARIABLE} "${TOOLCHAIN_C_FLAGS} ${SIMD_AVX_GENERIC_C_FLAGS} ${SIMD_AVX_AMD_FMA_C_FLAGS} ${SIMD_AVX_XOP_C_FLAGS}" CACHE INTERNAL "C flags required for 128-bit AVX with AMD FMA instructions")
215 if(${SIMD_AVX_128_FMA_CXX_FLAGS_RESULT})
216 set(${CXX_FLAGS_VARIABLE} "${TOOLCHAIN_CXX_FLAGS} ${SIMD_AVX_GENERIC_CXX_FLAGS} ${SIMD_AVX_AMD_FMA_CXX_FLAGS} ${SIMD_AVX_XOP_CXX_FLAGS}" CACHE INTERNAL "C++ flags required for 128-bit AVX with AMD FMA instructions")
218 set(${C_FLAGS_RESULT} ${SIMD_AVX_128_FMA_C_FLAGS_RESULT} CACHE INTERNAL "Result of test for 128-bit AVX with AMD FMA C flags" FORCE)
219 set(${CXX_FLAGS_RESULT} ${SIMD_AVX_128_FMA_CXX_FLAGS_RESULT} CACHE INTERNAL "Result of test for 128-bit AVX with AMD FMA C++ flags" FORCE)
223 # AVX (no AMD extensions)
224 function(gmx_find_simd_avx_flags C_FLAGS_RESULT CXX_FLAGS_RESULT C_FLAGS_VARIABLE CXX_FLAGS_VARIABLE)
225 find_x86_toolchain_flags(TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS)
226 gmx_find_flags(SIMD_AVX_C_FLAGS_RESULT SIMD_AVX_CXX_FLAGS_RESULT
227 "#include<immintrin.h>
228 int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_add_ps(x,x);return _mm256_movemask_ps(x);}"
229 TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
230 SIMD_AVX_C_FLAGS SIMD_AVX_CXX_FLAGS
231 "-mavx" "/arch:AVX" "-hgnu")
233 if(${SIMD_AVX_C_FLAGS_RESULT})
234 set(${C_FLAGS_VARIABLE} "${TOOLCHAIN_C_FLAGS} ${SIMD_AVX_C_FLAGS}" CACHE INTERNAL "C flags required for AVX instructions")
236 if(${SIMD_AVX_CXX_FLAGS_RESULT})
237 set(${CXX_FLAGS_VARIABLE} "${TOOLCHAIN_CXX_FLAGS} ${SIMD_AVX_CXX_FLAGS}" CACHE INTERNAL "C++ flags required for AVX instructions")
239 set(${C_FLAGS_RESULT} ${SIMD_AVX_C_FLAGS_RESULT} CACHE INTERNAL "Result of test for AVX C flags" FORCE)
240 set(${CXX_FLAGS_RESULT} ${SIMD_AVX_CXX_FLAGS_RESULT} CACHE INTERNAL "Result of test for AVX C++ flags" FORCE)
244 function(gmx_find_simd_avx2_flags C_FLAGS_RESULT CXX_FLAGS_RESULT C_FLAGS_VARIABLE CXX_FLAGS_VARIABLE)
245 find_x86_toolchain_flags(TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS)
246 # For our "AVX2_256" support we would ideally want to enable the instructions
247 # we want to use, '-mavx2 -mfma'. icc (v16-18) does not allow doing that and
248 # instead, it requires the '-march=core-avx2' flag to be used. Annoyingly, it does
249 # however accept the former flags but it is not silent about it issuing warnings
250 # that can't be disabled.
251 # At the same time Intel's -march=core-avx2 flag is not rejected by gcc/clang either
252 # (though they're at least silent). However, -march=core-avx2 is an undocumented
253 # flag with unclear behavior in gcc/clang (and might enable some arch-specific optimizations).
254 # For this reason, and because we can't distinguish compilers just based on checking flag
255 # compatibility, we need to treat the Intel and gcc/clang separately.
256 if (CMAKE_C_COMPILER_ID MATCHES "Intel")
257 set(TOOLCHAIN_FLAG_FOR_AVX2 "-march=core-avx2")
259 set(TOOLCHAIN_FLAG_FOR_AVX2 "-mavx2 -mfma")
261 gmx_find_flags(SIMD_AVX2_C_FLAGS_RESULT SIMD_AVX2_CXX_FLAGS_RESULT
262 "#include<immintrin.h>
263 int main(){__m256i x=_mm256_set1_epi32(5);x=_mm256_add_epi32(x,x);return _mm256_movemask_epi8(x);}"
264 TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
265 SIMD_AVX2_C_FLAGS SIMD_AVX2_CXX_FLAGS
266 "${TOOLCHAIN_FLAG_FOR_AVX2}" "-mavx2" "/arch:AVX" "-hgnu") # no AVX2-specific flag for MSVC yet
268 if(${SIMD_AVX2_C_FLAGS_RESULT})
269 set(${C_FLAGS_VARIABLE} "${TOOLCHAIN_C_FLAGS} ${SIMD_AVX2_C_FLAGS}" CACHE INTERNAL "C flags required for AVX2 instructions")
271 if(${SIMD_AVX2_CXX_FLAGS_RESULT})
272 set(${CXX_FLAGS_VARIABLE} "${TOOLCHAIN_CXX_FLAGS} ${SIMD_AVX2_CXX_FLAGS}" CACHE INTERNAL "C++ flags required for AVX2 instructions")
274 set(${C_FLAGS_RESULT} ${SIMD_AVX2_C_FLAGS_RESULT} CACHE INTERNAL "Result of test for AVX2 C flags" FORCE)
275 set(${CXX_FLAGS_RESULT} ${SIMD_AVX2_CXX_FLAGS_RESULT} CACHE INTERNAL "Result of test for AVX2 C++ flags" FORCE)
279 # AVX-512F (Skylake-X)
280 function(gmx_find_simd_avx_512_flags C_FLAGS_RESULT CXX_FLAGS_RESULT C_FLAGS_VARIABLE CXX_FLAGS_VARIABLE)
281 find_x86_toolchain_flags(TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS)
283 gmx_find_flags(SIMD_AVX_512_C_FLAGS_RESULT SIMD_AVX_512_CXX_FLAGS_RESULT
284 "#include<immintrin.h>
285 int main(){__m512 x=_mm512_set1_ps(0.5); __m512 y=_mm512_fmadd_ps(x,x,x);
286 __m512i i = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
287 __mmask16 mask = (short)(0xffff);
288 int idata[16]; i = _mm512_maskz_permutexvar_epi32(mask, i, i);
289 _mm512_storeu_si512(idata, i);
290 return idata[0]*(int)(_mm512_cmp_ps_mask(x,y,_CMP_LT_OS));}"
291 TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
292 SIMD_AVX_512_C_FLAGS SIMD_AVX_512_CXX_FLAGS
293 "-xCORE-AVX512 -qopt-zmm-usage=high" "-xCORE-AVX512" "-mavx512f -mfma" "-mavx512f" "/arch:AVX" "-hgnu") # no AVX_512F flags known for MSVC yet. ICC should use ZMM if code anyhow uses ZMM
295 if(${SIMD_AVX_512_C_FLAGS_RESULT})
296 set(${C_FLAGS_VARIABLE} "${TOOLCHAIN_C_FLAGS} ${SIMD_AVX_512_C_FLAGS}" CACHE INTERNAL "C flags required for AVX-512 instructions")
298 if(${SIMD_AVX_512_CXX_FLAGS_RESULT})
299 set(${CXX_FLAGS_VARIABLE} "${TOOLCHAIN_CXX_FLAGS} ${SIMD_AVX_512_CXX_FLAGS}" CACHE INTERNAL "C++ flags required for AVX-512 instructions")
301 set(${C_FLAGS_RESULT} ${SIMD_AVX_512_C_FLAGS_RESULT} CACHE INTERNAL "Result of test for AVX-512 C flags" FORCE)
302 set(${CXX_FLAGS_RESULT} ${SIMD_AVX_512_CXX_FLAGS_RESULT} CACHE INTERNAL "Result of test for AVX-512 C++ flags" FORCE)
307 function(gmx_find_simd_avx_512_knl_flags C_FLAGS_RESULT CXX_FLAGS_RESULT C_FLAGS_VARIABLE CXX_FLAGS_VARIABLE)
308 find_x86_toolchain_flags(TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS)
310 gmx_find_flags(SIMD_AVX_512_KNL_C_FLAGS_RESULT SIMD_AVX_512_KNL_CXX_FLAGS_RESULT
311 "#include<immintrin.h>
312 int main(){__m512 y,x=_mm512_set1_ps(0.5);y=_mm512_rsqrt28_ps(x);return (int)_mm512_cmp_ps_mask(x,y,_CMP_LT_OS);}"
313 TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
314 SIMD_AVX_512_KNL_C_FLAGS SIMD_AVX_512_KNL_CXX_FLAGS
315 "-xMIC-AVX512" "-mavx512er -mfma" "-mavx512er" "/arch:AVX" "-hgnu") # no AVX_512ER flags known for MSVC yet
317 if(${SIMD_AVX_512_KNL_C_FLAGS_RESULT})
318 set(${C_FLAGS_VARIABLE} "${TOOLCHAIN_C_FLAGS} ${SIMD_AVX_512_KNL_C_FLAGS}" CACHE INTERNAL "C flags required for AVX-512 for KNL instructions")
320 if(${SIMD_AVX_512_KNL_CXX_FLAGS_RESULT})
321 set(${CXX_FLAGS_VARIABLE} "${TOOLCHAIN_CXX_FLAGS} ${SIMD_AVX_512_KNL_CXX_FLAGS}" CACHE INTERNAL "C++ flags required for AVX-512 for KNL instructions")
323 set(${C_FLAGS_RESULT} ${SIMD_AVX_512_KNL_C_FLAGS_RESULT} CACHE INTERNAL "Result of test for AVX-512 for KNL C flags" FORCE)
324 set(${CXX_FLAGS_RESULT} ${SIMD_AVX_512_KNL_CXX_FLAGS_RESULT} CACHE INTERNAL "Result of test for AVX-512 for KNL C++ flags" FORCE)
328 # Arm Neon (32-bit ARM)
329 function(gmx_find_simd_arm_neon_flags C_FLAGS_RESULT CXX_FLAGS_RESULT C_FLAGS_VARIABLE CXX_FLAGS_VARIABLE)
331 gmx_find_flags(SIMD_ARM_NEON_C_FLAGS_RESULT SIMD_ARM_NEON_CXX_FLAGS_RESULT
332 "#include<arm_neon.h>
333 int main(){float32x4_t x=vdupq_n_f32(0.5);x=vmlaq_f32(x,x,x);return vgetq_lane_f32(x,0)>0;}"
334 TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
335 SIMD_ARM_NEON_C_FLAGS SIMD_ARM_NEON_CXX_FLAGS
336 "-mfpu=neon-vfpv4" "-mfpu=neon" "")
338 if(${SIMD_ARM_NEON_C_FLAGS_RESULT})
339 set(${C_FLAGS_VARIABLE} "${TOOLCHAIN_C_FLAGS} ${SIMD_ARM_NEON_C_FLAGS}" CACHE INTERNAL "C flags required for Arm Neon instructions")
341 if(${SIMD_ARM_NEON_CXX_FLAGS_RESULT})
342 set(${CXX_FLAGS_VARIABLE} "${TOOLCHAIN_CXX_FLAGS} ${SIMD_ARM_NEON_CXX_FLAGS}" CACHE INTERNAL "C++ flags required for Arm Neon instructions")
344 set(${C_FLAGS_RESULT} ${SIMD_ARM_NEON_C_FLAGS_RESULT} CACHE INTERNAL "Result of test for Arm Neon C flags" FORCE)
345 set(${CXX_FLAGS_RESULT} ${SIMD_ARM_NEON_CXX_FLAGS_RESULT} CACHE INTERNAL "Result of test for Arm Neon C++ flags" FORCE)
348 # Arm Neon Asimd (64-bit ARM)
349 function(gmx_find_simd_arm_neon_asimd_flags C_FLAGS_RESULT CXX_FLAGS_RESULT C_FLAGS_VARIABLE CXX_FLAGS_VARIABLE)
351 gmx_find_flags(SIMD_ARM_NEON_ASIMD_C_FLAGS_RESULT SIMD_ARM_NEON_ASIMD_CXX_FLAGS_RESULT
352 "#include<arm_neon.h>
353 int main(){float64x2_t x=vdupq_n_f64(0.5);x=vfmaq_f64(x,x,x);x=vrndnq_f64(x);return vgetq_lane_f64(x,0)>0;}"
354 TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
355 SIMD_ARM_NEON_ASIMD_C_FLAGS SIMD_ARM_NEON_ASIMD_CXX_FLAGS
358 if(${SIMD_ARM_NEON_ASIMD_C_FLAGS_RESULT})
359 set(${C_FLAGS_VARIABLE} "${TOOLCHAIN_C_FLAGS} ${SIMD_ARM_NEON_ASIMD_C_FLAGS}" CACHE INTERNAL "C flags required for Arm Neon Asimd instructions")
361 if(${SIMD_ARM_NEON_ASIMD_CXX_FLAGS_RESULT})
362 set(${CXX_FLAGS_VARIABLE} "${TOOLCHAIN_CXX_FLAGS} ${SIMD_ARM_NEON_ASIMD_CXX_FLAGS}" CACHE INTERNAL "C++ flags required for Arm Neon Asimd instructions")
364 set(${C_FLAGS_RESULT} ${SIMD_ARM_NEON_ASIMD_C_FLAGS_RESULT} CACHE INTERNAL "Result of test for Arm Neon Asimd C flags" FORCE)
365 set(${CXX_FLAGS_RESULT} ${SIMD_ARM_NEON_ASIMD_CXX_FLAGS_RESULT} CACHE INTERNAL "Result of test for Arm Neon Asimd C++ flags" FORCE)
369 function(gmx_find_simd_ibm_vmx_flags C_FLAGS_RESULT CXX_FLAGS_RESULT C_FLAGS_VARIABLE CXX_FLAGS_VARIABLE)
371 gmx_find_flags(SIMD_IBM_VMX_C_FLAGS_RESULT SIMD_IBM_VMX_CXX_FLAGS_RESULT
373 int main(){vector float x,y=vec_ctf(vec_splat_s32(1),0);x=vec_madd(y,y,y);return vec_all_ge(y,x);}"
374 TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
375 SIMD_IBM_VMX_C_FLAGS SIMD_IBM_VMX_CXX_FLAGS
376 "-maltivec -mabi=altivec" "-qarch=auto -qaltivec")
378 if(${SIMD_IBM_VMX_C_FLAGS_RESULT})
379 set(${C_FLAGS_VARIABLE} "${TOOLCHAIN_C_FLAGS} ${SIMD_IBM_VMX_C_FLAGS}" CACHE INTERNAL "C flags required for IBM VMX instructions")
381 if(${SIMD_IBM_VMX_CXX_FLAGS_RESULT})
382 set(${CXX_FLAGS_VARIABLE} "${TOOLCHAIN_CXX_FLAGS} ${SIMD_IBM_VMX_CXX_FLAGS}" CACHE INTERNAL "C++ flags required for IBM VMX instructions")
384 set(${C_FLAGS_RESULT} ${SIMD_IBM_VMX_C_FLAGS_RESULT} CACHE INTERNAL "Result of test for IBM VMX C flags" FORCE)
385 set(${CXX_FLAGS_RESULT} ${SIMD_IBM_VMX_CXX_FLAGS_RESULT} CACHE INTERNAL "Result of test for IBM VMX C++ flags" FORCE)
388 # IBM VSX (power7 and later)
389 function(gmx_find_simd_ibm_vsx_flags C_FLAGS_RESULT CXX_FLAGS_RESULT C_FLAGS_VARIABLE CXX_FLAGS_VARIABLE)
390 find_power_vsx_toolchain_flags(TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS)
391 gmx_find_flags(SIMD_IBM_VSX_C_FLAGS_RESULT SIMD_IBM_VSX_CXX_FLAGS_RESULT
393 int main(){vector double x,y=vec_splats(1.0);x=vec_madd(y,y,y);return vec_all_ge(y,x);}"
394 TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
395 SIMD_IBM_VSX_C_FLAGS SIMD_IBM_VSX_CXX_FLAGS
396 "-mvsx" "-maltivec -mabi=altivec" "-qarch=auto -qaltivec")
398 if(${SIMD_IBM_VSX_C_FLAGS_RESULT})
399 set(${C_FLAGS_VARIABLE} "${TOOLCHAIN_C_FLAGS} ${SIMD_IBM_VSX_C_FLAGS}" CACHE INTERNAL "C flags required for IBM VSX instructions")
401 if(${SIMD_IBM_VSX_CXX_FLAGS_RESULT})
402 set(${CXX_FLAGS_VARIABLE} "${TOOLCHAIN_CXX_FLAGS} ${SIMD_IBM_VSX_CXX_FLAGS}" CACHE INTERNAL "C++ flags required for IBM VSX instructions")
404 set(${C_FLAGS_RESULT} ${SIMD_IBM_VSX_C_FLAGS_RESULT} CACHE INTERNAL "Result of test for IBM VSX C flags" FORCE)
405 set(${CXX_FLAGS_RESULT} ${SIMD_IBM_VSX_CXX_FLAGS_RESULT} CACHE INTERNAL "Result of test for IBM VSX C++ flags" FORCE)