From 6302f1a0bd2840ed609feb24900a756db4d53423 Mon Sep 17 00:00:00 2001 From: Erik Lindahl Date: Sat, 3 Oct 2015 16:55:43 +0200 Subject: [PATCH] Added AlignedAllocator class Intended for use with standard library containers so they can hold SIMD data and still guarantee aligned load/store. Memory will currently be aligned to 128 bytes, and padded with the same amount after the area to avoid false cache sharing. Change-Id: I9d5b119d17f179793bb5252fad10e6dd89ef60c7 --- CMakeLists.txt | 14 +- cmake/TestMMMalloc.cpp | 14 ++ cmake/gmxTestMMMalloc.cmake | 75 ++++++++ src/config.h.cmakein | 21 ++- src/gromacs/utility/alignedallocator.cpp | 201 +++++++++++++++++++++ src/gromacs/utility/alignedallocator.h | 238 +++++++++++++++++++++++++ src/gromacs/utility/smalloc.cpp | 79 ++------ src/gromacs/utility/tests/CMakeLists.txt | 1 + src/gromacs/utility/tests/alignedallocator.cpp | 85 +++++++++ 9 files changed, 658 insertions(+), 70 deletions(-) create mode 100644 cmake/TestMMMalloc.cpp create mode 100644 cmake/gmxTestMMMalloc.cmake create mode 100644 src/gromacs/utility/alignedallocator.cpp create mode 100644 src/gromacs/utility/alignedallocator.h create mode 100644 src/gromacs/utility/tests/alignedallocator.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index fb81bd193a..d8e3d6e0a4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -357,9 +357,6 @@ check_include_files(regex.h HAVE_POSIX_REGEX) # as selections won't be fully functional. include(CheckCXXSymbolExists) -check_cxx_symbol_exists(posix_memalign stdlib.h HAVE_POSIX_MEMALIGN) -check_cxx_symbol_exists(memalign stdlib.h HAVE_MEMALIGN) -check_cxx_symbol_exists(_aligned_malloc stdlib.h HAVE__ALIGNED_MALLOC) check_cxx_symbol_exists(gettimeofday sys/time.h HAVE_GETTIMEOFDAY) check_cxx_symbol_exists(sysconf unistd.h HAVE_SYSCONF) check_cxx_symbol_exists(nice unistd.h HAVE_NICE) @@ -380,6 +377,17 @@ check_library_exists(m feenableexcept "" HAVE_FEENABLEEXCEPT) include(TestSchedAffinity) test_sched_affinity(HAVE_SCHED_AFFINITY) +# Aligned memory allocation. We need to check for both mm_malloc(), +# posix_memalign(), memalign(), and on windows also _aligned_malloc() +include(gmxTestMMMalloc) +gmx_test_mm_malloc(HAVE__MM_MALLOC) +check_cxx_symbol_exists(posix_memalign stdlib.h HAVE_POSIX_MEMALIGN) +check_cxx_symbol_exists(memalign stdlib.h HAVE_MEMALIGN) +if(MSVC) + # No need to waste time on this test on platforms where it will never be true + check_cxx_symbol_exists(_aligned_malloc stdlib.h HAVE__ALIGNED_MALLOC) +endif() + include(TestBigEndian) test_big_endian(GMX_INTEGER_BIG_ENDIAN) diff --git a/cmake/TestMMMalloc.cpp b/cmake/TestMMMalloc.cpp new file mode 100644 index 0000000000..64eab19eec --- /dev/null +++ b/cmake/TestMMMalloc.cpp @@ -0,0 +1,14 @@ +#if HAVE_MM_MALLOC_H +# include +#elif HAVE_MALLOC_H +# include +#elif HAVE_XMMINTRIN_H +# include +#endif + +int +main() +{ + void *p = _mm_malloc(8*sizeof(float),64); + _mm_free(p); +} diff --git a/cmake/gmxTestMMMalloc.cmake b/cmake/gmxTestMMMalloc.cmake new file mode 100644 index 0000000000..83e1a7e9f5 --- /dev/null +++ b/cmake/gmxTestMMMalloc.cmake @@ -0,0 +1,75 @@ +# +# This file is part of the GROMACS molecular simulation package. +# +# Copyright (c) 2009,2010,2012,2013,2014,2015, by the GROMACS development team, led by +# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, +# and including many others, as listed in the AUTHORS file in the +# top-level source directory and at http://www.gromacs.org. +# +# GROMACS is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public License +# as published by the Free Software Foundation; either version 2.1 +# of the License, or (at your option) any later version. +# +# GROMACS is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with GROMACS; if not, see +# http://www.gnu.org/licenses, or write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# If you want to redistribute modifications to GROMACS, please +# consider that scientific software is very special. Version +# control is crucial - bugs must be traceable. We will be happy to +# consider code for inclusion in the official distribution, but +# derived work must not be called official GROMACS. Details are found +# in the README & COPYING files - if they are missing, get the +# official version at http://www.gromacs.org. +# +# To help us fund GROMACS development, we humbly ask that you cite +# the research papers on the package. Check out http://www.gromacs.org. + +# +# GMX_TEST_MM_MALLOC(VARIABLE) +# +# VARIABLE will be set to true if we find _mm_malloc() and _mm_free(). +# +# The macro will also check whether the headers mm_malloc.h, malloc.h and +# xmmintrin.h are present, since the routines might be defined in either, +# and set the corresponding HAVE_MM_MALLOC_H, HAVE_MALLOC_H, and +# HAVE_XMMINTRIN_H if it hasn't already been done outside this file. +# +MACRO(GMX_TEST_MM_MALLOC VARIABLE) + IF(NOT DEFINED ${VARIABLE}) + + check_include_files(mm_malloc.h HAVE_MM_MALLOC_H) + check_include_files(malloc.h HAVE_MALLOC_H) + check_include_files(xmmintrin.h HAVE_XMMINTRIN_H) + + MESSAGE(STATUS "Checking for _mm_malloc()") + + TRY_COMPILE(${VARIABLE} "${CMAKE_BINARY_DIR}" + "${CMAKE_SOURCE_DIR}/cmake/TestMMMalloc.cpp" + COMPILE_DEFINITIONS + "-DHAVE_MM_MALLOC_H=${HAVE_MM_MALLOC_H}" + "-DHAVE_MALLOC_H=${HAVE_MALLOC_H}" + "-DHAVE_XMMINTRIN_H=${HAVE_XMMINTRIN_H}" + OUTPUT_VARIABLE MM_MALLOC_COMPILE_OUTPUT) + + if(${VARIABLE}) + MESSAGE(STATUS "Checking for _mm_malloc() - supported") + set(${VARIABLE} 1 CACHE INTERNAL "Result of test for _mm_malloc" FORCE) + else() + MESSAGE(STATUS "Checking for _mm_malloc() - not supported") + set(${VARIABLE} 0 CACHE INTERNAL "Result of test for _mm_malloc()" FORCE) + endif() + + ENDIF() +ENDMACRO(GMX_TEST_MM_MALLOC VARIABLE) + + + + diff --git a/src/config.h.cmakein b/src/config.h.cmakein index b6ef776999..bc646afcb6 100644 --- a/src/config.h.cmakein +++ b/src/config.h.cmakein @@ -249,13 +249,13 @@ #cmakedefine HAVE_IO_H /* Define to 1 if you have the posix_memalign() function. */ -#cmakedefine HAVE_POSIX_MEMALIGN +#cmakedefine01 HAVE_POSIX_MEMALIGN /* Define to 1 if you have the memalign() function. */ -#cmakedefine HAVE_MEMALIGN +#cmakedefine01 HAVE_MEMALIGN /* Define to 1 if you have the MSVC _aligned_malloc() function. */ -#cmakedefine HAVE__ALIGNED_MALLOC +#cmakedefine01 HAVE__ALIGNED_MALLOC /* Define to 1 if you have the clock_gettime() function. */ #cmakedefine HAVE_CLOCK_GETTIME @@ -320,6 +320,15 @@ /* Define to 1 if you have the header */ #cmakedefine HAVE_SCHED_H +/* Define to 1 if mm_malloc.h is present, otherwise 0 */ +#cmakedefine01 HAVE_MM_MALLOC_H + +/* Define to 1 if malloc.h is present, otherwise 0 */ +#cmakedefine01 HAVE_MALLOC_H + +/* Define to 1 if xmmintrin.h is present, otherwise 0 */ +#cmakedefine01 HAVE_XMMINTRIN_H + /* Define to 1 if you have the POSIX header file. */ #cmakedefine01 HAVE_POSIX_REGEX @@ -332,6 +341,12 @@ /* Define to 1 if you have the all the affinity functions in sched.h */ #cmakedefine HAVE_SCHED_AFFINITY +/* Define to 1 if _mm_malloc() is present in either mm_malloc.h, + * malloc.h or xmmintrin.h, and 0 otherwise. Note that you need to + * conditionally include the three headers too before using _mm_malloc(). + */ +#cmakedefine01 HAVE__MM_MALLOC + /* Define if SIGUSR1 is present */ #cmakedefine01 HAVE_SIGUSR1 diff --git a/src/gromacs/utility/alignedallocator.cpp b/src/gromacs/utility/alignedallocator.cpp new file mode 100644 index 0000000000..713acd812e --- /dev/null +++ b/src/gromacs/utility/alignedallocator.cpp @@ -0,0 +1,201 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2015, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * \brief + * Implements AlignedAllocator. + * + * \author Erik Lindahl + * \ingroup module_utility + */ +#include "gmxpre.h" + +#include "alignedallocator.h" + +#include "config.h" + +#include + +#if HAVE_MM_MALLOC_H +# include +#elif HAVE_MALLOC_H +# include +#elif HAVE_XMMINTRIN_H +# include +#endif + +#include "gromacs/utility/gmxassert.h" + + +namespace gmx +{ + +namespace internal +{ + +/*! \brief Allocate aligned memory in a fully portable way + * + * \param bytes Amount of memory (bytes) to allocate. The routine will return + * nullptr if the allocation fails. However, note that asking for + * zero bytes will return a pointer that is non-null and properly + * aligned (but obviously you cannot use it, since you promised + * not to access data beyond the 0 bytes you asked for). + * + * \param alignment Alignment specification in bytes, must be a power of 2. + * + * \return Nonzero pointer if the allocation worked, otherwise nullptr. + * This routine should only be called from alignedMalloc(), which also does + * the checking for valid values. This particular function is used for platforms + * where we have no control of the alignment of memory returned by the system. + * Instead, we increase the amount of memory requested internally such that we + * both can create a pointer inside this memory that fulfills the memory + * alignment requested, and that we have room to store the original pointer + * just before this area. + * + * \note This is an internal routine that should only be called from + * gmx::alignedMalloc(). Just like system-provided routines, it provides + * memory that is aligned - but not padded. + */ +static void * +alignedMallocGeneric(std::size_t bytes, std::size_t alignment) +{ + // The amount of extra memory (beyound what the user asked for) we need is: + // - sizeof(void *), to store the original pointer + // - alignment, to make sure we have an aligned pointer in the area + void * pMalloc = malloc(bytes + sizeof(void *) + alignment); + + if (pMalloc == nullptr) + { + return nullptr; + } + + // Convert pMalloc to size_t (so we work with raw bytes), add the space we + // need to save the original pointer, and (alignment-1) bytes, and then mask + // out the lowest bits. + std::size_t mask = ~static_cast(alignment-1); + void * pAligned = reinterpret_cast((reinterpret_cast(pMalloc) + sizeof(void *) + alignment - 1) & mask); + + // Store original pointer. Since we allocated at least sizeof(void *) extra + // space this is always a valid memory location. + reinterpret_cast(pAligned)[-1] = pMalloc; + + return pAligned; +} + + +/*! \brief Free aligned memory + * + * \param p Memory pointer previously returned from + * gmx::internal::alignedFreePortable(). + * + * Since this routine relies on the original pointer being stored just before + * the memory area p points to, bad things will happen if you call this routine + * with a pointer obtained any other way, or if you call the system free() + * with a pointer obtained from std::alignedMalloc(). + * + * \note This is an internal routine that should only be called from + * gmx::alignedFree(). + */ +static void +alignedFreeGeneric(void *p) +{ + if (p) + { + // Pick up the pointer stored just below p, and use that to call free() + free( reinterpret_cast(p)[-1] ); + } +} + + + +void * +alignedMalloc(std::size_t bytes) +{ + // For now we always use 128-byte alignment: + // 1) IBM Power already has cache lines of 128-bytes, and needs it. + // 2) x86 has 64 byte cache lines, but since a future AVX-1024 (rumored?) + // will need 1024/8=128 byte SIMD alignment, it is safer to use that + // already now. + // 3) The old Pentium4 used 256-byte cache prefetching (but 64-byte lines). + // However, it's not worth worrying about performance for P4... + // 4) ARM & Sparc have 64 byte lines, but will be just fine with + // 128-byte alignment (nobody knows what the future brings) + // + // So, for now we're semi-lazy and just align to 128 bytes! + + std::size_t alignment = 128; + + void * p; + + // Pad memory at the end with another alignment bytes to avoid false sharing + bytes += alignment; + +#if HAVE__MM_MALLOC + p = _mm_malloc( bytes, alignment ); +#elif HAVE_POSIX_MEMALIGN + if (posix_memalign(&p, alignment, bytes) != 0) + { + p = nullptr; + } +#elif HAVE_MEMALIGN + p = memalign(alignment, bytes); +#elif HAVE__ALIGNED_MALLOC + p = _aligned_malloc(bytes, alignment); +#else + p = alignedMallocGeneric(bytes, alignment); +#endif + + return p; +} + +void +alignedFree(void *p) +{ + if (p) + { +#if HAVE__MM_MALLOC + _mm_free(p); +#elif HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN + free(p); +#elif HAVE__ALIGNED_MALLOC + _aligned_free(p); +#else + alignedFreeGeneric(p); +#endif + } +} + +} // namespace internal + +} // namespace gmx diff --git a/src/gromacs/utility/alignedallocator.h b/src/gromacs/utility/alignedallocator.h new file mode 100644 index 0000000000..9770451e26 --- /dev/null +++ b/src/gromacs/utility/alignedallocator.h @@ -0,0 +1,238 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2015, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \libinternal \file + * \brief + * Declares gmx::AlignedAllocator that is used to make standard library + * containers compatible with SIMD contents that require aligned load/store. + * + * \author Erik Lindahl + * \inlibraryapi + * \ingroup module_utility + */ +#ifndef GMX_UTILITY_ALIGNEDALLOCATOR_H +#define GMX_UTILITY_ALIGNEDALLOCATOR_H + +#include + +#include +#include + +#include "gromacs/utility/basedefinitions.h" + +namespace gmx +{ + +namespace internal +{ + +/*! \brief Allocate aligned memory + * + * \param bytes Amount of memory (bytes) to allocate. It is valid to ask for + * 0 bytes, which will return a non-null pointer that is properly + * aligned and padded (but that you should not use). + * + * \return Valid pointer if the allocation worked, otherwise nullptr. + * + * The memory will always be aligned to 128 bytes, which is our + * estimate of the longest cache lines on architectures currently in use. + * It will also be padded by the same amount at the end of the + * area, to help avoid false cache sharing. + * + * \note Memory allocated with this routine must be released with + * gmx::internal::alignedFree(), and absolutely not the system free(). + */ +void * +alignedMalloc(std::size_t bytes); + +/*! \brief Free aligned memory + * + * \param p Memory pointer previously returned from gmx::internal::alignedMalloc() + * + * \note This routine should only be called with pointers obtained from + * gmx::internal::alignedMalloc(), and absolutely not any pointers obtained + * the system malloc(). + */ +void +alignedFree(void *p); + +} + +/*! \libinternal \brief Aligned memory allocator. + * + * \tparam T Type of objects to allocate + * + * This class can be used for the optional allocator template parameter + * in standard library containers, which is necessary e.g. to use SIMD + * aligned load and store operations in those containers. The memory will always + * be aligned to 128 bytes, which is our estimate of the longest cache lines on + * architectures currently in use. It will also be padded by the same amount at + * the end of the area, to help avoid false cache sharing. + * + * \throws std::bad_alloc Instead of a GROMACS exception object we throw the + * standard one on allocation failures to make it as compatible as possible with + * the errors expected by code using the standard library containers. + * + * \inlibraryapi + * \ingroup module_utility + */ +template +class AlignedAllocator +{ + public: + // The standard library specification for a custom allocator + // requires these typedefs, with this capitalization/underscoring. + typedef T value_type; //!< Type of allocated elements + typedef T &reference; //!< Reference to allocated elements + typedef const T &const_reference; //!< Constant reference to allocated elements + typedef T * pointer; //!< Pointer to allocated elements + typedef const T * const_pointer; //!< Constant pointer to allocated elements + typedef std::size_t size_type; //!< Integer type to use for size of objects + typedef std::ptrdiff_t difference_type; //!< Type to hold differences between pointers + + /*! \libinternal \brief Standard-required typedef to use allocator with different class. + * + * \tparam U new class + * + * This is used for things like std::list where the size of each link + * is larger than the class stored in the link. + * + * Required by the specification for an allocator. + */ + template + struct rebind + { + typedef AlignedAllocator other; //!< Align class U with our alignment + }; + + /*! \brief Return address of an object + * + * \param r Reference to object of type T + * \return Pointer to T memory + */ + pointer + address(reference r) const { return &r; } + + /*! \brief Return address of a const object + * + * \param r Const reference to object of type T + * \return Pointer to T memory + */ + const_pointer + address(const_reference r) const { return &r; } + + /*! \brief Do the actual memory allocation + * + * \param n Number of elements of type T to allocate. n can be + * 0 bytes, which will return a non-null properly aligned + * and padded pointer that should not be used. + * \param hint Optional value returned from previous call to allocate. + * For now this is not used. + * \return Pointer to allocated memory + * + * \throws std::bad_alloc if the allocation fails. + */ + pointer + allocate(std::size_t n, typename std::allocator::const_pointer gmx_unused hint = 0) + { + void *p = internal::alignedMalloc(n*sizeof(T)); + + if (p == nullptr) + { + throw std::bad_alloc(); + } + else + { + return static_cast(p); + } + } + + /*! \brief Release memory + * + * \param p Pointer to previously allocated memory returned from allocate() + * \param n number of objects previously passed to allocate() + */ + void + deallocate(pointer p, std::size_t gmx_unused n) + { + internal::alignedFree(p); + } + + /*! \brief Construct an object without allocating memory + * + * \param p Adress of memory where to construct object + * \param value Reference to object to copy + */ + void + construct(pointer p, const_reference value) { new (p) value_type(value); } + + /*! \brief Call the destructor of object without releasing memory + * + * \param p Address of memory where to destroy object + */ + void + destroy(pointer p) { p->~value_type(); } + + /*! \brief Return largest number of objects that can be allocated + * + * This will be set such that the number of objects T multiplied by + * the size of each object is the largest value that can be represented + * by size_type. + */ + std::size_t + max_size() const { return (static_cast(0) - static_cast(1)) / sizeof(T); } + + /*! \brief Return true if two allocators are identical + * + * \param rhs Other allocator + * + * This is a member function of the left-hand-side allocator. + */ + template + bool + operator==(const AlignedAllocator &gmx_unused rhs) const { return std::is_same::value; } + + /*! \brief Return true if two allocators are different + * + * \param rhs Other allocator. + * + * This is a member function of the left-hand-side allocator. + */ + bool + operator!=(const AlignedAllocator &rhs) const { return !operator==(rhs); } +}; + +} // namespace gmx + +#endif // GMX_UTILITY_ALIGNEDALLOCATOR_H diff --git a/src/gromacs/utility/smalloc.cpp b/src/gromacs/utility/smalloc.cpp index ac8fd499b0..6757d712af 100644 --- a/src/gromacs/utility/smalloc.cpp +++ b/src/gromacs/utility/smalloc.cpp @@ -47,14 +47,12 @@ #ifdef WITH_DMALLOC #include #endif -#ifdef HAVE__ALIGNED_MALLOC -#include -#endif #include #include "thread_mpi/threads.h" +#include "gromacs/utility/alignedallocator.h" #include "gromacs/utility/dir_separator.h" #include "gromacs/utility/fatalerror.h" #ifdef PRINT_ALLOC_KB @@ -248,18 +246,6 @@ void save_free(const char gmx_unused *name, const char gmx_unused *file, int gmx } } -/* If we don't have useful routines for allocating aligned memory, - * then we have to use the old-style GROMACS approach bitwise-ANDing - * pointers to ensure alignment. We store the pointer to the originally - * allocated region in the space before the returned pointer */ - -/* we create a positive define for the absence of an system-provided memalign */ -#if (!defined HAVE_POSIX_MEMALIGN && !defined HAVE_MEMALIGN && \ - !defined HAVE__ALIGNED_MALLOC) -#define GMX_OWN_MEMALIGN -#endif - - /* Pointers allocated with this routine should only be freed * with save_free_aligned, however this will only matter * on systems that lack posix_memalign() and memalign() when @@ -268,9 +254,7 @@ void save_free(const char gmx_unused *name, const char gmx_unused *file, int gmx void *save_malloc_aligned(const char *name, const char *file, int line, size_t nelem, size_t elsize, size_t alignment) { - void **aligned = NULL; - void *malloced = NULL; - gmx_bool allocate_fail; + void * p; if (alignment == 0) { @@ -278,10 +262,17 @@ void *save_malloc_aligned(const char *name, const char *file, int line, "Cannot allocate aligned memory with alignment of zero!\n(called from file %s, line %d)", file, line); } + // Our new alignedMalloc always returns 128-byte aligned memory. + if (alignment > 128) + { + gmx_fatal(errno, __FILE__, __LINE__, + "Cannot allocate aligned memory with alignment > 128 bytes\n(called from file %s, line %d)", file, line); + } + if (nelem == 0 || elsize == 0) { - aligned = NULL; + p = nullptr; } else { @@ -294,40 +285,15 @@ void *save_malloc_aligned(const char *name, const char *file, int line, } #endif -#ifdef HAVE_POSIX_MEMALIGN - allocate_fail = (0 != posix_memalign(&malloced, alignment, nelem*elsize)); -#elif defined HAVE_MEMALIGN - allocate_fail = ((malloced = memalign(alignment, nelem*elsize)) == NULL); -#elif defined HAVE__ALIGNED_MALLOC - allocate_fail = ((malloced = _aligned_malloc(nelem*elsize, alignment)) - == NULL); -#else - allocate_fail = ((malloced = malloc(nelem*elsize+alignment+ - sizeof(void*))) == NULL); -#endif - if (allocate_fail) + p = gmx::internal::alignedMalloc(nelem*elsize); + + if (p == nullptr) { gmx_fatal(errno, __FILE__, __LINE__, "Not enough memory. Failed to allocate %u aligned elements of size %u for %s\n(called from file %s, line %d)", nelem, elsize, name, file, line); } - /* we start with the original pointer */ - aligned = (void**)malloced; - -#ifdef GMX_OWN_MEMALIGN - /* Make the aligned pointer, and save the underlying pointer that - * we're allowed to free(). */ - - /* we first make space to store that underlying pointer: */ - aligned = aligned + 1; - /* then we apply a bit mask */ - aligned = (void *) (((size_t) aligned + alignment - 1) & - (~((size_t) (alignment-1)))); - /* and we store the original pointer in the area just before the - pointer we're going to return */ - aligned[-1] = malloced; -#endif } - return (void*)aligned; + return p; } void *save_calloc_aligned(const char *name, const char *file, int line, @@ -344,22 +310,7 @@ void *save_calloc_aligned(const char *name, const char *file, int line, /* This routine can NOT be called with any pointer */ void save_free_aligned(const char gmx_unused *name, const char gmx_unused *file, int gmx_unused line, void *ptr) { - void *free = ptr; - - if (NULL != ptr) - { -#ifdef GMX_OWN_MEMALIGN - /* we get the pointer from just before the memaligned pointer */ - free = ((void**)ptr)[-1]; -#endif - -#ifndef HAVE__ALIGNED_MALLOC - /* (Now) we're allowed to use a normal free() on this pointer. */ - save_free(name, file, line, free); -#else - _aligned_free(free); -#endif - } + gmx::internal::alignedFree(ptr); } void set_over_alloc_dd(gmx_bool set) diff --git a/src/gromacs/utility/tests/CMakeLists.txt b/src/gromacs/utility/tests/CMakeLists.txt index c980a24363..92a3b88f87 100644 --- a/src/gromacs/utility/tests/CMakeLists.txt +++ b/src/gromacs/utility/tests/CMakeLists.txt @@ -33,6 +33,7 @@ # the research papers on the package. Check out http://www.gromacs.org. gmx_add_unit_test(UtilityUnitTests utility-test + alignedallocator.cpp arrayref.cpp bitmask32.cpp bitmask64.cpp bitmask128.cpp stringutil.cpp diff --git a/src/gromacs/utility/tests/alignedallocator.cpp b/src/gromacs/utility/tests/alignedallocator.cpp new file mode 100644 index 0000000000..01a23ed7ad --- /dev/null +++ b/src/gromacs/utility/tests/alignedallocator.cpp @@ -0,0 +1,85 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2015, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * \brief Tests for gmx::AlignedAllocator + * + * \author Erik Lindahl + * \ingroup module_utility + */ + +#include "gmxpre.h" + +#include "gromacs/utility/alignedallocator.h" + +#include +#include + +#include + +#include "gromacs/utility/real.h" + +namespace gmx +{ + +TEST(AlignedAllocatorTest, AllocatorAlign) +{ + AlignedAllocator a; + real * p = a.allocate(1000); + + // Mask for 128-byte alignment is 128-1 - these bits should be zero in p + std::size_t mask = static_cast(128-1); + + EXPECT_EQ(0, reinterpret_cast(p) & mask); + a.deallocate(p, 1000); +} + + +TEST(AlignedAllocator, Vector) +{ + // Mask for 128-byte alignment is 128-1 - these bits should be zero in pointers + std::size_t mask = static_cast(128-1); + + std::vector > v(10); + EXPECT_EQ(0, reinterpret_cast(v.data()) & mask); + + for (std::size_t i = 10000; i <= 100000; i += 10000) + { + v.resize(i); + EXPECT_EQ(0, reinterpret_cast(v.data()) & mask); + } +} + + +} -- 2.11.4.GIT