Simplify compiling GPU code for tests
[gromacs.git] / src / gromacs / utility / alignedallocator.cpp
bloba6d073c3c9346762990829ab57995c76218cb632
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \internal \file
36 * \brief
37 * Implements AlignedAllocator.
39 * \author Erik Lindahl <erik.lindahl@gmail.com>
40 * \author Mark Abraham <mark.j.abraham@gmail.com>
41 * \ingroup module_utility
43 #include "gmxpre.h"
45 #include "alignedallocator.h"
47 #include "config.h"
49 #include <cstdlib>
51 #include <memory>
53 #if HAVE_MM_MALLOC_H
54 # include <mm_malloc.h>
55 #elif HAVE_MALLOC_H
56 # include <malloc.h>
57 #elif HAVE_XMMINTRIN_H
58 # include <xmmintrin.h>
59 #endif
61 #ifdef HAVE_UNISTD_H
62 # include <unistd.h>
63 #endif
65 #if GMX_NATIVE_WINDOWS
66 # include <windows.h> // only for the page size query purposes
67 #endif
69 #include "gromacs/utility/gmxassert.h"
71 namespace gmx
74 namespace
77 /*! \brief Allocate aligned memory in a fully portable way
79 * \param bytes Amount of memory (bytes) to allocate. The routine will return
80 * nullptr if the allocation fails. However, note that asking for
81 * zero bytes will return a pointer that is non-null and properly
82 * aligned (but obviously you cannot use it, since you promised
83 * not to access data beyond the 0 bytes you asked for).
85 * \param alignment Alignment specification in bytes, must be a power of 2.
87 * \return Nonzero pointer if the allocation worked, otherwise nullptr.
88 * This routine should only be called from alignedMalloc(), which also does
89 * the checking for valid values. This particular function is used for platforms
90 * where we have no control of the alignment of memory returned by the system.
91 * Instead, we increase the amount of memory requested internally such that we
92 * both can create a pointer inside this memory that fulfills the memory
93 * alignment requested, and that we have room to store the original pointer
94 * just before this area.
96 * \note This is an internal routine that should only be called from
97 * gmx::alignedMalloc(). Just like system-provided routines, it provides
98 * memory that is aligned - but not padded.
100 gmx_unused void* alignedMallocGeneric(std::size_t bytes, std::size_t alignment)
102 // The amount of extra memory (beyound what the user asked for) we need is:
103 // - sizeof(void *), to store the original pointer
104 // - alignment, to make sure we have an aligned pointer in the area
105 void* pMalloc = malloc(bytes + sizeof(void*) + alignment);
107 if (pMalloc == nullptr)
109 return nullptr;
112 // Convert pMalloc to size_t (so we work with raw bytes), add the space we
113 // need to save the original pointer, and (alignment-1) bytes, and then mask
114 // out the lowest bits.
115 std::size_t mask = ~static_cast<std::size_t>(alignment - 1);
116 void* pAligned = reinterpret_cast<void*>(
117 (reinterpret_cast<std::size_t>(pMalloc) + sizeof(void*) + alignment - 1) & mask);
119 // Store original pointer. Since we allocated at least sizeof(void *) extra
120 // space this is always a valid memory location.
121 reinterpret_cast<void**>(pAligned)[-1] = pMalloc;
123 return pAligned;
127 /*! \brief Free aligned memory
129 * \param p Memory pointer previously returned from
130 * gmx::internal::alignedFreePortable().
132 * Since this routine relies on the original pointer being stored just before
133 * the memory area p points to, bad things will happen if you call this routine
134 * with a pointer obtained any other way, or if you call the system free()
135 * with a pointer obtained from std::alignedMalloc().
137 * \note This is an internal routine that should only be called from
138 * gmx::alignedFree().
140 gmx_unused void alignedFreeGeneric(void* p)
142 if (p)
144 // Pick up the pointer stored just below p, and use that to call free()
145 free(reinterpret_cast<void**>(p)[-1]);
149 //! Implement malloc of \c bytes of memory, aligned to \c alignment.
150 void* mallocImpl(std::size_t bytes, std::size_t alignment)
152 void* p;
154 #if HAVE__MM_MALLOC
155 p = _mm_malloc(bytes, alignment);
156 #elif HAVE_POSIX_MEMALIGN
157 if (posix_memalign(&p, alignment, bytes) != 0)
159 p = nullptr;
161 #elif HAVE_MEMALIGN
162 p = memalign(alignment, bytes);
163 #elif HAVE__ALIGNED_MALLOC
164 p = _aligned_malloc(bytes, alignment);
165 #else
166 p = internal::alignedMallocGeneric(bytes, alignment);
167 #endif
169 return p;
172 //! Free aligned memory allocated with mallocImpl().
173 void freeImpl(void* p)
175 if (p)
177 #if HAVE__MM_MALLOC
178 _mm_free(p);
179 #elif HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN
180 free(p);
181 #elif HAVE__ALIGNED_MALLOC
182 _aligned_free(p);
183 #else
184 internal::alignedFreeGeneric(p);
185 #endif
189 } // namespace
191 // === AlignedAllocationPolicy
193 std::size_t AlignedAllocationPolicy::alignment()
195 // For now we always use 128-byte alignment:
196 // 1) IBM Power already has cache lines of 128-bytes, and needs it.
197 // 2) x86 has 64 byte cache lines, but since a future AVX-1024 (rumored?)
198 // will need 1024/8=128 byte SIMD alignment, it is safer to use that
199 // already now.
200 // 3) The old Pentium4 used 256-byte cache prefetching (but 64-byte lines).
201 // However, it's not worth worrying about performance for P4...
202 // 4) ARM & Sparc have 64 byte lines, but will be just fine with
203 // 128-byte alignment (nobody knows what the future brings)
205 // So, for now we're semi-lazy and just align to 128 bytes!
207 // TODO LINCS code is copying this assumption independently (for now)
208 return 128;
211 void* AlignedAllocationPolicy::malloc(std::size_t bytes)
213 // Pad memory at the end with another alignment bytes to avoid false sharing
214 auto size = alignment();
215 bytes += size;
217 return mallocImpl(bytes, size);
220 void AlignedAllocationPolicy::free(void* p)
222 freeImpl(p);
225 // === PageAlignedAllocationPolicy
227 //! Return a page size, from a sysconf/WinAPI query if available, or a default guess (4096 bytes).
228 //! \todo Move this function into sysinfo.cpp where other OS-specific code/includes live
229 static std::size_t getPageSize()
231 long pageSize;
232 #if GMX_NATIVE_WINDOWS
233 SYSTEM_INFO si;
234 GetNativeSystemInfo(&si);
235 pageSize = si.dwPageSize;
236 #elif defined(_SC_PAGESIZE)
237 /* Note that sysconf returns -1 on its error conditions, which we
238 don't really need to check, nor can really handle at
239 initialization time. */
240 pageSize = sysconf(_SC_PAGESIZE);
241 #elif defined(_SC_PAGE_SIZE)
242 pageSize = sysconf(_SC_PAGE_SIZE);
243 #else
244 pageSize = -1;
245 #endif
246 return ((pageSize == -1) ? 4096 // A useful guess
247 : static_cast<std::size_t>(pageSize));
250 /* Implements the "construct on first use" idiom to avoid the static
251 * initialization order fiasco where a possible static page-aligned
252 * container would be initialized before the alignment variable was.
254 * Note that thread-safety of the initialization is guaranteed by the
255 * C++11 language standard.
257 * The size_t has no destructor, so there is no deinitialization
258 * issue. See https://isocpp.org/wiki/faq/ctors for discussion of
259 * alternatives and trade-offs. */
260 std::size_t PageAlignedAllocationPolicy::alignment()
262 static size_t thePageSize = getPageSize();
263 return thePageSize;
266 void* PageAlignedAllocationPolicy::malloc(std::size_t bytes)
268 return mallocImpl(bytes, alignment());
271 void PageAlignedAllocationPolicy::free(void* p)
273 freeImpl(p);
276 } // namespace gmx