Compile most of PME GPU host code with OpenCL
[gromacs.git] / src / gromacs / ewald / pme-gpu-internal.h
blob97bfa98429100a52b114a26991314621044d313f
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \internal \file
38 * \brief This file contains internal function definitions for performing the PME calculations on GPU.
39 * These are not meant to be exposed outside of the PME GPU code.
40 * As of now, their bodies are still in the common pme-gpu.cpp files.
42 * \author Aleksei Iupinov <a.yupinov@gmail.com>
43 * \ingroup module_ewald
46 #ifndef GMX_EWALD_PME_GPU_INTERNAL_H
47 #define GMX_EWALD_PME_GPU_INTERNAL_H
49 #include "gromacs/fft/fft.h" // for the gmx_fft_direction enum
50 #include "gromacs/gpu_utils/gpu_macros.h" // for the GPU_FUNC_ macros
51 #include "gromacs/utility/arrayref.h"
53 #include "pme-gpu-types-host.h" // for the inline functions accessing PmeGpu members
55 struct gmx_hw_info_t;
56 struct gmx_gpu_opt_t;
57 struct gmx_pme_t; // only used in pme_gpu_reinit
58 struct gmx_wallclock_gpu_pme_t;
59 struct pme_atomcomm_t;
60 struct t_complex;
62 namespace gmx
64 class MDLogger;
67 //! Type of spline data
68 enum class PmeSplineDataType
70 Values, // theta
71 Derivatives, // dtheta
72 }; //TODO move this into new and shiny pme.h (pme-types.h?)
74 //! PME grid dimension ordering (from major to minor)
75 enum class GridOrdering
77 YZX,
78 XYZ
81 /*! \libinternal \brief
82 * Returns the number of atoms per chunk in the atom charges/coordinates data layout.
83 * Depends on CUDA-specific block sizes, needed for the atom data padding.
85 * \param[in] pmeGpu The PME GPU structure.
86 * \returns Number of atoms in a single GPU atom data chunk.
88 int pme_gpu_get_atom_data_alignment(const PmeGpu *pmeGpu);
90 /*! \libinternal \brief
91 * Returns the number of atoms per chunk in the atom spline theta/dtheta data layout.
93 * \param[in] pmeGpu The PME GPU structure.
94 * \returns Number of atoms in a single GPU atom spline data chunk.
96 int pme_gpu_get_atoms_per_warp(const PmeGpu *pmeGpu);
98 /*! \libinternal \brief
99 * Synchronizes the current computation, waiting for the GPU kernels/transfers to finish.
101 * \param[in] pmeGpu The PME GPU structure.
103 GPU_FUNC_QUALIFIER void pme_gpu_synchronize(const PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM
105 /*! \libinternal \brief
106 * Allocates the fixed size energy and virial buffer both on GPU and CPU.
108 * \param[in] pmeGpu The PME GPU structure.
110 void pme_gpu_alloc_energy_virial(const PmeGpu *pmeGpu);
112 /*! \libinternal \brief
113 * Frees the energy and virial memory both on GPU and CPU.
115 * \param[in] pmeGpu The PME GPU structure.
117 void pme_gpu_free_energy_virial(PmeGpu *pmeGpu);
119 /*! \libinternal \brief
120 * Clears the energy and virial memory on GPU with 0.
121 * Should be called at the end of PME computation which returned energy/virial.
123 * \param[in] pmeGpu The PME GPU structure.
125 void pme_gpu_clear_energy_virial(const PmeGpu *pmeGpu);
127 /*! \libinternal \brief
128 * Reallocates and copies the pre-computed B-spline values to the GPU.
130 * \param[in] pmeGpu The PME GPU structure.
132 void pme_gpu_realloc_and_copy_bspline_values(const PmeGpu *pmeGpu);
134 /*! \libinternal \brief
135 * Frees the pre-computed B-spline values on the GPU (and the transfer CPU buffers).
137 * \param[in] pmeGpu The PME GPU structure.
139 void pme_gpu_free_bspline_values(const PmeGpu *pmeGpu);
141 /*! \libinternal \brief
142 * Reallocates the GPU buffer for the PME forces.
144 * \param[in] pmeGpu The PME GPU structure.
146 void pme_gpu_realloc_forces(PmeGpu *pmeGpu);
148 /*! \libinternal \brief
149 * Frees the GPU buffer for the PME forces.
151 * \param[in] pmeGpu The PME GPU structure.
153 void pme_gpu_free_forces(const PmeGpu *pmeGpu);
155 /*! \libinternal \brief
156 * Copies the forces from the CPU buffer to the GPU (to reduce them with the PME GPU gathered forces).
157 * To be called e.g. after the bonded calculations.
159 * \param[in] pmeGpu The PME GPU structure.
161 void pme_gpu_copy_input_forces(PmeGpu *pmeGpu);
163 /*! \libinternal \brief
164 * Copies the forces from the GPU to the CPU buffer. To be called after the gathering stage.
166 * \param[in] pmeGpu The PME GPU structure.
168 void pme_gpu_copy_output_forces(PmeGpu *pmeGpu);
170 /*! \libinternal \brief
171 * Checks whether work in the PME GPU stream has completed.
173 * \param[in] pmeGpu The PME GPU structure.
175 * \returns True if work in the PME stream has completed.
177 bool pme_gpu_stream_query(const PmeGpu *pmeGpu);
179 /*! \libinternal \brief
180 * Reallocates the input coordinates buffer on the GPU (and clears the padded part if needed).
182 * \param[in] pmeGpu The PME GPU structure.
184 * Needs to be called on every DD step/in the beginning.
186 void pme_gpu_realloc_coordinates(const PmeGpu *pmeGpu);
188 /*! \libinternal \brief
189 * Copies the input coordinates from the CPU buffer onto the GPU.
191 * \param[in] pmeGpu The PME GPU structure.
192 * \param[in] h_coordinates Input coordinates (XYZ rvec array).
194 * Needs to be called for every PME computation. The coordinates are then used in the spline calculation.
196 GPU_FUNC_QUALIFIER void pme_gpu_copy_input_coordinates(const PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu),
197 const rvec *GPU_FUNC_ARGUMENT(h_coordinates)) GPU_FUNC_TERM
199 /*! \libinternal \brief
200 * Frees the coordinates on the GPU.
202 * \param[in] pmeGpu The PME GPU structure.
204 void pme_gpu_free_coordinates(const PmeGpu *pmeGpu);
206 /*! \libinternal \brief
207 * Reallocates the buffer on the GPU and copies the charges/coefficients from the CPU buffer.
208 * Clears the padded part if needed.
210 * \param[in] pmeGpu The PME GPU structure.
211 * \param[in] h_coefficients The input atom charges/coefficients.
213 * Does not need to be done for every PME computation, only whenever the local charges change.
214 * (So, in the beginning of the run, or on DD step).
216 void pme_gpu_realloc_and_copy_input_coefficients(const PmeGpu *pmeGpu,
217 const float *h_coefficients);
219 /*! \libinternal \brief
220 * Frees the charges/coefficients on the GPU.
222 * \param[in] pmeGpu The PME GPU structure.
224 void pme_gpu_free_coefficients(const PmeGpu *pmeGpu);
226 /*! \libinternal \brief
227 * Reallocates the buffers on the GPU and the host for the atoms spline data.
229 * \param[in] pmeGpu The PME GPU structure.
231 void pme_gpu_realloc_spline_data(const PmeGpu *pmeGpu);
233 /*! \libinternal \brief
234 * Frees the buffers on the GPU for the atoms spline data.
236 * \param[in] pmeGpu The PME GPU structure.
238 void pme_gpu_free_spline_data(const PmeGpu *pmeGpu);
240 /*! \libinternal \brief
241 * Reallocates the buffers on the GPU and the host for the particle gridline indices.
243 * \param[in] pmeGpu The PME GPU structure.
245 void pme_gpu_realloc_grid_indices(const PmeGpu *pmeGpu);
247 /*! \libinternal \brief
248 * Frees the buffer on the GPU for the particle gridline indices.
250 * \param[in] pmeGpu The PME GPU structure.
252 void pme_gpu_free_grid_indices(const PmeGpu *pmeGpu);
254 /*! \libinternal \brief
255 * Reallocates the real space grid and the complex reciprocal grid (if needed) on the GPU.
257 * \param[in] pmeGpu The PME GPU structure.
259 void pme_gpu_realloc_grids(PmeGpu *pmeGpu);
261 /*! \libinternal \brief
262 * Frees the real space grid and the complex reciprocal grid (if needed) on the GPU.
264 * \param[in] pmeGpu The PME GPU structure.
266 void pme_gpu_free_grids(const PmeGpu *pmeGpu);
268 /*! \libinternal \brief
269 * Clears the real space grid on the GPU.
270 * Should be called at the end of each computation.
272 * \param[in] pmeGpu The PME GPU structure.
274 void pme_gpu_clear_grids(const PmeGpu *pmeGpu);
276 /*! \libinternal \brief
277 * Reallocates and copies the pre-computed fractional coordinates' shifts to the GPU.
279 * \param[in] pmeGpu The PME GPU structure.
281 void pme_gpu_realloc_and_copy_fract_shifts(PmeGpu *pmeGpu);
283 /*! \libinternal \brief
284 * Frees the pre-computed fractional coordinates' shifts on the GPU.
286 * \param[in] pmeGpu The PME GPU structure.
288 void pme_gpu_free_fract_shifts(const PmeGpu *pmeGpu);
290 /*! \libinternal \brief
291 * Copies the input real-space grid from the host to the GPU.
293 * \param[in] pmeGpu The PME GPU structure.
294 * \param[in] h_grid The host-side grid buffer.
296 void pme_gpu_copy_input_gather_grid(const PmeGpu *pmeGpu,
297 float *h_grid);
299 /*! \libinternal \brief
300 * Copies the output real-space grid from the GPU to the host.
302 * \param[in] pmeGpu The PME GPU structure.
303 * \param[out] h_grid The host-side grid buffer.
305 void pme_gpu_copy_output_spread_grid(const PmeGpu *pmeGpu,
306 float *h_grid);
308 /*! \libinternal \brief
309 * Copies the spread output spline data and gridline indices from the GPU to the host.
311 * \param[in] pmeGpu The PME GPU structure.
313 void pme_gpu_copy_output_spread_atom_data(const PmeGpu *pmeGpu);
315 /*! \libinternal \brief
316 * Copies the gather input spline data and gridline indices from the host to the GPU.
318 * \param[in] pmeGpu The PME GPU structure.
320 void pme_gpu_copy_input_gather_atom_data(const PmeGpu *pmeGpu);
322 /*! \libinternal \brief
323 * Waits for the grid copying to the host-side buffer after spreading to finish.
325 * \param[in] pmeGpu The PME GPU structure.
327 void pme_gpu_sync_spread_grid(const PmeGpu *pmeGpu);
329 /*! \libinternal \brief
330 * Does the one-time GPU-framework specific PME initialization.
331 * For CUDA, the PME stream is created with the highest priority.
333 * \param[in] pmeGpu The PME GPU structure.
335 void pme_gpu_init_internal(PmeGpu *pmeGpu);
337 /*! \libinternal \brief
338 * Destroys the PME GPU-framework specific data.
339 * Should be called last in the PME GPU destructor.
341 * \param[in] pmeGpu The PME GPU structure.
343 void pme_gpu_destroy_specific(const PmeGpu *pmeGpu);
345 /*! \libinternal \brief
346 * Initializes the CUDA FFT structures.
348 * \param[in] pmeGpu The PME GPU structure.
350 void pme_gpu_reinit_3dfft(const PmeGpu *pmeGpu);
352 /*! \libinternal \brief
353 * Destroys the CUDA FFT structures.
355 * \param[in] pmeGpu The PME GPU structure.
357 void pme_gpu_destroy_3dfft(const PmeGpu *pmeGpu);
359 /* Several GPU event-based timing functions that live in pme-gpu-timings.cpp */
361 /*! \libinternal \brief
362 * Finalizes all the active PME GPU stage timings for the current computation. Should be called at the end of every computation.
364 * \param[in] pmeGpu The PME GPU structure.
366 void pme_gpu_update_timings(const PmeGpu *pmeGpu);
368 /*! \libinternal \brief
369 * Updates the internal list of active PME GPU stages (if timings are enabled).
371 * \param[in] pmeGpu The PME GPU data structure.
373 void pme_gpu_reinit_timings(const PmeGpu *pmeGpu);
375 /*! \brief
376 * Resets the PME GPU timings. To be called at the reset MD step.
378 * \param[in] pmeGpu The PME GPU structure.
380 void pme_gpu_reset_timings(const PmeGpu *pmeGpu);
382 /*! \libinternal \brief
383 * Copies the PME GPU timings to the gmx_wallclock_gpu_t structure (for log output). To be called at the run end.
385 * \param[in] pmeGpu The PME GPU structure.
386 * \param[in] timings The gmx_wallclock_gpu_pme_t structure.
388 void pme_gpu_get_timings(const PmeGpu *pmeGpu,
389 gmx_wallclock_gpu_pme_t *timings);
391 /* The PME stages themselves */
393 /*! \libinternal \brief
394 * A GPU spline computation and charge spreading function.
396 * \param[in] pmeGpu The PME GPU structure.
397 * \param[in] gridIndex Index of the PME grid - unused, assumed to be 0.
398 * \param[out] h_grid The host-side grid buffer (used only if the result of the spread is expected on the host,
399 * e.g. testing or host-side FFT)
400 * \param[in] computeSplines Should the computation of spline parameters and gridline indices be performed.
401 * \param[in] spreadCharges Should the charges/coefficients be spread on the grid.
403 CUDA_FUNC_QUALIFIER void pme_gpu_spread(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGpu),
404 int CUDA_FUNC_ARGUMENT(gridIndex),
405 real *CUDA_FUNC_ARGUMENT(h_grid),
406 bool CUDA_FUNC_ARGUMENT(computeSplines),
407 bool CUDA_FUNC_ARGUMENT(spreadCharges)) CUDA_FUNC_TERM
409 /*! \libinternal \brief
410 * 3D FFT R2C/C2R routine.
412 * \param[in] pmeGpu The PME GPU structure.
413 * \param[in] direction Transform direction (real-to-complex or complex-to-real)
414 * \param[in] gridIndex Index of the PME grid - unused, assumed to be 0.
416 void pme_gpu_3dfft(const PmeGpu *pmeGpu,
417 enum gmx_fft_direction direction,
418 const int gridIndex);
420 /*! \libinternal \brief
421 * A GPU Fourier space solving function.
423 * \param[in] pmeGpu The PME GPU structure.
424 * \param[in,out] h_grid The host-side input and output Fourier grid buffer (used only with testing or host-side FFT)
425 * \param[in] gridOrdering Specifies the dimenion ordering of the complex grid. TODO: store this information?
426 * \param[in] computeEnergyAndVirial Tells if the energy and virial computation should also be performed.
428 CUDA_FUNC_QUALIFIER void pme_gpu_solve(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGpu),
429 t_complex *CUDA_FUNC_ARGUMENT(h_grid),
430 GridOrdering CUDA_FUNC_ARGUMENT(gridOrdering),
431 bool CUDA_FUNC_ARGUMENT(computeEnergyAndVirial)) CUDA_FUNC_TERM
433 /*! \libinternal \brief
434 * A GPU force gathering function.
436 * \param[in] pmeGpu The PME GPU structure.
437 * \param[in] forceTreatment Tells how data in h_forces should be treated.
438 * TODO: determine efficiency/balance of host/device-side reductions.
439 * \param[in] h_grid The host-side grid buffer (used only in testing mode)
441 CUDA_FUNC_QUALIFIER void pme_gpu_gather(PmeGpu *CUDA_FUNC_ARGUMENT(pmeGpu),
442 PmeForceOutputHandling CUDA_FUNC_ARGUMENT(forceTreatment),
443 const float *CUDA_FUNC_ARGUMENT(h_grid)
444 ) CUDA_FUNC_TERM
447 /* The inlined convenience PME GPU status getters */
449 /*! \libinternal \brief
450 * Tells if PME runs on multiple GPUs with the decomposition.
452 * \param[in] pmeGpu The PME GPU structure.
453 * \returns True if PME runs on multiple GPUs, false otherwise.
455 inline bool pme_gpu_uses_dd(const PmeGpu *pmeGpu)
457 return !pmeGpu->settings.useDecomposition;
460 /*! \libinternal \brief
461 * Tells if PME performs the gathering stage on GPU.
463 * \param[in] pmeGpu The PME GPU structure.
464 * \returns True if the gathering is performed on GPU, false otherwise.
466 inline bool pme_gpu_performs_gather(const PmeGpu *pmeGpu)
468 return pmeGpu->settings.performGPUGather;
471 /*! \libinternal \brief
472 * Tells if PME performs the FFT stages on GPU.
474 * \param[in] pmeGpu The PME GPU structure.
475 * \returns True if FFT is performed on GPU, false otherwise.
477 inline bool pme_gpu_performs_FFT(const PmeGpu *pmeGpu)
479 return pmeGpu->settings.performGPUFFT;
482 /*! \libinternal \brief
483 * Tells if PME performs the grid (un-)wrapping on GPU.
485 * \param[in] pmeGpu The PME GPU structure.
486 * \returns True if (un-)wrapping is performed on GPU, false otherwise.
488 inline bool pme_gpu_performs_wrapping(const PmeGpu *pmeGpu)
490 return pmeGpu->settings.useDecomposition;
493 /*! \libinternal \brief
494 * Tells if PME performs the grid solving on GPU.
496 * \param[in] pmeGpu The PME GPU structure.
497 * \returns True if solving is performed on GPU, false otherwise.
499 inline bool pme_gpu_performs_solve(const PmeGpu *pmeGpu)
501 return pmeGpu->settings.performGPUSolve;
504 /*! \libinternal \brief
505 * Enables or disables the testing mode.
506 * Testing mode only implies copying all the outputs, even the intermediate ones, to the host,
507 * and also makes the copies synchronous.
509 * \param[in] pmeGpu The PME GPU structure.
510 * \param[in] testing Should the testing mode be enabled, or disabled.
512 inline void pme_gpu_set_testing(PmeGpu *pmeGpu, bool testing)
514 if (pmeGpu)
516 pmeGpu->settings.copyAllOutputs = testing;
517 pmeGpu->settings.transferKind = testing ? GpuApiCallBehavior::Sync : GpuApiCallBehavior::Async;
521 /*! \libinternal \brief
522 * Tells if PME is in the testing mode.
524 * \param[in] pmeGpu The PME GPU structure.
525 * \returns true if testing mode is enabled, false otherwise.
527 inline bool pme_gpu_is_testing(const PmeGpu *pmeGpu)
529 return pmeGpu->settings.copyAllOutputs;
532 /* A block of C++ functions that live in pme-gpu-internal.cpp */
534 /*! \libinternal \brief
535 * Returns the GPU gathering staging forces buffer.
537 * \param[in] pmeGpu The PME GPU structure.
538 * \returns The input/output forces.
540 GPU_FUNC_QUALIFIER gmx::ArrayRef<gmx::RVec> pme_gpu_get_forces(PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM_WITH_RETURN(gmx::EmptyArrayRef())
542 /*! \libinternal \brief
543 * Returns the output virial and energy of the PME solving.
545 * \param[in] pmeGpu The PME GPU structure.
546 * \param[out] energy The output energy.
547 * \param[out] virial The output virial matrix.
549 GPU_FUNC_QUALIFIER void pme_gpu_get_energy_virial(const PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu),
550 real *GPU_FUNC_ARGUMENT(energy),
551 matrix GPU_FUNC_ARGUMENT(virial)) GPU_FUNC_TERM
553 /*! \libinternal \brief
554 * Updates the unit cell parameters. Does not check if update is necessary - that is done in pme_gpu_prepare_computation().
556 * \param[in] pmeGpu The PME GPU structure.
557 * \param[in] box The unit cell box.
559 GPU_FUNC_QUALIFIER void pme_gpu_update_input_box(PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu),
560 const matrix GPU_FUNC_ARGUMENT(box)) GPU_FUNC_TERM
562 /*! \libinternal \brief
563 * Finishes the PME GPU computation, waiting for the output forces and/or energy/virial to be copied to the host.
564 * If forces were computed, they will have arrived at the external host buffer provided to gather.
565 * If virial/energy were computed, they will have arrived into the internal staging buffer
566 * (even though that should have already happened before even launching the gather).
567 * Finally, cudaEvent_t based GPU timers get updated if enabled. They also need stream synchronization for correctness.
568 * Additionally, device-side buffers are cleared asynchronously for the next computation.
570 * \param[in] pmeGpu The PME GPU structure.
572 void pme_gpu_finish_computation(const PmeGpu *pmeGpu);
574 //! A binary enum for spline data layout transformation
575 enum class PmeLayoutTransform
577 GpuToHost,
578 HostToGpu
581 /*! \libinternal \brief
582 * Rearranges the atom spline data between the GPU and host layouts.
583 * Only used for test purposes so far, likely to be horribly slow.
585 * \param[in] pmeGpu The PME GPU structure.
586 * \param[out] atc The PME CPU atom data structure (with a single-threaded layout).
587 * \param[in] type The spline data type (values or derivatives).
588 * \param[in] dimIndex Dimension index.
589 * \param[in] transform Layout transform type
591 GPU_FUNC_QUALIFIER void pme_gpu_transform_spline_atom_data(const PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu),
592 const pme_atomcomm_t *GPU_FUNC_ARGUMENT(atc),
593 PmeSplineDataType GPU_FUNC_ARGUMENT(type),
594 int GPU_FUNC_ARGUMENT(dimIndex),
595 PmeLayoutTransform GPU_FUNC_ARGUMENT(transform)) GPU_FUNC_TERM
597 /*! \libinternal \brief
598 * Gets a unique index to an element in a spline parameter buffer (theta/dtheta),
599 * which is laid out for GPU spread/gather kernels. The index is wrt the execution block,
600 * in range(0, atomsPerBlock * order * DIM).
601 * This is a wrapper, only used in unit tests.
602 * \param[in] order PME order
603 * \param[in] splineIndex Spline contribution index (from 0 to \p order - 1)
604 * \param[in] dimIndex Dimension index (from 0 to 2)
605 * \param[in] warpIndex Warp index wrt the block.
606 * \param[in] atomWarpIndex Atom index wrt the warp (0 or 1).
608 * \returns Index into theta or dtheta array using GPU layout.
610 int getSplineParamFullIndex(int order,
611 int splineIndex,
612 int dimIndex,
613 int warpIndex,
614 int atomWarpIndex);
616 /*! \libinternal \brief
617 * Get the normal/padded grid dimensions of the real-space PME grid on GPU. Only used in tests.
619 * \param[in] pmeGpu The PME GPU structure.
620 * \param[out] gridSize Pointer to the grid dimensions to fill in.
621 * \param[out] paddedGridSize Pointer to the padded grid dimensions to fill in.
623 GPU_FUNC_QUALIFIER void pme_gpu_get_real_grid_sizes(const PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu),
624 gmx::IVec *GPU_FUNC_ARGUMENT(gridSize),
625 gmx::IVec *GPU_FUNC_ARGUMENT(paddedGridSize)) GPU_FUNC_TERM
627 /*! \libinternal \brief
628 * (Re-)initializes the PME GPU data at the beginning of the run or on DLB.
630 * \param[in,out] pme The PME structure.
631 * \param[in,out] gpuInfo The GPU information structure.
632 * \param[in] pmeGpuProgram The PME GPU program data
633 * \throws gmx::NotImplementedError if this generally valid PME structure is not valid for GPU runs.
635 GPU_FUNC_QUALIFIER void pme_gpu_reinit(gmx_pme_t *GPU_FUNC_ARGUMENT(pme),
636 gmx_device_info_t *GPU_FUNC_ARGUMENT(gpuInfo),
637 PmeGpuProgramHandle GPU_FUNC_ARGUMENT(pmeGpuProgram)) GPU_FUNC_TERM
639 /*! \libinternal \brief
640 * Destroys the PME GPU data at the end of the run.
642 * \param[in] pmeGpu The PME GPU structure.
644 GPU_FUNC_QUALIFIER void pme_gpu_destroy(PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM
646 /*! \libinternal \brief
647 * Reallocates the local atoms data (charges, coordinates, etc.). Copies the charges to the GPU.
649 * \param[in] pmeGpu The PME GPU structure.
650 * \param[in] nAtoms The number of particles.
651 * \param[in] charges The pointer to the host-side array of particle charges.
653 * This is a function that should only be called in the beginning of the run and on domain decomposition.
654 * Should be called before the pme_gpu_set_io_ranges.
656 GPU_FUNC_QUALIFIER void pme_gpu_reinit_atoms(PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu),
657 const int GPU_FUNC_ARGUMENT(nAtoms),
658 const real *GPU_FUNC_ARGUMENT(charges)) GPU_FUNC_TERM
660 /*! \brief \libinternal
661 * The PME GPU reinitialization function that is called both at the end of any PME computation and on any load balancing.
663 * This clears the device-side working buffers in preparation for new computation.
665 * \param[in] pmeGpu The PME GPU structure.
667 void pme_gpu_reinit_computation(const PmeGpu *pmeGpu);
670 #endif