From dd36b9ad6c43a5d5c81bbad4b5ec0071ddfd367c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Szil=C3=A1rd=20P=C3=A1ll?= Date: Fri, 20 Oct 2017 20:55:45 +0200 Subject: [PATCH] Deduplicate CUDA and OpenCL timer struct The struct is identical in both CUDA/OpenCL so it's better placed in a common header, but this needs to be an internal-only header as it pulls in CUDA dependencies. Change-Id: I907d68b7c298f2ba0e7a1af2baf4819f637e2f2e --- src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h | 22 +------- src/gromacs/mdlib/nbnxn_gpu_types_common.h | 75 +++++++++++++++++++++++++ src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h | 24 ++------ 3 files changed, 83 insertions(+), 38 deletions(-) create mode 100644 src/gromacs/mdlib/nbnxn_gpu_types_common.h diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h index 56e8270e60..06a2dba7a2 100644 --- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h +++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h @@ -48,8 +48,8 @@ #include "gromacs/gpu_utils/cuda_arch_utils.cuh" #include "gromacs/gpu_utils/cudautils.cuh" -#include "gromacs/gpu_utils/gpuregiontimer.cuh" #include "gromacs/mdlib/nbnxn_consts.h" +#include "gromacs/mdlib/nbnxn_gpu_types_common.h" #include "gromacs/mdlib/nbnxn_pairlist.h" #include "gromacs/mdtypes/interaction_const.h" #include "gromacs/timing/gpu_timing.h" @@ -116,7 +116,6 @@ enum evdwCu { typedef struct cu_plist cu_plist_t; typedef struct cu_atomdata cu_atomdata_t; typedef struct cu_nbparam cu_nbparam_t; -typedef struct cu_timers cu_timers_t; typedef struct nb_staging nb_staging_t; /*! \endcond */ @@ -229,24 +228,9 @@ struct cu_plist }; /** \internal - * \brief CUDA timers used for timing GPU kernels and H2D/D2H transfers. - * - * The two-sized arrays hold the local and non-local values and should always - * be indexed with eintLocal/eintNonlocal. + * \brief Typedef of actual timer type. */ -struct cu_timers -{ - GpuRegionTimer atdat; /**< timer for atom data transfer (every PS step) */ - GpuRegionTimer nb_h2d[2]; /**< timer for x/q H2D transfers (l/nl, every step) */ - GpuRegionTimer nb_d2h[2]; /**< timer for f D2H transfer (l/nl, every step) */ - GpuRegionTimer pl_h2d[2]; /**< timer for pair-list H2D transfers (l/nl, every PS step) */ - bool didPairlistH2D[2]; /**< true when a pair-list transfer has been done at this step */ - GpuRegionTimer nb_k[2]; /**< timer for non-bonded kernels (l/nl, every step) */ - GpuRegionTimer prune_k[2]; /**< timer for the 1st pass list pruning kernel (l/nl, every PS step) */ - bool didPrune[2]; /**< true when we timed pruning and the timings need to be accounted for */ - GpuRegionTimer rollingPrune_k[2]; /**< timer for rolling pruning kernels (l/nl, frequency depends on chunk size) */ - bool didRollingPrune[2]; /**< true when we timed rolling pruning (at the previous step) and the timings need to be accounted for */ -}; +typedef struct nbnxn_gpu_timers_t cu_timers_t; /** \internal * \brief Main data structure for CUDA nonbonded force calculations. diff --git a/src/gromacs/mdlib/nbnxn_gpu_types_common.h b/src/gromacs/mdlib/nbnxn_gpu_types_common.h new file mode 100644 index 0000000000..e4d86421cd --- /dev/null +++ b/src/gromacs/mdlib/nbnxn_gpu_types_common.h @@ -0,0 +1,75 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2017, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * \brief Implements common internal types for different NBNXN GPU implementations + * + * \author Szilárd Páll + * \ingroup module_mdlib + */ + +#ifndef GMX_MDLIB_NBNXN_GPU_COMMON_TYPES_H +#define GMX_MDLIB_NBNXN_GPU_COMMON_TYPES_H + +#include "config.h" + +#if GMX_GPU == GMX_GPU_OPENCL +#include "gromacs/gpu_utils/gpuregiontimer_ocl.h" +#endif + +#if GMX_GPU == GMX_GPU_CUDA +#include "gromacs/gpu_utils/gpuregiontimer.cuh" +#endif + +/*! \internal + * \brief GPU region timers used for timing GPU kernels and H2D/D2H transfers. + * + * The two-sized arrays hold the local and non-local values and should always + * be indexed with eintLocal/eintNonlocal. + */ +struct nbnxn_gpu_timers_t +{ + GpuRegionTimer atdat; /**< timer for atom data transfer (every PS step) */ + GpuRegionTimer nb_h2d[2]; /**< timer for x/q H2D transfers (l/nl, every step) */ + GpuRegionTimer nb_d2h[2]; /**< timer for f D2H transfer (l/nl, every step) */ + GpuRegionTimer pl_h2d[2]; /**< timer for pair-list H2D transfers (l/nl, every PS step) */ + bool didPairlistH2D[2]; /**< true when a pair-list transfer has been done at this step */ + GpuRegionTimer nb_k[2]; /**< timer for non-bonded kernels (l/nl, every step) */ + GpuRegionTimer prune_k[2]; /**< timer for the 1st pass list pruning kernel (l/nl, every PS step) */ + bool didPrune[2]; /**< true when we timed pruning and the timings need to be accounted for */ + GpuRegionTimer rollingPrune_k[2]; /**< timer for rolling pruning kernels (l/nl, frequency depends on chunk size) */ + bool didRollingPrune[2]; /**< true when we timed rolling pruning (at the previous step) and the timings need to be accounted for */ +}; + +#endif diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h index 4e1b64c6c0..52be9fd29e 100644 --- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h +++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h @@ -55,8 +55,8 @@ # include #endif -#include "gromacs/gpu_utils/gpuregiontimer_ocl.h" #include "gromacs/gpu_utils/oclutils.h" +#include "gromacs/mdlib/nbnxn_gpu_types_common.h" #include "gromacs/mdlib/nbnxn_pairlist.h" #include "gromacs/mdtypes/interaction_const.h" #include "gromacs/utility/real.h" @@ -298,25 +298,11 @@ typedef struct cl_plist int rollingPruningPart; /**< the next part to which the roling pruning needs to be applied */ }cl_plist_t; -/*! \internal - * \brief OpenCL events used for timing GPU kernels and H2D/D2H transfers. - * - * The two-sized arrays hold the local and non-local values and should always - * be indexed with eintLocal/eintNonlocal. + +/** \internal + * \brief Typedef of actual timer type. */ -typedef struct cl_timers -{ - GpuRegionTimer atdat; /**< timer for atom data transfer (every PS step) */ - GpuRegionTimer nb_h2d[2]; /**< timer for x/q H2D transfers (l/nl, every step) */ - GpuRegionTimer nb_d2h[2]; /**< timer for f D2H transfer (l/nl, every step) */ - GpuRegionTimer pl_h2d[2]; /**< timer for pair-list H2D transfers (l/nl, every PS step) */ - bool didPairlistH2D[2]; /**< true when a pair-list transfer has been done at this step */ - GpuRegionTimer nb_k[2]; /**< timer for non-bonded kernels (l/nl, every step) */ - GpuRegionTimer prune_k[2]; /**< timer for the 1st pass list pruning kernel (l/nl, every PS step) */ - bool didPrune[2]; /**< true when we timed pruning and the timings need to be accounted for */ - GpuRegionTimer rollingPrune_k[2]; /**< timer for rolling pruning kernels (l/nl, frequency depends on chunk size) */ - bool didRollingPrune[2]; /**< true when we timed rolling pruning (at the previous step) and the timings need to be accounted for */ -} cl_timers_t; +typedef struct nbnxn_gpu_timers_t cl_timers_t; /*! \internal * \brief Main data structure for OpenCL nonbonded force calculations. -- 2.11.4.GIT