From 9be378d7b26843283207ef3d13a6e837ce06a58e Mon Sep 17 00:00:00 2001 From: Mark Abraham Date: Thu, 13 Jul 2017 18:55:13 +0200 Subject: [PATCH] Stop duplicate printing of detected GPUs The detected GPUs and their compatibility status are always printed to both stderr and log file immediately after detection, as part of the normal hardware report. When the user made an invalid selection, we should not print the information about the detected GPUs again (differently). When an auto-selection is made, we do not need to print information about the detected GPUs again. sprint_gpus is now local to a single source file. Noted TODO to separate a printing functionality from a checking routine. Change-Id: I47ef0da6bdf58d9a61b8577e539effd96c771b8c --- src/gromacs/hardware/detecthardware.cpp | 28 ++++--------------- src/gromacs/hardware/detecthardware.h | 5 ---- src/gromacs/hardware/hardwareassign.cpp | 49 ++------------------------------- src/gromacs/hardware/hardwareassign.h | 11 ++------ src/programs/mdrun/runner.cpp | 4 ++- 5 files changed, 14 insertions(+), 83 deletions(-) diff --git a/src/gromacs/hardware/detecthardware.cpp b/src/gromacs/hardware/detecthardware.cpp index aa2f827bf5..ea1df4cfaf 100644 --- a/src/gromacs/hardware/detecthardware.cpp +++ b/src/gromacs/hardware/detecthardware.cpp @@ -156,7 +156,10 @@ gmx_bool gmx_gpu_sharing_supported() return bGpuSharingSupported; } -std::string sprint_gpus(const gmx_gpu_info_t &gpu_info) +/*! \internal \brief + * Returns the GPU information text, one GPU per line. + */ +static std::string sprint_gpus(const gmx_gpu_info_t &gpu_info) { char stmp[STRLEN]; std::vector gpuStrings; @@ -168,7 +171,7 @@ std::string sprint_gpus(const gmx_gpu_info_t &gpu_info) return gmx::joinStrings(gpuStrings, "\n"); } -// TODO This function should not live in detectharware.cpp +// TODO This function should not live in detecthardware.cpp /*! \brief Helper function for reporting GPU usage information * in the mdrun log file @@ -205,27 +208,6 @@ makeGpuUsageReport(const gmx_gpu_info_t &gpu_info, } std::string output; - if (!userSetGpuIds) - { - auto compatibleGpus = getCompatibleGpus(gpu_info); - int numCompatibleGpus = static_cast(compatibleGpus.size()); - std::string gpuIdsString = - formatAndJoin(compatibleGpus, - ",", gmx::StringFormatter("%d")); - bool bPluralGpus = numCompatibleGpus > 1; - - if (bPrintHostName) - { - output += gmx::formatString("On host %s ", host); - } - output += gmx::formatString("%d compatible GPU%s %s present, with ID%s %s\n", - numCompatibleGpus, - bPluralGpus ? "s" : "", - bPluralGpus ? "are" : "is", - bPluralGpus ? "s" : "", - gpuIdsString.c_str()); - } - { std::vector gpuIdsInUse; for (int i = 0; i < ngpu_use; i++) diff --git a/src/gromacs/hardware/detecthardware.h b/src/gromacs/hardware/detecthardware.h index 62a36c265a..53aeca43f5 100644 --- a/src/gromacs/hardware/detecthardware.h +++ b/src/gromacs/hardware/detecthardware.h @@ -65,11 +65,6 @@ gmx_bool gmx_multiple_gpu_per_node_supported(); * example. */ gmx_bool gmx_gpu_sharing_supported(); -/*! \internal \brief - * Returns the GPU information text, one GPU per line. - */ -std::string sprint_gpus(const gmx_gpu_info_t &gpu_info); - /*! \brief Run detection, consistency checks, and make available on all ranks. * * This routine constructs the global hwinfo structure and returns a pointer to diff --git a/src/gromacs/hardware/hardwareassign.cpp b/src/gromacs/hardware/hardwareassign.cpp index 5a8a6a56d2..25d664fce4 100644 --- a/src/gromacs/hardware/hardwareassign.cpp +++ b/src/gromacs/hardware/hardwareassign.cpp @@ -60,44 +60,6 @@ #define HOSTNAMELEN 80 /*! \internal \brief - * Prints GPU information strings on this node into the stderr and log. - * Only used for logging errors in heterogenous MPI configurations. - */ -static void print_gpu_detection_stats(const gmx::MDLogger &mdlog, - const gmx_gpu_info_t &gpu_info) -{ - char onhost[HOSTNAMELEN+10]; - int ngpu; - - if (!gpu_info.bDetectGPUs) - { - /* We skipped the detection, so don't print detection stats */ - return; - } - - ngpu = gpu_info.n_dev; - - /* We only print the detection on one, of possibly multiple, nodes */ - std::strncpy(onhost, " on host ", 10); - gmx_gethostname(onhost + 9, HOSTNAMELEN); - - if (ngpu > 0) - { - std::string gpuDesc = sprint_gpus(gpu_info); - GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted( - "%d GPU%s detected%s:\n%s", - ngpu, (ngpu > 1) ? "s" : "", onhost, gpuDesc.c_str()); - } - else - { - GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted("No GPUs detected%s", onhost); - } - // FIXME: This currently only logs on the master rank, which defeats the purpose. - // A new MDLogger option is required for printing to stderr on all ranks. - // There is also a question of MPI reduction of the outputs, see Redmine issue #1505. -} - -/*! \internal \brief * This function is responsible for mapping the GPUs to the processes on a single node * (filling the gpu_opt->dev_use array). * @@ -220,10 +182,10 @@ std::vector getCompatibleGpus(const gmx_gpu_info_t &gpu_info) return compatibleGpus; } -void gmx_select_rank_gpu_ids(const gmx::MDLogger &mdlog, const t_commrec *cr, +void gmx_select_rank_gpu_ids(const t_commrec *cr, const gmx_gpu_info_t &gpu_info, - bool userSetGpuIds, - gmx_gpu_opt_t *gpu_opt) + bool userSetGpuIds, + gmx_gpu_opt_t *gpu_opt) { if (!(cr->duty & DUTY_PP)) { @@ -239,11 +201,6 @@ void gmx_select_rank_gpu_ids(const gmx::MDLogger &mdlog, const t_commrec *cr, std::string errorMessage; if (!checkGpuSelection(gpu_info, gpu_opt, &errorMessage)) { - const bool canHaveHeterogeneousNodes = GMX_LIB_MPI && PAR(cr); - if (canHaveHeterogeneousNodes) - { - print_gpu_detection_stats(mdlog, gpu_info); - } gmx_fatal(FARGS, errorMessage.c_str()); } } diff --git a/src/gromacs/hardware/hardwareassign.h b/src/gromacs/hardware/hardwareassign.h index f7b35fac3d..884115d29e 100644 --- a/src/gromacs/hardware/hardwareassign.h +++ b/src/gromacs/hardware/hardwareassign.h @@ -43,11 +43,6 @@ struct gmx_gpu_info_t; struct gmx_gpu_opt_t; struct t_commrec; -namespace gmx -{ -class MDLogger; -} - /*! \brief Select the compatible GPUs * * This function filters gpu_info.gpu_dev for compatible GPUs based @@ -57,9 +52,9 @@ class MDLogger; * \return vector of IDs of GPUs already recorded as compatible */ std::vector getCompatibleGpus(const gmx_gpu_info_t &gpu_info); -void gmx_select_rank_gpu_ids(const gmx::MDLogger &mdlog, const t_commrec *cr, +void gmx_select_rank_gpu_ids(const t_commrec *cr, const gmx_gpu_info_t &gpu_info, - bool userSetGpuIds, - gmx_gpu_opt_t *gpu_opt); + bool userSetGpuIds, + gmx_gpu_opt_t *gpu_opt); #endif diff --git a/src/programs/mdrun/runner.cpp b/src/programs/mdrun/runner.cpp index 4fe97de1e4..53fa251381 100644 --- a/src/programs/mdrun/runner.cpp +++ b/src/programs/mdrun/runner.cpp @@ -1193,7 +1193,7 @@ int mdrunner(gmx_hw_opt_t *hw_opt, if (bUseGPU && !emulateGpu) { /* Select GPU id's to use */ - gmx_select_rank_gpu_ids(mdlog, cr, hwinfo->gpu_info, + gmx_select_rank_gpu_ids(cr, hwinfo->gpu_info, userSetGpuIds, &hw_opt->gpu_opt); } else @@ -1204,6 +1204,8 @@ int mdrunner(gmx_hw_opt_t *hw_opt, /* check consistency across ranks of things like SIMD * support and number of GPUs selected */ + // TODO this also makes a GPU usage report, which should be a + // separate responsibility. gmx_check_hw_runconf_consistency(mdlog, hwinfo, cr, hw_opt, userSetGpuIds, bUseGPU && !emulateGpu); /* Now that we know the setup is consistent, check for efficiency */ -- 2.11.4.GIT