From 99f65881311effd767574e073b9b65b96f954141 Mon Sep 17 00:00:00 2001
From: Mark Abraham <mark.j.abraham@gmail.com>
Date: Fri, 4 Aug 2017 18:09:17 +0200
Subject: [PATCH] Clarify data flow in GPU task assignment

Previously, hw_opt.gpu_opt->dev_use could be filled with data at two
widely separated points, which was harder to understand and maintain
than this alternative.

The possible user-supplied task assignment is now copied into the
vector for task assignment that is later used by mdrunner to configure
the modules that do the work. The automated assignment fills the same
data structure, but the semantics are improved by using a vector, and
copying the user assignment is now trivial.

userGpuTaskAssignment is now part of hw_opt like other user options,
and stored in a string. Eventually the ordinary-data type hw_opt will
be used in a const-like manner, so this changes helps with that.
gmx_hw_opt_t now cannot be allocated with snew, but there is no longer
any reason to consider doing so.

Together, these eliminate gmx_gpu_opt_t because it is now merely a
vector (for now).

The parsing of the string from mdrun -gpu_id is simplified now that
its result is stored in a vector (and tests added). This aspect is
likely to get more complicated when a user assignment might look
different, e.g. "01:2,3:45" if/when we have two ranks each doing a two
PP and one PME task on different GPUs. Moved the parsing code from
detecthardware.cpp to hardwareassign.cpp, where it makes a bit more
sense.

Related trivial cleanup in gmx tune_pme, that uses the same parsing
machinery for GPU IIs, so that it can manage suitably.

Removed getCompatibleGpus() from header file, since its usage is now
local to hardwareassign.cpp. Similary, anyGpuIdIsRepeated() is no
longer used.

Change-Id: I1071281d3348c9cce05e7ac39a24775611ccc0dd
---
 src/gromacs/gmxana/gmx_tune_pme.cpp                |  34 +++----
 src/gromacs/hardware/detecthardware.cpp            |  42 ++------
 src/gromacs/hardware/detecthardware.h              |  20 ++--
 src/gromacs/hardware/gpu_hw_info.h                 |   9 --
 src/gromacs/hardware/hardwareassign.cpp            | 110 ++++++++++++++-------
 src/gromacs/hardware/hardwareassign.h              |  51 ++++++----
 src/gromacs/hardware/hw_info.h                     |  44 ++++++---
 src/gromacs/hardware/printhardware.cpp             |  46 +++++----
 src/gromacs/hardware/printhardware.h               |  26 ++---
 .../mdrunutility/tests/threadaffinitytest.cpp      |   4 +-
 .../mdrunutility/tests/threadaffinitytest.h        |  10 +-
 src/gromacs/utility/cstringutil.cpp                |  78 ---------------
 src/gromacs/utility/cstringutil.h                  |  46 ---------
 src/gromacs/utility/stringutil.cpp                 |  23 +++++
 src/gromacs/utility/stringutil.h                   |  12 +++
 src/gromacs/utility/tests/stringutil.cpp           |  17 ++++
 src/programs/mdrun/mdrun.cpp                       |  27 +++--
 src/programs/mdrun/resource-division.cpp           |  41 ++------
 src/programs/mdrun/resource-division.h             |  15 +--
 src/programs/mdrun/runner.cpp                      |  47 +++++----
 src/programs/mdrun/runner.h                        |   5 +-
 21 files changed, 322 insertions(+), 385 deletions(-)

diff --git a/src/gromacs/gmxana/gmx_tune_pme.cpp b/src/gromacs/gmxana/gmx_tune_pme.cpp
index e93b4298f1..295facf30d 100644
--- a/src/gromacs/gmxana/gmx_tune_pme.cpp
+++ b/src/gromacs/gmxana/gmx_tune_pme.cpp
@@ -70,6 +70,7 @@
 #include "gromacs/utility/futil.h"
 #include "gromacs/utility/gmxassert.h"
 #include "gromacs/utility/smalloc.h"
+#include "gromacs/utility/stringutil.h"
 
 /* Enum for situations that can occur during log file parsing, the
  * corresponding string entries can be found in do_the_tests() in
@@ -750,16 +751,8 @@ static void check_mdrun_works(gmx_bool    bThreads,
     sfree(command);
 }
 
-/*! \brief Helper struct so we can parse the string with eligible GPU
-    IDs outside do_the_tests. */
-typedef struct eligible_gpu_ids
-{
-    int  n;        /**< Length of ids */
-    int *ids;      /**< Array of length n. NULL if no GPUs in use */
-} t_eligible_gpu_ids;
-
 /* Handles the no-GPU case by emitting an empty string. */
-static char *make_gpu_id_command_line(int numRanks, int numPmeRanks, const t_eligible_gpu_ids *gpu_ids)
+static char *make_gpu_id_command_line(int numRanks, int numPmeRanks, const std::vector<int> &gpu_ids)
 {
     char       *command_line, *ptr;
     const char *flag = "-gpu_id ";
@@ -775,18 +768,17 @@ static char *make_gpu_id_command_line(int numRanks, int numPmeRanks, const t_eli
     /* If the user has given no eligible GPU IDs, or we're trying the
      * default behaviour, then there is nothing for g_tune_pme to give
      * to mdrun -gpu_id */
-    if (gpu_ids->n > 0 && numPmeRanks > -1)
+    if (!gpu_ids.empty() && numPmeRanks > -1)
     {
-        int   numPpRanks, max_num_ranks_for_each_GPU;
-        int   gpu_id, rank;
+        size_t numPpRanks, max_num_ranks_for_each_GPU;
 
         /* Write the option flag */
         std::strcpy(ptr, flag);
         ptr += flag_length;
 
         numPpRanks                 = numRanks - numPmeRanks;
-        max_num_ranks_for_each_GPU = numPpRanks / gpu_ids->n;
-        if (max_num_ranks_for_each_GPU * gpu_ids->n != numPpRanks)
+        max_num_ranks_for_each_GPU = numPpRanks / gpu_ids.size();
+        if (max_num_ranks_for_each_GPU * gpu_ids.size() != numPpRanks)
         {
             /* Some GPUs will receive more work than others, which
              * we choose to be those with the lowest indices */
@@ -794,16 +786,16 @@ static char *make_gpu_id_command_line(int numRanks, int numPmeRanks, const t_eli
         }
 
         /* Loop over all eligible GPU ids */
-        for (gpu_id = 0, rank = 0; gpu_id < gpu_ids->n; gpu_id++)
+        for (size_t gpu_id = 0, rank = 0; gpu_id < gpu_ids.size(); gpu_id++)
         {
-            int rank_for_this_GPU;
+            size_t rank_for_this_GPU;
             /* Loop over all PP ranks for GPU with ID gpu_id, building the
                assignment string. */
             for (rank_for_this_GPU = 0;
                  rank_for_this_GPU < max_num_ranks_for_each_GPU && rank < numPpRanks;
                  rank++, rank_for_this_GPU++)
             {
-                *ptr = '0' + gpu_ids->ids[gpu_id];
+                *ptr = '0' + gpu_ids[gpu_id];
                 ptr++;
             }
         }
@@ -824,7 +816,7 @@ static void launch_simulation(
         const char               *simulation_tpr, /* This tpr will be simulated */
         int                       nnodes,         /* Number of ranks to use */
         int                       nPMEnodes,      /* Number of PME ranks to use */
-        const t_eligible_gpu_ids *gpu_ids)        /* Struct containing GPU IDs for
+        const std::vector<int>   &gpu_ids)        /* Vector of GPU IDs for
                                                    * constructing mdrun command lines */
 {
     char  *command, *cmd_gpu_ids;
@@ -1443,7 +1435,7 @@ static void do_the_tests(
         int                       presteps,       /* DLB equilibration steps, is checked    */
         gmx_int64_t               cpt_steps,      /* Time step counter in the checkpoint    */
         gmx_bool                  bCheck,         /* Check whether benchmark mdrun works    */
-        const t_eligible_gpu_ids *gpu_ids)        /* Struct containing GPU IDs for
+        const std::vector<int>   &gpu_ids)        /* Vector of GPU IDs for
                                                    * constructing mdrun command lines */
 {
     int      i, nr, k, ret, count = 0, totaltests;
@@ -2222,7 +2214,6 @@ int gmx_tune_pme(int argc, char *argv[])
 
     /* IDs of GPUs that are eligible for computation */
     char               *eligible_gpu_ids = nullptr;
-    t_eligible_gpu_ids *gpu_ids          = nullptr;
 
     t_perf            **perfdata = nullptr;
     t_inputinfo        *info;
@@ -2464,8 +2455,7 @@ int gmx_tune_pme(int argc, char *argv[])
                 bench_nsteps, fnm, NFILE, sim_part, presteps,
                 asize(pa), pa);
     /* Check any GPU IDs passed make sense, and fill the data structure for them */
-    snew(gpu_ids, 1);
-    parse_digits_from_string(eligible_gpu_ids, &gpu_ids->n, &gpu_ids->ids);
+    auto gpu_ids = gmx::parseDigitsFromString(eligible_gpu_ids);
 
     /* Determine the maximum and minimum number of PME nodes to test,
      * the actual list of settings is build in do_the_tests(). */
diff --git a/src/gromacs/hardware/detecthardware.cpp b/src/gromacs/hardware/detecthardware.cpp
index 976a695821..7096bb5faa 100644
--- a/src/gromacs/hardware/detecthardware.cpp
+++ b/src/gromacs/hardware/detecthardware.cpp
@@ -120,12 +120,12 @@ static int                 n_hwinfo = 0;
 /* A lock to protect the hwinfo structure */
 static tMPI_Thread_mutex_t hw_info_lock = TMPI_THREAD_MUTEX_INITIALIZER;
 
-void gmx_check_hw_runconf_consistency(const gmx::MDLogger &mdlog,
-                                      const gmx_hw_info_t *hwinfo,
-                                      const t_commrec     *cr,
-                                      const gmx_hw_opt_t  &hw_opt,
-                                      bool                 userSetGpuIds,
-                                      bool                 willUsePhysicalGpu)
+void gmx_check_hw_runconf_consistency(const gmx::MDLogger    &mdlog,
+                                      const gmx_hw_info_t    *hwinfo,
+                                      const t_commrec        *cr,
+                                      const gmx_hw_opt_t     &hw_opt,
+                                      bool                    userSetGpuIds,
+                                      const std::vector<int> &gpuTaskAssignment)
 {
     int      npppn;
     char     th_or_proc[STRLEN], th_or_proc_plural[STRLEN], pernode[STRLEN];
@@ -151,10 +151,12 @@ void gmx_check_hw_runconf_consistency(const gmx::MDLogger &mdlog,
     bMPI          = TRUE;
     btMPI         = FALSE;
     bNthreadsAuto = FALSE;
+    GMX_UNUSED_VALUE(hw_opt);
 #else
     bMPI          = FALSE;
     btMPI         = FALSE;
     bNthreadsAuto = FALSE;
+    GMX_UNUSED_VALUE(hw_opt);
 #endif
 
     /* Need to ensure that we have enough GPUs:
@@ -189,13 +191,13 @@ void gmx_check_hw_runconf_consistency(const gmx::MDLogger &mdlog,
         sprintf(th_or_proc, "process");
     }
 
-    if (willUsePhysicalGpu)
+    if (!gpuTaskAssignment.empty())
     {
         int  ngpu_comp, ngpu_use;
         char gpu_comp_plural[2], gpu_use_plural[2];
 
         ngpu_comp = hwinfo->gpu_info.n_dev_compatible;
-        ngpu_use  = hw_opt.gpu_opt.n_dev_use;
+        ngpu_use  = static_cast<int>(gpuTaskAssignment.size());
 
         sprintf(gpu_comp_plural, "%s", (ngpu_comp > 1) ? "s" : "");
         sprintf(gpu_use_plural,  "%s", (ngpu_use > 1) ? "s" : "");
@@ -671,35 +673,11 @@ gmx_hw_info_t *gmx_detect_hardware(const gmx::MDLogger &mdlog, const t_commrec *
     return hwinfo_g;
 }
 
-bool hasUserSetGpuIds(const gmx_gpu_opt_t *gpu_opt)
-{
-    return gpu_opt->gpu_id != nullptr;
-}
-
 bool compatibleGpusFound(const gmx_gpu_info_t &gpu_info)
 {
     return gpu_info.n_dev_compatible > 0;
 }
 
-void gmx_parse_gpu_ids(gmx_gpu_opt_t *gpu_opt)
-{
-    if (!hasUserSetGpuIds(gpu_opt))
-    {
-        return;
-    }
-
-    /* Parse a "plain" or comma-separated GPU ID string which contains a
-     * sequence of digits corresponding to GPU IDs; the order will
-     * indicate the process/tMPI thread - GPU assignment. */
-    parse_digits_from_string(gpu_opt->gpu_id, &gpu_opt->n_dev_use, &gpu_opt->dev_use);
-
-    if (gpu_opt->n_dev_use == 0)
-    {
-        gmx_fatal(FARGS, "Empty GPU ID string encountered.\n"
-                  "An empty, delimiter-free, or comma-separated sequence of valid numeric IDs of available GPUs is required.\n");
-    }
-}
-
 void gmx_hardware_info_free(gmx_hw_info_t *hwinfo)
 {
     int ret;
diff --git a/src/gromacs/hardware/detecthardware.h b/src/gromacs/hardware/detecthardware.h
index c500ea6902..842c9c4ad2 100644
--- a/src/gromacs/hardware/detecthardware.h
+++ b/src/gromacs/hardware/detecthardware.h
@@ -38,11 +38,11 @@
 #include <cstdio>
 
 #include <string>
+#include <vector>
 
 #include "gromacs/utility/basedefinitions.h"
 
 struct gmx_gpu_info_t;
-struct gmx_gpu_opt_t;
 struct gmx_hw_info_t;
 struct gmx_hw_opt_t;
 struct t_commrec;
@@ -65,23 +65,17 @@ gmx_hw_info_t *gmx_detect_hardware(const gmx::MDLogger &mdlog,
 
 void gmx_hardware_info_free(gmx_hw_info_t *hwinfo);
 
-/* Return whether the user selected GPU ids */
-bool hasUserSetGpuIds(const gmx_gpu_opt_t *gpu_opt);
-
 //! Return whether compatible GPUs were found.
 bool compatibleGpusFound(const gmx_gpu_info_t &gpu_info);
 
-/* Parse the GPU ids the user may have passed. */
-void gmx_parse_gpu_ids(gmx_gpu_opt_t *gpu_opt);
-
 /*! \brief Check the consistency of hw_opt with hwinfo.
  *
  * This function should be called once on each MPI rank. */
-void gmx_check_hw_runconf_consistency(const gmx::MDLogger &mdlog,
-                                      const gmx_hw_info_t *hwinfo,
-                                      const t_commrec     *cr,
-                                      const gmx_hw_opt_t  &hw_opt,
-                                      bool                 userSetGpuIds,
-                                      bool                 willUsePhysicalGpu);
+void gmx_check_hw_runconf_consistency(const gmx::MDLogger    &mdlog,
+                                      const gmx_hw_info_t    *hwinfo,
+                                      const t_commrec        *cr,
+                                      const gmx_hw_opt_t     &hw_opt,
+                                      bool                    userSetGpuIds,
+                                      const std::vector<int> &gpuSelection);
 
 #endif
diff --git a/src/gromacs/hardware/gpu_hw_info.h b/src/gromacs/hardware/gpu_hw_info.h
index f2c2c64059..77ce7f5afe 100644
--- a/src/gromacs/hardware/gpu_hw_info.h
+++ b/src/gromacs/hardware/gpu_hw_info.h
@@ -69,15 +69,6 @@ struct gmx_gpu_info_t
     int                       n_dev_compatible; /* number of compatible GPUs */
 };
 
-/* GPU device selection information -- includes either CUDA or OpenCL devices */
-typedef struct gmx_gpu_opt_t
-{
-    char     *gpu_id;           /* GPU id's to use, each specified as chars */
-
-    int       n_dev_use;        /* number of GPU devices selected to be used, either by the user or automatically */
-    int      *dev_use;          /* array mapping from PP rank index to GPU device ID; GPU IDs can be listed multiple times when ranks share them */
-} gmx_gpu_opt_t;
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gromacs/hardware/hardwareassign.cpp b/src/gromacs/hardware/hardwareassign.cpp
index 2e965ba64a..bd9d3fc50e 100644
--- a/src/gromacs/hardware/hardwareassign.cpp
+++ b/src/gromacs/hardware/hardwareassign.cpp
@@ -42,6 +42,7 @@
 
 #include <algorithm>
 #include <string>
+#include <vector>
 
 #include "gromacs/gmxlib/network.h"
 #include "gromacs/gpu_utils/gpu_utils.h"
@@ -58,23 +59,51 @@
 
 #define HOSTNAMELEN 80
 
-/*! \internal \brief
- * This function is responsible for mapping the GPUs to the processes on a single node
- * (filling the gpu_opt->dev_use array).
+namespace gmx
+{
+
+std::vector<int> parseGpuTaskAssignment(const std::string &gpuTaskAssignment)
+{
+    std::vector<int> digits;
+    if (gpuTaskAssignment.empty())
+    {
+        return digits;
+    }
+
+    /* Parse a "plain" or comma-separated GPU ID string which contains
+     * a sequence of digits corresponding to GPU IDs; the order will
+     * indicate the assignment of GPU tasks on this node to GPU
+     * device IDs on this node. */
+    try
+    {
+        digits = parseDigitsFromString(gpuTaskAssignment);
+    }
+    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
+
+    if (digits.empty())
+    {
+        gmx_fatal(FARGS, "Empty GPU ID string encountered.\n"
+                  "An empty, delimiter-free, or comma-separated sequence of valid numeric IDs of available GPUs is required.\n");
+    }
+    return digits;
+}
+
+/*! \brief This function is responsible for the automated mapping the
+ * GPUs to the processes on a single node.
+ *
+ * This selects the GPUs we will use. This is an operation local to each physical node.
+ * If we have less MPI ranks than GPUs, we will waste some GPUs.
  *
  * \param[in]        compatibleGpus       Vector of GPUs that are compatible
- * \param[in,out]    gpu_opt              Input/output GPU assignment data.
  * \param[in]        nrank                Number of PP GPU ranks on the node.
  * \param[in]        rank                 Index of PP GPU rank on the node.
  *
- * This selects the GPUs we will use. This is an operation local to each physical node.
- * If we have less MPI ranks than GPUs, we will waste some GPUs.
+ * \returns The assignment of GPU tasks on ranks of this node to GPU devices on this node.
  */
-static void assign_rank_gpu_ids(const std::vector<int> &compatibleGpus,
-                                gmx_gpu_opt_t *gpu_opt, int nrank, int rank)
+static std::vector<int> assign_rank_gpu_ids(const std::vector<int> &compatibleGpus,
+                                            int nrank, int rank)
 {
     int numCompatibleGpus = static_cast<int>(compatibleGpus.size());
-    GMX_RELEASE_ASSERT(gpu_opt, "Invalid gpu_opt pointer passed");
     GMX_RELEASE_ASSERT(nrank >= 1,
                        gmx::formatString("Invalid limit (%d) for the number of GPUs (detected %d compatible GPUs)",
                                          rank, numCompatibleGpus).c_str());
@@ -114,20 +143,20 @@ static void assign_rank_gpu_ids(const std::vector<int> &compatibleGpus,
     }
 
     /* Here we will waste GPUs when nrank < numCompatibleGpus */
-    gpu_opt->n_dev_use = std::min(numCompatibleGpus*nshare, nrank);
-    snew(gpu_opt->dev_use, gpu_opt->n_dev_use);
-    for (int i = 0; i != gpu_opt->n_dev_use; ++i)
+    std::vector<int> taskAssignment;
+    taskAssignment.resize(std::min(numCompatibleGpus*nshare, nrank));
+    for (size_t i = 0; i != taskAssignment.size(); ++i)
     {
         /* TODO: improve this implementation: either sort GPUs or remove the weakest here */
-        gpu_opt->dev_use[i] = compatibleGpus[i/nshare];
+        taskAssignment[i] = compatibleGpus[i/nshare];
     }
+    return taskAssignment;
 }
 
 /*! \brief Check that all user-selected GPUs are compatible.
  *
- * Given the list of selected GPU device IDs in \c gpu_opt and
- * detected GPUs in \c gpu_info, gives a fatal error unless all
- * selected GPUs are compatible
+ * Given the \c userGpuTaskAssignment and \c compatibleGPUs, give a fatal
+ * error if any selected GPUs is not compatible
  *
  * The error is given with a suitable descriptive message, which will
  * have context if this check is done after the hardware detection
@@ -143,20 +172,21 @@ static void assign_rank_gpu_ids(const std::vector<int> &compatibleGpus,
  * infrastructure to do a good job of coordinating error messages and
  * behaviour across MPMD ranks and multiple simulations.
  *
- * \param[in]   gpu_info       GPU information including result of compatibility check.
- * \param[in]   gpu_opt        Mapping of GPU IDs derived from the user, e.g. via mdrun -gpu_id.
+ * \param[in]   gpu_info               GPU information including device description.
+ * \param[in]   compatibleGpus         Vector of compatible GPUs
+ * \param[in]   userGpuTaskAssignment  The GPU selection from the user.
  */
-static void exitUnlessGpuSelectionIsValid(const gmx_gpu_info_t &gpu_info,
-                                          const gmx_gpu_opt_t  &gpu_opt)
+static void exitUnlessUserGpuTaskAssignmentIsValid(const gmx_gpu_info_t   &gpu_info,
+                                                   const std::vector<int> &compatibleGpus,
+                                                   const std::vector<int> &userGpuTaskAssignment)
 {
     int         numIncompatibleGpuIds = 0;
     std::string message
         = "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n";
 
-    for (int index = 0; index < gpu_opt.n_dev_use; ++index)
+    for (const auto &gpuId : userGpuTaskAssignment)
     {
-        int gpuId = gpu_opt.dev_use[index];
-        if (!isGpuCompatible(gpu_info, gpuId))
+        if (std::find(compatibleGpus.begin(), compatibleGpus.end(), gpuId) == compatibleGpus.end())
         {
             numIncompatibleGpuIds++;
             message += gmx::formatString("    GPU #%d: %s\n",
@@ -171,7 +201,14 @@ static void exitUnlessGpuSelectionIsValid(const gmx_gpu_info_t &gpu_info,
     }
 }
 
-std::vector<int> getCompatibleGpus(const gmx_gpu_info_t &gpu_info)
+/*! \brief Filter the compatible GPUs
+ *
+ * This function filters gpu_info.gpu_dev for compatible GPUs based
+ * on the previously run compatibility tests.
+ *
+ * \param[in]     gpu_info    Information detected about GPUs, including compatibility
+ * \return                    vector of IDs of GPUs already recorded as compatible */
+static std::vector<int> getCompatibleGpus(const gmx_gpu_info_t &gpu_info)
 {
     // Possible minor over-allocation here, but not important for anything
     std::vector<int> compatibleGpus;
@@ -187,24 +224,29 @@ std::vector<int> getCompatibleGpus(const gmx_gpu_info_t &gpu_info)
     return compatibleGpus;
 }
 
-void mapPpRanksToGpus(bool                  rankCanUseGpu,
-                      const t_commrec      *cr,
-                      const gmx_gpu_info_t &gpu_info,
-                      bool                  userSetGpuIds,
-                      gmx_gpu_opt_t        *gpu_opt)
+std::vector<int> mapPpRanksToGpus(bool                    rankCanUseGpu,
+                                  const t_commrec        *cr,
+                                  const gmx_gpu_info_t   &gpu_info,
+                                  const std::vector<int> &userGpuTaskAssignment)
 {
+    std::vector<int> taskAssignment;
+
     if (!rankCanUseGpu)
     {
-        return;
+        return taskAssignment;
     }
 
-    if (userSetGpuIds)
+    auto compatibleGpus = getCompatibleGpus(gpu_info);
+    if (!userGpuTaskAssignment.empty())
     {
-        exitUnlessGpuSelectionIsValid(gpu_info, *gpu_opt);
+        exitUnlessUserGpuTaskAssignmentIsValid(gpu_info, compatibleGpus, userGpuTaskAssignment);
+        taskAssignment = userGpuTaskAssignment;
     }
     else
     {
-        auto compatibleGpus = getCompatibleGpus(gpu_info);
-        assign_rank_gpu_ids(compatibleGpus, gpu_opt, cr->nrank_pp_intranode, cr->rank_pp_intranode);
+        taskAssignment = assign_rank_gpu_ids(compatibleGpus, cr->nrank_pp_intranode, cr->rank_pp_intranode);
     }
+    return taskAssignment;
 }
+
+} // namespace
diff --git a/src/gromacs/hardware/hardwareassign.h b/src/gromacs/hardware/hardwareassign.h
index b7ed713712..e20580c958 100644
--- a/src/gromacs/hardware/hardwareassign.h
+++ b/src/gromacs/hardware/hardwareassign.h
@@ -35,43 +35,52 @@
 #ifndef GMX_HARDWARE_HARDWAREASSIGN_H
 #define GMX_HARDWARE_HARDWAREASSIGN_H
 
+#include <string>
 #include <vector>
 
 #include "gromacs/utility/basedefinitions.h"
 
 struct gmx_gpu_info_t;
-struct gmx_gpu_opt_t;
 struct t_commrec;
 
-/*! \brief Select the compatible GPUs
+namespace gmx
+{
+
+/*! \brief Parse a GPU assignment string into digits
+ *
+ * \param[in]   gpuTaskAssignment  String like "013" or "0,1,3" typically
+ *                                 supplied by the user to mdrun -gpu_id.
  *
- * This function filters gpu_info.gpu_dev for compatible GPUs based
- * on the previously run compatibility tests.
+ * \returns  A vector of integer GPU ids, like {0, 1, 3}.
  *
- * \param[in]     gpu_info    Information detected about GPUs, including compatibility
- * \return                    vector of IDs of GPUs already recorded as compatible */
-std::vector<int> getCompatibleGpus(const gmx_gpu_info_t &gpu_info);
+ * \throws   std::bad_alloc     If out of memory.
+ *           InvalidInputError  If an invalid character is found (ie not a digit or ',').
+ */
+std::vector<int> parseGpuTaskAssignment(const std::string &gpuTaskAssignment);
 
-/*! \brief Map PP ranks to GPU IDs.
+/*! \brief Assign PP ranks to valid GPU IDs.
  *
- * After this call, gpu_opt->dev_use will contain a validated mapping
- * from PP ranks (ie tasks that can run on GPUs) to the device IDs of
- * compatible GPUs on their node.
+ * Will return a validated mapping from PP ranks (ie tasks that can
+ * run on GPUs) to the device IDs of compatible GPUs on their node.
+ * This will be the \c userGpuTaskAssignment if non-empty, otherwise
+ * a default automated mapping is generated.
  *
  * Note that PME-only ranks have always ignored mdrun -gpu_id, so do
  * not attempt to validate -gpu_id. They should continue this behaviour
  * until PME tasks can use GPUs.
  *
- * \param[in]     rankCanUseGpu  Whether this rank can execute a task on a GPU.
- * \param[in]     cr             Communication record.
- * \param[in]     gpu_info       Information detected about GPUs, including compatibility.
- * \param[in]     userSetGpuIds  Whether the user set the GPU IDs to use in the mapping.
- * \param[inout]  gpu_opt        Holds the mapping to validate, or to fill.
+ * \param[in]     rankCanUseGpu          Whether this rank can execute a task on a GPU.
+ * \param[in]     cr                     Communication record.
+ * \param[in]     gpu_info               Information detected about GPUs, including compatibility.
+ * \param[in]     userGpuTaskAssignment  Any GPU IDs required by the user.
+ *
+ * \returns  A valid GPU selection.
  */
-void mapPpRanksToGpus(bool                  rankCanUseGpu,
-                      const t_commrec      *cr,
-                      const gmx_gpu_info_t &gpu_info,
-                      bool                  userSetGpuIds,
-                      gmx_gpu_opt_t        *gpu_opt);
+std::vector<int> mapPpRanksToGpus(bool                    rankCanUseGpu,
+                                  const t_commrec        *cr,
+                                  const gmx_gpu_info_t   &gpu_info,
+                                  const std::vector<int> &userGpuTaskAssignment);
+
+} // namespace
 
 #endif
diff --git a/src/gromacs/hardware/hw_info.h b/src/gromacs/hardware/hw_info.h
index 26ad31ba40..401421b521 100644
--- a/src/gromacs/hardware/hw_info.h
+++ b/src/gromacs/hardware/hw_info.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -35,6 +35,8 @@
 #ifndef GMX_HARDWARE_HWINFO_H
 #define GMX_HARDWARE_HWINFO_H
 
+#include <string>
+
 #include "gromacs/hardware/gpu_hw_info.h"
 #include "gromacs/utility/basedefinitions.h"
 
@@ -84,17 +86,33 @@ enum {
     threadaffSEL, threadaffAUTO, threadaffON, threadaffOFF, threadaffNR
 };
 
-/* Threading and GPU options, can be set automatically or by the user */
-typedef struct gmx_hw_opt_t {
-    int           nthreads_tot;        /* Total number of threads requested (TMPI) */
-    int           nthreads_tmpi;       /* Number of TMPI threads requested         */
-    int           nthreads_omp;        /* Number of OpenMP threads requested       */
-    int           nthreads_omp_pme;    /* As nthreads_omp, but for PME only nodes  */
-    int           thread_affinity;     /* Thread affinity switch, see enum above   */
-    int           core_pinning_stride; /* Logical core pinning stride              */
-    int           core_pinning_offset; /* Logical core pinning offset              */
-
-    gmx_gpu_opt_t gpu_opt;             /* The GPU options                          */
-} gmx_hw_opt_t;
+/*! \internal \brief Threading and GPU options, can be set automatically or by the user
+ *
+ * \todo During mdrunner(), if the user has left any of these values
+ * at their defaults (which tends to mean "choose automatically"),
+ * then those values are over-written with the result of such
+ * automation. This creates problems for the subsequent code in
+ * knowing what was done, why, and reporting correctly to the
+ * user. Find a way to improve this.
+ */
+struct gmx_hw_opt_t
+{
+    //! Total number of threads requested (thread-MPI + OpenMP).
+    int           nthreads_tot = 0;
+    //! Number of thread-MPI threads requested.
+    int           nthreads_tmpi = 0;
+    //! Number of OpenMP threads requested.
+    int           nthreads_omp = 0;
+    //! Number of OpenMP threads to use on PME_only ranks.
+    int           nthreads_omp_pme = 0;
+    //! Thread affinity switch, see enum above.
+    int           thread_affinity = threadaffSEL;
+    //! Logical core pinning stride.
+    int           core_pinning_stride = 0;
+    //! Logical core pinning offset.
+    int           core_pinning_offset = 0;
+    //! Empty, or a GPU task-assignment string provided by the user.
+    std::string   gpuIdTaskAssignment = "";
+};
 
 #endif
diff --git a/src/gromacs/hardware/printhardware.cpp b/src/gromacs/hardware/printhardware.cpp
index 734ade8197..d4593ea298 100644
--- a/src/gromacs/hardware/printhardware.cpp
+++ b/src/gromacs/hardware/printhardware.cpp
@@ -68,21 +68,23 @@ static const bool bGPUBinary     = GMX_GPU != GMX_GPU_NONE;
  *
  * Sharing GPUs among multiple PP ranks is possible via either user or
  * automated selection. */
-static int gmx_count_gpu_dev_shared(const gmx_gpu_opt_t *gpu_opt, bool userSetGpuIds)
+static int gmx_count_gpu_dev_shared(const std::vector<int> &gpuTaskAssignment,
+                                    bool                    userSetGpuIds)
 {
     int      same_count    = 0;
-    int      ngpu          = gpu_opt->n_dev_use;
 
     if (userSetGpuIds)
     {
-        int      i, j;
+        GMX_RELEASE_ASSERT(!gpuTaskAssignment.empty(),
+                           "The user cannot choose an empty set of GPU IDs, code is wrong somewhere");
+        size_t ngpu = gpuTaskAssignment.size();
 
-        for (i = 0; i < ngpu - 1; i++)
+        for (size_t i = 0; i < ngpu - 1; i++)
         {
-            for (j = i + 1; j < ngpu; j++)
+            for (size_t j = i + 1; j < ngpu; j++)
             {
-                same_count      += (gpu_opt->dev_use[i] ==
-                                    gpu_opt->dev_use[j]);
+                same_count      += (gpuTaskAssignment[i] ==
+                                    gpuTaskAssignment[j]);
             }
         }
     }
@@ -96,12 +98,10 @@ static int gmx_count_gpu_dev_shared(const gmx_gpu_opt_t *gpu_opt, bool userSetGp
  * GPU IDs, the number of GPUs user (per node) can be different from the
  * number of GPU IDs selected.
  */
-static size_t gmx_count_gpu_dev_unique(const gmx_gpu_opt_t  *gpu_opt)
+static size_t gmx_count_gpu_dev_unique(const std::vector<int> &userGpuTaskAssignment)
 {
-    GMX_RELEASE_ASSERT(gpu_opt, "gpu_opt must be a non-NULL pointer");
-
     std::set<int> uniqIds;
-    for (int deviceId = 0; deviceId < gpu_opt->n_dev_use; deviceId++)
+    for (const auto &deviceId : userGpuTaskAssignment)
     {
         uniqIds.insert(deviceId);
     }
@@ -124,13 +124,12 @@ static std::string sprint_gpus(const gmx_gpu_info_t &gpu_info)
 }
 
 std::string
-makeGpuUsageReport(const gmx_gpu_info_t &gpu_info,
-                   const gmx_gpu_opt_t  *gpu_opt,
-                   bool                  userSetGpuIds,
-                   size_t                numPpRanks,
-                   bool                  bPrintHostName)
+makeGpuUsageReport(const gmx_gpu_info_t   &gpu_info,
+                   bool                    userSetGpuIds,
+                   const std::vector<int> &gpuTaskAssignment,
+                   size_t                  numPpRanks,
+                   bool                    bPrintHostName)
 {
-    int  ngpu_use  = gpu_opt->n_dev_use;
     int  ngpu_comp = gpu_info.n_dev_compatible;
     char host[STRLEN];
 
@@ -140,7 +139,7 @@ makeGpuUsageReport(const gmx_gpu_info_t &gpu_info,
     }
 
     /* Issue a note if GPUs are available but not used */
-    if (ngpu_comp > 0 && ngpu_use < 1)
+    if (ngpu_comp > 0 && gpuTaskAssignment.empty())
     {
         return gmx::formatString("%d compatible GPU%s detected in the system, but none will be used.\n"
                                  "Consider trying GPU acceleration with the Verlet scheme!\n",
@@ -149,11 +148,10 @@ makeGpuUsageReport(const gmx_gpu_info_t &gpu_info,
 
     std::string output;
     {
-        std::vector<int> gpuIdsInUse(gpu_opt->dev_use, gpu_opt->dev_use + ngpu_use);
-        std::string      gpuIdsString =
-            formatAndJoin(gpuIdsInUse, ",", gmx::StringFormatter("%d"));
-        size_t           numGpusInUse = gmx_count_gpu_dev_unique(gpu_opt);
-        bool             bPluralGpus  = numGpusInUse > 1;
+        std::string gpuIdsString =
+            formatAndJoin(gpuTaskAssignment, ",", gmx::StringFormatter("%d"));
+        size_t      numGpusInUse = gmx_count_gpu_dev_unique(gpuTaskAssignment);
+        bool        bPluralGpus  = numGpusInUse > 1;
 
         if (bPrintHostName)
         {
@@ -169,7 +167,7 @@ makeGpuUsageReport(const gmx_gpu_info_t &gpu_info,
                                     gpuIdsString.c_str());
     }
 
-    int same_count = gmx_count_gpu_dev_shared(gpu_opt, userSetGpuIds);
+    int same_count = gmx_count_gpu_dev_shared(gpuTaskAssignment, userSetGpuIds);
 
     if (same_count > 0)
     {
diff --git a/src/gromacs/hardware/printhardware.h b/src/gromacs/hardware/printhardware.h
index 6e3cd4d60d..c90b81ef26 100644
--- a/src/gromacs/hardware/printhardware.h
+++ b/src/gromacs/hardware/printhardware.h
@@ -38,9 +38,9 @@
 #include <cstdio>
 
 #include <string>
+#include <vector>
 
 struct gmx_gpu_info_t;
-struct gmx_gpu_opt_t;
 struct gmx_hw_info_t;
 struct t_commrec;
 
@@ -59,18 +59,18 @@ void gmx_print_detected_hardware(FILE *fplog, const t_commrec *cr,
 /*! \brief Helper function for reporting GPU usage information
  * in the mdrun log file
  *
- * \param[in] gpu_info       Information detected about GPUs
- * \param[in] gpu_opt        Pointer to per-node GPU options struct
- * \param[in] userSetGpuIds  Whether the user selected the GPU ids
- * \param[in] numPpRanks     Number of PP ranks per node
- * \param[in] bPrintHostName Print the hostname in the usage information
- * \return                   String to write to the log file
- * \throws                   std::bad_alloc if out of memory */
+ * \param[in] gpu_info           Information detected about GPUs
+ * \param[in] userSetGpuIds      Whether the user selected the GPU ids
+ * \param[in] gpuTaskAssignment  The selected GPU IDs.
+ * \param[in] numPpRanks         Number of PP ranks per node
+ * \param[in] bPrintHostName     Print the hostname in the usage information
+ * \return                       String to write to the log file
+ * \throws                       std::bad_alloc if out of memory */
 std::string
-makeGpuUsageReport(const gmx_gpu_info_t &gpu_info,
-                   const gmx_gpu_opt_t  *gpu_opt,
-                   bool                  userSetGpuIds,
-                   size_t                numPpRanks,
-                   bool                  bPrintHostName);
+makeGpuUsageReport(const gmx_gpu_info_t   &gpu_info,
+                   bool                    userSetGpuIds,
+                   const std::vector<int> &gpuTaskAssignment,
+                   size_t                  numPpRanks,
+                   bool                    bPrintHostName);
 
 #endif
diff --git a/src/gromacs/mdrunutility/tests/threadaffinitytest.cpp b/src/gromacs/mdrunutility/tests/threadaffinitytest.cpp
index ed7f1d1916..e9ee864fa3 100644
--- a/src/gromacs/mdrunutility/tests/threadaffinitytest.cpp
+++ b/src/gromacs/mdrunutility/tests/threadaffinitytest.cpp
@@ -75,14 +75,12 @@ ThreadAffinityTestHelper::ThreadAffinityTestHelper()
 #if GMX_MPI
     cr_->mpi_comm_mysim = MPI_COMM_WORLD;
 #endif
-    snew(hwOpt_, 1);
-    hwOpt_->thread_affinity = threadaffAUTO;
+    hwOpt_.thread_affinity = threadaffAUTO;
 }
 
 ThreadAffinityTestHelper::~ThreadAffinityTestHelper()
 {
     sfree(cr_);
-    sfree(hwOpt_);
 }
 
 void ThreadAffinityTestHelper::setLogicalProcessorCount(int logicalProcessorCount)
diff --git a/src/gromacs/mdrunutility/tests/threadaffinitytest.h b/src/gromacs/mdrunutility/tests/threadaffinitytest.h
index 71941a2fb4..8a2da2465c 100644
--- a/src/gromacs/mdrunutility/tests/threadaffinitytest.h
+++ b/src/gromacs/mdrunutility/tests/threadaffinitytest.h
@@ -87,12 +87,12 @@ class ThreadAffinityTestHelper
         }
         void setAffinityOption(int affinityOption)
         {
-            hwOpt_->thread_affinity = affinityOption;
+            hwOpt_.thread_affinity = affinityOption;
         }
         void setOffsetAndStride(int offset, int stride)
         {
-            hwOpt_->core_pinning_offset = offset;
-            hwOpt_->core_pinning_stride = stride;
+            hwOpt_.core_pinning_offset = offset;
+            hwOpt_.core_pinning_stride = stride;
         }
 
         void setPhysicalNodeId(int nodeId)
@@ -166,13 +166,13 @@ class ThreadAffinityTestHelper
             {
                 setLogicalProcessorCount(1);
             }
-            gmx_set_thread_affinity(logHelper_.logger(), cr_, hwOpt_, *hwTop_,
+            gmx_set_thread_affinity(logHelper_.logger(), cr_, &hwOpt_, *hwTop_,
                                     nthread_local, &affinityAccess_);
         }
 
     private:
         t_commrec                         *cr_;
-        gmx_hw_opt_t                      *hwOpt_;
+        gmx_hw_opt_t                       hwOpt_;
         std::unique_ptr<HardwareTopology>  hwTop_;
         MockThreadAffinityAccess           affinityAccess_;
         LoggerTestHelper                   logHelper_;
diff --git a/src/gromacs/utility/cstringutil.cpp b/src/gromacs/utility/cstringutil.cpp
index af1d21e9de..764a7711dd 100644
--- a/src/gromacs/utility/cstringutil.cpp
+++ b/src/gromacs/utility/cstringutil.cpp
@@ -46,7 +46,6 @@
 #include <cctype>
 #include <cstring>
 
-#include <sstream>
 #include <string>
 #include <vector>
 
@@ -520,80 +519,3 @@ char *gmx_step_str(gmx_int64_t i, char *buf)
     sprintf(buf, "%" GMX_PRId64, i);
     return buf;
 }
-
-void parse_digits_from_string(const char *digitstring, int *ndigits, int **digitlist)
-{
-    /* TODO use std::string, once gmx_gpu_opt_t is ready for it */
-    if (nullptr == digitstring)
-    {
-        *ndigits   = 0;
-        *digitlist = nullptr;
-        return;
-    }
-
-    if (strstr(digitstring, ",") != nullptr)
-    {
-        parse_digits_from_csv_string(digitstring, ndigits, digitlist);
-    }
-    else
-    {
-        parse_digits_from_plain_string(digitstring, ndigits, digitlist);
-    }
-}
-
-void parse_digits_from_plain_string(const char *digitstring, int *ndigits, int **digitlist)
-{
-    int i;
-
-    if (nullptr == digitstring)
-    {
-        *ndigits   = 0;
-        *digitlist = nullptr;
-        return;
-    }
-
-    *ndigits = strlen(digitstring);
-
-    snew(*digitlist, *ndigits);
-
-    for (i = 0; i < *ndigits; i++)
-    {
-        if (digitstring[i] < '0' || digitstring[i] > '9')
-        {
-            gmx_fatal(FARGS, "Invalid character in digit-only string: '%c'\n",
-                      digitstring[i]);
-        }
-        (*digitlist)[i] = digitstring[i] - '0';
-    }
-}
-
-void parse_digits_from_csv_string(const char *digitstring, int *ndigits, int **digitlist)
-{
-    if (nullptr == digitstring)
-    {
-        *ndigits   = 0;
-        *digitlist = nullptr;
-        return;
-    }
-
-    std::vector<int>   digits;
-    std::istringstream ss(digitstring);
-    std::string        token;
-    while (std::getline(ss, token, ','))
-    {
-        if (token.find_first_not_of("0123456789") != std::string::npos)
-        {
-            gmx_fatal(FARGS, "Invalid token in digit-only string: \"%s\"\n",
-                      token.c_str());
-        }
-        int number = static_cast<int>(str_to_int64_t(token.c_str(), nullptr));
-        digits.push_back(number);
-    }
-
-    *ndigits = digits.size();
-    snew(*digitlist, *ndigits);
-    for (size_t i = 0; i < digits.size(); i++)
-    {
-        (*digitlist)[i] = digits[i];
-    }
-}
diff --git a/src/gromacs/utility/cstringutil.h b/src/gromacs/utility/cstringutil.h
index 62e5037211..a27a0ba353 100644
--- a/src/gromacs/utility/cstringutil.h
+++ b/src/gromacs/utility/cstringutil.h
@@ -186,52 +186,6 @@ gmx_int64_t str_to_int64_t(const char *str, char **endptr);
  */
 char *gmx_step_str(gmx_int64_t i, char *buf);
 
-/*! \brief Construct an array of digits found in the input string
- *
- * \param[in]  digitstring  String that must contain only digits
- * \param[out] ndigits      Size of return array with the values of the digits
- * \param[out] digitlist    Array of digits found in
- *                          digitstring. Allocated by this function
- *                          with size *ndigits. Calling code is
- *                          responsible for deallocation.
- *
- * If digitstring is NULL, then ndigits is set to zero and digitlist
- * to NULL. If digitstring contains a non-digit character, a fatal
- * error results.
- */
-void parse_digits_from_plain_string(const char *digitstring, int *ndigits, int **digitlist);
-
-/*! \brief Construct an array of digits from a comma separated input string
- *
- * \param[in]  digitstring  String that must contain only digits and commas
- * \param[out] ndigits      Size of return array with the values of the digits
- * \param[out] digitlist    Array of digits found in
- *                          digitstring. Allocated by this function
- *                          with size *ndigits. Calling code is
- *                          responsible for deallocation.
- *
- * If digitstring is NULL, then ndigits is set to zero and digitlist
- * to NULL. If digitstring contains a non digit-or-comma character, a fatal
- * error results.
- */
-void parse_digits_from_csv_string(const char *digitstring, int *ndigits, int **digitlist);
-
-/*! \brief Construct an array of digits an input string
- *
- *
- * \param[in]  digitstring  String that must contain only digits, or only
- *                          digits and commas
- * \param[out] ndigits      Size of return array with the values of the digits
- * \param[out] digitlist    Array of digits found in
- *                          digitstring. Allocated by this function
- *                          with size *ndigits. Calling code is
- *                          responsible for deallocation.
- * If digitstring is NULL, then ndigits is set to zero and digitlist
- * to NULL. If digitstring contains a non digit-or-comma character, a fatal
- * error results.
- */
-void parse_digits_from_string(const char *digitstring, int *ndigits, int **digitlist);
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gromacs/utility/stringutil.cpp b/src/gromacs/utility/stringutil.cpp
index 74a9753ec2..386e40aaaf 100644
--- a/src/gromacs/utility/stringutil.cpp
+++ b/src/gromacs/utility/stringutil.cpp
@@ -49,9 +49,11 @@
 #include <cstring>
 
 #include <algorithm>
+#include <sstream>
 #include <string>
 #include <vector>
 
+#include "gromacs/utility/exceptions.h"
 #include "gromacs/utility/gmxassert.h"
 
 namespace gmx
@@ -461,4 +463,25 @@ TextLineWrapper::wrapToVector(const std::string &input) const
     return result;
 }
 
+std::vector<int> parseDigitsFromString(const std::string &input)
+{
+    std::vector<int>   digits;
+    std::istringstream ss(input);
+    std::string        token;
+    digits.reserve(input.length());
+    token.reserve(input.length());
+    while (std::getline(ss, token, ','))
+    {
+        for (const auto &c : token)
+        {
+            if (std::isdigit(c) == 0)
+            {
+                GMX_THROW(InvalidInputError(formatString("Invalid character in digit-only string: \"%c\"\n", c)));
+            }
+            digits.push_back(c - '0');
+        }
+    }
+    return digits;
+}
+
 } // namespace gmx
diff --git a/src/gromacs/utility/stringutil.h b/src/gromacs/utility/stringutil.h
index 2c0be63baa..5250378838 100644
--- a/src/gromacs/utility/stringutil.h
+++ b/src/gromacs/utility/stringutil.h
@@ -686,6 +686,18 @@ class TextLineWrapper
         TextLineWrapperSettings settings_;
 };
 
+/*! \brief Construct a vector of decimal digits parsed from an \c input string.
+ *
+ * \param[in]  input  String that must contain only decimal digits, or only
+ *                    decimal digits separated by comma delimiters.
+ *
+ * \returns           Vector of any digits found in \c input.
+ *
+ * \throws  std::bad_alloc if out of memory
+ *          InvalidInputError if an invalid digit character is found.
+ */
+std::vector<int> parseDigitsFromString(const std::string &input);
+
 //! \}
 
 } // namespace gmx
diff --git a/src/gromacs/utility/tests/stringutil.cpp b/src/gromacs/utility/tests/stringutil.cpp
index cc8fd7a258..675e980286 100644
--- a/src/gromacs/utility/tests/stringutil.cpp
+++ b/src/gromacs/utility/tests/stringutil.cpp
@@ -54,6 +54,7 @@
 #include <gtest/gtest.h>
 
 #include "gromacs/utility/arrayref.h"
+#include "gromacs/utility/exceptions.h"
 
 #include "testutils/refdata.h"
 #include "testutils/stringtest.h"
@@ -430,6 +431,22 @@ TEST_F(TextLineWrapperTest, WrapsCorrectlyWithExtraWhitespace)
               "WrappedAt14WithTrailingWhitespace");
 }
 
+TEST(StringUtilityTest, ParseDigitsFromString)
+{
+    using ::testing::ElementsAre;
+    using ::testing::IsEmpty;
+    EXPECT_THAT(parseDigitsFromString("01"), ElementsAre(0, 1));
+    EXPECT_THAT(parseDigitsFromString("0,1"), ElementsAre(0, 1));
+    EXPECT_THAT(parseDigitsFromString(",0,1"), ElementsAre(0, 1));
+    EXPECT_THAT(parseDigitsFromString("0,1,"), ElementsAre(0, 1));
+    EXPECT_THAT(parseDigitsFromString(",0,1,"), ElementsAre(0, 1));
+    EXPECT_THAT(parseDigitsFromString(","), IsEmpty());
+    EXPECT_THAT(parseDigitsFromString(",,"), IsEmpty());
+    EXPECT_THAT(parseDigitsFromString(""), IsEmpty());
+    EXPECT_THROW(parseDigitsFromString("a"), InvalidInputError);
+    EXPECT_THROW(parseDigitsFromString("0a"), InvalidInputError);
+}
+
 } // namespace
 } // namespace
 } // namespace
diff --git a/src/programs/mdrun/mdrun.cpp b/src/programs/mdrun/mdrun.cpp
index 07b24730f4..dd70f417de 100644
--- a/src/programs/mdrun/mdrun.cpp
+++ b/src/programs/mdrun/mdrun.cpp
@@ -264,6 +264,7 @@ int Mdrunner::mainFunction(int argc, char *argv[])
     gmx_bool          bTryToAppendFiles     = TRUE;
     gmx_bool          bKeepAndNumCPT        = FALSE;
     gmx_bool          bResetCountersHalfWay = FALSE;
+    const char       *gpuIdTaskAssignment   = "";
 
     t_pargs           pa[] = {
 
@@ -287,7 +288,7 @@ int Mdrunner::mainFunction(int argc, char *argv[])
           "The lowest logical core number to which mdrun should pin the first thread" },
         { "-pinstride", FALSE, etINT, {&hw_opt.core_pinning_stride},
           "Pinning distance in logical cores for threads, use 0 to minimize the number of threads per physical core" },
-        { "-gpu_id",  FALSE, etSTR, {&hw_opt.gpu_opt.gpu_id},
+        { "-gpu_id",  FALSE, etSTR, {&gpuIdTaskAssignment},
           "List of GPU device id-s to use, specifies the per-node PP rank to GPU mapping" },
         { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
           "Check for all bonded interactions with DD" },
@@ -400,18 +401,24 @@ int Mdrunner::mainFunction(int argc, char *argv[])
         return 0;
     }
 
-    // Handle option that parses GPU ids, which could be in an
-    // environment variable, so that there is a way to customize it
-    // when using MPI in heterogeneous contexts.
+    // Handle the option that permits the user to select a GPU task
+    // assignment, which could be in an environment variable (so that
+    // there is a way to customize it, when using MPI in heterogeneous
+    // contexts).
     {
-        char *env = getenv("GMX_GPU_ID");
-        if (env != nullptr && hw_opt.gpu_opt.gpu_id != nullptr)
-        {
-            gmx_fatal(FARGS, "GMX_GPU_ID and -gpu_id can not be used at the same time");
-        }
+        // TODO Argument parsing can't handle std::string. We should
+        // fix that by changing the parsing, once more of the roles of
+        // handling, validating and implementing defaults for user
+        // command-line options have been seperated.
+        hw_opt.gpuIdTaskAssignment = gpuIdTaskAssignment;
+        const char *env = getenv("GMX_GPU_ID");
         if (env != nullptr)
         {
-            hw_opt.gpu_opt.gpu_id = env;
+            if (!hw_opt.gpuIdTaskAssignment.empty())
+            {
+                gmx_fatal(FARGS, "GMX_GPU_ID and -gpu_id can not be used at the same time");
+            }
+            hw_opt.gpuIdTaskAssignment = env;
         }
     }
 
diff --git a/src/programs/mdrun/resource-division.cpp b/src/programs/mdrun/resource-division.cpp
index 3133dbbfff..b7b9afc38a 100644
--- a/src/programs/mdrun/resource-division.cpp
+++ b/src/programs/mdrun/resource-division.cpp
@@ -46,7 +46,6 @@
 
 #include "gromacs/hardware/cpuinfo.h"
 #include "gromacs/hardware/detecthardware.h"
-#include "gromacs/hardware/gpu_hw_info.h"
 #include "gromacs/hardware/hardwaretopology.h"
 #include "gromacs/hardware/hw_info.h"
 #include "gromacs/mdlib/gmx_omp_nthreads.h"
@@ -336,12 +335,13 @@ class SingleRankChecker
  * Thus all options should be internally consistent and consistent
  * with the hardware, except that ntmpi could be larger than #GPU.
  */
-int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,
-                     gmx_hw_opt_t        *hw_opt,
-                     const t_inputrec    *inputrec,
-                     const gmx_mtop_t    *mtop,
-                     const gmx::MDLogger &mdlog,
-                     bool                 doMembed)
+int get_nthreads_mpi(const gmx_hw_info_t    *hwinfo,
+                     gmx_hw_opt_t           *hw_opt,
+                     const std::vector<int> &userGpuTaskAssignment,
+                     const t_inputrec       *inputrec,
+                     const gmx_mtop_t       *mtop,
+                     const gmx::MDLogger    &mdlog,
+                     bool                    doMembed)
 {
     int                          nthreads_hw, nthreads_tot_max, nrank, ngpu;
     int                          min_atoms_per_mpi_rank;
@@ -354,11 +354,11 @@ int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,
        support multiple ranks. We need also to handle the case where
        the user set multiple GPU IDs for an algorithm that cannot
        handle multiple ranks. */
-    if (hw_opt->nthreads_tmpi < 1 && hasUserSetGpuIds(&hw_opt->gpu_opt))
+    if (hw_opt->nthreads_tmpi < 1 && !userGpuTaskAssignment.empty())
     {
         /* Set the number of thread-MPI ranks equal to the number of GPU
            ranks that the user chose. */
-        int numGpuRanks = hw_opt->gpu_opt.n_dev_use;
+        int numGpuRanks = static_cast<int>(userGpuTaskAssignment.size());
 
         /* If the user chose both mdrun -nt -gpu_id, is that consistent? */
         if (hw_opt->nthreads_tot > 0 &&
@@ -685,28 +685,7 @@ static void print_hw_opt(FILE *fp, const gmx_hw_opt_t *hw_opt)
             hw_opt->nthreads_tmpi,
             hw_opt->nthreads_omp,
             hw_opt->nthreads_omp_pme,
-            hw_opt->gpu_opt.gpu_id != nullptr ? hw_opt->gpu_opt.gpu_id : "");
-}
-
-//! \brief Return if any GPU ID (e.g in a user-supplied string) is repeated
-static gmx_bool anyGpuIdIsRepeated(const gmx_gpu_opt_t *gpu_opt)
-{
-    /* Loop over IDs in the string */
-    for (int i = 0; i < gpu_opt->n_dev_use - 1; ++i)
-    {
-        /* Look for the ID in location i in the following part of the
-           string */
-        for (int j = i + 1; j < gpu_opt->n_dev_use; ++j)
-        {
-            if (gpu_opt->dev_use[i] == gpu_opt->dev_use[j])
-            {
-                /* Same ID found in locations i and j */
-                return TRUE;
-            }
-        }
-    }
-
-    return FALSE;
+            hw_opt->gpuIdTaskAssignment.c_str());
 }
 
 /* Checks we can do when we don't (yet) know the cut-off scheme */
diff --git a/src/programs/mdrun/resource-division.h b/src/programs/mdrun/resource-division.h
index 3f27e68397..ff443fde19 100644
--- a/src/programs/mdrun/resource-division.h
+++ b/src/programs/mdrun/resource-division.h
@@ -38,6 +38,8 @@
 
 #include <cstdio>
 
+#include <vector>
+
 #include "gromacs/utility/basedefinitions.h"
 
 struct gmx_hw_info_t;
@@ -59,12 +61,13 @@ class MDLogger;
  * with the hardware, except that ntmpi could be larger than #GPU.
  * If necessary, this function will modify hw_opt->nthreads_omp.
  */
-int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,
-                     gmx_hw_opt_t        *hw_opt,
-                     const t_inputrec    *inputrec,
-                     const gmx_mtop_t    *mtop,
-                     const gmx::MDLogger &mdlog,
-                     bool                 doMembed);
+int get_nthreads_mpi(const gmx_hw_info_t    *hwinfo,
+                     gmx_hw_opt_t           *hw_opt,
+                     const std::vector<int> &userGpuSelection,
+                     const t_inputrec       *inputrec,
+                     const gmx_mtop_t       *mtop,
+                     const gmx::MDLogger    &mdlog,
+                     bool                    doMembed);
 
 /* Check if the number of OpenMP threads is within reasonable range
  * considering the hardware used. This is a crude check, but mainly
diff --git a/src/programs/mdrun/runner.cpp b/src/programs/mdrun/runner.cpp
index 3b3fe879c8..ee3664cb21 100644
--- a/src/programs/mdrun/runner.cpp
+++ b/src/programs/mdrun/runner.cpp
@@ -720,15 +720,14 @@ int Mdrunner::mdrunner()
     /* Handle GPU-related user options. Later, we check consistency
      * with things like whether support is compiled, or tMPI thread
      * count. */
-    bool emulateGpu          = getenv("GMX_EMULATE_GPU") != nullptr;
-    gmx_parse_gpu_ids(&hw_opt.gpu_opt);
-    bool userSetGpuIds = hasUserSetGpuIds(&hw_opt.gpu_opt);
-    bool forceUseCpu   = (strncmp(nbpu_opt, "cpu", 3) == 0);
-    if (userSetGpuIds && forceUseCpu)
+    bool emulateGpu            = getenv("GMX_EMULATE_GPU") != nullptr;
+    auto userGpuTaskAssignment = parseGpuTaskAssignment(hw_opt.gpuIdTaskAssignment);
+    bool forceUseCpu           = (strncmp(nbpu_opt, "cpu", 3) == 0);
+    if (!userGpuTaskAssignment.empty() && forceUseCpu)
     {
         gmx_fatal(FARGS, "GPU IDs were specified, and short-ranged interactions were assigned to the CPU. Make no more than one of these choices.");
     }
-    bool forceUsePhysicalGpu = (strncmp(nbpu_opt, "gpu", 3) == 0) || userSetGpuIds;
+    bool forceUsePhysicalGpu = (strncmp(nbpu_opt, "gpu", 3) == 0) || !userGpuTaskAssignment.empty();
     bool tryUsePhysicalGpu   = (strncmp(nbpu_opt, "auto", 4) == 0) && !emulateGpu && (GMX_GPU != GMX_GPU_NONE);
 
     // Here we assume that SIMMASTER(cr) does not change even after the
@@ -845,6 +844,7 @@ int Mdrunner::mdrunner()
         // Determine how many thread-MPI ranks to start.
         hw_opt.nthreads_tmpi = get_nthreads_mpi(hwinfo,
                                                 &hw_opt,
+                                                userGpuTaskAssignment,
                                                 inputrec, mtop,
                                                 mdlog,
                                                 doMembed);
@@ -1097,26 +1097,30 @@ int Mdrunner::mdrunner()
     }
 #endif
 
+    // Contains the ID of the GPU used by each PP rank on this node,
+    // indexed by that rank. Empty if no GPUs are selected for use on
+    // this node.
+    std::vector<int> gpuTaskAssignment;
     if (tryUsePhysicalGpu || forceUsePhysicalGpu)
     {
-        /* Currently the DD code assigns duty to ranks that can include PP work
-         * that currently can be executed on a single GPU, if present and compatible.
-         * This has to be coordinated across PP ranks on a node, with possible
-         * multiple devices or sharing devices on a node. */
+        /* Currently the DD code assigns duty to ranks that can
+         * include PP work that currently can be executed on a single
+         * GPU, if present and compatible.  This has to be coordinated
+         * across PP ranks on a node, with possible multiple devices
+         * or sharing devices on a node, either from the user
+         * selection, or automatically. */
         bool rankCanUseGpu = cr->duty & DUTY_PP;
-        /* Map GPU IDs to ranks by filling or validating hw_opt->gpu_opt->dev_use */
-        mapPpRanksToGpus(rankCanUseGpu, cr, hwinfo->gpu_info,
-                         userSetGpuIds, &hw_opt.gpu_opt);
+        gpuTaskAssignment = mapPpRanksToGpus(rankCanUseGpu, cr, hwinfo->gpu_info,
+                                             userGpuTaskAssignment);
     }
-    bool willUsePhysicalGpu = hw_opt.gpu_opt.n_dev_use > 0;
 
     /* If we are using GPUs, report on this rank how they are being
      * used on this node. */
-    if (willUsePhysicalGpu)
+    if (!gpuTaskAssignment.empty())
     {
         auto gpuUsageReport =
-            makeGpuUsageReport(hwinfo->gpu_info, &hw_opt.gpu_opt,
-                               userSetGpuIds, cr->nrank_pp_intranode,
+            makeGpuUsageReport(hwinfo->gpu_info, !userGpuTaskAssignment.empty(),
+                               gpuTaskAssignment, cr->nrank_pp_intranode,
                                cr->nnodes > 1);
 
         /* NOTE: this print is only for and on one physical node */
@@ -1125,7 +1129,8 @@ int Mdrunner::mdrunner()
 
     /* check consistency across ranks of things like SIMD
      * support and number of GPUs selected */
-    gmx_check_hw_runconf_consistency(mdlog, hwinfo, cr, hw_opt, userSetGpuIds, willUsePhysicalGpu);
+    gmx_check_hw_runconf_consistency(mdlog, hwinfo, cr, hw_opt, !userGpuTaskAssignment.empty(), gpuTaskAssignment);
+    /* From now on, the userGpuTaskAssignment should never be used */
 
     /* Prevent other ranks from continuing after an inconsistency was found.
      *
@@ -1142,16 +1147,16 @@ int Mdrunner::mdrunner()
 #endif
 
     /* Now that we know the setup is consistent, check for efficiency */
-    check_resource_division_efficiency(hwinfo, hw_opt.nthreads_tot, hw_opt.gpu_opt.n_dev_use > 0, Flags & MD_NTOMPSET,
+    check_resource_division_efficiency(hwinfo, hw_opt.nthreads_tot, !gpuTaskAssignment.empty(), Flags & MD_NTOMPSET,
                                        cr, mdlog);
 
     gmx_device_info_t *shortRangedDeviceInfo = nullptr;
     int                shortRangedDeviceId   = -1;
     if (cr->duty & DUTY_PP)
     {
-        if (willUsePhysicalGpu)
+        if (!gpuTaskAssignment.empty())
         {
-            shortRangedDeviceId   = hw_opt.gpu_opt.dev_use[cr->nrank_pp_intranode];
+            shortRangedDeviceId   = gpuTaskAssignment[cr->rank_pp_intranode];
             shortRangedDeviceInfo = getDeviceInfo(hwinfo->gpu_info, shortRangedDeviceId);
         }
     }
diff --git a/src/programs/mdrun/runner.h b/src/programs/mdrun/runner.h
index 1cf14fd537..fc33c82ea7 100644
--- a/src/programs/mdrun/runner.h
+++ b/src/programs/mdrun/runner.h
@@ -83,10 +83,7 @@ class Mdrunner
 {
     private:
         //! Parallelism-related user options.
-        gmx_hw_opt_t hw_opt {
-            0, 0, 0, 0, threadaffSEL, 0, 0,
-            { nullptr, 0, nullptr }
-        };
+        gmx_hw_opt_t             hw_opt;
         //! Filenames and properties from command-line argument values.
         std::array<t_filenm, 34> filenames =
         {{{ efTPR, nullptr,     nullptr,     ffREAD },
-- 
2.11.4.GIT