From 6816800950d0a406dfe5ba53bff4395b8eab491c Mon Sep 17 00:00:00 2001
From: Mark Abraham <mark.j.abraham@gmail.com>
Date: Tue, 20 Dec 2016 21:26:56 +1100
Subject: [PATCH] Improved state_change_natoms and used it more

If the t_state control flags indicate that per-atom arrays will be
used, resize them appropriately whenever this routine is called.

CG minimization and non-DD simulations take care of organizing their
own state flags and allocations, rather than relying on code located
elsewhere. Non-DD used to rely on the allocation of x made when
reading the tpr, and perhaps one for v made in set_state_entries (if
not read from the tpr), and one for f made in do_md or init_em.

Commented more widely about why allocations for force arrays are
of size natoms+1.

Change-Id: I803c2695969f78616016520cdd8e9425507ab7c1
---
 src/gromacs/domdec/domdec.cpp          | 21 +++++----------------
 src/gromacs/fileio/tpxio.cpp           | 11 +++++++++++
 src/gromacs/gmxpreprocess/grompp.cpp   |  8 +++++---
 src/gromacs/mdlib/broadcaststructs.cpp |  3 +++
 src/gromacs/mdlib/md_support.cpp       |  8 +-------
 src/gromacs/mdlib/minimize.cpp         | 16 +++++++---------
 src/gromacs/mdlib/sim_util.cpp         |  6 ++++--
 src/gromacs/mdlib/tpi.cpp              |  3 +++
 src/gromacs/mdtypes/state.cpp          | 16 +++++++++++++---
 src/programs/mdrun/md.cpp              |  9 +++++----
 10 files changed, 57 insertions(+), 44 deletions(-)

diff --git a/src/gromacs/domdec/domdec.cpp b/src/gromacs/domdec/domdec.cpp
index b039af0f8a..42163ee117 100644
--- a/src/gromacs/domdec/domdec.cpp
+++ b/src/gromacs/domdec/domdec.cpp
@@ -1347,25 +1347,14 @@ static void dd_resize_state(t_state *state, PaddedRVecVector *f, int natoms)
         fprintf(debug, "Resizing state: currently %d, required %d\n", state->natoms, natoms);
     }
 
-    /* We need to allocate one element extra, since we might use
-     * (unaligned) 4-wide SIMD loads to access rvec entries.
-     */
-    if (state->flags & (1 << estX))
-    {
-        state->x.resize(natoms + 1);
-    }
-    if (state->flags & (1 << estV))
-    {
-        state->v.resize(natoms + 1);
-    }
-    if (state->flags & (1 << estCGP))
-    {
-        state->cg_p.resize(natoms + 1);
-    }
+    state_change_natoms(state, natoms);
 
     if (f != nullptr)
     {
-        (*f).resize(natoms + 1);
+        /* We need to allocate one element extra, since we might use
+         * (unaligned) 4-wide SIMD loads to access rvec entries.
+         */
+        f->resize(natoms + 1);
     }
 }
 
diff --git a/src/gromacs/fileio/tpxio.cpp b/src/gromacs/fileio/tpxio.cpp
index a4ef8718bd..0d43214f63 100644
--- a/src/gromacs/fileio/tpxio.cpp
+++ b/src/gromacs/fileio/tpxio.cpp
@@ -3174,6 +3174,17 @@ static int do_tpx(t_fileio *fio, gmx_bool bRead,
         init_gtc_state(state, tpx.ngtc, 0, 0);
         if (x == nullptr)
         {
+            // v is also nullptr by the above assertion, so we may
+            // need to make memory in state for storing the contents
+            // of the tpx file.
+            if (tpx.bX)
+            {
+                state->flags |= (1 << estX);
+            }
+            if (tpx.bV)
+            {
+                state->flags |= (1 << estV);
+            }
             state_change_natoms(state, tpx.natoms);
         }
     }
diff --git a/src/gromacs/gmxpreprocess/grompp.cpp b/src/gromacs/gmxpreprocess/grompp.cpp
index 9788d69fb0..17f3a44309 100644
--- a/src/gromacs/gmxpreprocess/grompp.cpp
+++ b/src/gromacs/gmxpreprocess/grompp.cpp
@@ -620,7 +620,11 @@ new_status(const char *topfile, const char *topppfile, const char *confin,
      * a priori if the number of atoms in confin matches what we expect.
      */
     state->flags |= (1 << estX);
-    state->x.resize(state->natoms);
+    if (v != NULL)
+    {
+        state->flags |= (1 << estV);
+    }
+    state_change_natoms(state, state->natoms);
     for (int i = 0; i < state->natoms; i++)
     {
         copy_rvec(x[i], state->x[i]);
@@ -628,8 +632,6 @@ new_status(const char *topfile, const char *topppfile, const char *confin,
     sfree(x);
     if (v != nullptr)
     {
-        state->flags |= (1 << estV);
-        state->v.resize(state->natoms);
         for (int i = 0; i < state->natoms; i++)
         {
             copy_rvec(v[i], state->v[i]);
diff --git a/src/gromacs/mdlib/broadcaststructs.cpp b/src/gromacs/mdlib/broadcaststructs.cpp
index adb857578b..7bc0df7216 100644
--- a/src/gromacs/mdlib/broadcaststructs.cpp
+++ b/src/gromacs/mdlib/broadcaststructs.cpp
@@ -260,6 +260,9 @@ static void bc_groups(const t_commrec *cr, t_symtab *symtab,
 
 static void bcastPaddedRVecVector(const t_commrec *cr, PaddedRVecVector *v, unsigned int n)
 {
+    /* We need to allocate one element extra, since we might use
+     * (unaligned) 4-wide SIMD loads to access rvec entries.
+     */
     (*v).resize(n + 1);
     nblock_bc(cr, n, as_rvec_array(v->data()));
 }
diff --git a/src/gromacs/mdlib/md_support.cpp b/src/gromacs/mdlib/md_support.cpp
index 39c7706ee5..3c99652db9 100644
--- a/src/gromacs/mdlib/md_support.cpp
+++ b/src/gromacs/mdlib/md_support.cpp
@@ -559,6 +559,7 @@ void rerun_parallel_comm(t_commrec *cr, t_trxframe *fr,
 
 }
 
+// TODO Most of this logic seems to belong in the respective modules
 void set_state_entries(t_state *state, const t_inputrec *ir)
 {
     /* The entries in the state in the tpx file might not correspond
@@ -575,13 +576,6 @@ void set_state_entries(t_state *state, const t_inputrec *ir)
     if (EI_DYNAMICS(ir->eI))
     {
         state->flags |= (1<<estV);
-        state->v.resize(state->natoms + 1);
-    }
-    if (ir->eI == eiCG)
-    {
-        state->flags |= (1<<estCGP);
-        /* cg_p is not stored in the tpx file, so we need to allocate it */
-        state->cg_p.resize(state->natoms + 1);
     }
 
     state->nnhpres = 0;
diff --git a/src/gromacs/mdlib/minimize.cpp b/src/gromacs/mdlib/minimize.cpp
index 7d655098b4..dd3af64376 100644
--- a/src/gromacs/mdlib/minimize.cpp
+++ b/src/gromacs/mdlib/minimize.cpp
@@ -393,12 +393,13 @@ void init_em(FILE *fplog, const char *title,
     }
     else
     {
+        state_change_natoms(state_global, state_global->natoms);
         /* Just copy the state */
         ems->s = *state_global;
+        state_change_natoms(&ems->s, ems->s.natoms);
         /* We need to allocate one element extra, since we might use
          * (unaligned) 4-wide SIMD loads to access rvec entries.
          */
-        ems->s.x.resize(ems->s.natoms + 1);
         ems->f.resize(ems->s.natoms + 1);
 
         snew(*top, 1);
@@ -572,23 +573,17 @@ static bool do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
 
     if (s2->natoms != s1->natoms)
     {
-        s2->natoms = s1->natoms;
+        state_change_natoms(s2, s1->natoms);
         /* We need to allocate one element extra, since we might use
          * (unaligned) 4-wide SIMD loads to access rvec entries.
          */
-        s2->x.resize(s1->natoms + 1);
-        ems2->f.resize(s1->natoms + 1);
-        if (s2->flags & (1<<estCGP))
-        {
-            s2->cg_p.resize(s1->natoms + 1);
-        }
+        ems2->f.resize(s2->natoms + 1);
     }
     if (DOMAINDECOMP(cr) && s2->cg_gl.size() != s1->cg_gl.size())
     {
         s2->cg_gl.resize(s1->cg_gl.size());
     }
 
-    s2->natoms = s1->natoms;
     copy_mat(s1->box, s2->box);
     /* Copy free energy state */
     s2->lambda = s1->lambda;
@@ -1022,6 +1017,9 @@ double do_cg(FILE *fplog, t_commrec *cr, const gmx::MDLogger gmx_unused &mdlog,
 
     step = 0;
 
+    // Ensure the extra per-atom state array gets allocated
+    state_global->flags |= (1<<estCGP);
+
     /* Create 4 states on the stack and extract pointers that we will swap */
     em_state_t  s0 {}, s1 {}, s2 {}, s3 {};
     em_state_t *s_min = &s0;
diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp
index d58b58a894..09fe7ad85f 100644
--- a/src/gromacs/mdlib/sim_util.cpp
+++ b/src/gromacs/mdlib/sim_util.cpp
@@ -1858,8 +1858,10 @@ void do_force(FILE *fplog, t_commrec *cr,
         flags &= ~GMX_FORCE_NONBONDED;
     }
 
-    GMX_ASSERT(coordinates->size() >= static_cast<unsigned int>(fr->natoms_force + 1), "We might need 1 element extra for SIMD");
-    GMX_ASSERT(force->size() >= static_cast<unsigned int>(fr->natoms_force + 1), "We might need 1 element extra for SIMD");
+    GMX_ASSERT(coordinates->size() >= static_cast<unsigned int>(fr->natoms_force + 1) ||
+               fr->natoms_force == 0, "We might need 1 element extra for SIMD");
+    GMX_ASSERT(force->size() >= static_cast<unsigned int>(fr->natoms_force + 1) ||
+               fr->natoms_force == 0, "We might need 1 element extra for SIMD");
 
     rvec *x = as_rvec_array(coordinates->data());
 
diff --git a/src/gromacs/mdlib/tpi.cpp b/src/gromacs/mdlib/tpi.cpp
index 8cc2237958..e2744cb9c2 100644
--- a/src/gromacs/mdlib/tpi.cpp
+++ b/src/gromacs/mdlib/tpi.cpp
@@ -291,6 +291,9 @@ double do_tpi(FILE *fplog, t_commrec *cr, const gmx::MDLogger gmx_unused &mdlog,
 
     snew(enerd, 1);
     init_enerdata(groups->grps[egcENER].nr, inputrec->fepvals->n_lambda, enerd);
+    /* We need to allocate one element extra, since we might use
+     * (unaligned) 4-wide SIMD loads to access rvec entries.
+     */
     f.resize(top_global->natoms + 1);
 
     /* Print to log file  */
diff --git a/src/gromacs/mdtypes/state.cpp b/src/gromacs/mdtypes/state.cpp
index 2c44d7c6ea..31f27f7499 100644
--- a/src/gromacs/mdtypes/state.cpp
+++ b/src/gromacs/mdtypes/state.cpp
@@ -120,15 +120,25 @@ void state_change_natoms(t_state *state, int natoms)
         /* We need to allocate one element extra, since we might use
          * (unaligned) 4-wide SIMD loads to access rvec entries.
          */
-        state->x.resize(state->natoms + 1);
-        state->v.resize(state->natoms + 1);
+        if (state->flags & (1 << estX))
+        {
+            state->x.resize(state->natoms + 1);
+        }
+        if (state->flags & (1 << estV))
+        {
+            state->v.resize(state->natoms + 1);
+        }
+        if (state->flags & (1 << estCGP))
+        {
+            state->cg_p.resize(state->natoms + 1);
+        }
     }
     else
     {
         state->x.resize(0);
         state->v.resize(0);
+        state->cg_p.resize(0);
     }
-    state->cg_p.resize(0);
 }
 
 void init_dfhist_state(t_state *state, int dfhistNumLambda)
diff --git a/src/programs/mdrun/md.cpp b/src/programs/mdrun/md.cpp
index 7fc77b592c..c01e69fc97 100644
--- a/src/programs/mdrun/md.cpp
+++ b/src/programs/mdrun/md.cpp
@@ -364,10 +364,6 @@ double gmx::do_md(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog,
     snew(enerd, 1);
     init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda,
                   enerd);
-    if (!DOMAINDECOMP(cr))
-    {
-        f.resize(top_global->natoms + 1);
-    }
 
     /* Kinetic energy data */
     snew(ekind, 1);
@@ -423,6 +419,11 @@ double gmx::do_md(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog,
     }
     else
     {
+        state_change_natoms(state_global, state_global->natoms);
+        /* We need to allocate one element extra, since we might use
+         * (unaligned) 4-wide SIMD loads to access rvec entries.
+         */
+        f.resize(state_global->natoms + 1);
         /* Copy the pointer to the global state */
         state = state_global;
 
-- 
2.11.4.GIT