From 2b8deca62d6140ea953286e1b99245fb2ea09bc5 Mon Sep 17 00:00:00 2001 From: Aleksei Iupinov Date: Mon, 21 Aug 2017 15:45:03 +0200 Subject: [PATCH] Separate PME spread and gather wallcycle counters Change-Id: If9d1bcac8b07d0ea09ac57c254e1ca30fbe78d31 --- src/gromacs/ewald/pme.cpp | 16 ++++++++-------- src/gromacs/timing/wallcycle.cpp | 2 +- src/gromacs/timing/wallcycle.h | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/gromacs/ewald/pme.cpp b/src/gromacs/ewald/pme.cpp index 6200d4c03b..0ea7ae8fec 100644 --- a/src/gromacs/ewald/pme.cpp +++ b/src/gromacs/ewald/pme.cpp @@ -1118,7 +1118,7 @@ int gmx_pme_do(struct gmx_pme_t *pme, if (flags & GMX_PME_SPREAD) { - wallcycle_start(wcycle, ewcPME_SPREADGATHER); + wallcycle_start(wcycle, ewcPME_SPREAD); /* Spread the coefficients on a grid */ spread_on_grid(pme, &pme->atc[0], pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index); @@ -1146,7 +1146,7 @@ int gmx_pme_do(struct gmx_pme_t *pme, copy_pmegrid_to_fftgrid(pme, grid, fftgrid, grid_index); } - wallcycle_stop(wcycle, ewcPME_SPREADGATHER); + wallcycle_stop(wcycle, ewcPME_SPREAD); /* TODO If the OpenMP and single-threaded implementations converge, then spread_on_grid() and @@ -1238,7 +1238,7 @@ int gmx_pme_do(struct gmx_pme_t *pme, /* Note: this wallcycle region is closed below outside an OpenMP region, so take care if refactoring code here. */ - wallcycle_start(wcycle, ewcPME_SPREADGATHER); + wallcycle_start(wcycle, ewcPME_GATHER); } copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread); @@ -1293,7 +1293,7 @@ int gmx_pme_do(struct gmx_pme_t *pme, pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n); /* Note: this wallcycle region is opened above inside an OpenMP region, so take care if refactoring code here. */ - wallcycle_stop(wcycle, ewcPME_SPREADGATHER); + wallcycle_stop(wcycle, ewcPME_GATHER); } if (bCalcEnerVir) @@ -1401,7 +1401,7 @@ int gmx_pme_do(struct gmx_pme_t *pme, if (flags & GMX_PME_SPREAD) { - wallcycle_start(wcycle, ewcPME_SPREADGATHER); + wallcycle_start(wcycle, ewcPME_SPREAD); /* Spread the c6 on a grid */ spread_on_grid(pme, &pme->atc[0], pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index); @@ -1425,7 +1425,7 @@ int gmx_pme_do(struct gmx_pme_t *pme, #endif copy_pmegrid_to_fftgrid(pme, grid, fftgrid, grid_index); } - wallcycle_stop(wcycle, ewcPME_SPREADGATHER); + wallcycle_stop(wcycle, ewcPME_SPREAD); } /*Here we start a large thread parallel region*/ #pragma omp parallel num_threads(pme->nthread) private(thread) @@ -1531,7 +1531,7 @@ int gmx_pme_do(struct gmx_pme_t *pme, npme = static_cast(ntot*std::log(ntot)/std::log(2.0)); inc_nrnb(nrnb, eNR_FFT, 2*npme); } - wallcycle_start(wcycle, ewcPME_SPREADGATHER); + wallcycle_start(wcycle, ewcPME_GATHER); } copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread); @@ -1575,7 +1575,7 @@ int gmx_pme_do(struct gmx_pme_t *pme, inc_nrnb(nrnb, eNR_GATHERFBSP, pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n); } - wallcycle_stop(wcycle, ewcPME_SPREADGATHER); + wallcycle_stop(wcycle, ewcPME_GATHER); bFirst = FALSE; } /* for (grid_index = 8; grid_index >= 2; --grid_index) */ diff --git a/src/gromacs/timing/wallcycle.cpp b/src/gromacs/timing/wallcycle.cpp index 85bc4967fb..a18a271a8e 100644 --- a/src/gromacs/timing/wallcycle.cpp +++ b/src/gromacs/timing/wallcycle.cpp @@ -104,7 +104,7 @@ static const char *wcn[ewcNR] = "Run", "Step", "PP during PME", "Domain decomp.", "DD comm. load", "DD comm. bounds", "Vsite constr.", "Send X to PME", "Neighbor search", "Launch GPU ops.", "Comm. coord.", "Born radii", "Force", "Wait + Comm. F", "PME mesh", - "PME redist. X/F", "PME spread/gather", "PME 3D-FFT", "PME 3D-FFT Comm.", "PME solve LJ", "PME solve Elec", + "PME redist. X/F", "PME spread", "PME gather", "PME 3D-FFT", "PME 3D-FFT Comm.", "PME solve LJ", "PME solve Elec", "PME wait for PP", "Wait + Recv. PME F", "Wait GPU nonlocal", "Wait GPU local", "NB X/F buffer ops.", "Vsite spread", "COM pull force", "Write traj.", "Update", "Constraints", "Comm. energies", diff --git a/src/gromacs/timing/wallcycle.h b/src/gromacs/timing/wallcycle.h index d40656e4f8..b1325e21d0 100644 --- a/src/gromacs/timing/wallcycle.h +++ b/src/gromacs/timing/wallcycle.h @@ -51,7 +51,7 @@ enum { ewcRUN, ewcSTEP, ewcPPDURINGPME, ewcDOMDEC, ewcDDCOMMLOAD, ewcDDCOMMBOUND, ewcVSITECONSTR, ewcPP_PMESENDX, ewcNS, ewcLAUNCH_GPU_NB, ewcMOVEX, ewcGB, ewcFORCE, ewcMOVEF, ewcPMEMESH, - ewcPME_REDISTXF, ewcPME_SPREADGATHER, ewcPME_FFT, ewcPME_FFTCOMM, ewcLJPME, ewcPME_SOLVE, + ewcPME_REDISTXF, ewcPME_SPREAD, ewcPME_GATHER, ewcPME_FFT, ewcPME_FFTCOMM, ewcLJPME, ewcPME_SOLVE, ewcPMEWAITCOMM, ewcPP_PMEWAITRECVF, ewcWAIT_GPU_NB_NL, ewcWAIT_GPU_NB_L, ewcNB_XF_BUF_OPS, ewcVSITESPREAD, ewcPULLPOT, ewcTRAJ, ewcUPDATE, ewcCONSTR, ewcMoveE, ewcROT, ewcROTadd, ewcSWAP, ewcIMD, -- 2.11.4.GIT