src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*
  36  * Note: this file was generated by the Verlet kernel generator for
  37  * kernel type 4xn.
  38  */
  39
  40 #include "gmxpre.h"
  41
  42 #include "config.h"
  43
  44 #include "gromacs/legacyheaders/typedefs.h"
  45 #include "gromacs/mdlib/nb_verlet.h"
  46 #include "gromacs/mdlib/nbnxn_simd.h"
  47
  48 #ifdef GMX_NBNXN_SIMD_4XN
  49
  50 #include "gromacs/simd/vector_operations.h"
  51
  52 #if !(GMX_SIMD_REAL_WIDTH == 2 || GMX_SIMD_REAL_WIDTH == 4 || GMX_SIMD_REAL_WIDTH == 8)
  53 #error "unsupported SIMD width"
  54 #endif
  55
  56 #define GMX_SIMD_J_UNROLL_SIZE 1
  57 #include "nbnxn_kernel_simd_4xn.h"
  58
  59 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
  60 #include "gromacs/legacyheaders/types/force_flags.h"
  61 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
  62 #include "gromacs/simd/simd.h"
  63 #include "gromacs/utility/fatalerror.h"
  64
  65 /*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
  66  */
  67 enum {
  68     coulktRF, coulktTAB, coulktTAB_TWIN, coulktEWALD, coulktEWALD_TWIN, coulktNR
  69 };
  70
  71 /*! \brief Kinds of Van der Waals treatments in SIMD Verlet kernels
  72  */
  73 enum {
  74     vdwktLJCUT_COMBGEOM, vdwktLJCUT_COMBLB, vdwktLJCUT_COMBNONE, vdwktLJFORCESWITCH, vdwktLJPOTSWITCH, vdwktLJEWALDCOMBGEOM, vdwktNR
  75 };
  76
  77 /* Declare and define the kernel function pointer lookup tables.
  78  * The minor index of the array goes over both the LJ combination rules,
  79  * which is only supported by plain cut-off, and the LJ switch/PME functions.
  80  */
  81 static p_nbk_func_noener p_nbk_noener[coulktNR][vdwktNR] =
  82 {
  83     {
  84         nbnxn_kernel_ElecRF_VdwLJCombGeom_F_4xn,
  85         nbnxn_kernel_ElecRF_VdwLJCombLB_F_4xn,
  86         nbnxn_kernel_ElecRF_VdwLJ_F_4xn,
  87         nbnxn_kernel_ElecRF_VdwLJFSw_F_4xn,
  88         nbnxn_kernel_ElecRF_VdwLJPSw_F_4xn,
  89         nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_4xn,
  90     },
  91     {
  92         nbnxn_kernel_ElecQSTab_VdwLJCombGeom_F_4xn,
  93         nbnxn_kernel_ElecQSTab_VdwLJCombLB_F_4xn,
  94         nbnxn_kernel_ElecQSTab_VdwLJ_F_4xn,
  95         nbnxn_kernel_ElecQSTab_VdwLJFSw_F_4xn,
  96         nbnxn_kernel_ElecQSTab_VdwLJPSw_F_4xn,
  97         nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_4xn,
  98     },
  99     {
 100         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_F_4xn,
 101         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_F_4xn,
 102         nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_4xn,
 103         nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_F_4xn,
 104         nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_F_4xn,
 105         nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_4xn,
 106     },
 107     {
 108         nbnxn_kernel_ElecEw_VdwLJCombGeom_F_4xn,
 109         nbnxn_kernel_ElecEw_VdwLJCombLB_F_4xn,
 110         nbnxn_kernel_ElecEw_VdwLJ_F_4xn,
 111         nbnxn_kernel_ElecEw_VdwLJFSw_F_4xn,
 112         nbnxn_kernel_ElecEw_VdwLJPSw_F_4xn,
 113         nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_4xn,
 114     },
 115     {
 116         nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_4xn,
 117         nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_F_4xn,
 118         nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_4xn,
 119         nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_4xn,
 120         nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_4xn,
 121         nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_4xn,
 122     },
 123 };
 124
 125 static p_nbk_func_ener p_nbk_ener[coulktNR][vdwktNR] =
 126 {
 127     {
 128         nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_4xn,
 129         nbnxn_kernel_ElecRF_VdwLJCombLB_VF_4xn,
 130         nbnxn_kernel_ElecRF_VdwLJ_VF_4xn,
 131         nbnxn_kernel_ElecRF_VdwLJFSw_VF_4xn,
 132         nbnxn_kernel_ElecRF_VdwLJPSw_VF_4xn,
 133         nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_4xn,
 134     },
 135     {
 136         nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VF_4xn,
 137         nbnxn_kernel_ElecQSTab_VdwLJCombLB_VF_4xn,
 138         nbnxn_kernel_ElecQSTab_VdwLJ_VF_4xn,
 139         nbnxn_kernel_ElecQSTab_VdwLJFSw_VF_4xn,
 140         nbnxn_kernel_ElecQSTab_VdwLJPSw_VF_4xn,
 141         nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_4xn,
 142     },
 143     {
 144         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VF_4xn,
 145         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VF_4xn,
 146         nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_4xn,
 147         nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VF_4xn,
 148         nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VF_4xn,
 149         nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_4xn,
 150     },
 151     {
 152         nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_4xn,
 153         nbnxn_kernel_ElecEw_VdwLJCombLB_VF_4xn,
 154         nbnxn_kernel_ElecEw_VdwLJ_VF_4xn,
 155         nbnxn_kernel_ElecEw_VdwLJFSw_VF_4xn,
 156         nbnxn_kernel_ElecEw_VdwLJPSw_VF_4xn,
 157         nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_4xn,
 158     },
 159     {
 160         nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_4xn,
 161         nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VF_4xn,
 162         nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_4xn,
 163         nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VF_4xn,
 164         nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VF_4xn,
 165         nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_4xn,
 166     },
 167 };
 168
 169 static p_nbk_func_ener p_nbk_energrp[coulktNR][vdwktNR] =
 170 {
 171     {
 172         nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_4xn,
 173         nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_4xn,
 174         nbnxn_kernel_ElecRF_VdwLJ_VgrpF_4xn,
 175         nbnxn_kernel_ElecRF_VdwLJFSw_VgrpF_4xn,
 176         nbnxn_kernel_ElecRF_VdwLJPSw_VgrpF_4xn,
 177         nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_4xn,
 178     },
 179     {
 180         nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VgrpF_4xn,
 181         nbnxn_kernel_ElecQSTab_VdwLJCombLB_VgrpF_4xn,
 182         nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_4xn,
 183         nbnxn_kernel_ElecQSTab_VdwLJFSw_VgrpF_4xn,
 184         nbnxn_kernel_ElecQSTab_VdwLJPSw_VgrpF_4xn,
 185         nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_4xn,
 186     },
 187     {
 188         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VgrpF_4xn,
 189         nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VgrpF_4xn,
 190         nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_4xn,
 191         nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VgrpF_4xn,
 192         nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VgrpF_4xn,
 193         nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_4xn,
 194     },
 195     {
 196         nbnxn_kernel_ElecEw_VdwLJCombGeom_VgrpF_4xn,
 197         nbnxn_kernel_ElecEw_VdwLJCombLB_VgrpF_4xn,
 198         nbnxn_kernel_ElecEw_VdwLJ_VgrpF_4xn,
 199         nbnxn_kernel_ElecEw_VdwLJFSw_VgrpF_4xn,
 200         nbnxn_kernel_ElecEw_VdwLJPSw_VgrpF_4xn,
 201         nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VgrpF_4xn,
 202     },
 203     {
 204         nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VgrpF_4xn,
 205         nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VgrpF_4xn,
 206         nbnxn_kernel_ElecEwTwinCut_VdwLJ_VgrpF_4xn,
 207         nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VgrpF_4xn,
 208         nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VgrpF_4xn,
 209         nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VgrpF_4xn,
 210     },
 211 };
 212
 213
 214 static void
 215 reduce_group_energies(int ng, int ng_2log,
 216                       const real *VSvdw, const real *VSc,
 217                       real *Vvdw, real *Vc)
 218 {
 219     const int unrollj      = GMX_SIMD_REAL_WIDTH/GMX_SIMD_J_UNROLL_SIZE;
 220     const int unrollj_half = unrollj/2;
 221     int       ng_p2, i, j, j0, j1, c, s;
 222
 223     ng_p2 = (1<<ng_2log);
 224
 225     /* The size of the x86 SIMD energy group buffer array is:
 226      * ng*ng*ng_p2*unrollj_half*simd_width
 227      */
 228     for (i = 0; i < ng; i++)
 229     {
 230         for (j = 0; j < ng; j++)
 231         {
 232             Vvdw[i*ng+j] = 0;
 233             Vc[i*ng+j]   = 0;
 234         }
 235
 236         for (j1 = 0; j1 < ng; j1++)
 237         {
 238             for (j0 = 0; j0 < ng; j0++)
 239             {
 240                 c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
 241                 for (s = 0; s < unrollj_half; s++)
 242                 {
 243                     Vvdw[i*ng+j0] += VSvdw[c+0];
 244                     Vvdw[i*ng+j1] += VSvdw[c+1];
 245                     Vc  [i*ng+j0] += VSc  [c+0];
 246                     Vc  [i*ng+j1] += VSc  [c+1];
 247                     c             += unrollj + 2;
 248                 }
 249             }
 250         }
 251     }
 252 }
 253
 254 #else /* GMX_NBNXN_SIMD_4XN */
 255
 256 #include "gromacs/utility/fatalerror.h"
 257
 258 #endif /* GMX_NBNXN_SIMD_4XN */
 259
 260 void
 261 nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
 262                       const nbnxn_atomdata_t    gmx_unused *nbat,
 263                       const interaction_const_t gmx_unused *ic,
 264                       int                       gmx_unused  ewald_excl,
 265                       rvec                      gmx_unused *shift_vec,
 266                       int                       gmx_unused  force_flags,
 267                       int                       gmx_unused  clearF,
 268                       real                      gmx_unused *fshift,
 269                       real                      gmx_unused *Vc,
 270                       real                      gmx_unused *Vvdw)
 271 #ifdef GMX_NBNXN_SIMD_4XN
 272 {
 273     int                nnbl;
 274     nbnxn_pairlist_t **nbl;
 275     int                coulkt, vdwkt = 0;
 276     int                nb;
 277     int                nthreads gmx_unused;
 278
 279     nnbl = nbl_list->nnbl;
 280     nbl  = nbl_list->nbl;
 281
 282     if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
 283     {
 284         coulkt = coulktRF;
 285     }
 286     else
 287     {
 288         if (ewald_excl == ewaldexclTable)
 289         {
 290             if (ic->rcoulomb == ic->rvdw)
 291             {
 292                 coulkt = coulktTAB;
 293             }
 294             else
 295             {
 296                 coulkt = coulktTAB_TWIN;
 297             }
 298         }
 299         else
 300         {
 301             if (ic->rcoulomb == ic->rvdw)
 302             {
 303                 coulkt = coulktEWALD;
 304             }
 305             else
 306             {
 307                 coulkt = coulktEWALD_TWIN;
 308             }
 309         }
 310     }
 311
 312     if (ic->vdwtype == evdwCUT)
 313     {
 314         switch (ic->vdw_modifier)
 315         {
 316             case eintmodNONE:
 317             case eintmodPOTSHIFT:
 318                 switch (nbat->comb_rule)
 319                 {
 320                     case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
 321                     case ljcrLB:   vdwkt = vdwktLJCUT_COMBLB;   break;
 322                     case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
 323                     default:       gmx_incons("Unknown combination rule");
 324                 }
 325                 break;
 326             case eintmodFORCESWITCH:
 327                 vdwkt = vdwktLJFORCESWITCH;
 328                 break;
 329             case eintmodPOTSWITCH:
 330                 vdwkt = vdwktLJPOTSWITCH;
 331                 break;
 332             default:
 333                 gmx_incons("Unsupported VdW interaction modifier");
 334         }
 335     }
 336     else if (ic->vdwtype == evdwPME)
 337     {
 338         if (ic->ljpme_comb_rule == eljpmeLB)
 339         {
 340             gmx_incons("The nbnxn SIMD kernels don't suport LJ-PME with LB");
 341         }
 342         vdwkt = vdwktLJEWALDCOMBGEOM;
 343     }
 344     else
 345     {
 346         gmx_incons("Unsupported VdW interaction type");
 347     }
 348
 349     // cppcheck-suppress unreadVariable
 350     nthreads = gmx_omp_nthreads_get(emntNonbonded);
 351 #pragma omp parallel for schedule(static) num_threads(nthreads)
 352     for (nb = 0; nb < nnbl; nb++)
 353     {
 354         nbnxn_atomdata_output_t *out;
 355         real                    *fshift_p;
 356
 357         out = &nbat->out[nb];
 358
 359         if (clearF == enbvClearFYes)
 360         {
 361             clear_f(nbat, nb, out->f);
 362         }
 363
 364         if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
 365         {
 366             fshift_p = fshift;
 367         }
 368         else
 369         {
 370             fshift_p = out->fshift;
 371
 372             if (clearF == enbvClearFYes)
 373             {
 374                 clear_fshift(fshift_p);
 375             }
 376         }
 377
 378         if (!(force_flags & GMX_FORCE_ENERGY))
 379         {
 380             /* Don't calculate energies */
 381             p_nbk_noener[coulkt][vdwkt](nbl[nb], nbat,
 382                                         ic,
 383                                         shift_vec,
 384                                         out->f,
 385                                         fshift_p);
 386         }
 387         else if (out->nV == 1)
 388         {
 389             /* No energy groups */
 390             out->Vvdw[0] = 0;
 391             out->Vc[0]   = 0;
 392
 393             p_nbk_ener[coulkt][vdwkt](nbl[nb], nbat,
 394                                       ic,
 395                                       shift_vec,
 396                                       out->f,
 397                                       fshift_p,
 398                                       out->Vvdw,
 399                                       out->Vc);
 400         }
 401         else
 402         {
 403             /* Calculate energy group contributions */
 404             int i;
 405
 406             for (i = 0; i < out->nVS; i++)
 407             {
 408                 out->VSvdw[i] = 0;
 409             }
 410             for (i = 0; i < out->nVS; i++)
 411             {
 412                 out->VSc[i] = 0;
 413             }
 414
 415             p_nbk_energrp[coulkt][vdwkt](nbl[nb], nbat,
 416                                          ic,
 417                                          shift_vec,
 418                                          out->f,
 419                                          fshift_p,
 420                                          out->VSvdw,
 421                                          out->VSc);
 422
 423             reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
 424                                   out->VSvdw, out->VSc,
 425                                   out->Vvdw, out->Vc);
 426         }
 427     }
 428
 429     if (force_flags & GMX_FORCE_ENERGY)
 430     {
 431         reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
 432     }
 433 }
 434 #else
 435 {
 436     gmx_incons("nbnxn_kernel_simd_4xn called when such kernels "
 437                " are not enabled.");
 438 }
 439 #endif
 440 #undef GMX_SIMD_J_UNROLL_SIZE