src/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd256.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  11  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  12  * Copyright (c) 2001-2012, The GROMACS development team,
  13  * check out http://www.gromacs.org for more information.
  14  *
  15  * This program is free software; you can redistribute it and/or
  16  * modify it under the terms of the GNU General Public License
  17  * as published by the Free Software Foundation; either version 2
  18  * of the License, or (at your option) any later version.
  19  *
  20  * If you want to redistribute modifications, please consider that
  21  * scientific software is very special. Version control is crucial -
  22  * bugs must be traceable. We will be happy to consider code for
  23  * inclusion in the official distribution, but derived work must not
  24  * be called official GROMACS. Details are found in the README & COPYING
  25  * files - if they are missing, get the official version at www.gromacs.org.
  26  *
  27  * To help us fund GROMACS development, we humbly ask that you cite
  28  * the papers on the package - you can find them in the top README file.
  29  *
  30  * For more info, check our website at http://www.gromacs.org
  31  *
  32  * And Hey:
  33  * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
  34  */
  35 #ifdef HAVE_CONFIG_H
  36 #include <config.h>
  37 #endif
  38
  39 #include <math.h>
  40
  41 #include "typedefs.h"
  42 #include "vec.h"
  43 #include "smalloc.h"
  44 #include "force.h"
  45 #include "gmx_omp_nthreads.h"
  46 #include "../nbnxn_consts.h"
  47 #include "nbnxn_kernel_common.h"
  48
  49 #ifdef GMX_X86_AVX_256
  50
  51 #include "nbnxn_kernel_x86_simd256.h"
  52
  53 /* Include all flavors of the 256-bit AVX kernel loops */
  54
  55 #define GMX_MM256_HERE
  56
  57 /* Analytical reaction-field kernels */
  58 #define CALC_COUL_RF
  59
  60 #include "nbnxn_kernel_x86_simd_includes.h"
  61
  62 #undef CALC_COUL_RF
  63
  64 /* Tabulated exclusion interaction electrostatics kernels */
  65 #define CALC_COUL_TAB
  66
  67 /* Single cut-off: rcoulomb = rvdw */
  68 #include "nbnxn_kernel_x86_simd_includes.h"
  69
  70 /* Twin cut-off: rcoulomb >= rvdw */
  71 #define VDW_CUTOFF_CHECK
  72 #include "nbnxn_kernel_x86_simd_includes.h"
  73 #undef VDW_CUTOFF_CHECK
  74
  75 #undef CALC_COUL_TAB
  76
  77
  78 typedef void (*p_nbk_func_ener)(const nbnxn_pairlist_t     *nbl,
  79                                 const nbnxn_atomdata_t     *nbat,
  80                                 const interaction_const_t  *ic,
  81                                 rvec                       *shift_vec,
  82                                 real                       *f,
  83                                 real                       *fshift,
  84                                 real                       *Vvdw,
  85                                 real                       *Vc);
  86
  87 typedef void (*p_nbk_func_noener)(const nbnxn_pairlist_t     *nbl,
  88                                   const nbnxn_atomdata_t     *nbat,
  89                                   const interaction_const_t  *ic,
  90                                   rvec                       *shift_vec,
  91                                   real                       *f,
  92                                   real                       *fshift);
  93
  94 enum { coultRF, coultTAB, coultTAB_TWIN, coultNR };
  95
  96
  97 static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR] =
  98 { { nbnxn_kernel_x86_simd256_rf_comb_geom_ener,
  99     nbnxn_kernel_x86_simd256_rf_comb_lb_ener,
 100     nbnxn_kernel_x86_simd256_rf_comb_none_ener },
 101   { nbnxn_kernel_x86_simd256_tab_comb_geom_ener,
 102     nbnxn_kernel_x86_simd256_tab_comb_lb_ener,
 103     nbnxn_kernel_x86_simd256_tab_twin_comb_none_ener },
 104   { nbnxn_kernel_x86_simd256_tab_twin_comb_geom_ener,
 105     nbnxn_kernel_x86_simd256_tab_twin_comb_lb_ener,
 106     nbnxn_kernel_x86_simd256_tab_twin_comb_none_ener }  };
 107
 108 static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR] =
 109 { { nbnxn_kernel_x86_simd256_rf_comb_geom_energrp,
 110     nbnxn_kernel_x86_simd256_rf_comb_lb_energrp,
 111     nbnxn_kernel_x86_simd256_rf_comb_none_energrp },
 112   { nbnxn_kernel_x86_simd256_tab_comb_geom_energrp,
 113     nbnxn_kernel_x86_simd256_tab_comb_lb_energrp,
 114     nbnxn_kernel_x86_simd256_tab_comb_none_energrp },
 115   { nbnxn_kernel_x86_simd256_tab_twin_comb_geom_energrp,
 116     nbnxn_kernel_x86_simd256_tab_twin_comb_lb_energrp,
 117     nbnxn_kernel_x86_simd256_tab_twin_comb_none_energrp } };
 118
 119 static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR] =
 120 { { nbnxn_kernel_x86_simd256_rf_comb_geom_noener,
 121     nbnxn_kernel_x86_simd256_rf_comb_lb_noener,
 122     nbnxn_kernel_x86_simd256_rf_comb_none_noener },
 123   { nbnxn_kernel_x86_simd256_tab_comb_geom_noener,
 124     nbnxn_kernel_x86_simd256_tab_comb_lb_noener,
 125     nbnxn_kernel_x86_simd256_tab_comb_none_noener },
 126   { nbnxn_kernel_x86_simd256_tab_twin_comb_geom_noener,
 127     nbnxn_kernel_x86_simd256_tab_twin_comb_lb_noener,
 128     nbnxn_kernel_x86_simd256_tab_twin_comb_none_noener } };
 129
 130 #endif /* SSE */
 131
 132
 133 static void reduce_group_energies(int ng,int ng_2log,
 134                                   const real *VSvdw,const real *VSc,
 135                                   real *Vvdw,real *Vc)
 136 {
 137     int ng_p2,i,j,j0,j1,c,s;
 138
 139 #ifndef GMX_DOUBLE
 140 #define SIMD_WIDTH   4
 141 #define SIMD_WIDTH_2 2
 142 #else
 143 #define SIMD_WIDTH   2
 144 #define SIMD_WIDTH_2 1
 145 #endif
 146
 147     ng_p2 = (1<<ng_2log);
 148
 149     /* The size of the SSE energy group buffer array is:
 150      * stride^3*SIMD_WIDTH_2*SIMD_WIDTH
 151      */
 152     for(i=0; i<ng; i++)
 153     {
 154         for(j=0; j<ng; j++)
 155         {
 156             Vvdw[i*ng+j] = 0;
 157             Vc[i*ng+j]   = 0;
 158         }
 159
 160         for(j1=0; j1<ng; j1++)
 161         {
 162             for(j0=0; j0<ng; j0++)
 163             {
 164                 c = ((i*ng + j1)*ng_p2 + j0)*SIMD_WIDTH_2*SIMD_WIDTH;
 165                 for(s=0; s<SIMD_WIDTH_2; s++)
 166                 {
 167                     Vvdw[i*ng+j0] += VSvdw[c+0];
 168                     Vvdw[i*ng+j1] += VSvdw[c+1];
 169                     Vc  [i*ng+j0] += VSc  [c+0];
 170                     Vc  [i*ng+j1] += VSc  [c+1];
 171                     c += SIMD_WIDTH + 2;
 172                 }
 173             }
 174         }
 175     }
 176 }
 177
 178 void
 179 nbnxn_kernel_x86_simd256(nbnxn_pairlist_set_t       *nbl_list,
 180                          const nbnxn_atomdata_t     *nbat,
 181                          const interaction_const_t  *ic,
 182                          rvec                       *shift_vec,
 183                          int                        force_flags,
 184                          int                        clearF,
 185                          real                       *fshift,
 186                          real                       *Vc,
 187                          real                       *Vvdw)
 188 #ifdef GMX_X86_AVX_256
 189 {
 190     int              nnbl;
 191     nbnxn_pairlist_t **nbl;
 192     int coult;
 193     int nb;
 194
 195     nnbl = nbl_list->nnbl;
 196     nbl  = nbl_list->nbl;
 197
 198     if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
 199     {
 200         coult = coultRF;
 201     }
 202     else
 203     {
 204         if (ic->rcoulomb == ic->rvdw)
 205         {
 206             coult = coultTAB;
 207         }
 208         else
 209         {
 210             coult = coultTAB_TWIN;
 211         }
 212     }
 213
 214 #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
 215     for(nb=0; nb<nnbl; nb++)
 216     {
 217         nbnxn_atomdata_output_t *out;
 218         real *fshift_p;
 219
 220         out = &nbat->out[nb];
 221
 222         if (clearF == enbvClearFYes)
 223         {
 224             clear_f(nbat,out->f);
 225         }
 226
 227         if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
 228         {
 229             fshift_p = fshift;
 230         }
 231         else
 232         {
 233             fshift_p = out->fshift;
 234
 235             if (clearF == enbvClearFYes)
 236             {
 237                 clear_fshift(fshift_p);
 238             }
 239         }
 240
 241         /* With Ewald type electrostatics we the forces for excluded atom pairs
 242          * should not contribute to the virial sum. The exclusion forces
 243          * are not calculate in the energy kernels, but are in _noener.
 244          */
 245         if (!((force_flags & GMX_FORCE_ENERGY) ||
 246               (EEL_FULL(ic->eeltype) && (force_flags & GMX_FORCE_VIRIAL))))
 247         {
 248             /* Don't calculate energies */
 249             p_nbk_noener[coult][nbat->comb_rule](nbl[nb],nbat,
 250                                                  ic,
 251                                                  shift_vec,
 252                                                  out->f,
 253                                                  fshift_p);
 254         }
 255         else if (out->nV == 1 || !(force_flags & GMX_FORCE_ENERGY))
 256         {
 257             /* No energy groups */
 258             out->Vvdw[0] = 0;
 259             out->Vc[0]   = 0;
 260
 261             p_nbk_ener[coult][nbat->comb_rule](nbl[nb],nbat,
 262                                                ic,
 263                                                shift_vec,
 264                                                out->f,
 265                                                fshift_p,
 266                                                out->Vvdw,
 267                                                out->Vc);
 268         }
 269         else
 270         {
 271             /* Calculate energy group contributions */
 272             int i;
 273
 274             for(i=0; i<out->nVS; i++)
 275             {
 276                 out->VSvdw[i] = 0;
 277             }
 278             for(i=0; i<out->nVS; i++)
 279             {
 280                 out->VSc[i] = 0;
 281             }
 282
 283             p_nbk_energrp[coult][nbat->comb_rule](nbl[nb],nbat,
 284                                                   ic,
 285                                                   shift_vec,
 286                                                   out->f,
 287                                                   fshift_p,
 288                                                   out->VSvdw,
 289                                                   out->VSc);
 290
 291             reduce_group_energies(nbat->nenergrp,nbat->neg_2log,
 292                                   out->VSvdw,out->VSc,
 293                                   out->Vvdw,out->Vc);
 294         }
 295     }
 296
 297     if (force_flags & GMX_FORCE_ENERGY)
 298     {
 299         /* Reduce the energies */
 300         for(nb=0; nb<nnbl; nb++)
 301         {
 302             int i;
 303
 304             for(i=0; i<nbat->out[nb].nV; i++)
 305             {
 306                 Vvdw[i] += nbat->out[nb].Vvdw[i];
 307                 Vc[i]   += nbat->out[nb].Vc[i];
 308             }
 309         }
 310     }
 311 }
 312 #else
 313 {
 314     gmx_incons("nbnxn_kernel_x86_simd256 called while GROMACS was configured without AVX enabled");
 315 }
 316 #endif