src/gmxlib/mkinl_calcdist.c

   1 /*
   2  * $Id$
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROningen Mixture of Alchemy and Childrens' Stories
  35  */
  36 /* This file is NOT threadsafe, but it is only used to create
  37  * the innerloops during the build process, so it will never be
  38  * executed by multiple threads.
  39  */
  40
  41 #include "mkinl.h"
  42 #include <string.h>
  43
  44
  45 void call_vectorized_routines()
  46 {
  47   /* m was updated a last time after the assignments. Reduce
  48    * it if we are using fortran, so we avoid referencing m-1
  49    * repeatedly (and some functions need an adress to a variable
  50    * rather than a value)
  51    */
  52   if(!bC)
  53     decrement("m","1");
  54
  55   /* always start with vectorized invsqrt if we should do it
  56    */
  57   if(loop.vectorize_invsqrt) {
  58     /* if we need the sqrt (non-reciprocal) later
  59      * we have to save the square values in buf1 and write to buf2,
  60      * otherwise we can gain some speed by writing back to the
  61      * same array buf1
  62      */
  63 #ifdef HAVE_LIBMASSV_ANY
  64     /* IBM provide special fast single and double routines in their vectorized MASS libraries */
  65 #ifdef DOUBLE
  66     code( bC ? "vrsqrt(%s,buf1,&m);" : "call vrsqrt(%s,buf1,m)",
  67           OVERWRITE_RSQ ? "buf1" : "buf2" );
  68 #else
  69     code( bC ? "vsrsqrt(%s,buf1,&m);" : "call vsrsqrt(%s,buf1,m)",
  70           OVERWRITE_RSQ ? "buf1" : "buf2" );
  71 #endif /* End of IBM routines */
  72 #elif defined(USE_AXP_ASM)
  73     /* Compaq provide a math library too, but not an inverse square root or
  74      * a fast reciprocal. Our own assembly routines are much faster! */
  75 #ifdef DOUBLE
  76     code( bC ? "sqrtiv_(buf1,%s,&m);" : "call sqrtiv(buf1,%s,m)",
  77           OVERWRITE_RSQ ? "buf1" : "buf2" );
  78 #else
  79     code( bC ? "ssqrtiv_(buf1,%s,&m);" : "call ssqrtiv(buf1,%s,m)",
  80           OVERWRITE_RSQ ? "buf1" : "buf2" );
  81 #endif /* End of code for compaq alpha */
  82 #else
  83     /* this is a vanilla GMX routine, suitable for most chips w/o hardware sqrt.
  84      * It is quite fast though - about 18 clocks on intel! */
  85
  86     code( bC ? "vecinvsqrt(buf1,%s,m);" : "call f77vecinvsqrt(buf1,%s,m)",
  87           OVERWRITE_RSQ ? "buf1" : "buf2" );
  88
  89 #endif
  90   } else if(loop.vectorize_recip) {
  91       /* Only do this if we didnt do invsqrt. If we did,
  92        * it is probably faster to square the value when
  93        * we use it.
  94        */
  95 #ifdef HAVE_LIBMASSV_ANY
  96     /* IBM stuff again. They provide a nice vectorized reciprocal, too */
  97 #ifdef DOUBLE
  98     code( bC ? "vrec(%s,buf1,&m);" : "call vrec(%s,buf1,m)",
  99           OVERWRITE_RSQ ? "buf1" : "buf2" );
 100 #else
 101     code( bC ? "vsrec(%s,buf1,&m);" : "call vsrec(%s,buf1,m)",
 102           OVERWRITE_RSQ ? "buf1" : "buf2" );
 103 #endif
 104 #else
 105         /* on some architectures, though, the compiler can
 106          * link with appropriate libs if the code is easily recognized as
 107          * vectorizable.
 108          */
 109     vector_pragma();
 110     code( bC ? "vecrecip(buf1,%s,m);" : "call f77vecrecip(buf1,%s,m)",
 111           OVERWRITE_RSQ ? "buf1" : "buf2" );
 112
 113 #endif
 114   }
 115 }
 116
 117
 118 int calc_rsq(int i, int j)
 119 {
 120   char lm[12],lm2[32],sqbuf[64];
 121   int  m;
 122
 123   sqbuf[0]=0;
 124   for(m='x'; (m<='z'); m++) {
 125     sprintf(lm,"d%c%d%d",m,i,j);
 126     sprintf(lm2,"%s*%s",lm,lm);
 127     add_to_buffer(sqbuf,lm2);
 128   }
 129   if(DO_VECTORIZE) {
 130     assign(ARRAY(buf1,m),sqbuf);
 131     increment("m","1");
 132   } else
 133     assign("rsq%d%d",sqbuf,i,j);
 134   return 5;
 135 }
 136
 137
 138 int calc_dist()
 139 {
 140   int nflop=0;
 141   int i,j,m;
 142   char buf[10];
 143   /* First calculate dr, and possibly store it */
 144
 145   comment("Calculate distance vector");
 146
 147   for(i=1;i<=loop.ni;i++)
 148     for(j=1;j<=loop.nj;j++) {
 149       /* first calculate dr */
 150       for(m='x'; (m<='z'); m++) {
 151         subtract("d%c%d%d","i%c%d","j%c%d",m,i,j,m,i,m,j);
 152         nflop ++;
 153         if(DO_VECTORIZE && DO_FORCE) {
 154           assign(ARRAY(drbuf,m3),"d%c%d%d",m,i,j);
 155           increment("m3","1");
 156         }
 157       }
 158       nflop += calc_rsq(i,j);
 159       if(DO_VECTORIZE) /* calc square separately later, but we  */
 160         increment("m","1"); /* need to know the number of items      */
 161     }
 162
 163   return nflop;
 164 }
 165
 166
 167 int calc_rinv_and_rinvsq()
 168 {
 169   int nflop=0;
 170   int i,j;
 171
 172   if(!DO_VECTORIZE && !opt.delay_invsqrt) {
 173     if(loop.invsqrt)
 174       nflop += calc_invsqrt();
 175     else if(loop.recip) {
 176       for(j=1;j<=loop.nj;j++)
 177         for(i=1;i<=loop.ni;i++) {
 178           assign("rinvsq%d", "1.0/rsq%d",i*10+j,i*10+j);
 179 #ifdef DOUBLE
 180           nflop += 6;
 181 #else
 182           nflop += 3;
 183 #endif
 184         }
 185     }
 186   }
 187   return nflop;
 188 }