Upped the version to 3.2.0
[gromacs.git] / src / gmxlib / mkinl_calcdist.c
blob09abb08177678ff70eef0b55a59e83ff100d953f
1 /*
2 * $Id$
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 3.2.0
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
33 * And Hey:
34 * GROningen Mixture of Alchemy and Childrens' Stories
36 /* This file is NOT threadsafe, but it is only used to create
37 * the innerloops during the build process, so it will never be
38 * executed by multiple threads.
41 #include "mkinl.h"
42 #include <string.h>
45 void call_vectorized_routines()
47 /* m was updated a last time after the assignments. Reduce
48 * it if we are using fortran, so we avoid referencing m-1
49 * repeatedly (and some functions need an adress to a variable
50 * rather than a value)
52 if(!bC)
53 decrement("m","1");
55 /* always start with vectorized invsqrt if we should do it
57 if(loop.vectorize_invsqrt) {
58 /* if we need the sqrt (non-reciprocal) later
59 * we have to save the square values in buf1 and write to buf2,
60 * otherwise we can gain some speed by writing back to the
61 * same array buf1
63 #ifdef HAVE_LIBMASSV_ANY
64 /* IBM provide special fast single and double routines in their vectorized MASS libraries */
65 #ifdef DOUBLE
66 code( bC ? "vrsqrt(%s,buf1,&m);" : "call vrsqrt(%s,buf1,m)",
67 OVERWRITE_RSQ ? "buf1" : "buf2" );
68 #else
69 code( bC ? "vsrsqrt(%s,buf1,&m);" : "call vsrsqrt(%s,buf1,m)",
70 OVERWRITE_RSQ ? "buf1" : "buf2" );
71 #endif /* End of IBM routines */
72 #elif defined(USE_AXP_ASM)
73 /* Compaq provide a math library too, but not an inverse square root or
74 * a fast reciprocal. Our own assembly routines are much faster! */
75 #ifdef DOUBLE
76 code( bC ? "sqrtiv_(buf1,%s,&m);" : "call sqrtiv(buf1,%s,m)",
77 OVERWRITE_RSQ ? "buf1" : "buf2" );
78 #else
79 code( bC ? "ssqrtiv_(buf1,%s,&m);" : "call ssqrtiv(buf1,%s,m)",
80 OVERWRITE_RSQ ? "buf1" : "buf2" );
81 #endif /* End of code for compaq alpha */
82 #else
83 /* this is a vanilla GMX routine, suitable for most chips w/o hardware sqrt.
84 * It is quite fast though - about 18 clocks on intel! */
86 code( bC ? "vecinvsqrt(buf1,%s,m);" : "call f77vecinvsqrt(buf1,%s,m)",
87 OVERWRITE_RSQ ? "buf1" : "buf2" );
89 #endif
90 } else if(loop.vectorize_recip) {
91 /* Only do this if we didnt do invsqrt. If we did,
92 * it is probably faster to square the value when
93 * we use it.
95 #ifdef HAVE_LIBMASSV_ANY
96 /* IBM stuff again. They provide a nice vectorized reciprocal, too */
97 #ifdef DOUBLE
98 code( bC ? "vrec(%s,buf1,&m);" : "call vrec(%s,buf1,m)",
99 OVERWRITE_RSQ ? "buf1" : "buf2" );
100 #else
101 code( bC ? "vsrec(%s,buf1,&m);" : "call vsrec(%s,buf1,m)",
102 OVERWRITE_RSQ ? "buf1" : "buf2" );
103 #endif
104 #else
105 /* on some architectures, though, the compiler can
106 * link with appropriate libs if the code is easily recognized as
107 * vectorizable.
109 vector_pragma();
110 code( bC ? "vecrecip(buf1,%s,m);" : "call f77vecrecip(buf1,%s,m)",
111 OVERWRITE_RSQ ? "buf1" : "buf2" );
113 #endif
118 int calc_rsq(int i, int j)
120 char lm[12],lm2[32],sqbuf[64];
121 int m;
123 sqbuf[0]=0;
124 for(m='x'; (m<='z'); m++) {
125 sprintf(lm,"d%c%d%d",m,i,j);
126 sprintf(lm2,"%s*%s",lm,lm);
127 add_to_buffer(sqbuf,lm2);
129 if(DO_VECTORIZE) {
130 assign(ARRAY(buf1,m),sqbuf);
131 increment("m","1");
132 } else
133 assign("rsq%d%d",sqbuf,i,j);
134 return 5;
138 int calc_dist()
140 int nflop=0;
141 int i,j,m;
142 char buf[10];
143 /* First calculate dr, and possibly store it */
145 comment("Calculate distance vector");
147 for(i=1;i<=loop.ni;i++)
148 for(j=1;j<=loop.nj;j++) {
149 /* first calculate dr */
150 for(m='x'; (m<='z'); m++) {
151 subtract("d%c%d%d","i%c%d","j%c%d",m,i,j,m,i,m,j);
152 nflop ++;
153 if(DO_VECTORIZE && DO_FORCE) {
154 assign(ARRAY(drbuf,m3),"d%c%d%d",m,i,j);
155 increment("m3","1");
158 nflop += calc_rsq(i,j);
159 if(DO_VECTORIZE) /* calc square separately later, but we */
160 increment("m","1"); /* need to know the number of items */
163 return nflop;
167 int calc_rinv_and_rinvsq()
169 int nflop=0;
170 int i,j;
172 if(!DO_VECTORIZE && !opt.delay_invsqrt) {
173 if(loop.invsqrt)
174 nflop += calc_invsqrt();
175 else if(loop.recip) {
176 for(j=1;j<=loop.nj;j++)
177 for(i=1;i<=loop.ni;i++) {
178 assign("rinvsq%d", "1.0/rsq%d",i*10+j,i*10+j);
179 #ifdef DOUBLE
180 nflop += 6;
181 #else
182 nflop += 3;
183 #endif
187 return nflop;