4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROningen Mixture of Alchemy and Childrens' Stories
36 /* This file is NOT threadsafe, but it is only used to create
37 * the innerloops during the build process, so it will never be
38 * executed by multiple threads.
45 void call_vectorized_routines()
47 /* m was updated a last time after the assignments. Reduce
48 * it if we are using fortran, so we avoid referencing m-1
49 * repeatedly (and some functions need an adress to a variable
50 * rather than a value)
55 /* always start with vectorized invsqrt if we should do it
57 if(loop
.vectorize_invsqrt
) {
58 /* if we need the sqrt (non-reciprocal) later
59 * we have to save the square values in buf1 and write to buf2,
60 * otherwise we can gain some speed by writing back to the
63 #ifdef HAVE_LIBMASSV_ANY
64 /* IBM provide special fast single and double routines in their vectorized MASS libraries */
66 code( bC
? "vrsqrt(%s,buf1,&m);" : "call vrsqrt(%s,buf1,m)",
67 OVERWRITE_RSQ
? "buf1" : "buf2" );
69 code( bC
? "vsrsqrt(%s,buf1,&m);" : "call vsrsqrt(%s,buf1,m)",
70 OVERWRITE_RSQ
? "buf1" : "buf2" );
71 #endif /* End of IBM routines */
72 #elif defined(USE_AXP_ASM)
73 /* Compaq provide a math library too, but not an inverse square root or
74 * a fast reciprocal. Our own assembly routines are much faster! */
76 code( bC
? "sqrtiv_(buf1,%s,&m);" : "call sqrtiv(buf1,%s,m)",
77 OVERWRITE_RSQ
? "buf1" : "buf2" );
79 code( bC
? "ssqrtiv_(buf1,%s,&m);" : "call ssqrtiv(buf1,%s,m)",
80 OVERWRITE_RSQ
? "buf1" : "buf2" );
81 #endif /* End of code for compaq alpha */
83 /* this is a vanilla GMX routine, suitable for most chips w/o hardware sqrt.
84 * It is quite fast though - about 18 clocks on intel! */
86 code( bC
? "vecinvsqrt(buf1,%s,m);" : "call f77vecinvsqrt(buf1,%s,m)",
87 OVERWRITE_RSQ
? "buf1" : "buf2" );
90 } else if(loop
.vectorize_recip
) {
91 /* Only do this if we didnt do invsqrt. If we did,
92 * it is probably faster to square the value when
95 #ifdef HAVE_LIBMASSV_ANY
96 /* IBM stuff again. They provide a nice vectorized reciprocal, too */
98 code( bC
? "vrec(%s,buf1,&m);" : "call vrec(%s,buf1,m)",
99 OVERWRITE_RSQ
? "buf1" : "buf2" );
101 code( bC
? "vsrec(%s,buf1,&m);" : "call vsrec(%s,buf1,m)",
102 OVERWRITE_RSQ
? "buf1" : "buf2" );
105 /* on some architectures, though, the compiler can
106 * link with appropriate libs if the code is easily recognized as
110 code( bC
? "vecrecip(buf1,%s,m);" : "call f77vecrecip(buf1,%s,m)",
111 OVERWRITE_RSQ
? "buf1" : "buf2" );
118 int calc_rsq(int i
, int j
)
120 char lm
[12],lm2
[32],sqbuf
[64];
124 for(m
='x'; (m
<='z'); m
++) {
125 sprintf(lm
,"d%c%d%d",m
,i
,j
);
126 sprintf(lm2
,"%s*%s",lm
,lm
);
127 add_to_buffer(sqbuf
,lm2
);
130 assign(ARRAY(buf1
,m
),sqbuf
);
133 assign("rsq%d%d",sqbuf
,i
,j
);
143 /* First calculate dr, and possibly store it */
145 comment("Calculate distance vector");
147 for(i
=1;i
<=loop
.ni
;i
++)
148 for(j
=1;j
<=loop
.nj
;j
++) {
149 /* first calculate dr */
150 for(m
='x'; (m
<='z'); m
++) {
151 subtract("d%c%d%d","i%c%d","j%c%d",m
,i
,j
,m
,i
,m
,j
);
153 if(DO_VECTORIZE
&& DO_FORCE
) {
154 assign(ARRAY(drbuf
,m3
),"d%c%d%d",m
,i
,j
);
158 nflop
+= calc_rsq(i
,j
);
159 if(DO_VECTORIZE
) /* calc square separately later, but we */
160 increment("m","1"); /* need to know the number of items */
167 int calc_rinv_and_rinvsq()
172 if(!DO_VECTORIZE
&& !opt
.delay_invsqrt
) {
174 nflop
+= calc_invsqrt();
175 else if(loop
.recip
) {
176 for(j
=1;j
<=loop
.nj
;j
++)
177 for(i
=1;i
<=loop
.ni
;i
++) {
178 assign("rinvsq%d", "1.0/rsq%d",i
*10+j
,i
*10+j
);