Upped the version to 3.2.0
[gromacs.git] / src / gmxlib / mkinl_innerloop.c
blob92eb3629983000ed284a3d5cc923f7b6dd732fd8
1 /*
2 * $Id$
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 3.2.0
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
33 * And Hey:
34 * GROningen Mixture of Alchemy and Childrens' Stories
36 /* This file is NOT threadsafe, but it is only used to create
37 * the innerloops during the build process, so it will never be
38 * executed by multiple threads.
41 #include "mkinl.h"
43 void unpack_inner_data(bool calcdist,bool calcforce)
45 int nflop = 0;
47 assign("jnr", "%s%s", ARRAY(jjnr,k), bC ? "" : "+1");
48 if(DO_FORCE || calcdist)
49 assign("j3",bC ? "3*jnr" : "3*jnr-2");
51 if(arch.vectorcpu && calcforce) /* prefetch x is turned off in this case */
52 assign("kk","%d*(k-nj0)%s",(loop.sol==SOL_WATERWATER) ? 9 : 3 ,bC ? "" : "+1");
57 void fetch_coord(void)
59 int j,offset;
61 comment("Fetching coordinates");
63 for(j=1;j<=loop.nj;j++) {
64 offset=3*(j-1);
65 assign("jx%d",_array("pos", "j3+%d",offset),j);
66 assign("jy%d",_array("pos", "j3+%d",offset+1),j);
67 assign("jz%d",_array("pos", "j3+%d",offset+2),j);
72 void prefetch_forces(void)
75 int j;
76 if(DO_FORCE) {
77 comment("Prefetching forces");
79 for(j=1;j<=loop.nj;j++) {
80 assign("fjx%d", _array("faction","j3+%d",3*(j-1)),j);
81 assign("fjy%d", _array("faction","j3+%d",3*(j-1)+1),j);
82 assign("fjz%d", _array("faction","j3+%d",3*(j-1)+2),j);
85 /* no flops */
90 void unpack_vector_machine_forces(bool calcdist,bool calcforce)
92 /* this shouldnt be used with prefetching */
93 if(DO_FORCE) {
94 comment("Unpack forces for vectorization on a vector machine");
95 vector_pragma();
96 start_loop("k","nj0","nj1");
98 unpack_inner_data(calcdist,calcforce);
100 assign(ARRAY(fbuf,kk), ARRAY(faction,j3));
101 assign(ARRAY(fbuf,kk+1), ARRAY(faction,j3+1));
102 assign(ARRAY(fbuf,kk+2), ARRAY(faction,j3+2));
104 if(loop.sol==SOL_WATERWATER) {
105 assign(ARRAY(fbuf,kk+3), ARRAY(faction,j3+3));
106 assign(ARRAY(fbuf,kk+4), ARRAY(faction,j3+4));
107 assign(ARRAY(fbuf,kk+5), ARRAY(faction,j3+5));
108 assign(ARRAY(fbuf,kk+6), ARRAY(faction,j3+6));
109 assign(ARRAY(fbuf,kk+7), ARRAY(faction,j3+7));
110 assign(ARRAY(fbuf,kk+8), ARRAY(faction,j3+8));
112 end_loop();
118 void inner_loop(bool calcdist, bool calcforce)
120 int nflop=0;
121 char buf[50],buf2[50];
123 if(arch.vectorcpu && calcforce)
124 unpack_vector_machine_forces(calcdist,calcforce);
126 comment("Inner loop (over j-particles) starts right here");
128 vector_pragma();
130 start_loop("k", "nj0" , "nj1");
132 unpack_inner_data(calcdist,calcforce);
134 fetch_coord();
136 /* non-vectorized coordinate prefetching is issued from
137 * calc_interactions() in mkinl_interactions.c due to
138 * timing optimization.
141 if(calcdist)
142 nflop += calc_dist();
144 if(calcforce) {
146 nflop += calc_rinv_and_rinvsq();
148 if(DO_PREFETCH)
149 prefetch_forces();
151 nflop += calc_interactions();
153 /* Only print vanilla loop count */
154 if(!DO_VECTORIZE && !arch.vectorcpu) {
155 sprintf(buf,"Innerloop of %s costs %d flops",loopname,nflop);
156 comment(buf);
159 end_loop();