2 * Author: Abhinav S Bhatele
3 * Date Created: March 09th, 2009
15 * X: blockDimX, arrayDimX --> wrap_x
16 * Y: blockDimY, arrayDimY --> wrap_y
19 #include "jacobi2d.decl.h"
20 #include "TopoManager.h"
22 /*readonly*/ CProxy_Main mainProxy;
23 /*readonly*/ int blockDimX;
24 /*readonly*/ int blockDimY;
25 /*readonly*/ int arrayDimX;
26 /*readonly*/ int arrayDimY;
28 // specify the number of worker chares in each dimension
29 /*readonly*/ int num_chare_x;
30 /*readonly*/ int num_chare_y;
32 /*readonly*/ int globalBarrier;
33 /*readonly*/ int localBarrier;
35 // We want to wrap entries around, and because mod operator %
36 // sometimes misbehaves on negative values. -1 maps to the highest value.
37 #define wrap_x(a) (((a)+num_chare_x)%num_chare_x)
38 #define wrap_y(a) (((a)+num_chare_y)%num_chare_y)
50 class Main : public CBase_Main
57 if ( (m->argc < 4) || (m->argc > 7) ) {
58 CkPrintf("%s [array_size] [block_size] +[no]barrier [+[no]local]\n", m->argv[0]);
59 CkPrintf("%s [array_size_X] [array_size_Y] [block_size_X] [block_size_Y] +[no]barrier [+[no]local]\n", m->argv[0]);
64 arrayDimY = arrayDimX = atoi(m->argv[1]);
65 blockDimY = blockDimX = atoi(m->argv[2]);
66 if(strcasecmp(m->argv[3], "+nobarrier") == 0) {
68 if(strcasecmp(m->argv[4], "+nolocal") == 0)
75 if(globalBarrier == 0) CkPrintf("\nSTENCIL COMPUTATION WITH NO BARRIERS\n");
78 arrayDimX = atoi(m->argv[1]);
79 arrayDimY = atoi(m->argv[2]);
80 blockDimX = atoi(m->argv[3]);
81 blockDimY = atoi(m->argv[4]);
82 if(strcasecmp(m->argv[5], "+nobarrier") == 0) {
84 if(strcasecmp(m->argv[6], "+nolocal") == 0)
91 if(globalBarrier == 0 && localBarrier == 0)
92 CkPrintf("\nSTENCIL COMPUTATION WITH NO BARRIERS\n");
94 CkPrintf("\nSTENCIL COMPUTATION WITH LOCAL BARRIERS\n");
97 if (arrayDimX < blockDimX || arrayDimX % blockDimX != 0)
98 CkAbort("array_size_X % block_size_X != 0!");
99 if (arrayDimY < blockDimY || arrayDimY % blockDimY != 0)
100 CkAbort("array_size_Y % block_size_Y != 0!");
102 // store the main proxy
103 mainProxy = thisProxy;
105 num_chare_x = arrayDimX / blockDimX;
106 num_chare_y = arrayDimY / blockDimY;
109 CkPrintf("Running Jacobi on %d processors with (%d, %d) elements\n", CkNumPes(), num_chare_x, num_chare_y);
110 CkPrintf("Array Dimensions: %d %d\n", arrayDimX, arrayDimY);
111 CkPrintf("Block Dimensions: %d %d\n", blockDimX, blockDimY);
113 // Create new array of worker chares
115 CProxy_JacobiMap map = CProxy_JacobiMap::ckNew(num_chare_x, num_chare_y);
116 CkPrintf("Topology Mapping is being done ... \n");
117 CkArrayOptions opts(num_chare_x, num_chare_y);
119 array = CProxy_Jacobi::ckNew(opts);
121 array = CProxy_Jacobi::ckNew(num_chare_x, num_chare_y);
124 //Start the computation
126 array.begin_iteration();
129 // Each worker reports back to here when it completes an iteration
130 void report(double error) {
132 if(iterations == WARM_ITER)
133 startTime = CmiWallTimer();
135 if((globalBarrier == 1 && iterations < MAX_ITER) || (globalBarrier == 0 && iterations <= WARM_ITER)) {
136 if(iterations > WARM_ITER) CkPrintf("Start of iteration %d\n", iterations);
137 BgPrintf("BgPrint> Start of iteration at %f\n");
138 array.begin_iteration();
140 CkPrintf("Completed %d iterations\n", MAX_ITER-1);
141 endTime = CmiWallTimer();
142 CkPrintf("Time elapsed per iteration: %f\n", (endTime - startTime)/(MAX_ITER-1-WARM_ITER));
149 class Jacobi: public CBase_Jacobi {
157 double **temperature;
158 double **new_temperature;
163 // Constructor, initialize values
166 // allocate two dimensional arrays
167 temperature = new double*[blockDimX+2];
168 new_temperature = new double*[blockDimX+2];
169 for (i=0; i<blockDimX+2; i++) {
170 temperature[i] = new double[blockDimY+2];
171 new_temperature[i] = new double[blockDimY+2];
173 for(i=0;i<blockDimX+2; i++) {
174 for(j=0;j<blockDimY+2; j++) {
175 temperature[i][j] = 0.5;
176 new_temperature[i][j] = 0.5;
189 Jacobi(CkMigrateMessage* m) {}
192 for (int i=0; i<blockDimX+2; i++) {
193 delete [] temperature[i];
194 delete [] new_temperature[i];
196 delete [] temperature;
197 delete [] new_temperature;
200 // Perform one iteration of work
201 void begin_iteration(void) {
202 if(localBarrier == 1 && iterations > WARM_ITER) {
203 _TRACE_BG_TLINE_END(&ackLogs[readyToSend]);
207 if(readyToSend == 5) {
208 if(thisIndex.x == 0 && thisIndex.y == 0 && (globalBarrier == 0 && iterations > WARM_ITER)) {
209 CkPrintf("Start of iteration %d\n", iterations);
210 BgPrintf("BgPrint> Start of iteration at %f\n");
213 if(localBarrier == 1 && iterations > WARM_ITER) {
215 _TRACE_BG_END_EXECUTE(1);
216 _TRACE_BG_BEGIN_EXECUTE_NOMSG("start next iteration", &curLog);
217 for(int i=0; i<5; i++)
218 _TRACE_BG_ADD_BACKWARD_DEP(ackLogs[i]);
220 if(localBarrier == 1 && iterations >= WARM_ITER) readyToSend = 0;
223 // Copy left column and right column into temporary arrays
224 double *left_edge = new double[blockDimX];
225 double *right_edge = new double[blockDimX];
227 for(int i=0; i<blockDimX; i++){
228 left_edge[i] = temperature[i+1][1];
229 right_edge[i] = temperature[i+1][blockDimY];
236 thisProxy(x, wrap_y(y-1)).receiveGhosts(RIGHT, blockDimX, left_edge);
237 // Send my right edge
238 thisProxy(x, wrap_y(y+1)).receiveGhosts(LEFT, blockDimX, right_edge);
240 thisProxy(wrap_x(x-1), y).receiveGhosts(BOTTOM, blockDimY, &temperature[1][1]);
241 // Send my bottom edge
242 thisProxy(wrap_x(x+1), y).receiveGhosts(TOP, blockDimY, &temperature[blockDimX][1]);
244 delete [] right_edge;
250 void receiveGhosts(int dir, int size, double gh[]) {
252 _TRACE_BG_TLINE_END(&sendLogs[dir-1]);
257 for(i=0; i<size; i++)
258 temperature[i+1][0] = gh[i];
262 for(i=0; i<size; i++)
263 temperature[i+1][blockDimY+1] = gh[i];
267 for(j=0; j<size; j++)
268 temperature[0][j+1] = gh[j];
272 for(j=0; j<size; j++)
273 temperature[blockDimX+1][j+1] = gh[j];
281 void check_and_compute() {
282 double error = 0.0, max_error = 0.0;
285 if (arrived_left >=1 && arrived_right >=1 && arrived_top >=1 && arrived_bottom >= 1) {
291 _TRACE_BG_END_EXECUTE(1);
292 _TRACE_BG_BEGIN_EXECUTE_NOMSG("start computation", &curLog);
293 for(int i=0; i<4; i++)
294 _TRACE_BG_ADD_BACKWARD_DEP(sendLogs[i]);
296 if(localBarrier == 1 && iterations > WARM_ITER) {
299 thisProxy(x, wrap_y(y-1)).begin_iteration();
300 thisProxy(x, wrap_y(y+1)).begin_iteration();
301 thisProxy(wrap_x(x-1), y).begin_iteration();
302 thisProxy(wrap_x(x+1), y).begin_iteration();
307 for(int i=1; i<blockDimX+1; i++) {
308 for(int j=1; j<blockDimY+1; j++) {
309 error = fabs(new_temperature[i][j] - temperature[i][j]);
310 if(error > max_error) {
318 temperature = new_temperature;
319 new_temperature = tmp;
323 if(globalBarrier == 1 || (globalBarrier==0 && (iterations <= WARM_ITER || iterations >= MAX_ITER))) {
324 contribute(sizeof(double), &max_error, CkReduction::max_double,
325 CkCallback(CkReductionTarget(Main, report), mainProxy));
332 // Check to see if we have received all neighbor values yet
333 // If all neighbor values have been received, we update our values and proceed
334 void compute_kernel()
336 for(int i=1; i<blockDimX+1; i++) {
337 for(int j=1; j<blockDimY+1; j++) {
338 // update my value based on the surrounding values
339 new_temperature[i][j] = (temperature[i-1][j]+temperature[i+1][j]+temperature[i][j-1]+temperature[i][j+1]+temperature[i][j]) * 0.2;
344 // Enforce some boundary conditions
347 if(thisIndex.y == 0 && thisIndex.x < num_chare_x/2) {
348 for(int i=1; i<=blockDimX; i++)
349 temperature[i][1] = 1.0;
352 if(thisIndex.x == num_chare_x-1 && thisIndex.y >= num_chare_y/2) {
353 for(int j=1; j<=blockDimY; j++)
354 temperature[blockDimX][j] = 0.0;
364 class JacobiMap : public CkArrayMap {
369 JacobiMap(int x, int y) {
372 mapping = new int*[x];
374 mapping[i] = new int[y];
377 // we are assuming that the no. of chares in each dimension is a
378 // multiple of the torus dimension
379 int dimNX = tmgr.getDimNX();
380 int dimNY = tmgr.getDimNY();
381 int dimNZ = tmgr.getDimNZ();
382 int dimNT = tmgr.getDimNT();
384 int numCharesPerPeX = x / dimNX;
385 int numCharesPerPeY = numCharesPerPeX / dimNY;
386 int numCharesPerPeZ = y / dimNZ;
387 int numCharesPerPeT = numCharesPerPeZ / dimNT;
389 if(CkMyPe()==0) CkPrintf("%d %d %d %d : %d %d %d %d\n", dimNX, dimNY, dimNZ, dimNT, numCharesPerPeX, numCharesPerPeY, numCharesPerPeZ, numCharesPerPeT);
391 for(int i=0; i<dimNX; i++)
392 for(int j=0; j<dimNY; j++)
393 for(int k=0; k<dimNZ; k++)
394 for(int l=0; l<dimNT; l++)
395 for(int ci = i*numCharesPerPeX+j*numCharesPerPeY; ci < i*numCharesPerPeX+(j+1)*numCharesPerPeY; ci++)
396 for(int cj = k*numCharesPerPeZ+l*numCharesPerPeT; cj < k*numCharesPerPeZ+(l+1)*numCharesPerPeT; cj++)
397 mapping[ci][cj] = tmgr.coordinatesToRank(i, j, k, l);
399 /*if(CkMyPe() == 0) {
400 for(int ci=0; ci<x; ci++) {
401 for(int cj=0; cj<y; cj++) {
402 CkPrintf("%d ", mapping[ci][cj]);
411 for (int i=0; i<X; i++)
412 delete [] mapping[i];
416 int procNum(int, const CkArrayIndex &idx) {
417 int *index = (int *)idx.data();
418 return mapping[index[0]][index[1]];
422 #include "jacobi2d.def.h"