8 //#include "LBDatabase.h"
12 extern "C" char *_lbtopo; /* topology name string */
14 int LBTopology::get_hop_count(int src,int dest)
22 npe = max_neighbors();
23 visited_srcs = new int[npes];
25 int count = rec_hop_count(src,dest,npe,1,visited_srcs,999999);
26 delete [] visited_srcs;
31 int LBTopology::rec_hop_count(int src,int dest,int max_neigh,int count,int *visited_srcs,int min_hop_cnt)
33 int *pes = new int[max_neigh];
34 //int min_hop_cnt=999999;
40 neighbors(src,pes,neigh_cnt);
42 visited_srcs[count-1]=src;
44 for(i=0;i<neigh_cnt;i++)
49 for(i=0;i<neigh_cnt;i++)
51 for(int j=0;j<count;j++)
52 if(visited_srcs[j]==pes[i])
59 if(min_hop_cnt > count+1){
60 ret_val=rec_hop_count(pes[i],dest,max_neigh,count+1,visited_srcs,min_hop_cnt);
61 if(ret_val < min_hop_cnt)
62 min_hop_cnt = ret_val;
72 double LBTopology::per_hop_delay(int last_hop)
75 return (HOP_LINK_DELAY + HOP_PROC_DELAY);
77 return HOP_LINK_DELAY;
80 void LBTopology::get_pairwise_hop_count(double **distance)
87 queueNode(int i,int d): index(i), dist(d), next(NULL) {}
90 bool *visited=new bool[npes];
91 int *neigh=new int[max_neighbors()];
94 for(int i=0;i<npes;i++)
96 //Init data structures for BFS from i-th proc
97 for(int j=0;j<npes;j++)
100 queueNode *q=new queueNode(i,0);
105 // Perform BFS until queue is empty
108 neighbors(q->index,neigh,num_neighbors);
109 for(int j=0;j<num_neighbors;j++)
111 if(!visited[neigh[j]])
113 visited[neigh[j]]=true;
114 distance[i][neigh[j]]=q->dist+1;
115 queueNode *qnew=new queueNode(neigh[j],q->dist+1);
129 //smp - assume 1,2,3 or 4 processors per node
132 class LBTopo_smp_n: public LBTopology {
134 LBTopo_smp_n(int p): LBTopology(p) {}
135 virtual int max_neighbors() { return npes - 1; }
137 virtual void neighbors(int mype, int* _n, int &nb){
139 for(int i=1; i<=ppn; i++)
141 _n[nb++] = (mype+i)%npes;
145 int get_hop_count(int src,int dest){
147 //CmiPrintf("in smp get_hop_count\n");
152 //CmiPrintf("2 returned\n");
156 //CmiPrintf("1 returned\n");
162 typedef LBTopo_smp_n<1> LBTopo_smp_n_1;
163 typedef LBTopo_smp_n<2> LBTopo_smp_n_2;
164 typedef LBTopo_smp_n<3> LBTopo_smp_n_3;
165 typedef LBTopo_smp_n<4> LBTopo_smp_n_4;
166 typedef LBTopo_smp_n<5> LBTopo_smp_n_5;
167 typedef LBTopo_smp_n<6> LBTopo_smp_n_6;
168 typedef LBTopo_smp_n<7> LBTopo_smp_n_7;
169 typedef LBTopo_smp_n<8> LBTopo_smp_n_8;
170 typedef LBTopo_smp_n<9> LBTopo_smp_n_9;
171 typedef LBTopo_smp_n<10> LBTopo_smp_n_10;
173 LBTOPO_MACRO(LBTopo_smp_n_1)
174 LBTOPO_MACRO(LBTopo_smp_n_2)
175 LBTOPO_MACRO(LBTopo_smp_n_3)
176 LBTOPO_MACRO(LBTopo_smp_n_4)
177 LBTOPO_MACRO(LBTopo_smp_n_5)
178 LBTOPO_MACRO(LBTopo_smp_n_6)
179 LBTOPO_MACRO(LBTopo_smp_n_7)
180 LBTOPO_MACRO(LBTopo_smp_n_8)
181 LBTOPO_MACRO(LBTopo_smp_n_9)
182 LBTOPO_MACRO(LBTopo_smp_n_10)
187 LBTOPO_MACRO(LBTopo_ring)
189 int LBTopo_ring::max_neighbors()
191 if (npes > 2) return 2;
192 else return (npes-1);
195 void LBTopo_ring::neighbors(int mype, int* _n, int &nb)
198 if (npes>1) _n[nb++] = (mype + npes -1) % npes;
199 if (npes>2) _n[nb++] = (mype + 1) % npes;
202 int LBTopo_ring::get_hop_count(int src,int dest){
205 if(dist<0) dist=-dist;
207 if((npes-dist) < dist)
215 LBTOPO_MACRO(LBTopo_torus2d)
217 LBTopo_torus2d::LBTopo_torus2d(int p): LBTopology(p)
219 width = (int)sqrt(p*1.0);
220 if (width * width < npes) width++;
223 int LBTopo_torus2d::max_neighbors()
228 int LBTopo_torus2d::goodcoor(int x, int y)
230 if (x<0 || x>=width) return -1;
231 if (y<0 || y>=width) return -1;
232 int next = x*width + y;
233 if (next<npes && next>=0) return next;
237 static int checkuniq(int *arr, int nb, int val) {
238 for (int i=0;i<nb;i++) if (arr[i]==val) return 0;
242 void LBTopo_torus2d::neighbors(int mype, int* _n, int &nb)
248 for (int i=-1; i<=1; i+=2) {
252 while (goodcoor(x1, y)==-1) x1--;
254 else if (goodcoor(x1, y) == -1) x1=0;
255 next = goodcoor(x1, y);
256 CmiAssert(next != -1);
257 if (next != mype && checkuniq(_n, nb, next)) _n[nb++] = next;
262 while (goodcoor(x, y1)==-1) y1--;
264 else if (goodcoor(x, y1) == -1) y1=0;
265 next = goodcoor(x, y1);
266 CmiAssert(next != -1);
267 if (next != mype && checkuniq(_n, nb, next)) _n[nb++] = next;
271 int LBTopo_torus2d::get_hop_count(int src,int dest){
272 int xpos_src,xpos_dest;
273 int ypos_src,ypos_dest;
287 xpos_dest=dest%width;
288 ypos_dest=dest/width;
290 xdist = xpos_dest-xpos_src;
291 if(xdist<0) xdist=-xdist;
292 if((width-xdist) < xdist)
295 ydist = ypos_dest-ypos_src;
296 if(ydist<0) ydist=-ydist;
298 int lastpos=(npes-1)%width;
301 if(xpos_src<=lastpos && xpos_dest<=lastpos)
302 otherylen=((npes-1)/width)+1-ydist;
304 if(ypos_dest==((npes-1)/width))
305 otherylen=((npes-1)/width)+1-ydist;
307 otherylen=((npes-1)/width)-ydist;
310 if(otherylen < ydist)
314 int sdist=0,adist=0,bdist=0,cdist=0,ddist=0;
316 if(xpos_src>lastpos && xpos_dest>lastpos){
318 if((width-sdist) < sdist)
321 adist = ((npes-1)/width)-ypos_src;
322 if(adist<0) adist=-adist;
323 if(ypos_src+1 < adist)
328 cdist = ((npes-1)/width)-ypos_dest;
329 if(cdist<0) cdist=-cdist;
330 if(ypos_dest+1 < cdist)
333 ddist = xpos_dest-lastpos;
334 if(ddist<0) ddist=-ddist;
335 if((width-ddist) < ddist)
339 if(xpos_src>lastpos){
345 xpos_dest=dest%width;
346 ypos_dest=dest/width;
348 adist = ((npes-1)/width)-ypos_src;
349 if(adist<0) adist=-adist;
350 if(ypos_src+1 < adist)
353 if(xpos_dest<=lastpos){
354 bdist = xpos_dest-xpos_src;
355 if(bdist<0) bdist=-bdist;
356 if((lastpos+1-bdist) < bdist)
357 bdist = lastpos+1-bdist;
359 cdist = ((npes-1)/width)-ypos_dest;
360 if(cdist<0) cdist=-cdist;
361 if(ypos_dest+1 < cdist)
367 bdist = lastpos-xpos_src;
368 if(bdist<0) bdist=-bdist;
369 if((xpos_src+1) < bdist)
372 cdist = ((npes-1)/width)-ypos_dest;
373 if(cdist<0) cdist=-cdist;
374 if(ypos_dest+1 < cdist)
377 ddist = xpos_dest-lastpos;
378 if(ddist<0) ddist=-ddist;
379 if((width-ddist) < ddist)
384 if((sdist+adist+bdist+cdist+ddist) < (xdist+ydist))
385 return (sdist+adist+bdist+cdist+ddist);
387 return (xdist+ydist);
393 LBTOPO_MACRO(LBTopo_torus3d)
395 LBTopo_torus3d::LBTopo_torus3d(int p): LBTopology(p)
398 while ( (width+1) * (width+1) * (width+1) <= npes) width++;
399 if (width * width * width < npes) width++;
402 int LBTopo_torus3d::max_neighbors()
407 int LBTopo_torus3d::goodcoor(int x, int y, int z)
409 if (x<0 || x>=width) return -1;
410 if (y<0 || y>=width) return -1;
411 if (z<0 || z>=width) return -1;
412 int next = x*width*width + y*width + z;
413 if (next<npes && next>=0) return next;
417 void LBTopo_torus3d::neighbors(int mype, int* _n, int &nb)
420 int x = mype/(width*width);
421 int k = mype%(width*width);
426 for (int i=-1; i<=1; i+=2) {
430 while (goodcoor(x1, y, z)==-1) x1--;
432 else if (goodcoor(x1, y, z) == -1) x1=0;
433 next = goodcoor(x1, y, z);
434 CmiAssert(next != -1);
435 if (next != mype && checkuniq(_n, nb, next)) _n[nb++] = next;
440 while (goodcoor(x, y1, z)==-1) y1--;
442 else if (goodcoor(x, y1, z) == -1) y1=0;
443 next = goodcoor(x, y1, z);
444 CmiAssert(next != -1);
445 if (next != mype && checkuniq(_n, nb, next)) _n[nb++] = next;
450 while (goodcoor(x, y, z1)==-1) z1--;
452 else if (goodcoor(x, y, z1) == -1) z1=0;
453 next = goodcoor(x, y, z1);
454 CmiAssert(next != -1);
455 if (next != mype && checkuniq(_n, nb, next)) _n[nb++] = next;
460 // Works only for perfect cube number of processor topologies
461 int LBTopo_torus3d::get_hop_count(int src,int dest){
463 int x_src = src/(width*width);
464 int k_src = src%(width*width);
465 int y_src = k_src/width;
466 int z_src = k_src%width;
468 int x_dest = dest/(width*width);
469 int k_dest = dest%(width*width);
470 int y_dest = k_dest/width;
471 int z_dest = k_dest%width;
473 int xdist=0,ydist=0,zdist=0;
475 //CmiPrintf("just a chk........\n");
476 xdist = x_dest-x_src;
477 if(xdist<0) xdist=-xdist;
478 if((width-xdist) < xdist)
481 ydist = y_dest-y_src;
482 if(ydist<0) ydist=-ydist;
483 if((width-ydist) < ydist)
486 zdist = z_dest-z_src;
487 if(zdist<0) zdist=-zdist;
488 if((width-zdist) < zdist)
491 return (xdist+ydist+zdist);
497 LBTOPO_MACRO(LBTopo_mesh3d)
499 LBTopo_mesh3d::LBTopo_mesh3d(int p): LBTopology(p)
502 while ( (width+1) * (width+1) * (width+1) <= npes) width++;
503 if (width * width * width < npes) width++;
506 int LBTopo_mesh3d::max_neighbors()
511 int LBTopo_mesh3d::goodcoor(int x, int y, int z)
513 if (x<0 || x>=width) return -1;
514 if (y<0 || y>=width) return -1;
515 if (z<0 || z>=width) return -1;
516 int next = z*width*width + y*width + x;
517 if (next<npes && next>=0) return next;
521 void LBTopo_mesh3d::neighbors(int mype, int* _n, int &nb)
524 int z = mype/(width*width);
525 int k = mype%(width*width);
531 for (int i=-1; i<=1; i+=2) {
537 //while (goodcoor(x1, y, z)==-1) x1--;
540 else if (goodcoor(x1, y, z) == -1) { x1=0; isNeigh=0; }
541 next = goodcoor(x1, y, z);
542 CmiAssert(next != -1);
543 if (next != mype && isNeigh && checkuniq(_n, nb, next)) _n[nb++] = next;
549 //while (goodcoor(x, y1, z)==-1) y1--;
553 else if (goodcoor(x, y1, z) == -1) { y1=0; isNeigh=0; }
554 next = goodcoor(x, y1, z);
555 CmiAssert(next != -1);
556 if (next != mype && isNeigh && checkuniq(_n, nb, next)) _n[nb++] = next;
562 //while (goodcoor(x, y, z1)==-1) z1--;
566 else if (goodcoor(x, y, z1) == -1) { z1=0; isNeigh=0; }
567 next = goodcoor(x, y, z1);
568 CmiAssert(next != -1);
569 if (next != mype && isNeigh && checkuniq(_n, nb, next)) _n[nb++] = next;
576 template <int dimension>
577 class LBTopo_torus_nd: public LBTopology {
579 // inherited int npes;
581 int VirtualProcessorCount;
584 int GetNeighborID(int ProcessorID, int number) {
585 CmiAssert(number>=0 && number<max_neighbors());
586 CmiAssert(ProcessorID>=0 && ProcessorID<npes);
587 get_processor_coordinates(ProcessorID, TempCo);
589 int index = number/2;
590 int displacement = (number%2)? -1: 1;
592 TempCo[index] = (TempCo[index] + displacement + Cardinality[index]) % Cardinality[index];
593 get_processor_id(TempCo, &ProcessorID);
594 } while (ProcessorID >= npes);
598 LBTopo_torus_nd(int p): LBTopology(p) /*inherited :npes(p) */ {
600 CmiAssert(dimension>=1 && dimension<=16);
603 Cardinality = new int[dimension];
604 TempCo = new int[dimension];
606 for(i=0;i<dimension;i++) {
607 Cardinality[i] = (int)ceil(pow(pp,1.0/(dimension-i))-1e-5);
608 pp = pp / Cardinality[i];
610 VirtualProcessorCount = 1;
611 for(i=0;i<dimension;i++) {
612 VirtualProcessorCount *= Cardinality[i];
616 delete[] Cardinality;
619 virtual int max_neighbors() {
622 virtual void neighbors(int mype, int* _n, int &nb) {
624 for(int i=0;i<dimension*2;i++) {
625 _n[nb] = GetNeighborID(mype, i);
626 if (_n[nb]!=mype && (nb==0 || _n[nb-1]!=_n[nb]) ) nb++;
629 virtual int get_dimension() {
632 virtual bool get_processor_coordinates(int processor_id, int* processor_coordinates) {
633 CmiAssert(processor_id>=0 && processor_id<VirtualProcessorCount);
634 CmiAssert( processor_coordinates != NULL );
635 for(int i=0;i<dimension;i++) {
636 processor_coordinates[i] = processor_id % Cardinality[i];
637 processor_id = processor_id / Cardinality[i];
641 virtual bool get_processor_id(const int* processor_coordinates, int* processor_id) {
643 CmiAssert( processor_coordinates != NULL );
644 CmiAssert( processor_id != NULL );
645 for(i=dimension-1;i>=0;i--)
646 CmiAssert( 0<=processor_coordinates[i] && processor_coordinates[i]<Cardinality[i]);
648 for(i=dimension-1;i>=0;i--) {
649 (*processor_id) = (*processor_id)* Cardinality[i] + processor_coordinates[i];
653 //Note: if abs(difference)*2 = cardinality, the difference is set to zero
654 virtual bool coordinate_difference(const int* my_coordinates, const int* target_coordinates, int* difference) {
655 // CmiPrintf("[%d] coordiate_difference begin\n", CkMyPe());
656 CmiAssert( my_coordinates != NULL);
657 CmiAssert( target_coordinates != NULL);
658 CmiAssert( difference != NULL);
659 // CmiPrintf("[%d] after assert\n", CkMyPe());
660 for(int i=0;i<dimension;i++) {
661 // CmiPrintf("[%d] coordiate_difference iteration %d\n", i);
662 difference[i] = target_coordinates[i] - my_coordinates[i];
663 if (abs(difference[i])*2 > Cardinality[i]) {
664 difference[i] += (difference[i]>0) ? -Cardinality[i] : Cardinality[i];
665 } else if (abs(difference[i])*2 == Cardinality[i]) {
669 // CmiPrintf("[%d] coordiate_difference just before return\n");
672 //Note: if abs(difference)*2 = cardinality, the difference is set to zero
673 virtual bool coordinate_difference(int my_processor_id, int target_processor_id, int* difference) {
674 CmiAssert( difference != NULL);
675 int my_coordinates[dimension];
676 int target_coordinates[dimension];
677 get_processor_coordinates(my_processor_id, my_coordinates);
678 get_processor_coordinates(target_processor_id, target_coordinates);
679 coordinate_difference(my_coordinates, target_coordinates, difference);
684 typedef LBTopo_torus_nd<1> LBTopo_torus_nd_1;
685 typedef LBTopo_torus_nd<2> LBTopo_torus_nd_2;
686 typedef LBTopo_torus_nd<3> LBTopo_torus_nd_3;
687 typedef LBTopo_torus_nd<4> LBTopo_torus_nd_4;
688 typedef LBTopo_torus_nd<5> LBTopo_torus_nd_5;
689 typedef LBTopo_torus_nd<6> LBTopo_torus_nd_6;
690 typedef LBTopo_torus_nd<7> LBTopo_torus_nd_7;
691 typedef LBTopo_torus_nd<8> LBTopo_torus_nd_8;
692 typedef LBTopo_torus_nd<9> LBTopo_torus_nd_9;
693 typedef LBTopo_torus_nd<10> LBTopo_torus_nd_10;
695 LBTOPO_MACRO(LBTopo_torus_nd_1)
696 LBTOPO_MACRO(LBTopo_torus_nd_2)
697 LBTOPO_MACRO(LBTopo_torus_nd_3)
698 LBTOPO_MACRO(LBTopo_torus_nd_4)
699 LBTOPO_MACRO(LBTopo_torus_nd_5)
700 LBTOPO_MACRO(LBTopo_torus_nd_6)
701 LBTOPO_MACRO(LBTopo_torus_nd_7)
702 LBTOPO_MACRO(LBTopo_torus_nd_8)
703 LBTOPO_MACRO(LBTopo_torus_nd_9)
704 LBTOPO_MACRO(LBTopo_torus_nd_10)
706 // TORUS ND and SMP Awareness
707 // added by Yanhua Sun
711 template <int dimension>
712 class LBTopo_torus_nd_smp: public LBTopology {
714 // inherited int npes;
716 int VirtualNodeCount;
721 int GetNeighborID(int ProcessorID, int number) {
722 CmiAssert(number>=0 && number<max_neighbors());
723 CmiAssert(ProcessorID>=0 && ProcessorID<npes);
725 int nodeId = CmiPhysicalNodeID(ProcessorID);
727 get_node_coordinates(nodeId, TempCo);
729 int index = number/2;
730 int displacement = (number%2)? -1: 1;
732 TempCo[index] = (TempCo[index] + displacement + Cardinality[index]) % Cardinality[index];
733 get_node_id(TempCo, &nodeId);
734 } while (nodeId >= NumOfNodes);
735 neighborId = CmiGetFirstPeOnPhysicalNode(nodeId);
739 LBTopo_torus_nd_smp(int p): LBTopology(p) /*inherited :npes(p) */ {
741 CmiAssert(dimension>=1 && dimension<=32);
744 ppn = CmiNumPesOnPhysicalNode(0);
745 NumOfNodes = CmiNumPhysicalNodes();
747 Cardinality = new int[dimension];
748 TempCo = new int[dimension];
749 double pp = NumOfNodes;
750 for(i=0;i<dimension;i++) {
751 Cardinality[i] = (int)ceil(pow(pp,1.0/(dimension-i))-1e-5);
752 pp = pp / Cardinality[i];
754 VirtualNodeCount = 1;
755 for(i=0;i<dimension;i++) {
756 VirtualNodeCount *= Cardinality[i];
759 CmiPrintf(" ppn=%d, NumOfNodes=%d\n", ppn, NumOfNodes);
762 ~LBTopo_torus_nd_smp() {
763 delete[] Cardinality;
766 virtual int max_neighbors() {
767 return (dimension+ppn)*2;
769 virtual void neighbors(int mype, int* _n, int &nb) {
773 int rank = CmiPhysicalRank(mype);
774 int node = CmiPhysicalNodeID(mype);
775 int _ppn_ = CmiNumPesOnPhysicalNode(node);
776 CmiGetPesOnPhysicalNode(node, &nodePeList, &numpes);
778 CmiPrintf(" PE[%d] ppn=%d, NumOfNodes=%d, rank=%d, node=%d, numpes=%d\n", mype, _ppn_, NumOfNodes, rank, node, numpes);
780 for(int i=0; i<numpes; i++)
782 int _pid = nodePeList[i];
790 /* for inter-node communication */
791 if(mype == CmiGetFirstPeOnPhysicalNode(node))
793 for(int j=0; j<dimension*2; j++)
795 //_n[nb] = (mype+1)%npes;//GetNeighborID(mype, j);
796 _n[nb] = GetNeighborID(mype, j);
797 /* the first processors in other nodes */
798 if (_n[nb]!=mype && (nb==0 || _n[nb-1]!=_n[nb]) ) nb++;
803 CmiPrintf(" [%d] neighbor = %d ppn=%d, NumOfNodes=%d, rank=%d, node=%d, numpes=%d\n", mype, nb, _ppn_, NumOfNodes, rank, node, numpes);
806 virtual int get_dimension() {
809 virtual bool get_node_coordinates(int node_id, int* node_coordinates) {
810 CmiAssert(node_id>=0 && node_id<VirtualNodeCount);
811 CmiAssert( node_coordinates != NULL );
812 for(int i=0;i<dimension;i++) {
813 node_coordinates[i] = node_id % Cardinality[i];
814 node_id = node_id / Cardinality[i];
818 virtual bool get_node_id(const int* node_coordinates, int* node_id) {
820 CmiAssert( node_coordinates != NULL );
821 CmiAssert( node_id != NULL );
822 for(i=dimension-1;i>=0;i--)
823 CmiAssert( 0<=node_coordinates[i] && node_coordinates[i]<Cardinality[i]);
825 for(i=dimension-1;i>=0;i--) {
826 (*node_id) = (*node_id)* Cardinality[i] + node_coordinates[i];
830 //Note: if abs(difference)*2 = cardinality, the difference is set to zero
831 virtual bool coordinate_difference(const int* my_coordinates, const int* target_coordinates, int* difference) {
832 CmiAssert( my_coordinates != NULL);
833 CmiAssert( target_coordinates != NULL);
834 CmiAssert( difference != NULL);
835 for(int i=0;i<dimension;i++) {
836 difference[i] = target_coordinates[i] - my_coordinates[i];
837 if (abs(difference[i])*2 > Cardinality[i]) {
838 difference[i] += (difference[i]>0) ? -Cardinality[i] : Cardinality[i];
839 } else if (abs(difference[i])*2 == Cardinality[i]) {
845 //Note: if abs(difference)*2 = cardinality, the difference is set to zero
846 virtual bool coordinate_difference(int my_processor_id, int target_processor_id, int* difference) {
847 CmiAssert( difference != NULL);
848 int my_coordinates[dimension];
849 int target_coordinates[dimension];
850 get_processor_coordinates(my_processor_id, my_coordinates);
851 get_processor_coordinates(target_processor_id, target_coordinates);
852 coordinate_difference(my_coordinates, target_coordinates, difference);
858 typedef LBTopo_torus_nd_smp<1> LBTopo_torus_nd_smp_1;
859 typedef LBTopo_torus_nd_smp<2> LBTopo_torus_nd_smp_2;
860 typedef LBTopo_torus_nd_smp<3> LBTopo_torus_nd_smp_3;
861 typedef LBTopo_torus_nd_smp<4> LBTopo_torus_nd_smp_4;
862 typedef LBTopo_torus_nd_smp<5> LBTopo_torus_nd_smp_5;
863 typedef LBTopo_torus_nd_smp<6> LBTopo_torus_nd_smp_6;
864 typedef LBTopo_torus_nd_smp<7> LBTopo_torus_nd_smp_7;
865 typedef LBTopo_torus_nd_smp<8> LBTopo_torus_nd_smp_8;
866 typedef LBTopo_torus_nd_smp<9> LBTopo_torus_nd_smp_9;
867 typedef LBTopo_torus_nd_smp<10> LBTopo_torus_nd_smp_10;
869 LBTOPO_MACRO(LBTopo_torus_nd_smp_1)
870 LBTOPO_MACRO(LBTopo_torus_nd_smp_2)
871 LBTOPO_MACRO(LBTopo_torus_nd_smp_3)
872 LBTOPO_MACRO(LBTopo_torus_nd_smp_4)
873 LBTOPO_MACRO(LBTopo_torus_nd_smp_5)
874 LBTOPO_MACRO(LBTopo_torus_nd_smp_6)
875 LBTOPO_MACRO(LBTopo_torus_nd_smp_7)
876 LBTOPO_MACRO(LBTopo_torus_nd_smp_8)
877 LBTOPO_MACRO(LBTopo_torus_nd_smp_9)
878 LBTOPO_MACRO(LBTopo_torus_nd_smp_10)
881 //Torus ND with unequal number of processors in each dimension
882 /***************************************************************/
883 template <int dimension>
884 class LBTopo_itorus_nd: public LBTopology {
890 LBTopo_itorus_nd(int p): LBTopology(p) {
891 CmiPrintf("Irregular torus created\n");
892 dim = new int[dimension];
893 tempCoor = new int[dimension];
896 char *lbcopy = strdup(_lbtopo);
897 char *ptr = strchr(lbcopy, ':');
902 ptr = strtok(ptr+1, ",");
906 ptr = strtok(NULL, ",");
908 CmiAssert(dimension==i);
911 for(i=0;i<dimension;i++)
913 CmiAssert(dimension>=1 && dimension<=16);
919 ~LBTopo_itorus_nd() {
924 virtual int max_neighbors() {
928 virtual void neighbors(int mype, int* _n, int &nb) {
930 for(int i=0;i<dimension*2;i++) {
931 _n[nb] = GetNeighborID(mype, i);
932 if (_n[nb]!=mype && (nb==0 || _n[nb-1]!=_n[nb]) ) nb++;
936 int GetNeighborID(int ProcessorID, int number) {
937 CmiAssert(number>=0 && number<max_neighbors());
938 CmiAssert(ProcessorID>=0 && ProcessorID<npes);
939 get_processor_coordinates(ProcessorID, tempCoor);
941 int index = number/2;
942 int displacement = (number%2)? -1: 1;
944 tempCoor[index] = (tempCoor[index] + displacement + dim[index]) % dim[index];
945 get_processor_id(tempCoor, &ProcessorID);
946 //} while (ProcessorID >= npes);
950 virtual bool get_processor_coordinates(int processor_id, int* processor_coordinates) {
951 CmiAssert(processor_id>=0 && processor_id<npes);
952 CmiAssert(processor_coordinates != NULL );
953 for(int i=0;i<dimension;i++) {
954 processor_coordinates[i] = processor_id % dim[i];
955 processor_id = processor_id / dim[i];
960 virtual bool get_processor_id(const int* processor_coordinates, int* processor_id) {
962 CmiAssert( processor_coordinates != NULL );
963 CmiAssert( processor_id != NULL );
964 for(i=dimension-1;i>=0;i--)
965 CmiAssert( 0<=processor_coordinates[i] && processor_coordinates[i]<dim[i]);
967 for(i=dimension-1;i>=0;i--) {
968 (*processor_id) = (*processor_id)* dim[i] + processor_coordinates[i];
975 typedef LBTopo_itorus_nd<1> LBTopo_itorus_nd_1;
976 typedef LBTopo_itorus_nd<2> LBTopo_itorus_nd_2;
977 typedef LBTopo_itorus_nd<3> LBTopo_itorus_nd_3;
978 typedef LBTopo_itorus_nd<4> LBTopo_itorus_nd_4;
979 typedef LBTopo_itorus_nd<5> LBTopo_itorus_nd_5;
980 typedef LBTopo_itorus_nd<6> LBTopo_itorus_nd_6;
981 typedef LBTopo_itorus_nd<7> LBTopo_itorus_nd_7;
983 LBTOPO_MACRO(LBTopo_itorus_nd_1)
984 LBTOPO_MACRO(LBTopo_itorus_nd_2)
985 LBTOPO_MACRO(LBTopo_itorus_nd_3)
986 LBTOPO_MACRO(LBTopo_itorus_nd_4)
987 LBTOPO_MACRO(LBTopo_itorus_nd_5)
988 LBTOPO_MACRO(LBTopo_itorus_nd_6)
989 LBTOPO_MACRO(LBTopo_itorus_nd_7)
992 /******************************************************************/
993 //Mesh ND with unequal number of processors in each dimension
994 /***************************************************************/
995 template <int dimension>
996 class LBTopo_imesh_nd: public LBTopology {
1002 LBTopo_imesh_nd(int p): LBTopology(p) {
1003 CmiPrintf("Irregular mesh created\n");
1004 dim = new int[dimension];
1005 tempCoor = new int[dimension];
1008 char *lbcopy = strdup(_lbtopo);
1009 char *ptr = strchr(lbcopy, ':');
1017 ptr = strtok(ptr+1, ",");
1021 ptr = strtok(NULL, ",");
1023 CmiAssert(dimension==i);
1025 //char *ptr2=strchr(_lbtopo,':');
1028 for(i=0;i<dimension;i++)
1030 CmiAssert(dimension>=1 && dimension<=16);
1032 CmiAssert(procs==p);
1036 ~LBTopo_imesh_nd() {
1041 virtual int max_neighbors() {
1045 virtual void neighbors(int mype, int* _n, int &nb) {
1047 for(int i=0;i<dimension*2;i++) {
1048 _n[nb] = GetNeighborID(mype, i);
1049 if (_n[nb]!=mype && (nb==0 || _n[nb-1]!=_n[nb]) ) nb++;
1051 /*CmiPrintf("Nhs[%d]:",mype);
1052 for(int i=0;i<nb;i++)
1053 CmiPrintf("%d ",_n[i]);
1057 int GetNeighborID(int ProcessorID, int number) {
1058 CmiAssert(number>=0 && number<max_neighbors());
1059 CmiAssert(ProcessorID>=0 && ProcessorID<npes);
1060 get_processor_coordinates(ProcessorID, tempCoor);
1062 int index = number/2;
1063 int displacement = (number%2)? -1: 1;
1065 if((tempCoor[index]==0 && displacement==-1) || (tempCoor[index]==dim[index]-1 && displacement==1)){
1068 tempCoor[index] = (tempCoor[index] + displacement + dim[index]) % dim[index];
1069 get_processor_id(tempCoor, &ProcessorID);
1071 //} while (ProcessorID >= npes);
1075 virtual bool get_processor_coordinates(int processor_id, int* processor_coordinates) {
1076 CmiAssert(processor_id>=0 && processor_id<npes);
1077 CmiAssert(processor_coordinates != NULL );
1078 for(int i=0;i<dimension;i++) {
1079 processor_coordinates[i] = processor_id % dim[i];
1080 processor_id = processor_id / dim[i];
1085 virtual bool get_processor_id(const int* processor_coordinates, int* processor_id) {
1087 CmiAssert( processor_coordinates != NULL );
1088 CmiAssert( processor_id != NULL );
1089 for(i=dimension-1;i>=0;i--)
1090 CmiAssert( 0<=processor_coordinates[i] && processor_coordinates[i]<dim[i]);
1091 (*processor_id) = 0;
1092 for(i=dimension-1;i>=0;i--) {
1093 (*processor_id) = (*processor_id)* dim[i] + processor_coordinates[i];
1100 typedef LBTopo_imesh_nd<1> LBTopo_imesh_nd_1;
1101 typedef LBTopo_imesh_nd<2> LBTopo_imesh_nd_2;
1102 typedef LBTopo_imesh_nd<3> LBTopo_imesh_nd_3;
1103 typedef LBTopo_imesh_nd<4> LBTopo_imesh_nd_4;
1104 typedef LBTopo_imesh_nd<5> LBTopo_imesh_nd_5;
1105 typedef LBTopo_imesh_nd<6> LBTopo_imesh_nd_6;
1106 typedef LBTopo_imesh_nd<7> LBTopo_imesh_nd_7;
1108 LBTOPO_MACRO(LBTopo_imesh_nd_1)
1109 LBTOPO_MACRO(LBTopo_imesh_nd_2)
1110 LBTOPO_MACRO(LBTopo_imesh_nd_3)
1111 LBTOPO_MACRO(LBTopo_imesh_nd_4)
1112 LBTOPO_MACRO(LBTopo_imesh_nd_5)
1113 LBTOPO_MACRO(LBTopo_imesh_nd_6)
1114 LBTOPO_MACRO(LBTopo_imesh_nd_7)
1117 // dense graph with connectivity of square root processor number
1119 LBTOPO_MACRO(LBTopo_graph)
1121 int LBTopo_graph::max_neighbors()
1123 return (int)(sqrt(1.0*CmiNumPes())+0.5);
1126 extern "C" void gengraph(int, int, int, int *, int *, int);
1128 void LBTopo_graph::neighbors(int mype, int* na, int &nb)
1130 gengraph(CmiNumPes(), (int)(sqrt(1.0*CmiNumPes())+0.5), 234, na, &nb, 0);
1133 /* add by Yanhua Aug-2010*/
1134 template <int dimension>
1135 class LBTopo_graph_nc: public LBTopology {
1138 LBTopo_graph_nc(int p): LBTopology(p) {}
1141 return dimension + 1;
1144 void neighbors(int mype, int* na, int &nb)
1146 #if CMK_NODE_QUEUE_AVAILABLE
1147 gengraph(CmiNumNodes(), dimension, 234, na, &nb, 0);
1149 gengraph(CmiNumPes(), dimension, 234, na, &nb, 0);
1154 typedef LBTopo_graph_nc<2> LBTopo_graph_nc_2;
1155 typedef LBTopo_graph_nc<3> LBTopo_graph_nc_3;
1156 typedef LBTopo_graph_nc<4> LBTopo_graph_nc_4;
1157 typedef LBTopo_graph_nc<5> LBTopo_graph_nc_5;
1158 typedef LBTopo_graph_nc<6> LBTopo_graph_nc_6;
1159 typedef LBTopo_graph_nc<7> LBTopo_graph_nc_7;
1160 typedef LBTopo_graph_nc<8> LBTopo_graph_nc_8;
1161 typedef LBTopo_graph_nc<9> LBTopo_graph_nc_9;
1162 typedef LBTopo_graph_nc<10> LBTopo_graph_nc_10;
1163 typedef LBTopo_graph_nc<20> LBTopo_graph_nc_20;
1165 LBTOPO_MACRO(LBTopo_graph_nc_2)
1166 LBTOPO_MACRO(LBTopo_graph_nc_3)
1167 LBTOPO_MACRO(LBTopo_graph_nc_4)
1168 LBTOPO_MACRO(LBTopo_graph_nc_5)
1169 LBTOPO_MACRO(LBTopo_graph_nc_6)
1170 LBTOPO_MACRO(LBTopo_graph_nc_7)
1171 LBTOPO_MACRO(LBTopo_graph_nc_8)
1172 LBTOPO_MACRO(LBTopo_graph_nc_9)
1173 LBTOPO_MACRO(LBTopo_graph_nc_10)
1174 LBTOPO_MACRO(LBTopo_graph_nc_20)
1177 /* Centralized balancer, one processor has the neighbors of all other processors, while the other ones only have one neighbor, the centralized processor */
1183 class LBTopo_complete: public LBTopology {
1185 LBTopo_complete(int p): LBTopology(p) {}
1186 int max_neighbors() {
1189 void neighbors(int mype, int* _n, int &nb) {
1191 for (int i=0; i<npes; i++) if (mype != i) _n[nb++] = i;
1195 LBTOPO_MACRO(LBTopo_complete)
1200 class LBTopo_karytree: public LBTopology {
1202 LBTopo_karytree(int p): LBTopology(p) {}
1203 virtual int max_neighbors() {
1204 return k+1; // parent + children
1206 virtual void neighbors(int mype, int* _n, int &nb) {
1208 if (mype!=0) _n[nb++] = (mype-1)/k;
1209 int firstchild = mype*k+1;
1210 for (int i=0; i<k; i++)
1211 if (firstchild+i < npes) _n[nb++] = firstchild+i;
1215 typedef LBTopo_karytree<2> LBTopo_2_arytree;
1216 typedef LBTopo_karytree<3> LBTopo_3_arytree;
1217 typedef LBTopo_karytree<4> LBTopo_4_arytree;
1218 typedef LBTopo_karytree<128> LBTopo_128_arytree;
1219 typedef LBTopo_karytree<512> LBTopo_512_arytree;
1221 LBTOPO_MACRO(LBTopo_2_arytree)
1222 LBTOPO_MACRO(LBTopo_3_arytree)
1223 LBTOPO_MACRO(LBTopo_4_arytree)
1224 LBTOPO_MACRO(LBTopo_128_arytree)
1225 LBTOPO_MACRO(LBTopo_512_arytree)
1233 LBTopoMap(const char *s, LBtopoFn f): name(s), fn(f) {}
1234 LBTopoMap(const LBTopoMap &p); // You don't want to copy
1235 void operator=(const LBTopoMap &p); // You don't want to copy
1239 CkVec<LBTopoMap *> lbTopos;
1242 // register all topos
1243 lbTopos.push_back(new LBTopoMap("ring", createLBTopo_ring));
1244 lbTopos.push_back(new LBTopoMap("torus2d", createLBTopo_torus2d));
1245 lbTopos.push_back(new LBTopoMap("torus3d", createLBTopo_torus3d));
1246 lbTopos.push_back(new LBTopoMap("mesh3d", createLBTopo_mesh3d));
1247 lbTopos.push_back(new LBTopoMap("torus_nd_1", createLBTopo_torus_nd_1));
1248 lbTopos.push_back(new LBTopoMap("torus_nd_2", createLBTopo_torus_nd_2));
1249 lbTopos.push_back(new LBTopoMap("torus_nd_3", createLBTopo_torus_nd_3));
1250 lbTopos.push_back(new LBTopoMap("torus_nd_4", createLBTopo_torus_nd_4));
1251 lbTopos.push_back(new LBTopoMap("torus_nd_5", createLBTopo_torus_nd_5));
1252 lbTopos.push_back(new LBTopoMap("torus_nd_6", createLBTopo_torus_nd_6));
1253 lbTopos.push_back(new LBTopoMap("torus_nd_7", createLBTopo_torus_nd_7));
1254 lbTopos.push_back(new LBTopoMap("torus_nd_8", createLBTopo_torus_nd_8));
1255 lbTopos.push_back(new LBTopoMap("torus_nd_9", createLBTopo_torus_nd_9));
1256 lbTopos.push_back(new LBTopoMap("torus_nd_10", createLBTopo_torus_nd_10));
1257 lbTopos.push_back(new LBTopoMap("torus_nd_smp_1", createLBTopo_torus_nd_smp_1));
1258 lbTopos.push_back(new LBTopoMap("torus_nd_smp_2", createLBTopo_torus_nd_smp_2));
1259 lbTopos.push_back(new LBTopoMap("torus_nd_smp_3", createLBTopo_torus_nd_smp_3));
1260 lbTopos.push_back(new LBTopoMap("torus_nd_smp_4", createLBTopo_torus_nd_smp_4));
1261 lbTopos.push_back(new LBTopoMap("torus_nd_smp_5", createLBTopo_torus_nd_smp_5));
1262 lbTopos.push_back(new LBTopoMap("torus_nd_smp_6", createLBTopo_torus_nd_smp_6));
1263 lbTopos.push_back(new LBTopoMap("torus_nd_smp_7", createLBTopo_torus_nd_smp_7));
1264 lbTopos.push_back(new LBTopoMap("torus_nd_smp_8", createLBTopo_torus_nd_smp_8));
1265 lbTopos.push_back(new LBTopoMap("torus_nd_smp_9", createLBTopo_torus_nd_smp_9));
1266 lbTopos.push_back(new LBTopoMap("torus_nd_smp_10", createLBTopo_torus_nd_smp_10));
1267 lbTopos.push_back(new LBTopoMap("itorus_nd_1", createLBTopo_itorus_nd_1));
1268 lbTopos.push_back(new LBTopoMap("itorus_nd_2", createLBTopo_itorus_nd_2));
1269 lbTopos.push_back(new LBTopoMap("itorus_nd_3", createLBTopo_itorus_nd_3));
1270 lbTopos.push_back(new LBTopoMap("itorus_nd_4", createLBTopo_itorus_nd_4));
1271 lbTopos.push_back(new LBTopoMap("itorus_nd_5", createLBTopo_itorus_nd_5));
1272 lbTopos.push_back(new LBTopoMap("itorus_nd_6", createLBTopo_itorus_nd_6));
1273 lbTopos.push_back(new LBTopoMap("itorus_nd_7", createLBTopo_itorus_nd_7));
1274 lbTopos.push_back(new LBTopoMap("imesh_nd_1", createLBTopo_imesh_nd_1));
1275 lbTopos.push_back(new LBTopoMap("imesh_nd_2", createLBTopo_imesh_nd_2));
1276 lbTopos.push_back(new LBTopoMap("imesh_nd_3", createLBTopo_imesh_nd_3));
1277 lbTopos.push_back(new LBTopoMap("imesh_nd_4", createLBTopo_imesh_nd_4));
1278 lbTopos.push_back(new LBTopoMap("imesh_nd_5", createLBTopo_imesh_nd_5));
1279 lbTopos.push_back(new LBTopoMap("imesh_nd_6", createLBTopo_imesh_nd_6));
1280 lbTopos.push_back(new LBTopoMap("imesh_nd_7", createLBTopo_imesh_nd_7));
1281 lbTopos.push_back(new LBTopoMap("graph", createLBTopo_graph));
1282 lbTopos.push_back(new LBTopoMap("graph_nc_2", createLBTopo_graph_nc_2));
1283 lbTopos.push_back(new LBTopoMap("graph_nc_3", createLBTopo_graph_nc_3));
1284 lbTopos.push_back(new LBTopoMap("graph_nc_4", createLBTopo_graph_nc_4));
1285 lbTopos.push_back(new LBTopoMap("graph_nc_5", createLBTopo_graph_nc_5));
1286 lbTopos.push_back(new LBTopoMap("graph_nc_6", createLBTopo_graph_nc_6));
1287 lbTopos.push_back(new LBTopoMap("graph_nc_7", createLBTopo_graph_nc_7));
1288 lbTopos.push_back(new LBTopoMap("graph_nc_8", createLBTopo_graph_nc_8));
1289 lbTopos.push_back(new LBTopoMap("graph_nc_9", createLBTopo_graph_nc_9));
1290 lbTopos.push_back(new LBTopoMap("graph_nc_10", createLBTopo_graph_nc_10));
1291 lbTopos.push_back(new LBTopoMap("graph_nc_20", createLBTopo_graph_nc_20));
1292 lbTopos.push_back(new LBTopoMap("complete", createLBTopo_complete));
1293 lbTopos.push_back(new LBTopoMap("2_arytree", createLBTopo_2_arytree));
1294 lbTopos.push_back(new LBTopoMap("3_arytree", createLBTopo_3_arytree));
1295 lbTopos.push_back(new LBTopoMap("4_arytree", createLBTopo_4_arytree));
1296 lbTopos.push_back(new LBTopoMap("128_arytree", createLBTopo_128_arytree));
1297 lbTopos.push_back(new LBTopoMap("512_arytree", createLBTopo_512_arytree));
1298 lbTopos.push_back(new LBTopoMap("smp_n_1", createLBTopo_smp_n_1));
1299 lbTopos.push_back(new LBTopoMap("smp_n_2", createLBTopo_smp_n_2));
1300 lbTopos.push_back(new LBTopoMap("smp_n_3", createLBTopo_smp_n_3));
1301 lbTopos.push_back(new LBTopoMap("smp_n_4", createLBTopo_smp_n_4));
1302 lbTopos.push_back(new LBTopoMap("smp_n_5", createLBTopo_smp_n_5));
1303 lbTopos.push_back(new LBTopoMap("smp_n_6", createLBTopo_smp_n_6));
1304 lbTopos.push_back(new LBTopoMap("smp_n_7", createLBTopo_smp_n_7));
1305 lbTopos.push_back(new LBTopoMap("smp_n_8", createLBTopo_smp_n_8));
1306 lbTopos.push_back(new LBTopoMap("smp_n_9", createLBTopo_smp_n_9));
1307 lbTopos.push_back(new LBTopoMap("smp_n_10", createLBTopo_smp_n_10));
1310 for (int i=0; i<lbTopos.length(); i++)
1313 void push_back(LBTopoMap *map) { lbTopos.push_back(map); }
1314 int length() { return lbTopos.length(); }
1315 LBTopoMap * operator[](size_t n) { return lbTopos[n]; }
1317 for (int i=0; i<lbTopos.length(); i++) {
1318 CmiPrintf(" %s\n", lbTopos[i]->name);
1323 static LBTopoVec lbTopoMap;
1326 LBtopoFn LBTopoLookup(char *name)
1328 for (int i=0; i<lbTopoMap.length(); i++) {
1329 if (strcmp(name, lbTopoMap[i]->name)==0) return lbTopoMap[i]->fn;
1334 // C wrapper functions
1335 extern "C" void getTopoNeighbors(void *topo, int myid, int* narray, int *n)
1337 ((LBTopology*)topo)->neighbors(myid, narray, *n);
1340 extern "C" int getTopoMaxNeighbors(void *topo)
1342 return ((LBTopology*)topo)->max_neighbors();
1345 extern "C" void printoutTopo()