2 * Written by Esteban Meneses, 2010-11-24
3 * Updated by Abhinav Bhatele, 2010-12-09 to use ckgraph
15 CreateLBFunc_Def(TeamLB, "Use Metis(tm) to partition object graph at two levels: team level and processor level")
17 TeamLB::TeamLB(const CkLBOptions &opt): CBase_TeamLB(opt)
21 CkPrintf("[%d] TeamLB created\n",CkMyPe());
23 // setting number of teams and team size
24 teamSize = _lb_args.teamSize();
25 numberTeams = CkNumPes() / teamSize;
29 * @brief METIS function that performs a balanced k-way partitioning of the
30 * graph, considering the communication volume (hence the "V" in the name of
33 extern "C" void METIS_PartGraphRecursive(int*, int*, int*, int*, int*, int*,
34 int*, int*, int*, int*, int*);
39 * @brief Load balancing function. It uses METIS in a two step approach. The
40 * first step consists in splitting the objects into teams. METIS is able to
41 * minimize the communication volume across the teams while balancing the load
42 * among the different teams. The second step goes deep in each team to balance
43 * the load in the processors belonging to that particular team.
45 void TeamLB::work(LDStats* stats)
47 /** ========================== INITIALIZATION ============================= */
48 ProcArray *parr = new ProcArray(stats);
49 ObjGraph *ogr = new ObjGraph(stats);
51 /** ============================= STRATEGY ================================ */
52 if (_lb_args.debug() >= 2) {
53 CkPrintf("[%d] In TeamLB Strategy...\n", CkMyPe());
56 // convert ObjGraph to the adjacency structure
57 idx_t numVertices = ogr->vertices.size(); // number of vertices
58 int numEdges = 0; // number of edges
61 int maxBytes = 0, i, j, k;
63 /** both object load and number of bytes exchanged are normalized to an
64 * integer between 0 and 256 */
65 for(i = 0; i < numVertices; i++) {
66 if(ogr->vertices[i].getVertexLoad() > maxLoad)
67 maxLoad = ogr->vertices[i].getVertexLoad();
68 numEdges += ogr->vertices[i].sendToList.size();
69 for(j = 0; j < ogr->vertices[i].sendToList.size(); j++) {
70 if(ogr->vertices[i].sendToList[j].getNumBytes() > maxBytes)
71 maxBytes = ogr->vertices[i].sendToList[j].getNumBytes();
76 idx_t *xadj = new idx_t[numVertices + 1];
77 /* id of the neighbors */
78 idx_t *adjncy = new idx_t[numEdges];
79 /* weights of the vertices */
80 idx_t *vwgt = new idx_t[numVertices];
81 /* weights of the edges */
82 idx_t *adjwgt = new idx_t[numEdges];
86 for(i = 0; i < numVertices; i++) {
88 vwgt[i] = (int)( (ogr->vertices[i].getVertexLoad() * 128) /maxLoad );
89 for(j = 0; j < ogr->vertices[i].sendToList.size(); j++) {
90 adjncy[edgeNum] = ogr->vertices[i].sendToList[j].getNeighborId();
91 adjwgt[edgeNum] = (int)( (ogr->vertices[i].sendToList[j].getNumBytes() * 128) / maxBytes );
96 CkAssert(edgeNum == numEdges);
98 idx_t options[METIS_NOPTIONS];
99 METIS_SetDefaultOptions(options);
100 //options[METIS_OPTION_PTYPE] = METIS_PTYPE_RB;
102 options[METIS_OPTION_NUMBERING] = 0;
104 idx_t edgecut; // number of edges cut by the partitioning
105 // mapping of objs to partitions
106 idx_t *pemap = new idx_t[numVertices];
108 // number of constrains
111 // allow 10% imbalance tolerance
114 // Specifies size of vertices for computing the total communication volume
116 // This array of size nparts specifies the desired weight for each partition
117 // and setting it to NULL indicates graph should be equally divided among
119 real_t *tpwgts = NULL;
121 if (_lb_args.debug() >= 1)
122 CkPrintf("[%d] calling METIS_PartGraphRecursive.\n", CkMyPe());
124 METIS_PartGraphRecursive(&numVertices, &ncon, xadj, adjncy, vwgt, vsize,
125 adjwgt, &numberTeams, tpwgts, ubvec, options, &edgecut, pemap);
127 idx_t *global_pemap = new idx_t[numVertices];
129 // partitioning each team
131 idx_t *team_xadj = new idx_t[numVertices + 1];
132 idx_t *team_adjncy = new idx_t[numEdges];
133 idx_t *team_vwgt = new idx_t[numVertices];
134 idx_t *team_adjwgt = new idx_t[numEdges];
135 idx_t *team_pemap = new idx_t[numVertices];
136 idx_t *team_vsize = NULL;
137 real_t *team_tpwgts = NULL;
139 idx_t teamEdgecut, node;
140 int *mapping = new int[numVertices];
141 int *invMapping = new int[numVertices];
143 // traversing the list of teams and load balancing each one
144 for(i=0; i<numberTeams; i++) {
145 idx_t teamMembers = 0; // number of vertices in a team
147 // collecting all the elements of a particular team
148 // mapping stores the association of local to global index
149 // invMapping stores the inverse association
150 for(j = 0; j < numVertices; j++) {
152 mapping[teamMembers] = j;
153 invMapping[j] = teamMembers;
154 team_vwgt[teamMembers] = vwgt[j];
159 // building up the adjacency data structures
161 for(j = 0; j < teamMembers; j++) {
162 team_xadj[j] = teamIndex;
163 for(k = xadj[mapping[j]]; k < xadj[mapping[j]+1]; k++) {
165 if(pemap[node] == i) {
166 team_adjncy[teamIndex] = invMapping[node];
167 team_adjwgt[teamIndex] = adjwgt[k];
172 team_xadj[teamMembers] = teamIndex;
174 // calling METIS library
175 METIS_PartGraphRecursive(&teamMembers, &ncon, team_xadj, team_adjncy,
176 team_vwgt, team_vsize, team_adjwgt, &teamSize, team_tpwgts, ubvec,
177 options, &teamEdgecut, team_pemap);
179 // converting local mapping into global mapping
180 for(j = 0; j < teamMembers; j++) {
181 global_pemap[mapping[j]] = i*teamSize + team_pemap[j];
187 delete[] team_adjncy;
189 delete[] team_adjwgt;
192 delete[] team_tpwgts;
197 delete[] global_pemap;
198 global_pemap = pemap;
209 if (_lb_args.debug() >= 1) {
210 CkPrintf("[%d] TeamLB done! \n", CkMyPe());
213 for(i = 0; i < numVertices; i++) {
214 if(pemap[i] != ogr->vertices[i].getCurrentPe())
215 ogr->vertices[i].setNewPe(pemap[i]);
220 /** ============================== CLEANUP ================================ */
221 ogr->convertDecisions(stats);
224 #include "TeamLB.def.h"