src/ck-ldb/GridHybridLB.C

   1 /**************************************************************************
   2 ** Greg Koenig (koenig@uiuc.edu)
   3 ** June 14, 2007
   4 **
   5 ** This is GridHybridLB.C
   6 **
   7 */
   8
   9 #include "GridHybridLB.decl.h"
  10
  11 #include "GridHybridLB.h"
  12 #include "manager.h"
  13
  14 CreateLBFunc_Def (GridHybridLB, "Grid load balancer that uses hybrid technique to optimize communication graph")
  15
  16
  17
  18 /**************************************************************************
  19 **
  20 */
  21 GridHybridLB::GridHybridLB (const CkLBOptions &opt) : CBase_GridHybridLB (opt)
  22 {
  23   char *value;
  24
  25
  26   lbname = (char *) "GridHybridLB";
  27
  28   if (CkMyPe() == 0) {
  29     CkPrintf ("[%d] GridHybridLB created.\n", CkMyPe());
  30   }
  31
  32   if (value = getenv ("CK_LDB_GRIDHYBRIDLB_MODE")) {
  33     CK_LDB_GridHybridLB_Mode = atoi (value);
  34   } else {
  35     CK_LDB_GridHybridLB_Mode = CK_LDB_GRIDHYBRIDLB_MODE;
  36   }
  37
  38   if (value = getenv ("CK_LDB_GRIDHYBRIDLB_BACKGROUND_LOAD")) {
  39     CK_LDB_GridHybridLB_Background_Load = atoi (value);
  40   } else {
  41     CK_LDB_GridHybridLB_Background_Load = CK_LDB_GRIDHYBRIDLB_BACKGROUND_LOAD;
  42   }
  43
  44   if (value = getenv ("CK_LDB_GRIDHYBRIDLB_LOAD_TOLERANCE")) {
  45     CK_LDB_GridHybridLB_Load_Tolerance = atof (value);
  46   } else {
  47     CK_LDB_GridHybridLB_Load_Tolerance = CK_LDB_GRIDHYBRIDLB_LOAD_TOLERANCE;
  48   }
  49
  50   manager_init ();
  51 }
  52
  53
  54
  55 /**************************************************************************
  56 **
  57 */
  58 GridHybridLB::GridHybridLB (CkMigrateMessage *msg) : CBase_GridHybridLB (msg)
  59 {
  60   char *value;
  61
  62
  63   lbname = (char *) "GridHybridLB";
  64
  65   if (value = getenv ("CK_LDB_GRIDHYBRIDLB_MODE")) {
  66     CK_LDB_GridHybridLB_Mode = atoi (value);
  67   } else {
  68     CK_LDB_GridHybridLB_Mode = CK_LDB_GRIDHYBRIDLB_MODE;
  69   }
  70
  71   if (value = getenv ("CK_LDB_GRIDHYBRIDLB_BACKGROUND_LOAD")) {
  72     CK_LDB_GridHybridLB_Background_Load = atoi (value);
  73   } else {
  74     CK_LDB_GridHybridLB_Background_Load = CK_LDB_GRIDHYBRIDLB_BACKGROUND_LOAD;
  75   }
  76
  77   if (value = getenv ("CK_LDB_GRIDHYBRIDLB_LOAD_TOLERANCE")) {
  78     CK_LDB_GridHybridLB_Load_Tolerance = atof (value);
  79   } else {
  80     CK_LDB_GridHybridLB_Load_Tolerance = CK_LDB_GRIDHYBRIDLB_LOAD_TOLERANCE;
  81   }
  82
  83   manager_init ();
  84 }
  85
  86
  87
  88 /**************************************************************************
  89 ** The Charm++ load balancing framework invokes this method to determine
  90 ** whether load balancing can be performed at a specified time.
  91 */
  92 bool GridHybridLB::QueryBalanceNow (int step)
  93 {
  94   if (_lb_args.debug() > 2) {
  95     CkPrintf ("[%d] GridHybridLB is balancing on step %d.\n", CkMyPe(), step);
  96   }
  97
  98   return (true);
  99 }
 100
 101
 102
 103 /**************************************************************************
 104 ** The vmi-linux machine layer incorporates the idea that PEs are located
 105 ** within identifiable clusters.  This information can be supplied by the
 106 ** user or can be probed automatically by the machine layer.  The exposed
 107 ** API call CmiGetCluster() returns the integer cluster number for a
 108 ** specified PE or -1 if the information is unknown.
 109 **
 110 ** For machine layers other than vmi-linux, simply return the constant 0.
 111 ** GridHybridLB will assume a single-cluster computation and will balance
 112 ** on the scaled processor load and number of LAN messages.
 113 */
 114 int GridHybridLB::Get_Cluster (int pe)
 115 {
 116   return (0);
 117 }
 118
 119
 120
 121 /**************************************************************************
 122 **
 123 */
 124 void GridHybridLB::Initialize_PE_Data (CentralLB::LDStats *stats)
 125 {
 126   int min_speed;
 127   int i;
 128
 129
 130   PE_Data = new PE_Data_T[Num_PEs];
 131
 132   min_speed = MAXINT;
 133   for (i = 0; i < Num_PEs; i++) {
 134     (&PE_Data[i])->available      = stats->procs[i].available;
 135     (&PE_Data[i])->cluster        = Get_Cluster (i);
 136     (&PE_Data[i])->num_objs       = 0;
 137     (&PE_Data[i])->num_lan_objs   = 0;
 138     (&PE_Data[i])->num_lan_msgs   = 0;
 139     (&PE_Data[i])->num_wan_objs   = 0;
 140     (&PE_Data[i])->num_wan_msgs   = 0;
 141     (&PE_Data[i])->relative_speed = 0.0;
 142     (&PE_Data[i])->scaled_load    = 0.0;
 143
 144     if (stats->procs[i].pe_speed < min_speed) {
 145       min_speed = stats->procs[i].pe_speed;
 146     }
 147   }
 148
 149   // Compute the relative PE speeds.
 150   // Also add background CPU time to each PE's scaled load.
 151   for (i = 0; i < Num_PEs; i++) {
 152     (&PE_Data[i])->relative_speed = (double) (stats->procs[i].pe_speed / min_speed);
 153     if (CK_LDB_GridHybridLB_Background_Load) {
 154       (&PE_Data[i])->scaled_load += stats->procs[i].bg_walltime;
 155     }
 156   }
 157 }
 158
 159
 160
 161 /**************************************************************************
 162 **
 163 */
 164 int GridHybridLB::Available_PE_Count ()
 165 {
 166   int available_pe_count;
 167   int i;
 168
 169
 170   available_pe_count = 0;
 171   for (i = 0; i < Num_PEs; i++) {
 172     if ((&PE_Data[i])->available) {
 173       available_pe_count += 1;
 174     }
 175   }
 176   return (available_pe_count);
 177 }
 178
 179
 180
 181 /**************************************************************************
 182 **
 183 */
 184 int GridHybridLB::Compute_Number_Of_Clusters ()
 185 {
 186   int max_cluster;
 187   int i;
 188
 189
 190   max_cluster = 0;
 191   for (i = 0; i < Num_PEs; i++) {
 192     if ((&PE_Data[i])->cluster < 0) {
 193       return (-1);
 194     }
 195
 196     if ((&PE_Data[i])->cluster > max_cluster) {
 197       max_cluster = (&PE_Data[i])->cluster;
 198     }
 199   }
 200   return (max_cluster + 1);
 201 }
 202
 203
 204
 205 /**************************************************************************
 206 **
 207 */
 208 void GridHybridLB::Initialize_Object_Data (CentralLB::LDStats *stats)
 209 {
 210   int i;
 211
 212
 213   Object_Data = new Object_Data_T[Num_Objects];
 214
 215   for (i = 0; i < Num_Objects; i++) {
 216     (&Object_Data[i])->migratable   = (&stats->objData[i])->migratable;
 217     (&Object_Data[i])->from_pe      = stats->from_proc[i];
 218     (&Object_Data[i])->num_lan_msgs = 0;
 219     (&Object_Data[i])->num_wan_msgs = 0;
 220     (&Object_Data[i])->load         = (&stats->objData[i])->wallTime;
 221
 222     if ((&Object_Data[i])->migratable) {
 223       (&Object_Data[i])->to_pe = -1;
 224       (&Object_Data[i])->cluster = -1;
 225     } else {
 226       (&Object_Data[i])->to_pe = (&Object_Data[i])->from_pe;
 227       (&Object_Data[i])->cluster = Get_Cluster ((&Object_Data[i])->from_pe);
 228       if (_lb_args.debug() > 1) {
 229         CkPrintf ("[%d] GridHybridLB identifies object %d as non-migratable.\n", CkMyPe(), i);
 230       }
 231     }
 232   }
 233 }
 234
 235
 236
 237 /**************************************************************************
 238 **
 239 */
 240 void GridHybridLB::Initialize_Cluster_Data ()
 241 {
 242   int cluster;
 243   double min_total_cpu_power;
 244   int i;
 245
 246
 247   Cluster_Data = new Cluster_Data_T[Num_Clusters];
 248
 249   for (i = 0; i < Num_Clusters; i++) {
 250     (&Cluster_Data[i])->num_pes = 0;
 251     (&Cluster_Data[i])->total_cpu_power = 0.0;
 252     (&Cluster_Data[i])->scaled_cpu_power = 0.0;
 253   }
 254
 255   // Compute the relative speed of each cluster.
 256   for (i = 0; i < Num_PEs; i++) {
 257     cluster = (&PE_Data[i])->cluster;
 258
 259     (&Cluster_Data[cluster])->num_pes += 1;
 260     (&Cluster_Data[cluster])->total_cpu_power += (&PE_Data[i])->relative_speed;
 261   }
 262
 263   min_total_cpu_power = MAXDOUBLE;
 264   for (i = 0; i < Num_Clusters; i++) {
 265     if ((&Cluster_Data[i])->total_cpu_power < min_total_cpu_power) {
 266       min_total_cpu_power = (&Cluster_Data[i])->total_cpu_power;
 267     }
 268   }
 269
 270   for (i = 0; i < Num_Clusters; i++) {
 271     (&Cluster_Data[i])->scaled_cpu_power = (double) ((&Cluster_Data[i])->total_cpu_power / min_total_cpu_power);
 272   }
 273 }
 274
 275
 276
 277 /**************************************************************************
 278 ** This takes objects and partitions them into clusters.
 279 */
 280 void GridHybridLB::Partition_Objects_Into_Clusters (CentralLB::LDStats *stats)
 281 {
 282   int num_migratable_objects;
 283   int *migratable_objects;
 284   int index;
 285   int num_partitions;
 286   int *partition_to_cluster_map;
 287   int cluster;
 288   int partition;
 289   int partition_count;
 290   int *vertex_weights;
 291   int vertex;
 292   int **communication_matrix;
 293   LDCommData *com_data;
 294   int send_object;
 295   int recv_object;
 296   int send_index;
 297   int recv_index;
 298   LDObjKey *recv_objects;
 299   int num_objects;
 300   int *xadj;
 301   int num_edges;
 302   int *adjncy;
 303   int *edge_weights;
 304   int count;
 305   int weight_flag;
 306   int numbering_flag;
 307   int options[5];
 308   int edgecut;
 309   int *newmap;
 310   int i;
 311   int j;
 312
 313
 314   if (Num_Clusters == 1) {
 315     for (i = 0; i < Num_Objects; i++) {
 316       (&Object_Data[i])->cluster = 0;
 317     }
 318
 319     return;
 320   }
 321
 322   for (i = 0; i < Num_Objects; i++) {
 323     (&Object_Data[i])->secondary_index = -1;
 324   }
 325
 326   // Count the number of migratable objects, which are the only candidates to give to Metis.
 327   // (The non-migratable objects have been placed onto the correct destination PEs earlier.)
 328   // After getting the count, create a migratable_objects[] array to keep track of them.
 329   num_migratable_objects = 0;
 330   for (i = 0; i < Num_Objects; i++) {
 331     if ((&Object_Data[i])->migratable) {
 332       num_migratable_objects += 1;
 333     }
 334   }
 335
 336   migratable_objects = new int[num_migratable_objects];
 337
 338   index = 0;
 339   for (i = 0; i < Num_Objects; i++) {
 340     if ((&Object_Data[i])->migratable) {
 341       (&Object_Data[i])->secondary_index = index;
 342       migratable_objects[index] = i;
 343       index += 1;
 344     }
 345   }
 346
 347   // Compute the number of partitions for Metis, based on the scaled CPU power for each cluster.
 348   // Also create a partition-to-cluster mapping so the output of Metis can be mapped back to clusters.
 349   num_partitions = 0;
 350   for (i = 0; i < Num_Clusters; i++) {
 351     num_partitions += (int) ceil ((&Cluster_Data[i])->scaled_cpu_power);
 352   }
 353
 354   partition_to_cluster_map = new int[num_partitions];
 355
 356   cluster = 0;
 357   partition = 0;
 358   while (partition < num_partitions) {
 359     partition_count = (int) ceil ((&Cluster_Data[cluster])->scaled_cpu_power);
 360
 361     for (i = partition; i < (partition + partition_count); i++) {
 362       partition_to_cluster_map[i] = cluster;
 363     }
 364
 365     partition += partition_count;
 366     cluster += 1;
 367   }
 368
 369   if ((CK_LDB_GridHybridLB_Mode == 1) || (CK_LDB_GridHybridLB_Mode == 3)) {
 370     vertex_weights = new int[num_migratable_objects];
 371     vertex = 0;
 372     for (i = 0; i < Num_Objects; i++) {
 373       if ((&Object_Data[i])->migratable) {
 374         vertex_weights[vertex] = (int) ceil ((&Object_Data[i])->load * 10000);
 375         vertex += 1;
 376       }
 377     }
 378   }
 379
 380   // Create communication_matrix[] to hold all object-to-object message counts.
 381   communication_matrix = new int *[num_migratable_objects];
 382   for (i = 0; i < num_migratable_objects; i++) {
 383     communication_matrix[i] = new int[num_migratable_objects];
 384     for (j = 0; j < num_migratable_objects; j++) {
 385       communication_matrix[i][j] = 0;
 386     }
 387   }
 388
 389   for (i = 0; i < stats->n_comm; i++) {
 390     com_data = &(stats->commData[i]);
 391     if ((!com_data->from_proc()) && (com_data->recv_type() == LD_OBJ_MSG)) {
 392       send_object = stats->getHash (com_data->sender);
 393       recv_object = stats->getHash (com_data->receiver.get_destObj());
 394
 395       //if ((recv_object == -1) && (stats->complete_flag == 0)) {
 396       if ((send_object < 0) || (send_object > Num_Objects) || (recv_object < 0) || (recv_object > Num_Objects)) {
 397         continue;
 398       }
 399
 400       if ((!(&Object_Data[send_object])->migratable) || (!(&Object_Data[recv_object])->migratable)) {
 401         continue;
 402       }
 403
 404       send_index = (&Object_Data[send_object])->secondary_index;
 405       recv_index = (&Object_Data[recv_object])->secondary_index;
 406
 407       communication_matrix[send_index][recv_index] += com_data->messages;
 408       communication_matrix[recv_index][send_index] += com_data->messages;
 409     } else if (com_data->receiver.get_type() == LD_OBJLIST_MSG) {
 410       send_object = stats->getHash (com_data->sender);
 411
 412       if ((send_object < 0) || (send_object > Num_Objects)) {
 413         continue;
 414       }
 415
 416       if (!(&Object_Data[send_object])->migratable) {
 417         continue;
 418       }
 419
 420       recv_objects = com_data->receiver.get_destObjs (num_objects);   // (num_objects is passed by reference)
 421
 422       for (j = 0; j < num_objects; j++) {
 423         recv_object = stats->getHash (recv_objects[j]);
 424
 425         //if (recv_object == -1) {
 426         if ((recv_object < 0) || (recv_object > Num_Objects)) {
 427           continue;
 428         }
 429
 430         if (!(&Object_Data[recv_object])->migratable) {
 431           continue;
 432         }
 433
 434         send_index = (&Object_Data[send_object])->secondary_index;
 435         recv_index = (&Object_Data[recv_object])->secondary_index;
 436
 437         communication_matrix[send_index][recv_index] += com_data->messages;
 438         communication_matrix[recv_index][send_index] += com_data->messages;
 439       }
 440     }
 441   }
 442
 443   for (i = 0; i < num_migratable_objects; i++) {
 444     communication_matrix[i][i] = 0;
 445   }
 446
 447   // Construct a graph in CSR format for input to Metis.
 448   xadj = new int[num_migratable_objects + 1];
 449   num_edges = 0;
 450   for (i = 0; i < num_migratable_objects; i++) {
 451     for (j = 0; j < num_migratable_objects; j++) {
 452       if (communication_matrix[i][j] > 0) {
 453         num_edges += 1;
 454       }
 455     }
 456   }
 457   adjncy = new int[num_edges];
 458   edge_weights = new int[num_edges];
 459   count = 0;
 460   xadj[0] = 0;
 461   for (i = 0; i < num_migratable_objects; i++) {
 462     for (j = 0; j < num_migratable_objects; j++) {
 463       if (communication_matrix[i][j] > 0) {
 464         adjncy[count] = j;
 465         edge_weights[count] = communication_matrix[i][j];
 466         count += 1;
 467       }
 468     }
 469     xadj[i+1] = count;
 470   }
 471
 472   if ((CK_LDB_GridHybridLB_Mode == 0) || (CK_LDB_GridHybridLB_Mode == 2)) {
 473     // Call Metis to partition the communication graph.
 474     weight_flag = 1;      // weights on edges only
 475     numbering_flag = 0;   // C style numbering (base 0)
 476     options[0] = 0;
 477     newmap = new int[num_migratable_objects];
 478
 479     METIS_PartGraphRecursive (&num_migratable_objects, xadj, adjncy, NULL, edge_weights, &weight_flag, &numbering_flag, &num_partitions, options, &edgecut, newmap);
 480   } else if ((CK_LDB_GridHybridLB_Mode == 1) || (CK_LDB_GridHybridLB_Mode == 3)) {
 481     // Call Metis to partition the communication graph.
 482     weight_flag = 3;      // weights on both vertices and edges
 483     numbering_flag = 0;   // C style numbering (base 0)
 484     options[0] = 0;
 485     newmap = new int[num_migratable_objects];
 486
 487     METIS_PartGraphRecursive (&num_migratable_objects, xadj, adjncy, vertex_weights, edge_weights, &weight_flag, &numbering_flag, &num_partitions, options, &edgecut, newmap);
 488   } else {
 489     if (_lb_args.debug() > 0) {
 490       CkPrintf ("[%d] GridHybridLB was told to use bad mode (%d).\n", CkMyPe(), CK_LDB_GridHybridLB_Mode);
 491     }
 492   }
 493
 494   // Place the partitioned objects into their correct clusters.
 495   for (i = 0; i < num_migratable_objects; i++) {
 496     partition = newmap[i];
 497     cluster = partition_to_cluster_map[partition];
 498
 499     index = migratable_objects[i];
 500
 501     (&Object_Data[index])->cluster = cluster;
 502   }
 503
 504   // Free memory.
 505   delete [] newmap;
 506   delete [] edge_weights;
 507   delete [] adjncy;
 508   delete [] xadj;
 509   for (i = 0; i < num_migratable_objects; i++) {
 510     delete [] communication_matrix[i];
 511   }
 512   delete [] communication_matrix;
 513   if ((CK_LDB_GridHybridLB_Mode == 1) || (CK_LDB_GridHybridLB_Mode == 3)) {
 514     delete [] vertex_weights;
 515   }
 516   delete [] partition_to_cluster_map;
 517   delete [] migratable_objects;
 518 }
 519
 520
 521
 522 /**************************************************************************
 523 **
 524 */
 525 void GridHybridLB::Examine_InterObject_Messages (CentralLB::LDStats *stats)
 526 {
 527   int i;
 528   int j;
 529   LDCommData *com_data;
 530   int send_object;
 531   int send_cluster;
 532   int recv_object;
 533   int recv_cluster;
 534   LDObjKey *recv_objects;
 535   int num_objects;
 536
 537
 538   for (i = 0; i < stats->n_comm; i++) {
 539     com_data = &(stats->commData[i]);
 540     if ((!com_data->from_proc()) && (com_data->recv_type() == LD_OBJ_MSG)) {
 541       send_object = stats->getHash (com_data->sender);
 542       recv_object = stats->getHash (com_data->receiver.get_destObj());
 543
 544       if ((send_object < 0) || (send_object > Num_Objects) || (recv_object < 0) || (recv_object > Num_Objects)) {
 545         continue;
 546       }
 547
 548       send_cluster = (&Object_Data[send_object])->cluster;
 549       recv_cluster = (&Object_Data[recv_object])->cluster;
 550
 551       if (send_cluster == recv_cluster) {
 552         (&Object_Data[send_object])->num_lan_msgs += com_data->messages;
 553       } else {
 554         (&Object_Data[send_object])->num_wan_msgs += com_data->messages;
 555       }
 556     } else if (com_data->receiver.get_type() == LD_OBJLIST_MSG) {
 557       send_object = stats->getHash (com_data->sender);
 558
 559       if ((send_object < 0) || (send_object > Num_Objects)) {
 560         continue;
 561       }
 562
 563       send_cluster = (&Object_Data[send_object])->cluster;
 564
 565       recv_objects = com_data->receiver.get_destObjs (num_objects);   // (num_objects is passed by reference)
 566
 567       for (j = 0; j < num_objects; j++) {
 568         recv_object = stats->getHash (recv_objects[j]);
 569
 570         if ((recv_object < 0) || (recv_object > Num_Objects)) {
 571           continue;
 572         }
 573
 574         recv_cluster = (&Object_Data[recv_object])->cluster;
 575
 576         if (send_cluster == recv_cluster) {
 577           (&Object_Data[send_object])->num_lan_msgs += com_data->messages;
 578         } else {
 579           (&Object_Data[send_object])->num_wan_msgs += com_data->messages;
 580         }
 581       }
 582     }
 583   }
 584 }
 585
 586
 587
 588 /**************************************************************************
 589 **
 590 */
 591 void GridHybridLB::Map_NonMigratable_Objects_To_PEs ()
 592 {
 593   int i;
 594
 595
 596   for (i = 0; i < Num_Objects; i++) {
 597     if (!((&Object_Data[i])->migratable)) {
 598       if (_lb_args.debug() > 1) {
 599         CkPrintf ("[%d] GridHybridLB identifies object %d as non-migratable.\n", CkMyPe(), i);
 600       }
 601
 602       Assign_Object_To_PE (i, (&Object_Data[i])->from_pe);
 603     }
 604   }
 605 }
 606
 607
 608
 609 /**************************************************************************
 610 **
 611 */
 612 void GridHybridLB::Map_Migratable_Objects_To_PEs (int cluster)
 613 {
 614   int target_object;
 615   int target_pe;
 616
 617
 618   while (1) {
 619     target_object = Find_Maximum_Object (cluster);
 620     target_pe = Find_Minimum_PE (cluster);
 621
 622     if ((target_object == -1) || (target_pe == -1)) {
 623       break;
 624     }
 625
 626     Assign_Object_To_PE (target_object, target_pe);
 627   }
 628 }
 629
 630
 631
 632 /**************************************************************************
 633 ** This method locates the maximum WAN object in terms of number of
 634 ** messages that traverse a wide-area connection.  The search is
 635 ** constrained to objects within the specified cluster that have not yet
 636 ** been mapped (balanced) to a PE.
 637 **
 638 ** The method returns -1 if no matching object is found.
 639 */
 640 int GridHybridLB::Find_Maximum_Object (int cluster)
 641 {
 642   int max_index;
 643   int max_load_index;
 644   double max_load;
 645   int max_wan_msgs_index;
 646   int max_wan_msgs;
 647   double load_tolerance;
 648   int i;
 649
 650
 651   max_index = -1;
 652
 653   max_load_index = -1;
 654   max_load = -1.0;
 655
 656   max_wan_msgs_index = -1;
 657   max_wan_msgs = -1;
 658
 659   for (i = 0; i < Num_Objects; i++) {
 660     if (((&Object_Data[i])->cluster == cluster) && ((&Object_Data[i])->to_pe == -1)) {
 661       if ((&Object_Data[i])->load > max_load) {
 662         max_load_index = i;
 663         max_load = (&Object_Data[i])->load;
 664       }
 665       if ((&Object_Data[i])->num_wan_msgs > max_wan_msgs) {
 666         max_wan_msgs_index = i;
 667         max_wan_msgs = (&Object_Data[i])->num_wan_msgs;
 668       }
 669     }
 670   }
 671
 672   if (max_load_index < 0) {
 673     return (max_load_index);
 674   }
 675
 676   if ((&Object_Data[max_load_index])->num_wan_msgs >= (&Object_Data[max_wan_msgs_index])->num_wan_msgs) {
 677     return (max_load_index);
 678   }
 679
 680   load_tolerance = (&Object_Data[max_load_index])->load * CK_LDB_GridHybridLB_Load_Tolerance;
 681
 682   max_index = max_load_index;
 683
 684   for (i = 0; i < Num_Objects; i++) {
 685     if (((&Object_Data[i])->cluster == cluster) && ((&Object_Data[i])->to_pe == -1)) {
 686       if (i != max_load_index) {
 687         if (fabs ((&Object_Data[max_load_index])->load - (&Object_Data[i])->load) <= load_tolerance) {
 688           if ((&Object_Data[i])->num_wan_msgs > (&Object_Data[max_index])->num_wan_msgs) {
 689             max_index = i;
 690           }
 691         }
 692       }
 693     }
 694   }
 695
 696   return (max_index);
 697 }
 698
 699
 700
 701 /**************************************************************************
 702 ** This method locates the minimum WAN PE in terms of number of objects
 703 ** that communicate with objects across a wide-area connection.  The search
 704 ** is constrained to PEs within the specified cluster.
 705 **
 706 ** In the event of a "tie" (i.e., the number of WAN objects on a candidate
 707 ** PE is equal to the minimum number of WAN objects discovered so far) the
 708 ** tie is broken by considering the scaled CPU loads on the PEs.  The PE
 709 ** with the smaller scaled load is the better candidate.  In the event of
 710 ** a secondary tie, the secondary tie is broken by considering the number
 711 ** of LAN objects on the two PEs.
 712 **
 713 ** The method returns -1 if no matching PE is found.
 714 */
 715 int GridHybridLB::Find_Minimum_PE (int cluster)
 716 {
 717   if ((CK_LDB_GridHybridLB_Mode == 0) || (CK_LDB_GridHybridLB_Mode == 1)) {
 718     int min_index;
 719     int min_objs;
 720     int i;
 721
 722
 723     min_index = -1;
 724     min_objs = MAXINT;
 725
 726     for (i = 0; i < Num_PEs; i++) {
 727       if (((&PE_Data[i])->available) && ((&PE_Data[i])->cluster == cluster)) {
 728         if ((&PE_Data[i])->num_objs < min_objs) {
 729           min_index = i;
 730           min_objs = (&PE_Data[i])->num_objs;
 731         } else if (((&PE_Data[i])->num_objs == min_objs) &&
 732                    ((&PE_Data[i])->num_wan_objs < (&PE_Data[min_index])->num_wan_objs)) {
 733           min_index = i;
 734         } else if (((&PE_Data[i])->num_objs == min_objs) &&
 735                    ((&PE_Data[i])->num_wan_objs == (&PE_Data[min_index])->num_wan_objs) &&
 736                    ((&PE_Data[i])->num_wan_msgs < (&PE_Data[min_index])->num_wan_msgs)) {
 737           min_index = i;
 738         } else if (((&PE_Data[i])->num_objs == min_objs) &&
 739                    ((&PE_Data[i])->num_wan_objs == (&PE_Data[min_index])->num_wan_objs) &&
 740                    ((&PE_Data[i])->num_wan_msgs == (&PE_Data[min_index])->num_wan_msgs) &&
 741                    ((&PE_Data[i])->scaled_load < (&PE_Data[min_index])->scaled_load)) {
 742           min_index = i;
 743         }
 744       }
 745     }
 746
 747     return (min_index);
 748   } else if ((CK_LDB_GridHybridLB_Mode == 2) || (CK_LDB_GridHybridLB_Mode == 3)) {
 749     int min_index;
 750     int min_load_index;
 751     double min_scaled_load;
 752     int min_wan_msgs_index;
 753     int min_wan_msgs;
 754     double load_tolerance;
 755     int i;
 756
 757
 758     min_index = -1;
 759
 760     min_load_index = -1;
 761     min_scaled_load = MAXDOUBLE;
 762
 763     min_wan_msgs_index = -1;
 764     min_wan_msgs = MAXINT;
 765
 766     for (i = 0; i < Num_PEs; i++) {
 767       if (((&PE_Data[i])->available) && ((&PE_Data[i])->cluster == cluster)) {
 768         if ((&PE_Data[i])->scaled_load < min_scaled_load) {
 769           min_load_index = i;
 770           min_scaled_load = (&PE_Data[i])->scaled_load;
 771         }
 772         if ((&PE_Data[i])->num_wan_msgs < min_wan_msgs) {
 773           min_wan_msgs_index = i;
 774           min_wan_msgs = (&PE_Data[i])->num_wan_msgs;
 775         }
 776       }
 777     }
 778
 779     // If no PE at all was found, return a -1.
 780     if (min_load_index < 0) {
 781       return (min_load_index);
 782     }
 783
 784     // If the number of WAN messages on the lightest loaded PE happens to match the minimum number
 785     // of WAN messages overall, we win because this target PE is overall the minimum PE in terms
 786     // of both load *and* WAN messages.
 787     if ((&PE_Data[min_load_index])->num_wan_msgs <= (&PE_Data[min_wan_msgs_index])->num_wan_msgs) {
 788       return (min_load_index);
 789     }
 790
 791     // Otherwise, we now search for PEs that have loads +/- our tolerance.  If any PE has a load
 792     // within our tolerance, check its number of WAN messages.  The one of these that has the
 793     // fewest WAN messages is probably the best candidate for placing the next object onto.
 794
 795     load_tolerance = (&PE_Data[min_load_index])->scaled_load * CK_LDB_GridHybridLB_Load_Tolerance;
 796
 797     min_index = min_load_index;
 798
 799     for (i = 0; i < Num_PEs; i++) {
 800       if (((&PE_Data[i])->available) && ((&PE_Data[i])->cluster == cluster)) {
 801         if (i != min_load_index) {
 802           if (fabs ((&PE_Data[i])->scaled_load - (&PE_Data[min_load_index])->scaled_load) <= load_tolerance) {
 803             if ((&PE_Data[i])->num_wan_msgs < (&PE_Data[min_index])->num_wan_msgs) {
 804               min_index = i;
 805             }
 806           }
 807         }
 808       }
 809     }
 810
 811     return (min_index);
 812   } else {
 813     if (_lb_args.debug() > 0) {
 814       CkPrintf ("[%d] GridHybridLB was told to use bad mode (%d).\n", CkMyPe(), CK_LDB_GridHybridLB_Mode);
 815     }
 816     return (-1);
 817   }
 818 }
 819
 820
 821
 822 /**************************************************************************
 823 ** This method assigns target_object to target_pe.  The data structure
 824 ** entry for target_pe is updated appropriately with measurements from
 825 ** target_object.  This updated information is considered when placing
 826 ** successive objects onto PEs.
 827 */
 828 void GridHybridLB::Assign_Object_To_PE (int target_object, int target_pe)
 829 {
 830   (&Object_Data[target_object])->to_pe = target_pe;
 831
 832   (&PE_Data[target_pe])->num_objs += 1;
 833
 834   if ((&Object_Data[target_object])->num_lan_msgs > 0) {
 835     (&PE_Data[target_pe])->num_lan_objs += 1;
 836     (&PE_Data[target_pe])->num_lan_msgs += (&Object_Data[target_object])->num_lan_msgs;
 837   }
 838
 839   if ((&Object_Data[target_object])->num_wan_msgs > 0) {
 840     (&PE_Data[target_pe])->num_wan_objs += 1;
 841     (&PE_Data[target_pe])->num_wan_msgs += (&Object_Data[target_object])->num_wan_msgs;
 842   }
 843
 844   (&PE_Data[target_pe])->scaled_load += (&Object_Data[target_object])->load / (&PE_Data[target_pe])->relative_speed;
 845 }
 846
 847
 848
 849 /**************************************************************************
 850 ** The Charm++ load balancing framework invokes this method to cause the
 851 ** load balancer to migrate objects to "better" PEs.
 852 */
 853 void GridHybridLB::work (LDStats *stats)
 854 {
 855   int i;
 856
 857
 858   if (_lb_args.debug() > 0) {
 859     CkPrintf ("[%d] GridHybridLB is working (mode=%d, background load=%d, load tolerance=%f).\n", CkMyPe(), CK_LDB_GridHybridLB_Mode, CK_LDB_GridHybridLB_Background_Load, CK_LDB_GridHybridLB_Load_Tolerance);
 860   }
 861
 862   // Since this load balancer looks at communications data, it must call stats->makeCommHash().
 863   stats->makeCommHash ();
 864
 865   // Initialize object variables for the number of PEs and number of objects.
 866   Num_PEs = stats->nprocs();
 867   Num_Objects = stats->n_objs;
 868
 869   if (_lb_args.debug() > 0) {
 870     CkPrintf ("[%d] GridHybridLB is examining %d PEs and %d objects.\n", CkMyPe(), Num_PEs, Num_Objects);
 871   }
 872
 873   // Initialize the PE_Data[] data structure.
 874   Initialize_PE_Data (stats);
 875
 876   // If at least one available PE does not exist, return from load balancing.
 877   if (Available_PE_Count() < 1) {
 878     if (_lb_args.debug() > 0) {
 879       CkPrintf ("[%d] GridHybridLB finds no available PEs -- no balancing done.\n", CkMyPe());
 880     }
 881
 882     delete [] PE_Data;
 883
 884     return;
 885   }
 886
 887   // Determine the number of clusters.
 888   // If any PE is not mapped to a cluster, return from load balancing.
 889   Num_Clusters = Compute_Number_Of_Clusters ();
 890   if (Num_Clusters < 1) {
 891     if (_lb_args.debug() > 0) {
 892       CkPrintf ("[%d] GridHybridLB finds incomplete PE cluster map -- no balancing done.\n", CkMyPe());
 893     }
 894
 895     delete [] PE_Data;
 896
 897     return;
 898   }
 899
 900   if (_lb_args.debug() > 0) {
 901     CkPrintf ("[%d] GridHybridLB finds %d clusters.\n", CkMyPe(), Num_Clusters);
 902   }
 903
 904   // Initialize the Object_Data[] data structure.
 905   Initialize_Object_Data (stats);
 906
 907   // Initialize the Cluster_Data[] data structure.
 908   Initialize_Cluster_Data ();
 909
 910   // Partition objects into clusters.
 911   Partition_Objects_Into_Clusters (stats);
 912
 913   // Examine all object-to-object messages for intra-cluster and inter-cluster communications.
 914   Examine_InterObject_Messages (stats);
 915
 916   // Map non-migratable objects to PEs.
 917   Map_NonMigratable_Objects_To_PEs ();
 918
 919   // Map migratable objects to PEs in each cluster.
 920   for (i = 0; i < Num_Clusters; i++) {
 921     Map_Migratable_Objects_To_PEs (i);
 922   }
 923
 924   // Make the assignment of objects to PEs in the load balancer framework.
 925   for (i = 0; i < Num_Objects; i++) {
 926     stats->to_proc[i] = (&Object_Data[i])->to_pe;
 927
 928     if (_lb_args.debug() > 2) {
 929       CkPrintf ("[%d] GridHybridLB migrates object %d from PE %d to PE %d.\n", CkMyPe(), i, stats->from_proc[i], stats->to_proc[i]);
 930     } else if (_lb_args.debug() > 1) {
 931       if (stats->to_proc[i] != stats->from_proc[i]) {
 932         CkPrintf ("[%d] GridHybridLB migrates object %d from PE %d to PE %d.\n", CkMyPe(), i, stats->from_proc[i], stats->to_proc[i]);
 933       }
 934     }
 935   }
 936
 937   // Free memory.
 938   delete [] Cluster_Data;
 939   delete [] Object_Data;
 940   delete [] PE_Data;
 941 }
 942
 943 #include "GridHybridLB.def.h"