8 #include "LBSimulation.h"
12 void BaseLB::initLB(const CkLBOptions &opt) {
13 seqno = opt.getSeqNo();
14 CkpvAccess(numLoadBalancers) ++;
16 if (CkpvAccess(numLoadBalancers) - CkpvAccess(hasNullLB) > 1)
17 CmiAbort("Error: try to create more than one load balancer strategies!");
19 theLbdb = CProxy_LBDatabase(_lbdb).ckLocalBranch();
21 // register this load balancer to LBDatabase at the sequence number
22 theLbdb->addLoadbalancer(this, seqno);
26 CkpvAccess(numLoadBalancers) --;
29 void BaseLB::unregister() {
30 theLbdb->RemoveLocalBarrierReceiver(receiver);
31 CkpvAccess(numLoadBalancers) --;
34 void BaseLB::pup(PUP::er &p) {
40 int newseq = LBDatabaseObj()->getLoadbalancerTicket();
41 CmiAssert(newseq == seqno);
48 void BaseLB::flushStates() {
50 theLbdb->ClearLoads();
55 void BaseLB::initLB(const CkLBOptions &) {}
56 void BaseLB::unregister() {}
57 void BaseLB::pup(PUP::er &p) {}
58 void BaseLB::flushStates() {}
61 static inline int i_abs(int c) { return c>0?c:-c; }
63 // assume integer is 32 bits
64 inline static int ObjKey(const LDObjid &oid, const int hashSize) {
65 // make sure all positive
66 return (((i_abs(oid.id[2]) & 0x7F)<<24)
67 |((i_abs(oid.id[1]) & 0xFF)<<16)
68 |i_abs(oid.id[0])) % hashSize;
71 BaseLB::LDStats::LDStats(int c, int complete)
72 : n_objs(0), n_migrateobjs(0), n_comm(0),
73 objHash(NULL), complete_flag(complete)
76 if (count == 0) count = CkNumPes();
77 procs = new ProcStats[count];
80 const static unsigned int doublingPrimes[] = {
112 /* extra primes larger than an unsigned 32-bit integer:
139 3200000000000000059 //This is a 62-bit number
143 //This routine returns an arbitrary prime larger than x
144 static unsigned int primeLargerThan(unsigned int x)
147 while (doublingPrimes[i]<=x) i++;
148 return doublingPrimes[i];
151 void BaseLB::LDStats::makeCommHash() {
152 // hash table is already build
157 hashSize = primeLargerThan(hashSize);
158 objHash = new int[hashSize];
159 for(i=0;i<hashSize;i++)
162 for(i=0;i<n_objs;i++){
163 const LDObjid &oid = objData[i].objID();
164 int hash = ObjKey(oid, hashSize);
165 CmiAssert(hash != -1);
166 while(objHash[hash] != -1)
167 hash = (hash+1)%hashSize;
172 void BaseLB::LDStats::deleteCommHash() {
173 if (objHash) delete [] objHash;
175 for(int i=0; i < n_comm; i++) {
176 commData[i].clearHash();
180 int BaseLB::LDStats::getHash(const LDObjid &oid, const LDOMid &mid)
183 CmiAssert(hashSize > 0);
184 int hash = ObjKey(oid, hashSize);
186 for(int id=0;id<hashSize;id++){
187 int index = (id+hash)%hashSize;
188 if (index == -1 || objHash[index] == -1) return -1;
189 if (LDObjIDEqual(objData[objHash[index]].objID(), oid) &&
190 LDOMidEqual(objData[objHash[index]].omID(), mid))
191 return objHash[index];
193 // CkPrintf("not found \n");
198 int BaseLB::LDStats::getHash(const LDObjKey &objKey)
200 const LDObjid &oid = objKey.objID();
201 const LDOMid &mid = objKey.omID();
202 return getHash(oid, mid);
205 int BaseLB::LDStats::getSendHash(LDCommData &cData)
207 if (cData.sendHash == -1) {
208 cData.sendHash = getHash(cData.sender);
210 return cData.sendHash;
213 int BaseLB::LDStats::getRecvHash(LDCommData &cData)
215 if (cData.recvHash == -1) {
216 cData.recvHash = getHash(cData.receiver.get_destObj());
218 return cData.recvHash;
221 void BaseLB::LDStats::clearCommHash() {
222 for(int i=0; i < n_comm; i++) {
223 commData[i].clearHash();
227 void BaseLB::LDStats::computeNonlocalComm(int &nmsgs, int &nbytes)
236 for (int cidx=0; cidx < n_comm; cidx++) {
237 LDCommData& cdata = commData[cidx];
238 int senderPE, receiverPE;
239 if (cdata.from_proc())
240 senderPE = cdata.src_proc;
242 int idx = getHash(cdata.sender);
243 if (idx == -1) continue; // sender has just migrated?
244 senderPE = to_proc[idx];
245 CmiAssert(senderPE != -1);
247 CmiAssert(senderPE < nprocs() && senderPE >= 0);
249 // find receiver: point-to-point and multicast two cases
250 int receiver_type = cdata.receiver.get_type();
251 if (receiver_type == LD_PROC_MSG || receiver_type == LD_OBJ_MSG) {
252 if (receiver_type == LD_PROC_MSG)
253 receiverPE = cdata.receiver.proc();
255 int idx = getHash(cdata.receiver.get_destObj());
256 if (idx == -1) { // receiver outside this domain
257 if (complete_flag) continue;
258 else receiverPE = -1;
261 receiverPE = to_proc[idx];
262 CmiAssert(receiverPE < nprocs() && receiverPE >= 0);
265 if(senderPE != receiverPE)
267 nmsgs += cdata.messages;
268 nbytes += cdata.bytes;
271 else if (receiver_type == LD_OBJLIST_MSG) {
273 LDObjKey *objs = cdata.receiver.get_destObjs(nobjs);
276 for (int i=0; i<nobjs; i++) {
277 int idx = getHash(objs[i]);
278 CmiAssert(idx != -1);
279 if (idx == -1) continue; // receiver has just been removed?
280 receiverPE = to_proc[idx];
281 CmiAssert(receiverPE < nprocs() && receiverPE >= 0);
283 for (int p=0; p<pes.size(); p++)
284 if (receiverPE == pes[p]) { exist=1; break; }
286 pes.push_back(receiverPE);
287 if(senderPE != receiverPE)
289 nmsgs += cdata.messages;
290 nbytes += cdata.bytes;
298 void BaseLB::LDStats::normalize_speed() {
300 double maxspeed = 0.0;
302 for(int pe=0; pe < nprocs(); pe++) {
303 if (procs[pe].pe_speed > maxspeed) maxspeed = procs[pe].pe_speed;
305 for(int pe=0; pe < nprocs(); pe++)
306 procs[pe].pe_speed /= maxspeed;
309 void BaseLB::LDStats::print()
313 CkPrintf("------------- Processor Data: %d -------------\n", nprocs());
314 for(int pe=0; pe < nprocs(); pe++) {
315 struct ProcStats &proc = procs[pe];
317 CkPrintf("Proc %d (%d) Speed %d Total = %f Idle = %f Bg = %f nObjs = %d",
318 pe, proc.pe, proc.pe_speed, proc.total_walltime, proc.idletime,
319 proc.bg_walltime, proc.n_objs);
321 CkPrintf(" CPU Total %f Bg %f", proc.total_cputime, proc.bg_cputime);
326 CkPrintf("------------- Object Data: %d objects -------------\n", n_objs);
327 for(i=0; i < n_objs; i++) {
328 LDObjData &odata = objData[i];
329 CkPrintf("Object %d\n",i);
330 CkPrintf(" id = %d %d %d %d\n",odata.objID().id[0],odata.objID().id[1
331 ], odata.objID().id[2], odata.objID().id[3]);
332 CkPrintf(" OM id = %d\t",odata.omID().id);
333 CkPrintf(" Mig. = %d\n",odata.migratable);
335 CkPrintf(" CPU = %f\t",odata.cpuTime);
337 CkPrintf(" Wall = %f\n",odata.wallTime);
340 CkPrintf("------------- Comm Data: %d records -------------\n", n_comm);
341 CkVec<LDCommData> &cdata = commData;
342 for(i=0; i < n_comm; i++) {
343 CkPrintf("Link %d\n",i);
345 LDObjid &sid = cdata[i].sender.objID();
346 if (cdata[i].from_proc())
347 CkPrintf(" sender PE = %d\t",cdata[i].src_proc);
349 CkPrintf(" sender id = %d:[%d %d %d %d]\t",
350 cdata[i].sender.omID().id,sid.id[0], sid.id[1], sid.id[2], sid.id[3]);
352 LDObjid &rid = cdata[i].receiver.get_destObj().objID();
353 if (cdata[i].recv_type() == LD_PROC_MSG)
354 CkPrintf(" receiver PE = %d\n",cdata[i].receiver.proc());
356 CkPrintf(" receiver id = %d:[%d %d %d %d]\n",
357 cdata[i].receiver.get_destObj().omID().id,rid.id[0],rid.id[1],rid.id[2],rid.id[3]);
359 CkPrintf(" messages = %d\t",cdata[i].messages);
360 CkPrintf(" bytes = %d\n",cdata[i].bytes);
362 CkPrintf("------------- Object to PE mapping -------------\n");
363 for (i=0; i<n_objs; i++) CkPrintf(" %d", from_proc[i]);
368 double BaseLB::LDStats::computeAverageLoad()
372 for (i=0; i<n_objs; i++) total += objData[i].wallTime;
374 for (i=0; i<nprocs(); i++)
375 if (procs[i].available == true) {
376 total += procs[i].bg_walltime;
380 double averageLoad = total/numAvail;
384 // remove the obj-th object from database
385 void BaseLB::LDStats::removeObject(int obj)
387 CmiAssert(obj < objData.size());
388 LDObjData odata = objData[obj];
390 LDObjKey okey; // build a key
391 okey.omID() = odata.omID();
392 okey.objID() = odata.objID();
395 from_proc.remove(obj);
398 if (odata.migratable) n_migrateobjs --;
400 // search for sender, can be multiple sender
402 for (int com=0; com<n_comm; com++) {
403 LDCommData &cdata = commData[com-removed];
404 if(!cdata.from_proc() && cdata.sender == okey) {
405 commData.remove(com-removed);
412 void BaseLB::LDStats::pup(PUP::er &p)
419 if (p.isUnpacking()) {
420 // user can specify simulated processors other than the real # of procs.
421 int maxpe = nprocs() > LBSimulation::simProcs ? nprocs() : LBSimulation::simProcs;
422 procs = new ProcStats[maxpe];
423 objData.resize(n_objs);
424 commData.resize(n_comm);
425 from_proc.resize(n_objs);
426 to_proc.resize(n_objs);
429 // ignore the background load when unpacking if the user change the # of procs
430 // otherwise load everything
431 if (p.isUnpacking() && LBSimulation::procsChanged) {
433 for (i=0; i<nprocs(); i++) p|dummy;
436 for (i=0; i<nprocs(); i++) p|procs[i];
437 for (i=0; i<n_objs; i++) p|objData[i];
438 for (i=0; i<n_objs; i++) p|from_proc[i];
439 for (i=0; i<n_objs; i++) p|to_proc[i];
440 // reset to_proc when unpacking
442 for (i=0; i<n_objs; i++) to_proc[i] = from_proc[i];
443 for (i=0; i<n_comm; i++) p|commData[i];
445 count = LBSimulation::simProcs;
446 if (p.isUnpacking()) {
448 if (_lb_args.lbversion() <= 1)
449 for (i=0; i<nprocs(); i++) procs[i].pe = i;
453 int BaseLB::LDStats::useMem() {
454 // calculate the memory usage of this LB (superclass).
455 return sizeof(LDStats) + sizeof(ProcStats) * nprocs() +
456 (sizeof(LDObjData) + 2 * sizeof(int)) * n_objs +
457 sizeof(LDCommData) * n_comm;
460 #include "BaseLB.def.h"