3 // This file is compiled twice to make a version that is capable of not needing the tracing to be turned on.
5 #include "controlPoints.h"
6 #include "trace-controlPoints.h"
7 #include "LBDatabase.h"
8 #include "controlPoints.h"
10 #include "trace-projections.h"
11 #include <pathHistory.h>
12 #include "cp_effects.h"
17 #if CMK_WITH_CONTROLPOINT
19 #define roundDouble(x) ((long)(x+0.5))
20 #define myAbs(x) (((x)>=0.0)?(x):(-1.0*(x)))
21 #define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
23 inline double closestInRange(double v, double a, double b){
24 return (v<a) ? a : ((v>b)?b:v);
28 // A framework for tuning "control points" exposed by an application. Tuning decisions are based upon observed performance measurements.
31 /** @defgroup ControlPointFramework Automatic Performance Tuning and Steering Framework */
36 #define DEFAULT_CONTROL_POINT_SAMPLE_PERIOD 10000000
40 //#define DEBUGPRINT 4
42 static void periodicProcessControlPoints(void* ptr, double currWallTime);
45 // A pointer to this PE's controlpoint manager Proxy
46 /* readonly */ CProxy_controlPointManager controlPointManagerProxy;
47 /* readonly */ int random_seed;
48 /* readonly */ long controlPointSamplePeriod;
49 /* readonly */ int whichTuningScheme;
50 /* readonly */ bool writeDataFileAtShutdown;
51 /* readonly */ bool shouldFilterOutputData;
52 /* readonly */ bool loadDataFileAtStartup;
53 /* readonly */ bool shouldGatherMemoryUsage;
54 /* readonly */ bool shouldGatherUtilization;
55 /* readonly */ bool shouldGatherAll;
56 /* readonly */ char CPDataFilename[512];
58 extern bool enableCPTracing;
60 /// The control point values to be used for the first few phases if the strategy doesn't choose to do something else.
61 /// These probably come from the command line arguments, so are available only on PE 0
62 std::map<std::string, int> defaultControlPointValues;
66 typedef enum tuningSchemeEnum {RandomSelection, SimulatedAnnealing, ExhaustiveSearch, CriticalPathAutoPrioritization, UseBestKnownTiming, UseSteering, MemoryAware, Simplex, DivideAndConquer, AlwaysDefaults, LDBPeriod, LDBPeriodLinear, LDBPeriodQuadratic, LDBPeriodOptimal} tuningScheme;
70 void printTuningScheme(){
71 switch(whichTuningScheme){
73 CkPrintf("Tuning Scheme: RandomSelection\n");
76 CkPrintf("Tuning Scheme: AlwaysDefaults\n");
78 case SimulatedAnnealing:
79 CkPrintf("Tuning Scheme: SimulatedAnnealing\n");
81 case ExhaustiveSearch:
82 CkPrintf("Tuning Scheme: ExhaustiveSearch\n");
84 case CriticalPathAutoPrioritization:
85 CkPrintf("Tuning Scheme: CriticalPathAutoPrioritization\n");
87 case UseBestKnownTiming:
88 CkPrintf("Tuning Scheme: UseBestKnownTiming\n");
91 CkPrintf("Tuning Scheme: UseSteering\n");
94 CkPrintf("Tuning Scheme: MemoryAware\n");
97 CkPrintf("Tuning Scheme: Simplex Algorithm\n");
99 case DivideAndConquer:
100 CkPrintf("Tuning Scheme: Divide & Conquer Algorithm\n");
103 CkPrintf("Tuning Scheme: Load Balancing Period Steering (Constant Prediction)\n");
105 case LDBPeriodLinear:
106 CkPrintf("Tuning Scheme: Load Balancing Period Steering (Linear Prediction)\n");
108 case LDBPeriodQuadratic:
109 CkPrintf("Tuning Scheme: Load Balancing Period Steering (Quadratic Prediction)\n");
111 case LDBPeriodOptimal:
112 CkPrintf("Tuning Scheme: Load Balancing Period Steering (Optimal Prediction)\n");
115 CkPrintf("Unknown tuning scheme\n");
123 /// A reduction type that combines idle time measurements (min/sum/max etc.)
124 CkReduction::reducerType idleTimeReductionType;
125 /// A reducer that combines idle time measurements (min/sum/max etc.)
126 CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
129 CkAssert(msgs[0]->getSize()==3*sizeof(double));
130 double *m=(double *)msgs[0]->getData();
135 for (int i=1;i<nMsg;i++) {
136 CkAssert(msgs[i]->getSize()==3*sizeof(double));
137 double *m=(double *)msgs[i]->getData();
138 ret[0]=min(ret[0],m[0]);
140 ret[2]=max(ret[2],m[2]);
142 return CkReductionMsg::buildNew(3*sizeof(double),ret);
147 /// A reduction type that combines idle, overhead, and memory measurements
148 CkReduction::reducerType allMeasuresReductionType;
149 /// A reducer that combines idle, overhead, and memory measurements
150 #define ALL_REDUCTION_SIZE 12
151 CkReductionMsg *allMeasuresReduction(int nMsg,CkReductionMsg **msgs){
152 double ret[ALL_REDUCTION_SIZE];
154 CkAssert(msgs[0]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
155 double *m=(double *)msgs[0]->getData();
156 memcpy(ret, m, ALL_REDUCTION_SIZE*sizeof(double) );
158 for (int i=1;i<nMsg;i++) {
159 CkAssert(msgs[i]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
160 double *m=(double *)msgs[i]->getData();
161 // idle time (min/sum/max)
162 ret[0]=min(ret[0],m[0]);
164 ret[2]=max(ret[2],m[2]);
165 // overhead time (min/sum/max)
166 ret[3]=min(ret[3],m[3]);
168 ret[5]=max(ret[5],m[5]);
170 ret[6]=max(ret[6],m[6]);
171 // bytes per invocation for two types of entry methods
179 return CkReductionMsg::buildNew(ALL_REDUCTION_SIZE*sizeof(double),ret);
183 /// Registers the control point framework's reduction handlers at startup on each PE
184 /*initproc*/ void registerCPReductions(void) {
185 idleTimeReductionType=CkReduction::addReducer(idleTimeReduction, false, "idleTimeReduction");
186 allMeasuresReductionType=CkReduction::addReducer(allMeasuresReduction, false, "allMeasuresReduction");
194 /// Return an integer between 0 and num-1 inclusive
195 /// If different seed, name, and random_seed values are provided, the returned values are pseudo-random
196 unsigned int randInt(unsigned int num, const char* name, int seed=0){
199 unsigned long hash = 0;
201 unsigned char * str = (unsigned char*)name;
203 while ((c = *str++)){
204 unsigned int c2 = (c+64)%128;
205 unsigned int c3 = (c2*5953)%127;
206 hash = c3 + (hash << 6) + (hash << 16) - hash;
209 unsigned long shuffled1 = (hash*2083)%7907;
210 unsigned long shuffled2 = (seed*4297)%2017;
212 unsigned long shuffled3 = (random_seed*4799)%7919;
214 unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
216 unsigned int result = ((namehash * 6029) % 1117) % num;
218 CkAssert(result >=0 && result < num);
224 controlPointManager::controlPointManager() {
225 generatedPlanForStep = -1;
227 exitWhenReady = false;
228 alreadyRequestedMemoryUsage = false;
229 alreadyRequestedIdleTime = false;
230 alreadyRequestedAll = false;
232 instrumentedPhase * newPhase = new instrumentedPhase();
233 allData.phases.push_back(newPhase);
235 frameworkShouldAdvancePhase = false;
236 haveControlPointChangeCallback = false;
237 // CkPrintf("[%d] controlPointManager() Constructor Initializing control points, and loading data file\n", CkMyPe());
241 if(loadDataFileAtStartup){
247 CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
250 traceRegisterUserEvent("No currently executing message", 5000);
251 traceRegisterUserEvent("Zero time along critical path", 5010);
252 traceRegisterUserEvent("Positive total time along critical path", 5020);
253 traceRegisterUserEvent("env->setPathHistory()", 6000);
254 traceRegisterUserEvent("Critical Path", 5900);
255 traceRegisterUserEvent("Table Entry", 5901);
258 #if USER_EVENT_FOR_REGISTERTERMINALPATH
259 traceRegisterUserEvent("registerTerminalPath", 100);
265 controlPointManager::~controlPointManager(){
266 // CkPrintf("[%d] controlPointManager() Destructor\n", CkMyPe());
269 void controlPointManager::pup(PUP::er &p)
271 // FIXME: does not work when control point is actually used,
272 // just minimal pup so that it allows exit function to work (exitIfReady).
273 p|generatedPlanForStep;
275 p|alreadyRequestedMemoryUsage;
276 p|alreadyRequestedIdleTime;
277 p|alreadyRequestedAll;
278 p|frameworkShouldAdvancePhase;
279 p|haveControlPointChangeCallback;
284 /// Loads the previous run data file
285 void controlPointManager::loadDataFile(){
286 ifstream infile(CPDataFilename);
287 vector<std::string> names;
290 while(getline(infile,line)){
296 std::istringstream n(line);
299 while(getline(infile,line)){
304 int numControlPointNames = 0;
305 std::istringstream n2(line);
306 n2 >> numControlPointNames;
308 for(int i=0; i<numControlPointNames; i++){
309 getline(infile,line);
310 names.push_back(line);
313 for(int i=0;i<numTimings;i++){
314 getline(infile,line);
315 while(line[0] == '#')
316 getline(infile,line);
318 instrumentedPhase * ips = new instrumentedPhase();
320 std::istringstream iss(line);
322 // Read memory usage for phase
323 iss >> ips->memoryUsageMB;
324 // CkPrintf("Memory usage loaded from file: %d\n", ips.memoryUsageMB);
327 iss >> ips->idleTime.min;
328 iss >> ips->idleTime.avg;
329 iss >> ips->idleTime.max;
331 // Read overhead time
332 iss >> ips->overheadTime.min;
333 iss >> ips->overheadTime.avg;
334 iss >> ips->overheadTime.max;
336 // read bytePerInvoke
337 iss >> ips->bytesPerInvoke;
340 iss >> ips->grainSize;
342 // Read control point values
343 for(int cp=0;cp<numControlPointNames;cp++){
346 ips->controlPoints.insert(make_pair(names[cp],cpvalue));
349 // ignore median time
358 ips->times.push_back(time);
360 CkPrintf("read time %lf from file\n", time);
364 allData.phases.push_back(ips);
373 /// Add the current data to allData and output it to a file
374 void controlPointManager::writeDataFile(){
375 CkPrintf("============= writeDataFile() to %s ============\n", CPDataFilename);
376 ofstream outfile(CPDataFilename);
377 allData.cleanupNames();
379 // string s = allData.toString();
380 // CkPrintf("At end: \n %s\n", s.c_str());
382 if(shouldFilterOutputData){
384 allData.filterOutIncompletePhases();
387 // string s2 = allData.toString();
388 // CkPrintf("After filtering: \n %s\n", s2.c_str());
389 if(allData.toString().length() > 10){
390 outfile << allData.toString();
392 outfile << " No data available to save to disk " << endl;
397 /// User can register a callback that is called when application should advance to next phase
398 void controlPointManager::setCPCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
399 frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
400 controlPointChangeCallback = cb;
401 haveControlPointChangeCallback = true;
405 /// A user can specify that the framework should advance the phases automatically. Useful for gather performance measurements without modifying a program.
406 void controlPointManager::setFrameworkAdvancePhase(bool _frameworkShouldAdvancePhase){
407 frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
410 /// Called periodically by the runtime to handle the control points
411 /// Currently called on each PE
412 void controlPointManager::processControlPoints(){
415 CkPrintf("[%d] processControlPoints() haveControlPointChangeCallback=%d frameworkShouldAdvancePhase=%d\n", CkMyPe(), (int)haveControlPointChangeCallback, (int)frameworkShouldAdvancePhase);
418 //==========================================================================================
419 // Print the data for each phase
421 const int s = allData.phases.size();
424 CkPrintf("\n\nExamining critical paths and priorities and idle times (num phases=%d)\n", s );
425 for(int p=0;p<s;++p){
426 const instrumentedPhase &phase = allData.phases[p];
427 const idleTimeContainer &idle = phase.idleTime;
428 // vector<MergeablePathHistory> const &criticalPaths = phase.criticalPaths;
429 vector<double> const × = phase.times;
431 CkPrintf("Phase %d:\n", p);
433 // CkPrintf("critical paths: (* affected by control point)\n");
434 // for(int i=0;i<criticalPaths.size(); i++){
435 // If affected by a control point
436 // criticalPaths[i].print();
438 // criticalPaths[i].print();
443 CkPrintf("Timings:\n");
444 for(int i=0;i<times.size(); i++){
445 CkPrintf("%lf ", times[i]);
458 //==========================================================================================
459 // If this is a phase during which we try to adapt control point values based on critical path
463 // Find the most recent phase with valid critical path data and idle time measurements
465 for(int p=0; p<s; ++p){
466 const instrumentedPhase &phase = allData.phases[p];
467 const idleTimeContainer &idle = phase.idleTime;
468 if(idle.isValid() && phase.criticalPaths.size()>0 ){
474 CkPrintf("Examining phase %d which has a valid idle time and critical paths\n", whichPhase);
475 const instrumentedPhase &phase = allData.phases[whichPhase];
476 const idleTimeContainer &idle = phase.idleTime;
479 CkPrintf("Min PE idle is HIGH. %.2lf%% > 10%%\n", idle.min*100.0);
481 // Determine the median critical path for this phase
482 int medianCriticalPathIdx = phase.medianCriticalPathIdx();
483 const PathHistory &path = phase.criticalPaths[medianCriticalPathIdx];
484 CkAssert(phase.criticalPaths.size() > 0);
485 CkAssert(phase.criticalPaths.size() > medianCriticalPathIdx);
487 CkPrintf("Median Critical Path has time %lf\n", path.getTotalTime());
489 if(phase.times[medianCriticalPathIdx] > 1.2 * path.getTotalTime()){
490 CkPrintf("The application step(%lf) is taking significantly longer than the critical path(%lf). BAD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
493 CkPrintf("Finding control points related to the critical path\n");
495 std::set<std::string> controlPointsAffectingCriticalPath;
498 for(int e=0;e<path.getNumUsed();e++){
499 if(path.getUsedCount(e)>0){
500 int ep = path.getUsedEp(e);
502 std::map<std::string, std::set<int> >::iterator iter;
503 for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
504 if(iter->second.count(ep)>0){
505 controlPointsAffectingCriticalPath.insert(iter->first);
506 CkPrintf("Control Point \"%s\" affects the critical path\n", iter->first.c_str());
516 CkPrintf("No control points are known to affect the critical path\n");
518 double beginTime = CmiWallTimer();
520 CkPrintf("Attempting to modify control point values for %d control points that affect the critical path\n", controlPointsAffectingCriticalPath.size());
522 newControlPoints = phase.controlPoints;
524 if(frameworkShouldAdvancePhase){
528 if(haveControlPointChangeCallback){
530 CkPrintf("Calling control point change callback\n");
532 // Create a high priority message and send it to the callback
533 controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg;
534 *((int*)CkPriorityPtr(msg)) = -INT_MAX;
535 CkSetQueueing(msg, CK_QUEUEING_IFIFO);
536 controlPointChangeCallback.send(msg);
541 // adjust the control points that can affect the critical path
543 char textDescription[4096*2];
544 textDescription[0] = '\0';
546 std::map<std::string,int>::iterator newCP;
547 for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
548 if( controlPointsAffectingCriticalPath.count(newCP->first) > 0 ){
549 // decrease the value (increase priority) if within range
550 int lowerbound = controlPointSpace[newCP->first].first;
551 if(newCP->second > lowerbound){
552 newControlPointsAvailable = true;
558 // Create a string for a projections user event
560 std::map<std::string,int>::iterator newCP;
561 for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
562 sprintf(textDescription+strlen(textDescription), "<br>\"%s\"=%d", newCP->first.c_str(), newCP->second);
566 char *userEventString = new char[4096*2];
567 sprintf(userEventString, "Adjusting control points affecting critical path: %s ***", textDescription);
569 static int userEventCounter = 20000;
570 CkPrintf("USER EVENT: %s\n", userEventString);
572 traceRegisterUserEvent(userEventString, userEventCounter);
573 traceUserBracketEvent(userEventCounter, beginTime, CmiWallTimer());
579 CkPrintf("The application step(%lf) is dominated mostly by the critical path(%lf). GOOD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
594 if(frameworkShouldAdvancePhase){
598 if(haveControlPointChangeCallback){
599 // Create a high priority message and send it to the callback
600 controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg;
601 *((int*)CkPriorityPtr(msg)) = -INT_MAX;
602 CkSetQueueing(msg, CK_QUEUEING_IFIFO);
603 controlPointChangeCallback.send(msg);
608 /// Determine if any control point is known to affect an entry method
609 bool controlPointManager::controlPointAffectsThisEP(int ep){
610 std::map<std::string, std::set<int> >::iterator iter;
611 for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
612 if(iter->second.count(ep)>0){
619 /// Determine if any control point is known to affect a chare array
620 bool controlPointManager::controlPointAffectsThisArray(int array){
621 std::map<std::string, std::set<int> >::iterator iter;
622 for(iter=affectsPrioritiesArray.begin(); iter!= affectsPrioritiesArray.end(); ++iter){
623 if(iter->second.count(array)>0){
631 /// The data from the current phase
632 instrumentedPhase * controlPointManager::currentPhaseData(){
633 int s = allData.phases.size();
635 return allData.phases[s-1];
639 /// The data from the previous phase
640 instrumentedPhase * controlPointManager::previousPhaseData(){
641 int s = allData.phases.size();
642 if(s >= 2 && phase_id > 0) {
643 return allData.phases[s-2];
649 /// The data from two phases back
650 instrumentedPhase * controlPointManager::twoAgoPhaseData(){
651 int s = allData.phases.size();
652 if(s >= 3 && phase_id > 0) {
653 return allData.phases[s-3];
660 /// Called by either the application or the Control Point Framework to advance to the next phase
661 void controlPointManager::gotoNextPhase(){
662 CkPrintf("gotoNextPhase shouldGatherAll=%d enableCPTracing=%d\n", (int)shouldGatherAll, (int)enableCPTracing);
666 if(shouldGatherAll && CkMyPe() == 0 && !alreadyRequestedAll){
667 alreadyRequestedAll = true;
668 CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherAll(NULL), 0, thisProxy);
669 CkPrintf("Requesting all measurements\n");
670 thisProxy.requestAll(*cb);
675 if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
676 alreadyRequestedMemoryUsage = true;
677 CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
678 thisProxy.requestMemoryUsage(*cb);
682 if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
683 alreadyRequestedIdleTime = true;
684 CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
685 thisProxy.requestIdleTime(*cb);
694 LBDatabase * myLBdatabase = LBDatabaseObj();
695 LBDB * myLBDB = myLBdatabase->getLBDB(); // LBDB is Defined in LBDBManager.h
696 const CkVec<LBObj*> objs = myLBDB->getObjs();
698 LBRealType maxObjWallTime = -1.0;
700 for(int i=0;i<objs.length();i++){
702 const LDObjData d = o->ObjData();
703 LBRealType cpuTime = d.cpuTime;
704 LBRealType wallTime = d.wallTime;
705 // can also get object handles from the LDObjData struct
706 CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
707 if(wallTime > maxObjWallTime){
713 myLBDB->ClearLoads(); // BUG: Probably very dangerous if we are actually using load balancing
719 // increment phase id
723 // Create new entry for the phase we are starting now
724 instrumentedPhase * newPhase = new instrumentedPhase();
725 allData.phases.push_back(newPhase);
727 CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
731 /// An application uses this to register an instrumented timing for this phase
732 void controlPointManager::setTiming(double time){
733 currentPhaseData()->times.push_back(time);
735 #if USE_CRITICAL_PATH_HEADER_ARRAY
737 // First we should register this currently executing message as a path, because it is likely an important one to consider.
738 // registerTerminalEntryMethod();
740 // save the critical path for this phase
741 // thisPhaseData.criticalPaths.push_back(maxTerminalPathHistory);
742 // maxTerminalPathHistory.reset();
745 // Reset the counts for the currently executing message
746 //resetThisEntryPath();
752 /// Entry method called on all PEs to request CPU utilization statistics
753 void controlPointManager::requestIdleTime(CkCallback cb){
754 CkAssert(enableCPTracing);
756 double i = localControlPointTracingInstance()->idleRatio();
762 // CkPrintf("[%d] idleRatio=%f\n", CkMyPe(), i);
764 localControlPointTracingInstance()->resetTimings();
766 contribute(3*sizeof(double),idle,idleTimeReductionType, cb);
769 /// All processors reduce their memory usages in requestIdleTime() to this method
770 void controlPointManager::gatherIdleTime(CkReductionMsg *msg){
771 CkAssert(enableCPTracing);
773 int size=msg->getSize() / sizeof(double);
775 double *r=(double *) msg->getData();
777 instrumentedPhase* prevPhase = previousPhaseData();
778 if(prevPhase != NULL){
779 prevPhase->idleTime.min = r[0];
780 prevPhase->idleTime.avg = r[1]/CkNumPes();
781 prevPhase->idleTime.max = r[2];
782 prevPhase->idleTime.print();
783 CkPrintf("Stored idle time min=%lf avg=%lf max=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, prevPhase);
785 CkPrintf("There is no previous phase to store the idle time measurements\n");
788 alreadyRequestedIdleTime = false;
798 /// Entry method called on all PEs to request CPU utilization statistics and memory usage
799 void controlPointManager::requestAll(CkCallback cb){
800 CkAssert(enableCPTracing);
802 TraceControlPoints *t = localControlPointTracingInstance();
804 double data[ALL_REDUCTION_SIZE];
807 double *over = data+3;
808 double *mem = data+6;
809 double *msgBytes = data+7;
810 double *grainsize = data+11;
812 const double i = t->idleRatio();
817 const double o = t->overheadRatio();
822 const double m = t->memoryUsageMB();
827 msgBytes[2] = t->b2mlen;
828 msgBytes[3] = t->b3mlen;
830 grainsize[0] = t->grainSize();
832 localControlPointTracingInstance()->resetAll();
834 contribute(ALL_REDUCTION_SIZE*sizeof(double),data,allMeasuresReductionType, cb);
837 /// All processors reduce their memory usages in requestIdleTime() to this method
838 void controlPointManager::gatherAll(CkReductionMsg *msg){
839 CkAssert(enableCPTracing);
841 CkAssert(msg->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
842 int size=msg->getSize() / sizeof(double);
843 double *data=(double *) msg->getData();
846 double *over = data+3;
847 double *mem = data+6;
848 double *msgBytes = data+7;
849 double *granularity = data+11;
852 // std::string b = allData.toString();
854 instrumentedPhase* prevPhase = previousPhaseData();
855 if(prevPhase != NULL){
856 prevPhase->idleTime.min = idle[0];
857 prevPhase->idleTime.avg = idle[1]/CkNumPes();
858 prevPhase->idleTime.max = idle[2];
860 prevPhase->overheadTime.min = over[0];
861 prevPhase->overheadTime.avg = over[1]/CkNumPes();
862 prevPhase->overheadTime.max = over[2];
864 prevPhase->memoryUsageMB = mem[0];
866 CkPrintf("Stored idle time min=%lf avg=%lf max=%lf mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, (double)prevPhase->memoryUsageMB, prevPhase);
868 double bytesPerInvoke2 = msgBytes[2] / msgBytes[0];
869 double bytesPerInvoke3 = msgBytes[3] / msgBytes[1];
871 /** The average of the grain sizes on all PEs in us */
872 prevPhase->grainSize = (granularity[0] / (double)CkNumPes());
874 CkPrintf("Bytes Per Invokation 2 = %f\n", bytesPerInvoke2);
875 CkPrintf("Bytes Per Invokation 3 = %f\n", bytesPerInvoke3);
877 CkPrintf("Bytes Per us of work 2 = %f\n", bytesPerInvoke2/prevPhase->grainSize);
878 CkPrintf("Bytes Per us of work 3 = %f\n", bytesPerInvoke3/prevPhase->grainSize);
880 if(bytesPerInvoke2 > bytesPerInvoke3)
881 prevPhase->bytesPerInvoke = bytesPerInvoke2;
883 prevPhase->bytesPerInvoke = bytesPerInvoke3;
885 // prevPhase->print();
886 // CkPrintf("prevPhase=%p number of timings=%d\n", prevPhase, prevPhase->times.size() );
888 // std::string a = allData.toString();
890 //CkPrintf("Before:\n%s\nAfter:\n%s\n\n", b.c_str(), a.c_str());
893 CkPrintf("There is no previous phase to store measurements\n");
896 alreadyRequestedAll = false;
904 void controlPointManager::checkForShutdown(){
905 if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
911 void controlPointManager::exitIfReady(){
912 if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
913 // CkPrintf("controlPointManager::exitIfReady exiting immediately\n");
916 // CkPrintf("controlPointManager::exitIfReady Delaying exiting\n");
917 exitWhenReady = true;
923 void controlPointManager::doExitNow(){
924 _TRACE_BEGIN_EXECUTE_DETAILED(-1, -1, _threadEP,CkMyPe(), 0, NULL, this);
926 // CkPrintf("[%d] Control point manager calling CkContinueExit()\n", CkMyPe());
930 void controlPointManager::writeOutputToDisk(){
931 if(writeDataFileAtShutdown){
932 controlPointManagerProxy.ckLocalBranch()->writeDataFile();
937 /// Entry method called on all PEs to request memory usage
938 void controlPointManager::requestMemoryUsage(CkCallback cb){
939 int m = CmiMaxMemoryUsage() / 1024 / 1024;
941 // CkPrintf("PE %d Memory Usage is %d MB\n",CkMyPe(), m);
942 contribute(sizeof(int),&m,CkReduction::max_int, cb);
945 /// All processors reduce their memory usages to this method
946 void controlPointManager::gatherMemoryUsage(CkReductionMsg *msg){
947 int size=msg->getSize() / sizeof(int);
949 int *m=(int *) msg->getData();
951 CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
953 instrumentedPhase* prevPhase = previousPhaseData();
954 if(prevPhase != NULL){
955 prevPhase->memoryUsageMB = m[0];
957 CkPrintf("No place to store memory usage");
960 alreadyRequestedMemoryUsage = false;
966 // /// Inform the control point framework that a named control point affects the priorities of some array
967 // void controlPointManager::associatePriorityArray(const char *name, int groupIdx){
968 // CkPrintf("Associating control point \"%s\" affects priority of array id=%d\n", name, groupIdx );
970 // if(affectsPrioritiesArray.count(std::string(name)) > 0 ) {
971 // affectsPrioritiesArray[std::string(name)].insert(groupIdx);
974 // s.insert(groupIdx);
975 // affectsPrioritiesArray[std::string(name)] = s;
979 // std::map<std::string, std::set<int> >::iterator f;
980 // for(f=affectsPrioritiesArray.begin(); f!=affectsPrioritiesArray.end();++f){
981 // std::string name = f->first;
982 // std::set<int> &vals = f->second;
983 // cout << "Control point " << name << " affects arrays: ";
984 // std::set<int>::iterator i;
985 // for(i=vals.begin(); i!=vals.end();++i){
986 // cout << *i << " ";
994 // /// Inform the control point framework that a named control point affects the priority of some entry method
995 // void controlPointManager::associatePriorityEntry(const char *name, int idx){
996 // CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
998 // if(affectsPrioritiesEP.count(std::string(name)) > 0 ) {
999 // affectsPrioritiesEP[std::string(name)].insert(idx);
1003 // affectsPrioritiesEP[std::string(name)] = s;
1007 // std::map<std::string, std::set<int> >::iterator f;
1008 // for(f=affectsPrioritiesEP.begin(); f!=affectsPrioritiesEP.end();++f){
1009 // std::string name = f->first;
1010 // std::set<int> &vals = f->second;
1011 // cout << "Control point " << name << " affects EP: ";
1012 // std::set<int>::iterator i;
1013 // for(i=vals.begin(); i!=vals.end();++i){
1014 // cout << *i << " ";
1025 /// An interface callable by the application.
1026 void gotoNextPhase(){
1027 controlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
1030 FLINKAGE void FTN_NAME(GOTONEXTPHASE,gotonextphase)()
1036 /// A mainchare that is used just to create our controlPointManager group at startup
1037 class controlPointMain : public CBase_controlPointMain {
1039 controlPointMain(CkArgMsg* args){
1042 gettimeofday(& tp, NULL);
1043 random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
1045 double time = CmiWallTimer();
1046 int sec = (int)time;
1047 int usec = (int)((time - (double)sec)*1000000.0);
1048 random_seed = sec ^ usec;
1052 double period, periodms;
1053 bool haveSamplePeriod = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriod", &period,"The time between Control Point Framework samples (in seconds)");
1054 bool haveSamplePeriodMs = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriodMs", &periodms,"The time between Control Point Framework samples (in milliseconds)");
1055 if(haveSamplePeriod){
1056 CkPrintf("controlPointSamplePeriod = %lf sec\n", period);
1057 controlPointSamplePeriod = (int)(period * 1000); /**< A readonly */
1058 } else if(haveSamplePeriodMs){
1059 CkPrintf("controlPointSamplePeriodMs = %lf ms\n", periodms);
1060 controlPointSamplePeriod = periodms; /**< A readonly */
1062 controlPointSamplePeriod = DEFAULT_CONTROL_POINT_SAMPLE_PERIOD;
1067 whichTuningScheme = RandomSelection;
1070 if( CmiGetArgFlagDesc(args->argv,"+CPSchemeRandom","Randomly Select Control Point Values") ){
1071 whichTuningScheme = RandomSelection;
1072 } else if ( CmiGetArgFlagDesc(args->argv,"+CPExhaustiveSearch","Exhaustive Search of Control Point Values") ){
1073 whichTuningScheme = ExhaustiveSearch;
1074 } else if ( CmiGetArgFlagDesc(args->argv,"+CPAlwaysUseDefaults","Always Use The Provided Default Control Point Values") ){
1075 whichTuningScheme = AlwaysDefaults;
1076 } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimulAnneal","Simulated Annealing Search of Control Point Values") ){
1077 whichTuningScheme = SimulatedAnnealing;
1078 } else if ( CmiGetArgFlagDesc(args->argv,"+CPCriticalPathPrio","Use Critical Path to adapt Control Point Values") ){
1079 whichTuningScheme = CriticalPathAutoPrioritization;
1080 } else if ( CmiGetArgFlagDesc(args->argv,"+CPBestKnown","Use BestKnown Timing for Control Point Values") ){
1081 whichTuningScheme = UseBestKnownTiming;
1082 } else if ( CmiGetArgFlagDesc(args->argv,"+CPSteering","Use Steering to adjust Control Point Values") ){
1083 whichTuningScheme = UseSteering;
1084 } else if ( CmiGetArgFlagDesc(args->argv,"+CPMemoryAware", "Adjust control points to approach available memory") ){
1085 whichTuningScheme = MemoryAware;
1086 } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimplex", "Nelder-Mead Simplex Algorithm") ){
1087 whichTuningScheme = Simplex;
1088 } else if ( CmiGetArgFlagDesc(args->argv,"+CPDivideConquer", "A divide and conquer program specific steering scheme") ){
1089 whichTuningScheme = DivideAndConquer;
1090 } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriod", "Adjust the load balancing period (Constant Predictor)") ){
1091 whichTuningScheme = LDBPeriod;
1092 } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodLinear", "Adjust the load balancing period (Linear Predictor)") ){
1093 whichTuningScheme = LDBPeriodLinear;
1094 } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodQuadratic", "Adjust the load balancing period (Quadratic Predictor)") ){
1095 whichTuningScheme = LDBPeriodQuadratic;
1096 } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodOptimal", "Adjust the load balancing period (Optimal Predictor)") ){
1097 whichTuningScheme = LDBPeriodOptimal;
1100 char *defValStr = NULL;
1101 if( CmiGetArgStringDesc(args->argv, "+CPDefaultValues", &defValStr, "Specify the default control point values used for the first couple phases") ){
1102 CkPrintf("You specified default value string: %s\n", defValStr);
1104 // Parse the string, looking for commas
1107 char *tok = strtok(defValStr, ",");
1109 // split on the equal sign
1110 int len = strlen(tok);
1111 char *eqsign = strchr(tok, '=');
1112 if(eqsign != NULL && eqsign != tok){
1115 std::string cpName(tok);
1116 char *cpDefVal = eqsign+1;
1118 if(sscanf(cpDefVal, "%d", &v) == 1){
1119 CkPrintf("Command Line Argument Specifies that Control Point \"%s\" defaults to %d\n", cpname, v);
1120 CkAssert(CkMyPe() == 0); // can only access defaultControlPointValues on PE 0
1121 defaultControlPointValues[cpName] = v;
1124 tok = strtok(NULL, ",");
1129 shouldGatherAll = false;
1130 shouldGatherMemoryUsage = false;
1131 shouldGatherUtilization = false;
1133 if ( CmiGetArgFlagDesc(args->argv,"+CPGatherAll","Gather all types of measurements for each phase") ){
1134 shouldGatherAll = true;
1136 if ( CmiGetArgFlagDesc(args->argv,"+CPGatherMemoryUsage","Gather memory usage after each phase") ){
1137 shouldGatherMemoryUsage = true;
1139 if ( CmiGetArgFlagDesc(args->argv,"+CPGatherUtilization","Gather utilization & Idle time after each phase") ){
1140 shouldGatherUtilization = true;
1144 writeDataFileAtShutdown = false;
1145 if( CmiGetArgFlagDesc(args->argv,"+CPSaveData","Save Control Point timings & configurations at completion") ){
1146 writeDataFileAtShutdown = true;
1149 shouldFilterOutputData = true;
1150 if( CmiGetArgFlagDesc(args->argv,"+CPNoFilterData","Don't filter phases from output data") ){
1151 shouldFilterOutputData = false;
1155 loadDataFileAtStartup = false;
1156 if( CmiGetArgFlagDesc(args->argv,"+CPLoadData","Load Control Point timings & configurations at startup") ){
1157 loadDataFileAtStartup = true;
1161 if( CmiGetArgStringDesc(args->argv, "+CPDataFilename", &cpdatafile, "Specify control point data file to save/load") ){
1162 sprintf(CPDataFilename, "%s", cpdatafile);
1164 sprintf(CPDataFilename, "controlPointData.txt");
1168 controlPointManagerProxy = CProxy_controlPointManager::ckNew();
1172 ~controlPointMain(){}
1175 /// An interface callable by the application.
1176 void registerCPChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
1177 CkAssert(CkMyPe() == 0);
1178 CkPrintf("Application has registered a control point change callback\n");
1179 controlPointManagerProxy.ckLocalBranch()->setCPCallback(cb, frameworkShouldAdvancePhase);
1182 /// An interface callable by the application.
1183 void setFrameworkAdvancePhase(bool frameworkShouldAdvancePhase){
1185 CkPrintf("Application has specified that framework should %sadvance phase\n", frameworkShouldAdvancePhase?"":"not ");
1186 controlPointManagerProxy.ckLocalBranch()->setFrameworkAdvancePhase(frameworkShouldAdvancePhase);
1190 /// An interface callable by the application.
1191 void registerControlPointTiming(double time){
1192 CkAssert(CkMyPe() == 0);
1194 CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
1196 controlPointManagerProxy.ckLocalBranch()->setTiming(time);
1199 /// An interface callable by the application.
1200 void controlPointTimingStamp() {
1201 CkAssert(CkMyPe() == 0);
1203 CkPrintf("Program registering its own timing with controlPointTimingStamp()\n", time);
1206 static double prev_time = 0.0;
1207 double t = CmiWallTimer();
1208 double duration = t - prev_time;
1211 controlPointManagerProxy.ckLocalBranch()->setTiming(duration);
1214 FLINKAGE void FTN_NAME(CONTROLPOINTTIMINGSTAMP,controlpointtimingstamp)()
1216 controlPointTimingStamp();
1220 FLINKAGE void FTN_NAME(SETFRAMEWORKADVANCEPHASEF,setframeworkadvancephasef)(CMK_TYPEDEF_INT4 *value)
1222 setFrameworkAdvancePhase(*value);
1228 /// Shutdown the control point framework, writing data to disk if necessary
1229 extern "C" void controlPointShutdown(){
1232 if (!controlPointManagerProxy.ckGetGroupID().isZero()) {
1233 // wait for gathering of idle time & memory usage to complete
1234 controlPointManagerProxy.ckLocalBranch()->exitIfReady();
1241 /// A function called at startup on each node to register controlPointShutdown() to be called at CkExit()
1242 void controlPointInitNode(){
1243 registerExitFn(controlPointShutdown);
1246 /// Called periodically to allow control point framework to do things periodically
1247 static void periodicProcessControlPoints(void* ptr, double currWallTime){
1249 CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
1251 controlPointManagerProxy.ckLocalBranch()->processControlPoints();
1252 CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
1259 /// Determine a control point value using some optimization scheme (use max known, simmulated annealling,
1260 /// user observed characteristic to adapt specific control point values.
1261 /// This function must return valid values for newControlPoints.
1262 void controlPointManager::generatePlan() {
1263 const double startGenerateTime = CmiWallTimer();
1264 const int phase_id = this->phase_id;
1265 const int effective_phase = allData.phases.size();
1267 // Only generate a plan once per phase
1268 if(generatedPlanForStep == phase_id)
1270 generatedPlanForStep = phase_id;
1272 CkPrintf("Generating Plan for phase %d\n", phase_id);
1273 printTuningScheme();
1275 // By default lets put the previous phase data into newControlPoints
1276 instrumentedPhase *prevPhase = previousPhaseData();
1277 for(std::map<std::string, int >::const_iterator cpsIter=prevPhase->controlPoints.begin(); cpsIter != prevPhase->controlPoints.end(); ++cpsIter){
1278 newControlPoints[cpsIter->first] = cpsIter->second;
1282 if( whichTuningScheme == RandomSelection ){
1283 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1284 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1285 const std::string &name = cpsIter->first;
1286 const std::pair<int,int> &bounds = cpsIter->second;
1287 const int lb = bounds.first;
1288 const int ub = bounds.second;
1289 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1291 } else if( whichTuningScheme == CriticalPathAutoPrioritization) {
1292 // -----------------------------------------------------------
1293 // USE CRITICAL PATH TO ADJUST PRIORITIES
1295 // This scheme will return the median value for the range
1296 // early on, and then will transition over to the new control points
1297 // determined by the critical path adapting code
1299 // This won't work until the periodic function is fixed up a bit
1301 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1302 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1303 const std::string &name = cpsIter->first;
1304 const std::pair<int,int> &bounds = cpsIter->second;
1305 const int lb = bounds.first;
1306 const int ub = bounds.second;
1307 newControlPoints[name] = (lb+ub)/2;
1310 } else if ( whichTuningScheme == MemoryAware ) {
1312 // -----------------------------------------------------------
1313 // STEERING BASED ON MEMORY USAGE
1315 instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1316 instrumentedPhase *prevPhase = previousPhaseData();
1318 if(phase_id%2 == 0){
1319 CkPrintf("Steering (memory based) based on 2 phases ago:\n");
1320 twoAgoPhase->print();
1324 // See if memory usage is low:
1325 double memUsage = twoAgoPhase->memoryUsageMB;
1326 CkPrintf("Steering (memory based) encountered memory usage of (%f MB)\n", memUsage);
1328 if(memUsage < 1100.0 && memUsage > 0.0){ // Kraken has about 16GB and 12 cores per node
1329 CkPrintf("Steering (memory based) encountered low memory usage (%f) < 1200 \n", memUsage);
1330 CkPrintf("Steering (memory based) controlPointSpace.size()=\n", controlPointSpace.size());
1332 // Initialize plan to be the values from two phases ago (later we'll adjust this)
1333 newControlPoints = twoAgoPhase->controlPoints;
1336 CkPrintf("Steering (memory based) initialized plan\n");
1339 // look for a possible control point knob to turn
1340 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["MemoryConsumption"];
1342 // FIXME: assume for now that we just have one control point with the effect, and one direction to turn it
1345 std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1346 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1347 for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1348 cpName = iter->first;
1349 info = &iter->second;
1354 // Adapt the control point value
1356 CkPrintf("Steering found knob to turn that should increase memory consumption\n");
1358 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
1359 const int maxValue = controlPointSpace[cpName].second;
1361 if(twoAgoValue+1 <= maxValue){
1362 newControlPoints[cpName] = twoAgoValue+1; // increase from two phases back
1369 CkPrintf("Steering (memory based) done for this phase\n");
1372 } else if ( whichTuningScheme == UseBestKnownTiming ) {
1374 // -----------------------------------------------------------
1375 // USE BEST KNOWN TIME
1376 static bool firstTime = true;
1379 instrumentedPhase *best = allData.findBest();
1380 CkPrintf("Best known phase is:\n");
1384 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1385 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
1386 const std::string &name = cpsIter->first;
1387 newControlPoints[name] = best->controlPoints[name];
1390 } else if( whichTuningScheme == LDBPeriod) {
1391 // Assume this is used in this manner:
1392 // 1) go to next phase
1393 // 2) request control point
1394 // 3) load balancing
1398 instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1399 instrumentedPhase *prevPhase = previousPhaseData();
1402 const std::vector<double> × = twoAgoPhase->times;
1403 const int oldNumTimings = times.size();
1406 const std::vector<double> ×New = prevPhase->times;
1407 const int newNumTimings = timesNew.size();
1410 if(oldNumTimings > 4 && newNumTimings > 4){
1412 // Build model of execution time based on two phases ago
1413 // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1417 for(int i=2; i<oldNumTimings; i++){
1421 const double oldAvg = oldSum / (oldNumTimings-2);
1426 // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1427 const double expectedTotalTime = oldAvg * newNumTimings;
1430 // Measure actual time
1431 double newSum = 0.0;
1432 for(int i=2; i<newNumTimings; ++i){
1433 newSum += timesNew[i];
1436 const double newAvg = newSum / (newNumTimings-2);
1437 const double newTotalTimeExcludingLBSteps = newAvg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1439 const double benefit = expectedTotalTime - newTotalTimeExcludingLBSteps;
1441 // Determine load balance cost
1442 const double lbcost = timesNew[0] + timesNew[1] - 2.0*newAvg;
1444 const double benefitAfterLB = benefit - lbcost;
1447 // Determine whether LB cost outweights the estimated benefit
1448 CkPrintf("Constant Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, newTotalTimeExcludingLBSteps);
1451 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1452 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1453 const std::string &name = cpsIter->first;
1454 const std::pair<int,int> &bounds = cpsIter->second;
1455 const int lb = bounds.first;
1456 const int ub = bounds.second;
1458 if(benefitAfterLB > 0){
1459 CkPrintf("Constant Model: Beneficial LB\n");
1460 int newval = newControlPoints[name] / 2;
1462 newControlPoints[name] = newval;
1464 newControlPoints[name] = lb;
1466 CkPrintf("Constant Model: Detrimental LB\n");
1467 int newval = newControlPoints[name] * 2;
1469 newControlPoints[name] = newval;
1471 newControlPoints[name] = ub;
1477 } else if( whichTuningScheme == LDBPeriodLinear) {
1478 // Assume this is used in this manner:
1479 // 1) go to next phase
1480 // 2) request control point
1481 // 3) load balancing
1485 instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1486 instrumentedPhase *prevPhase = previousPhaseData();
1488 const std::vector<double> × = twoAgoPhase->times;
1489 const int oldNumTimings = times.size();
1491 const std::vector<double> ×New = prevPhase->times;
1492 const int newNumTimings = timesNew.size();
1495 if(oldNumTimings > 4 && newNumTimings > 4){
1497 // Build model of execution time based on two phases ago
1498 // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1499 const int b1 = 2 + (oldNumTimings-2)/2;
1503 const double ldbStepsTime = times[0] + times[1];
1505 for(int i=2; i<b1; i++){
1508 for(int i=b1; i<oldNumTimings; i++){
1513 // Compute the estimated time for the last phase's data
1515 const double a1 = s1 / (double)(b1-2);
1516 const double a2 = s2 / (double)(oldNumTimings-b1);
1517 const double aold = (a1+a2)/2.0;
1519 const double expectedTotalTime = newNumTimings*(aold+(oldNumTimings+newNumTimings)*(a2-a1)/oldNumTimings);
1522 // Measure actual time
1524 for(int i=2; i<newNumTimings; ++i){
1528 const double avg = sum / ((double)(newNumTimings-2));
1529 const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1531 const double benefit = expectedTotalTime - actualTotalTime;
1533 // Determine load balance cost
1534 const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1536 const double benefitAfterLB = benefit - lbcost;
1539 // Determine whether LB cost outweights the estimated benefit
1540 CkPrintf("Linear Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, actualTotalTime);
1544 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1545 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1546 const std::string &name = cpsIter->first;
1547 const std::pair<int,int> &bounds = cpsIter->second;
1548 const int lb = bounds.first;
1549 const int ub = bounds.second;
1551 if(benefitAfterLB > 0){
1552 CkPrintf("Linear Model: Beneficial LB\n");
1553 int newval = newControlPoints[name] / 2;
1555 newControlPoints[name] = newval;
1557 newControlPoints[name] = lb;
1559 CkPrintf("Linear Model: Detrimental LB\n");
1560 int newval = newControlPoints[name] * 2;
1562 newControlPoints[name] = newval;
1564 newControlPoints[name] = ub;
1571 else if( whichTuningScheme == LDBPeriodQuadratic) {
1572 // Assume this is used in this manner:
1573 // 1) go to next phase
1574 // 2) request control point
1575 // 3) load balancing
1579 instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1580 instrumentedPhase *prevPhase = previousPhaseData();
1582 const std::vector<double> × = twoAgoPhase->times;
1583 const int oldNumTimings = times.size();
1585 const std::vector<double> ×New = prevPhase->times;
1586 const int newNumTimings = timesNew.size();
1589 if(oldNumTimings > 4 && newNumTimings > 4){
1592 // Build model of execution time based on two phases ago
1593 // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1594 const int b1 = 2 + (oldNumTimings-2)/3;
1595 const int b2 = 2 + (2*(oldNumTimings-2))/3;
1600 const double ldbStepsTime = times[0] + times[1];
1602 for(int i=2; i<b1; i++){
1605 for(int i=b1; i<b2; i++){
1608 for(int i=b2; i<oldNumTimings; i++){
1613 // Compute the estimated time for the last phase's data
1615 const double a1 = s1 / (double)(b1-2);
1616 const double a2 = s2 / (double)(b2-b1);
1617 const double a3 = s3 / (double)(oldNumTimings-b2);
1619 const double a = (a1-2.0*a2+a3)/2.0;
1620 const double b = (a1-4.0*a2+3.0*a3)/2.0;
1621 const double c = a3;
1623 // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1624 const double x1 = (double)newNumTimings / (double)oldNumTimings * 3.0 + 0.5; // should be 3.5 if ratio is same
1625 const double x2 = 0.5;
1626 const double expectedTotalTime = a*x1*x1*x1/3.0 + b*x1*x1/2.0 + c*x1 - (a*x2*x2*x2/3.0 + b*x2*x2/2.0 + c*x2);
1629 // Measure actual time
1631 for(int i=2; i<newNumTimings; ++i){
1635 const double avg = sum / ((double)(newNumTimings-2));
1636 const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1638 const double benefit = expectedTotalTime - actualTotalTime;
1640 // Determine load balance cost
1641 const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1643 const double benefitAfterLB = benefit - lbcost;
1646 // Determine whether LB cost outweights the estimated benefit
1647 CkPrintf("Quadratic Model: lbcost = %f, expected = %f, actual = %f, x1=%f, a1=%f, a2=%f, a3=%f, b1=%d, b2=%d, a=%f, b=%f, c=%f\n", lbcost, expectedTotalTime, actualTotalTime, x1, a1, a2, a3, b1, b2, a, b, c);
1651 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1652 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1653 const std::string &name = cpsIter->first;
1654 const std::pair<int,int> &bounds = cpsIter->second;
1655 const int lb = bounds.first;
1656 const int ub = bounds.second;
1658 if(benefitAfterLB > 0){
1659 CkPrintf("QuadraticModel: Beneficial LB\n");
1660 int newval = newControlPoints[name] / 2;
1662 newControlPoints[name] = newval;
1664 newControlPoints[name] = lb;
1666 CkPrintf("QuadraticModel: Detrimental LB\n");
1667 int newval = newControlPoints[name] * 2;
1669 newControlPoints[name] = newval;
1671 newControlPoints[name] = ub;
1678 } else if( whichTuningScheme == LDBPeriodOptimal) {
1679 // Assume this is used in this manner:
1680 // 1) go to next phase
1681 // 2) request control point
1682 // 3) load balancing
1687 instrumentedPhase *prevPhase = previousPhaseData();
1689 const std::vector<double> × = prevPhase->times;
1690 const int numTimings = times.size();
1692 if( numTimings > 4){
1694 const int b1 = 2 + (numTimings-2)/2;
1699 for(int i=2; i<b1; i++){
1702 for(int i=b1; i<numTimings; i++){
1707 const double a1 = s1 / (double)(b1-2);
1708 const double a2 = s2 / (double)(numTimings-b1);
1709 const double avg = (a1+a1) / 2.0;
1711 const double m = (a2-a1)/((double)(numTimings-2)/2.0); // An approximation of the slope of the execution times
1713 const double ldbStepsTime = times[0] + times[1];
1714 const double lbcost = ldbStepsTime - 2.0*avg; // An approximation of the
1717 int newval = roundDouble(sqrt(2.0*lbcost/m));
1719 // We don't really know what to do if m<=0, so we'll just double the period
1721 newval = 2*numTimings;
1723 CkPrintf("Optimal Model (double when negative): lbcost = %f, m = %f, new ldbperiod should be %d\n", lbcost, m, newval);
1726 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1727 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1728 // TODO: lookup only control points that are relevant instead of all of them
1729 const std::string &name = cpsIter->first;
1730 const std::pair<int,int> &bounds = cpsIter->second;
1731 const int lb = bounds.first;
1732 const int ub = bounds.second;
1735 newControlPoints[name] = lb;
1736 } else if(newval > ub){
1737 newControlPoints[name] = ub;
1739 newControlPoints[name] = newval;
1749 } else if ( whichTuningScheme == UseSteering ) {
1750 // -----------------------------------------------------------
1751 // STEERING BASED ON KNOWLEDGE
1753 // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1754 // plans are only generated after 3 phases
1756 instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1757 instrumentedPhase *prevPhase = previousPhaseData();
1759 if(phase_id%4 == 0){
1760 CkPrintf("Steering based on 2 phases ago:\n");
1761 twoAgoPhase->print();
1765 std::vector<std::map<std::string,int> > possibleNextStepPlans;
1768 // ========================================= Concurrency =============================================
1769 // See if idle time is high:
1771 double idleTime = twoAgoPhase->idleTime.avg;
1772 CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1774 if(idleTime > 0.10){
1775 CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1776 CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1778 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1782 std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1783 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1784 for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1785 cpName = iter->first;
1786 info = &iter->second;
1788 // Initialize a new plan based on two phases ago
1789 std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1791 CkPrintf("Steering found knob to turn\n");
1794 if(info->first == ControlPoint::EFF_INC){
1795 const int maxValue = controlPointSpace[cpName].second;
1796 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
1797 if(twoAgoValue+1 <= maxValue){
1798 aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1801 const int minValue = controlPointSpace[cpName].second;
1802 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
1803 if(twoAgoValue-1 >= minValue){
1804 aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1808 possibleNextStepPlans.push_back(aNewPlan);
1814 // ========================================= Grain Size =============================================
1815 // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1817 double overheadTime = twoAgoPhase->overheadTime.avg;
1818 CkPrintf("Steering encountered overhead time (%f)\n", overheadTime);
1820 if(overheadTime > 0.10){
1821 CkPrintf("Steering encountered high overhead time(%f) > 10%%\n", overheadTime);
1822 CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1824 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GrainSize"];
1828 std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1829 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1830 for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1831 cpName = iter->first;
1832 info = &iter->second;
1834 // Initialize a new plan based on two phases ago
1835 std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1839 CkPrintf("Steering found knob to turn\n");
1842 if(info->first == ControlPoint::EFF_INC){
1843 const int maxValue = controlPointSpace[cpName].second;
1844 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
1845 if(twoAgoValue+1 <= maxValue){
1846 aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1849 const int minValue = controlPointSpace[cpName].second;
1850 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
1851 if(twoAgoValue-1 >= minValue){
1852 aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1856 possibleNextStepPlans.push_back(aNewPlan);
1862 // ========================================= GPU Offload =============================================
1863 // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1865 double idleTime = twoAgoPhase->idleTime.avg;
1866 CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1868 if(idleTime > 0.10){
1869 CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1870 CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1872 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GPUOffloadedWork"];
1876 std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1877 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1878 for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1879 cpName = iter->first;
1880 info = &iter->second;
1882 // Initialize a new plan based on two phases ago
1883 std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1886 CkPrintf("Steering found knob to turn\n");
1889 if(info->first == ControlPoint::EFF_DEC){
1890 const int maxValue = controlPointSpace[cpName].second;
1891 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
1892 if(twoAgoValue+1 <= maxValue){
1893 aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1896 const int minValue = controlPointSpace[cpName].second;
1897 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
1898 if(twoAgoValue-1 >= minValue){
1899 aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1903 possibleNextStepPlans.push_back(aNewPlan);
1910 // ========================================= Done =============================================
1913 if(possibleNextStepPlans.size() > 0){
1914 newControlPoints = possibleNextStepPlans[0];
1918 CkPrintf("Steering done for this phase\n");
1922 // This is not a phase to do steering, so stick with previously used values (one phase ago)
1923 CkPrintf("not a phase to do steering, so stick with previously planned values (one phase ago)\n");
1929 } else if( whichTuningScheme == SimulatedAnnealing ) {
1931 // -----------------------------------------------------------
1932 // SIMULATED ANNEALING
1933 // Simulated Annealing style hill climbing method
1935 // Find the best search space configuration, and try something
1936 // nearby it, with a radius decreasing as phases increase
1938 CkPrintf("Finding best phase\n");
1939 instrumentedPhase *bestPhase = allData.findBest();
1940 CkPrintf("best found:\n");
1945 const int convergeByPhase = 100;
1946 // Determine from 0.0 to 1.0 how far along we are
1947 const double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
1949 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1950 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1951 const std::string &name = cpsIter->first;
1952 const std::pair<int,int> &bounds = cpsIter->second;
1953 const int minValue = bounds.first;
1954 const int maxValue = bounds.second;
1956 const int before = bestPhase->controlPoints[name];
1958 const int range = (int)((maxValue-minValue+1)*(1.0-progress));
1960 int high = min(before+range, maxValue);
1961 int low = max(before-range, minValue);
1963 newControlPoints[name] = low;
1965 newControlPoints[name] += randInt(high-low, name.c_str(), phase_id);
1970 } else if ( whichTuningScheme == DivideAndConquer ) {
1972 // -----------------------------------------------------------
1973 // STEERING FOR Divide & Conquer Programs
1974 // This scheme uses no timing information. It just tries to converge to the point where idle time = overhead time.
1975 // For a Fibonacci example, this appears to be a good heurstic for finding the best performing program.
1976 // The scheme can be applied within a single program tree computation, if the tree is being traversed depth first.
1978 // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1979 // plans are only generated after 3 phases
1981 instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1982 instrumentedPhase *prevPhase = previousPhaseData();
1984 if(phase_id%4 == 0){
1985 CkPrintf("Divide & Conquer Steering based on 2 phases ago:\n");
1986 twoAgoPhase->print();
1990 std::vector<std::map<std::string,int> > possibleNextStepPlans;
1993 // ========================================= Concurrency =============================================
1994 // See if idle time is high:
1996 double idleTime = twoAgoPhase->idleTime.avg;
1997 double overheadTime = twoAgoPhase->overheadTime.avg;
2000 CkPrintf("Divide & Conquer Steering encountered overhead time (%f) idle time (%f)\n",overheadTime, idleTime);
2002 if(idleTime+overheadTime > 0.10){
2003 CkPrintf("Steering encountered high idle+overheadTime time(%f) > 10%%\n", idleTime+overheadTime);
2004 CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
2007 if (idleTime>overheadTime){
2008 // We need to decrease the grain size, or increase the available concurrency
2012 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
2016 std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
2017 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
2018 for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
2019 cpName = iter->first;
2020 info = &iter->second;
2022 // Initialize a new plan based on two phases ago
2023 std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
2025 CkPrintf("Divide & Conquer Steering found knob to turn\n");
2028 int adjustByAmount = (int)(myAbs(idleTime-overheadTime)*5.0);
2030 if(info->first == ControlPoint::EFF_INC){
2031 const int minValue = controlPointSpace[cpName].first;
2032 const int maxValue = controlPointSpace[cpName].second;
2033 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
2034 const int newVal = closestInRange(twoAgoValue+adjustByAmount*direction, minValue, maxValue);
2035 CkAssert(newVal <= maxValue && newVal >= minValue);
2036 aNewPlan[cpName] = newVal;
2038 const int minValue = controlPointSpace[cpName].first;
2039 const int maxValue = controlPointSpace[cpName].second;
2040 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
2041 const int newVal = closestInRange(twoAgoValue-adjustByAmount*direction, minValue, maxValue);
2042 CkAssert(newVal <= maxValue && newVal >= minValue);
2043 aNewPlan[cpName] = newVal;
2046 possibleNextStepPlans.push_back(aNewPlan);
2051 if(possibleNextStepPlans.size() > 0){
2052 CkPrintf("Divide & Conquer Steering found %d possible next phases, using first one\n", possibleNextStepPlans.size());
2053 newControlPoints = possibleNextStepPlans[0];
2055 CkPrintf("Divide & Conquer Steering found no possible next phases\n");
2059 } else if( whichTuningScheme == Simplex ) {
2061 // -----------------------------------------------------------
2062 // Nelder Mead Simplex Algorithm
2064 // A scheme that takes a simplex (n+1 points) and moves it
2065 // toward the minimum, eventually converging there.
2067 s.adapt(controlPointSpace, newControlPoints, phase_id, allData);
2069 } else if( whichTuningScheme == ExhaustiveSearch ) {
2071 // -----------------------------------------------------------
2072 // EXHAUSTIVE SEARCH
2074 int numDimensions = controlPointSpace.size();
2075 CkAssert(numDimensions > 0);
2077 vector<int> lowerBounds(numDimensions);
2078 vector<int> upperBounds(numDimensions);
2081 std::map<std::string, pair<int,int> >::iterator iter;
2082 for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
2083 // CkPrintf("Examining dimension %d\n", d);
2084 lowerBounds[d] = iter->second.first;
2085 upperBounds[d] = iter->second.second;
2089 // get names for each dimension (control point)
2090 vector<std::string> names(numDimensions);
2092 for(std::map<std::string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
2093 names[d] = niter->first;
2098 // Create the first possible configuration
2099 vector<int> config = lowerBounds;
2100 config.push_back(0);
2102 // Increment until finding an unused configuration
2103 allData.cleanupNames(); // put -1 values in for any control points missing
2104 std::vector<instrumentedPhase*> &phases = allData.phases;
2108 std::vector<instrumentedPhase*>::iterator piter;
2109 bool testedConfiguration = false;
2110 for(piter = phases.begin(); piter != phases.end(); piter++){
2112 // Test if the configuration matches this phase
2114 for(int j=0;j<numDimensions;j++){
2115 match &= (*piter)->controlPoints[names[j]] == config[j];
2119 testedConfiguration = true;
2124 if(testedConfiguration == false){
2128 // go to next configuration
2130 // Propagate "carrys"
2131 for(int i=0;i<numDimensions;i++){
2132 if(config[i] > upperBounds[i]){
2134 config[i] = lowerBounds[i];
2137 // check if we have exhausted all possible values
2138 if(config[numDimensions] > 0){
2144 if(config[numDimensions] > 0){
2145 for(int i=0;i<numDimensions;i++){
2146 config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
2150 // results are now in config[i]
2152 for(int i=0; i<numDimensions; i++){
2153 newControlPoints[names[i]] = config[i];
2154 CkPrintf("Exhaustive search chose: %s -> %d\n", names[i].c_str(), config[i]);
2159 CkAbort("Some Control Point tuning strategy must be enabled.\n");
2163 const double endGenerateTime = CmiWallTimer();
2165 CkPrintf("Time to generate next control point configuration(s): %f sec\n", (endGenerateTime - startGenerateTime) );
2173 /// Get control point value from range of integers [lb,ub]
2174 int controlPoint(const char *name, int lb, int ub){
2175 instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
2176 const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
2177 std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
2180 // if we already have control point values for phase, return them
2181 if( thisPhaseData->controlPoints.count(std::string(name))>0 && thisPhaseData->controlPoints[std::string(name)]>=0 ){
2182 CkPrintf("Already have control point values for phase. %s -> %d\n", name, (int)(thisPhaseData->controlPoints[std::string(name)]) );
2183 return thisPhaseData->controlPoints[std::string(name)];
2187 if( phase_id < 4 || whichTuningScheme == AlwaysDefaults){
2188 // For the first few phases, just use the lower bound, or the default if one was provided
2189 // This ensures that the ranges for all the control points are known before we do anything fancy
2192 if(defaultControlPointValues.count(std::string(name)) > 0){
2193 int v = defaultControlPointValues[std::string(name)];
2194 CkPrintf("Startup phase using default value of %d for \"%s\"\n", v, name);
2198 } else if(controlPointSpace.count(std::string(name)) == 0){
2199 // if this is the first time we've seen the CP, then return the lower bound
2203 // Generate a plan one time for each phase
2204 controlPointManagerProxy.ckLocalBranch()->generatePlan();
2206 // Use the planned value:
2207 result = controlPointManagerProxy.ckLocalBranch()->newControlPoints[std::string(name)];
2211 if(!isInRange(result,ub,lb)){
2212 std::cerr << "control point = " << result << " is out of range: " << lb << " " << ub << std::endl;
2216 CkAssert(isInRange(result,ub,lb));
2217 thisPhaseData->controlPoints[std::string(name)] = result; // was insert()
2219 controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub)));
2221 CkPrintf("Control Point \"%s\" for phase %d is: %d\n", name, phase_id, result);
2222 // thisPhaseData->print();
2228 FLINKAGE int FTN_NAME(CONTROLPOINT, controlpoint)(CMK_TYPEDEF_INT4 *lb, CMK_TYPEDEF_INT4 *ub){
2229 CkAssert(CkMyPe() == 0);
2230 return controlPoint("FortranCP", *lb, *ub);
2236 /// Inform the control point framework that a named control point affects the priorities of some array
2237 // void controlPointPriorityArray(const char *name, CProxy_ArrayBase &arraybase){
2238 // CkGroupID aid = arraybase.ckGetArrayID();
2239 // int groupIdx = aid.idx;
2240 // controlPointManagerProxy.ckLocalBranch()->associatePriorityArray(name, groupIdx);
2241 // // CkPrintf("Associating control point \"%s\" with array id=%d\n", name, groupIdx );
2245 // /// Inform the control point framework that a named control point affects the priorities of some entry method
2246 // void controlPointPriorityEntry(const char *name, int idx){
2247 // controlPointManagerProxy.ckLocalBranch()->associatePriorityEntry(name, idx);
2248 // // CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
2255 /** Determine the next configuration to try using the Nelder Mead Simplex Algorithm.
2257 This function decomposes the algorithm into a state machine that allows it to
2258 evaluate one or more configurations through subsequent clls to this function.
2259 The state diagram is pictured in the NelderMeadStateDiagram.pdf diagram.
2261 At one point in the algorithm, n+1 parameter configurations must be evaluated,
2262 so a list of them will be created and they will be evaluated, one per call.
2264 Currently there is no stopping criteria, but the simplex ought to contract down
2265 to a few close configurations, and hence not much change will happen after this
2269 void simplexScheme::adapt(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2272 CkPrintf("Simplex Tuning: Simplex algorithm is done, using best known phase:\n");
2277 if(firstSimplexPhase< 0){
2278 firstSimplexPhase = allData.phases.size()-1;
2279 CkPrintf("First simplex phase is %d\n", firstSimplexPhase);
2282 int n = controlPointSpace.size();
2287 if(simplexState == beginning){
2288 // First we evaluate n+1 random points, then we go to a different state
2289 if(allData.phases.size() < firstSimplexPhase + n+2 ){
2290 CkPrintf("Simplex Tuning: chose random configuration\n");
2292 // Choose random values from the middle third of the range in each dimension
2293 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2294 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2295 const std::string &name = cpsIter->first;
2296 const std::pair<int,int> &bounds = cpsIter->second;
2297 const int lb = bounds.first;
2298 const int ub = bounds.second;
2299 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
2302 // Set initial simplex:
2303 for(int i=0; i<n+1; i++){
2304 simplexIndices.insert(firstSimplexPhase+i);
2306 CkAssert(simplexIndices.size() == n+1);
2308 // Transition to reflecting state
2309 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2313 } else if (simplexState == reflecting){
2314 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2315 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2317 // Find the highest time from other points in the simplex
2318 double highestTimeForOthersInSimplex = 0.0;
2319 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2320 double t = allData.phases[*iter]->medianTime();
2321 if(*iter != worstPhase && t > highestTimeForOthersInSimplex) {
2322 highestTimeForOthersInSimplex = t;
2326 CkPrintf("After reflecting, the median time for the phase is %f, previous worst phase %d time was %f\n", recentPhaseTime, worstPhase, previousWorstPhaseTime);
2328 if(recentPhaseTime < highestTimeForOthersInSimplex){
2329 // if y* < yl, transition to "expanding"
2330 doExpansion(controlPointSpace, newControlPoints, phase_id, allData);
2332 } else if (recentPhaseTime <= highestTimeForOthersInSimplex){
2333 // else if y* <= yi replace ph with p* and transition to "evaluatingOne"
2334 CkAssert(simplexIndices.size() == n+1);
2335 simplexIndices.erase(worstPhase);
2336 CkAssert(simplexIndices.size() == n);
2337 simplexIndices.insert(pPhase);
2338 CkAssert(simplexIndices.size() == n+1);
2339 // Transition to reflecting state
2340 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2344 if(recentPhaseTime <= worstTime){
2345 // replace Ph with P*
2346 CkAssert(simplexIndices.size() == n+1);
2347 simplexIndices.erase(worstPhase);
2348 CkAssert(simplexIndices.size() == n);
2349 simplexIndices.insert(pPhase);
2350 CkAssert(simplexIndices.size() == n+1);
2351 // Because we later will possibly replace Ph with P**, and we just replaced it with P*, we need to update our Ph reference
2352 worstPhase = pPhase;
2353 // Just as a sanity check, make sure we don't use the non-updated values.
2357 // Now, form P** and do contracting phase
2358 doContraction(controlPointSpace, newControlPoints, phase_id, allData);
2362 } else if (simplexState == doneExpanding){
2363 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2364 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2365 // A new configuration has been evaluated
2367 // Check to see if y** < y1
2368 if(recentPhaseTime < bestTime){
2369 // replace Ph by P**
2370 CkAssert(simplexIndices.size() == n+1);
2371 simplexIndices.erase(worstPhase);
2372 CkAssert(simplexIndices.size() == n);
2373 simplexIndices.insert(p2Phase);
2374 CkAssert(simplexIndices.size() == n+1);
2377 CkAssert(simplexIndices.size() == n+1);
2378 simplexIndices.erase(worstPhase);
2379 CkAssert(simplexIndices.size() == n);
2380 simplexIndices.insert(pPhase);
2381 CkAssert(simplexIndices.size() == n+1);
2384 // Transition to reflecting state
2385 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2387 } else if (simplexState == contracting){
2388 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2389 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2390 // A new configuration has been evaluated
2392 // Check to see if y** > yh
2393 if(recentPhaseTime <= worstTime){
2394 // replace Ph by P**
2395 CkPrintf("Replacing phase %d with %d\n", worstPhase, p2Phase);
2396 CkAssert(simplexIndices.size() == n+1);
2397 simplexIndices.erase(worstPhase);
2398 CkAssert(simplexIndices.size() == n);
2399 simplexIndices.insert(p2Phase);
2400 CkAssert(simplexIndices.size() == n+1);
2401 // Transition to reflecting state
2402 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2405 // conceptually we will replace all Pi by (Pi+Pl)/2, but there is nothing to store this until after we have tried all of them
2406 simplexState = stillContracting;
2408 // A set of phases for which (P_i+P_l)/2 ought to be evaluated
2409 stillMustContractList = simplexIndices;
2411 CkPrintf("Simplex Tuning: Switched to state: stillContracting\n");
2414 } else if (simplexState == stillContracting){
2415 CkPrintf("Simplex Tuning: stillContracting found %d configurations left to try\n", stillMustContractList.size());
2417 if(stillMustContractList.size()>0){
2418 int c = *stillMustContractList.begin();
2419 stillMustContractList.erase(c);
2420 CkPrintf("Simplex Tuning: stillContracting evaluating configuration derived from phase %d\n", c);
2422 std::vector<double> cPhaseConfig = pointCoords(allData, c);
2424 // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2426 for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2427 const std::string &name = cpsIter->first;
2428 const std::pair<int,int> &bounds = cpsIter->second;
2429 const int lb = bounds.first;
2430 const int ub = bounds.second;
2432 double val = (cPhaseConfig[v] + best[v])/2.0;
2434 newControlPoints[name] = keepInRange((int)val,lb,ub);
2435 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2439 // We have tried all configurations. We should update the simplex to refer to all the newly tried configurations, and start over
2440 CkAssert(stillMustContractList.size() == 0);
2441 simplexIndices.clear();
2442 CkAssert(simplexIndices.size()==0);
2443 for(int i=0; i<n+1; i++){
2444 simplexIndices.insert(allData.phases.size()-2-i);
2446 CkAssert(simplexIndices.size()==n+1);
2448 // Transition to reflecting state
2449 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2454 } else if (simplexState == expanding){
2455 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2456 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2457 // A new configuration has been evaluated
2459 // determine if y** < yl
2460 if(recentPhaseTime < bestTime){
2461 // replace Ph by P**
2462 CkAssert(simplexIndices.size() == n+1);
2463 simplexIndices.erase(worstPhase);
2464 CkAssert(simplexIndices.size() == n);
2465 simplexIndices.insert(p2Phase);
2466 CkAssert(simplexIndices.size() == n+1);
2467 // Transition to reflecting state
2468 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2471 // else, replace ph with p*
2472 CkAssert(simplexIndices.size() == n+1);
2473 simplexIndices.erase(worstPhase);
2474 CkAssert(simplexIndices.size() == n);
2475 simplexIndices.insert(pPhase);
2476 CkAssert(simplexIndices.size() == n+1);
2477 // Transition to reflecting state
2478 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2483 CkAbort("Unknown simplexState");
2490 /** Replace the worst point with its reflection across the centroid. */
2491 void simplexScheme::doReflection(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2493 int n = controlPointSpace.size();
2495 printSimplex(allData);
2497 computeCentroidBestWorst(controlPointSpace, newControlPoints, phase_id, allData);
2500 // Quit if the diameter of our simplex is small
2502 for(int i=0; i<n+1; i++){
2503 // Compute r^2 of this simplex point from the centroid
2505 std::vector<double> p = pointCoords(allData, i);
2506 for(int d=0; d<p.size(); d++){
2507 double r1 = (p[d] * centroid[d]);
2515 // At some point we quit this tuning
2517 useBestKnown = true;
2518 instrumentedPhase *best = allData.findBest();
2519 CkPrintf("Simplex Tuning: Simplex diameter is small, so switching over to best known phase:\n");
2521 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2522 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
2523 const std::string &name = cpsIter->first;
2524 newControlPoints[name] = best->controlPoints[name];
2529 // Compute new point P* =(1+alpha)*centroid - alpha(worstPoint)
2531 pPhase = allData.phases.size()-1;
2533 for(int i=0; i<n; i++){
2534 P[i] = (1.0+alpha) * centroid[i] - alpha * worst[i] ;
2537 for(int i=0; i<P.size(); i++){
2538 CkPrintf("Simplex Tuning: P dimension %d is %f\n", i, P[i]);
2542 // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2544 for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2545 const std::string &name = cpsIter->first;
2546 const std::pair<int,int> &bounds = cpsIter->second;
2547 const int lb = bounds.first;
2548 const int ub = bounds.second;
2549 newControlPoints[name] = keepInRange((int)P[v],lb,ub);
2550 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2555 // Transition to "reflecting" state
2556 simplexState = reflecting;
2557 CkPrintf("Simplex Tuning: Switched to state: reflecting\n");
2564 /** Replace the newly tested reflection with a further expanded version of itself. */
2565 void simplexScheme::doExpansion(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2566 int n = controlPointSpace.size();
2567 printSimplex(allData);
2569 // Note that the original Nelder Mead paper has an error when it displays the equation for P** in figure 1.
2570 // I believe the equation for P** in the text on page 308 is correct.
2572 // Compute new point P2 = (1+gamma)*P - gamma(centroid)
2575 p2Phase = allData.phases.size()-1;
2577 for(int i=0; i<n; i++){
2578 P2[i] = ( (1.0+gamma) * P[i] - gamma * centroid[i] );
2581 for(int i=0; i<P2.size(); i++){
2582 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2586 // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2588 for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2589 const std::string &name = cpsIter->first;
2590 const std::pair<int,int> &bounds = cpsIter->second;
2591 const int lb = bounds.first;
2592 const int ub = bounds.second;
2593 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
2594 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Expanding %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2599 // Transition to "doneExpanding" state
2600 simplexState = doneExpanding;
2601 CkPrintf("Simplex Tuning: Switched to state: doneExpanding\n");
2608 /** Replace the newly tested reflection with a further expanded version of itself. */
2609 void simplexScheme::doContraction(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2610 int n = controlPointSpace.size();
2611 printSimplex(allData);
2613 // Compute new point P2 = beta*Ph + (1-beta)*centroid
2616 p2Phase = allData.phases.size()-1;
2618 for(int i=0; i<n; i++){
2619 P2[i] = ( beta*worst[i] + (1.0-beta)*centroid[i] );
2622 for(int i=0; i<P2.size(); i++){
2623 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2627 // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2629 for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2630 const std::string &name = cpsIter->first;
2631 const std::pair<int,int> &bounds = cpsIter->second;
2632 const int lb = bounds.first;
2633 const int ub = bounds.second;
2634 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
2635 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Contracting %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2640 // Transition to "contracting" state
2641 simplexState = contracting;
2642 CkPrintf("Simplex Tuning: Switched to state: contracting\n");
2649 void simplexScheme::computeCentroidBestWorst(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2650 int n = controlPointSpace.size();
2652 // Find worst performing point in the simplex
2655 bestPhase = 10000000;
2656 bestTime = 10000000;
2657 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2658 double t = allData.phases[*iter]->medianTime();
2668 CkAssert(worstTime != -1.0 && worstPhase != -1 && bestTime != 10000000 && bestPhase != 10000000);
2670 best = pointCoords(allData, bestPhase);
2671 CkAssert(best.size() == n);
2673 worst = pointCoords(allData, worstPhase);
2674 CkAssert(worst.size() == n);
2676 // Calculate centroid of the remaining points in the simplex
2678 for(int i=0; i<n; i++){
2684 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2685 if(*iter != worstPhase){
2687 // Accumulate into the result vector
2689 for(std::map<std::string,int>::iterator citer = allData.phases[*iter]->controlPoints.begin(); citer != allData.phases[*iter]->controlPoints.end(); ++citer){
2690 centroid[c] += citer->second;
2697 // Now divide the sums by the number of points.
2698 for(int v = 0; v<centroid.size(); v++) {
2699 centroid[v] /= (double)numPts;
2702 CkAssert(centroid.size() == n);
2704 for(int i=0; i<centroid.size(); i++){
2705 CkPrintf("Centroid dimension %d is %f\n", i, centroid[i]);
2713 std::vector<double> simplexScheme::pointCoords(instrumentedData &allData, int i){
2714 std::vector<double> result;
2715 for(std::map<std::string,int>::iterator citer = allData.phases[i]->controlPoints.begin(); citer != allData.phases[i]->controlPoints.end(); ++citer){
2716 result.push_back((double)citer->second);
2724 void ControlPointWriteOutputToDisk(){
2725 CkAssert(CkMyPe() == 0);
2726 controlPointManagerProxy.ckLocalBranch()->writeOutputToDisk();
2733 #include "ControlPoints.def.h"