Fix parallel build of examples/charm++/user-driven-interop
[charm.git] / src / ck-cp / controlPoints.C
blob3e7b885149e70ec398de21202c666a93f5592570
1 #include <charm++.h>
3 // This file is compiled twice to make a version that is capable of not needing the tracing to be turned on. 
5 #include "controlPoints.h"
6 #include "trace-controlPoints.h"
7 #include "LBDatabase.h"
8 #include "controlPoints.h"
9 #include "charm++.h"
10 #include "trace-projections.h"
11 #include <pathHistory.h>
12 #include "cp_effects.h"
13 #include <iostream>
14 #include <math.h>
15 #include <climits>
17 #if CMK_WITH_CONTROLPOINT
19 #define roundDouble(x)        ((long)(x+0.5))
20 #define myAbs(x)   (((x)>=0.0)?(x):(-1.0*(x)))
21 #define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
23 inline double closestInRange(double v, double a, double b){
24   return (v<a) ? a : ((v>b)?b:v);
28 //  A framework for tuning "control points" exposed by an application. Tuning decisions are based upon observed performance measurements.
31 /** @defgroup ControlPointFramework Automatic Performance Tuning and Steering Framework  */
32 /**  @{ */
34 using namespace std;
36 #define DEFAULT_CONTROL_POINT_SAMPLE_PERIOD  10000000
39 //#undef DEBUGPRINT
40 //#define DEBUGPRINT 4
42 static void periodicProcessControlPoints(void* ptr, double currWallTime);
45 // A pointer to this PE's controlpoint manager Proxy
46 /* readonly */ CProxy_controlPointManager controlPointManagerProxy;
47 /* readonly */ int random_seed;
48 /* readonly */ long controlPointSamplePeriod;
49 /* readonly */ int whichTuningScheme;
50 /* readonly */ bool writeDataFileAtShutdown;
51 /* readonly */ bool shouldFilterOutputData;
52 /* readonly */ bool loadDataFileAtStartup;
53 /* readonly */ bool shouldGatherMemoryUsage;
54 /* readonly */ bool shouldGatherUtilization;
55 /* readonly */ bool shouldGatherAll;
56 /* readonly */ char CPDataFilename[512];
58 extern bool enableCPTracing;
60 /// The control point values to be used for the first few phases if the strategy doesn't choose to do something else.
61 /// These probably come from the command line arguments, so are available only on PE 0
62 std::map<std::string, int> defaultControlPointValues;
66 typedef enum tuningSchemeEnum {RandomSelection, SimulatedAnnealing, ExhaustiveSearch, CriticalPathAutoPrioritization, UseBestKnownTiming, UseSteering, MemoryAware, Simplex, DivideAndConquer, AlwaysDefaults, LDBPeriod, LDBPeriodLinear, LDBPeriodQuadratic, LDBPeriodOptimal}  tuningScheme;
70 void printTuningScheme(){
71   switch(whichTuningScheme){
72   case RandomSelection:
73     CkPrintf("Tuning Scheme: RandomSelection\n");
74     break;
75   case AlwaysDefaults:
76     CkPrintf("Tuning Scheme: AlwaysDefaults\n");
77     break;
78   case SimulatedAnnealing:
79     CkPrintf("Tuning Scheme: SimulatedAnnealing\n");
80     break;
81   case ExhaustiveSearch:
82     CkPrintf("Tuning Scheme: ExhaustiveSearch\n");
83     break;
84   case CriticalPathAutoPrioritization:
85     CkPrintf("Tuning Scheme: CriticalPathAutoPrioritization\n");
86     break;
87   case UseBestKnownTiming:
88     CkPrintf("Tuning Scheme: UseBestKnownTiming\n");
89     break;
90   case UseSteering:
91     CkPrintf("Tuning Scheme: UseSteering\n");
92     break;
93   case MemoryAware:
94     CkPrintf("Tuning Scheme: MemoryAware\n");
95     break;
96   case Simplex:
97     CkPrintf("Tuning Scheme: Simplex Algorithm\n");
98     break;
99   case DivideAndConquer:
100     CkPrintf("Tuning Scheme: Divide & Conquer Algorithm\n");
101     break;
102   case LDBPeriod:
103     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Constant Prediction)\n");
104     break;
105   case LDBPeriodLinear:
106     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Linear Prediction)\n");
107     break;
108   case LDBPeriodQuadratic:
109     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Quadratic Prediction)\n");
110     break;
111   case LDBPeriodOptimal:
112     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Optimal Prediction)\n");
113     break;
114   default:
115     CkPrintf("Unknown tuning scheme\n");
116     break;
117   }
118   fflush(stdout);
123 /// A reduction type that combines idle time measurements (min/sum/max etc.)
124 CkReduction::reducerType idleTimeReductionType;
125 /// A reducer that combines idle time measurements (min/sum/max etc.)
126 CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
127   double ret[3];
128   if(nMsg > 0){
129     CkAssert(msgs[0]->getSize()==3*sizeof(double));
130     double *m=(double *)msgs[0]->getData();
131     ret[0]=m[0];
132     ret[1]=m[1];
133     ret[2]=m[2];
134   }
135   for (int i=1;i<nMsg;i++) {
136     CkAssert(msgs[i]->getSize()==3*sizeof(double));
137     double *m=(double *)msgs[i]->getData();
138     ret[0]=min(ret[0],m[0]);
139     ret[1]+=m[1];
140     ret[2]=max(ret[2],m[2]);
141   }  
142   return CkReductionMsg::buildNew(3*sizeof(double),ret);   
147 /// A reduction type that combines idle, overhead, and memory measurements
148 CkReduction::reducerType allMeasuresReductionType;
149 /// A reducer that combines idle, overhead, and memory measurements
150 #define ALL_REDUCTION_SIZE 12
151 CkReductionMsg *allMeasuresReduction(int nMsg,CkReductionMsg **msgs){
152   double ret[ALL_REDUCTION_SIZE];
153   if(nMsg > 0){
154     CkAssert(msgs[0]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
155     double *m=(double *)msgs[0]->getData();
156     memcpy(ret, m, ALL_REDUCTION_SIZE*sizeof(double) );
157   }
158   for (int i=1;i<nMsg;i++) {
159     CkAssert(msgs[i]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
160     double *m=(double *)msgs[i]->getData();
161     // idle time (min/sum/max)
162     ret[0]=min(ret[0],m[0]);
163     ret[1]+=m[1];
164     ret[2]=max(ret[2],m[2]);
165     // overhead time (min/sum/max)
166     ret[3]=min(ret[3],m[3]);
167     ret[4]+=m[4];
168     ret[5]=max(ret[5],m[5]);
169     // mem usage (max)
170     ret[6]=max(ret[6],m[6]);
171     // bytes per invocation for two types of entry methods
172     ret[7]+=m[7];
173     ret[8]+=m[8];
174     ret[9]+=m[9];
175     ret[10]+=m[10];
176     // Grain size (avg)
177     ret[11]+=m[11];
178   }  
179   return CkReductionMsg::buildNew(ALL_REDUCTION_SIZE*sizeof(double),ret);   
183 /// Registers the control point framework's reduction handlers at startup on each PE
184 /*initproc*/ void registerCPReductions(void) {
185   idleTimeReductionType=CkReduction::addReducer(idleTimeReduction, false, "idleTimeReduction");
186   allMeasuresReductionType=CkReduction::addReducer(allMeasuresReduction, false, "allMeasuresReduction");
194 /// Return an integer between 0 and num-1 inclusive
195 /// If different seed, name, and random_seed values are provided, the returned values are pseudo-random
196 unsigned int randInt(unsigned int num, const char* name, int seed=0){
197   CkAssert(num > 0);
199   unsigned long hash = 0;
200   unsigned int c;
201   unsigned char * str = (unsigned char*)name;
203   while ((c = *str++)){
204     unsigned int c2 = (c+64)%128;
205     unsigned int c3 = (c2*5953)%127;
206     hash = c3 + (hash << 6) + (hash << 16) - hash;
207   }
209   unsigned long shuffled1 = (hash*2083)%7907;
210   unsigned long shuffled2 = (seed*4297)%2017;
212   unsigned long shuffled3 = (random_seed*4799)%7919;
214   unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
216   unsigned int result = ((namehash * 6029) % 1117) % num;
218   CkAssert(result >=0 && result < num);
219   return result;
224 controlPointManager::controlPointManager() {
225   generatedPlanForStep = -1;
227     exitWhenReady = false;
228     alreadyRequestedMemoryUsage = false;   
229     alreadyRequestedIdleTime = false;
230     alreadyRequestedAll = false;
231     
232     instrumentedPhase * newPhase = new instrumentedPhase();
233     allData.phases.push_back(newPhase);   
234     
235     frameworkShouldAdvancePhase = false;
236     haveControlPointChangeCallback = false;
237 //    CkPrintf("[%d] controlPointManager() Constructor Initializing control points, and loading data file\n", CkMyPe());
238     
239     phase_id = 0;
241     if(loadDataFileAtStartup){    
242       loadDataFile();
243     }
245     
246     if(CkMyPe() == 0){
247       CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
248     }
250     traceRegisterUserEvent("No currently executing message", 5000);
251     traceRegisterUserEvent("Zero time along critical path", 5010);
252     traceRegisterUserEvent("Positive total time along critical path", 5020);
253     traceRegisterUserEvent("env->setPathHistory()", 6000);
254     traceRegisterUserEvent("Critical Path", 5900);
255     traceRegisterUserEvent("Table Entry", 5901);
258 #if USER_EVENT_FOR_REGISTERTERMINALPATH
259     traceRegisterUserEvent("registerTerminalPath", 100); 
260 #endif
262   }
263   
265  controlPointManager::~controlPointManager(){
266 //    CkPrintf("[%d] controlPointManager() Destructor\n", CkMyPe());
267   }
269   void controlPointManager::pup(PUP::er &p)
270   {
271       // FIXME: does not work when control point is actually used,
272       // just minimal pup so that it allows exit function to work (exitIfReady).
273     p|generatedPlanForStep;
274     p|exitWhenReady;
275     p|alreadyRequestedMemoryUsage;
276     p|alreadyRequestedIdleTime;
277     p|alreadyRequestedAll;
278     p|frameworkShouldAdvancePhase;
279     p|haveControlPointChangeCallback;
280     p|phase_id;
281   }
282   
284   /// Loads the previous run data file
285   void controlPointManager::loadDataFile(){
286     ifstream infile(CPDataFilename);
287     vector<std::string> names;
288     std::string line;
289   
290     while(getline(infile,line)){
291       if(line[0] != '#')
292         break;
293     }
294   
295     int numTimings = 0;
296     std::istringstream n(line);
297     n >> numTimings;
298   
299     while(getline(infile,line)){ 
300       if(line[0] != '#') 
301         break; 
302     }
304     int numControlPointNames = 0;
305     std::istringstream n2(line);
306     n2 >> numControlPointNames;
307   
308     for(int i=0; i<numControlPointNames; i++){
309       getline(infile,line);
310       names.push_back(line);
311     }
313     for(int i=0;i<numTimings;i++){
314       getline(infile,line);
315       while(line[0] == '#')
316         getline(infile,line); 
318       instrumentedPhase * ips = new instrumentedPhase();
320       std::istringstream iss(line);
322       // Read memory usage for phase
323       iss >> ips->memoryUsageMB;
324       //     CkPrintf("Memory usage loaded from file: %d\n", ips.memoryUsageMB);
326       // Read idle time
327       iss >> ips->idleTime.min;
328       iss >> ips->idleTime.avg;
329       iss >> ips->idleTime.max;
331       // Read overhead time
332       iss >> ips->overheadTime.min;
333       iss >> ips->overheadTime.avg;
334       iss >> ips->overheadTime.max;
336       // read bytePerInvoke
337       iss >> ips->bytesPerInvoke;
339       // read grain size
340       iss >> ips->grainSize;
342       // Read control point values
343       for(int cp=0;cp<numControlPointNames;cp++){
344         int cpvalue;
345         iss >> cpvalue;
346         ips->controlPoints.insert(make_pair(names[cp],cpvalue));
347       }
349       // ignore median time
350       double mt;
351       iss >> mt;
353       // Read all times
355       double time;
357       while(iss >> time){
358         ips->times.push_back(time);
359 #if DEBUGPRINT > 5
360         CkPrintf("read time %lf from file\n", time);
361 #endif
362       }
364       allData.phases.push_back(ips);
366     }
368     infile.close();
369   }
373   /// Add the current data to allData and output it to a file
374   void controlPointManager::writeDataFile(){
375     CkPrintf("============= writeDataFile() to %s  ============\n", CPDataFilename);
376     ofstream outfile(CPDataFilename);
377     allData.cleanupNames();
379 //    string s = allData.toString();
380 //    CkPrintf("At end: \n %s\n", s.c_str());
382     if(shouldFilterOutputData){
383       allData.verify();
384       allData.filterOutIncompletePhases();
385     }
387 //    string s2 = allData.toString();
388 //    CkPrintf("After filtering: \n %s\n", s2.c_str());
389     if(allData.toString().length() > 10){
390       outfile << allData.toString();
391     } else {
392       outfile << " No data available to save to disk " << endl;
393     }
394     outfile.close();
395   }
397   /// User can register a callback that is called when application should advance to next phase
398   void controlPointManager::setCPCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
399     frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
400     controlPointChangeCallback = cb;
401     haveControlPointChangeCallback = true;
402   }
405 /// A user can specify that the framework should advance the phases automatically. Useful for gather performance measurements without modifying a program.
406 void controlPointManager::setFrameworkAdvancePhase(bool _frameworkShouldAdvancePhase){
407   frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
410   /// Called periodically by the runtime to handle the control points
411   /// Currently called on each PE
412   void controlPointManager::processControlPoints(){
414 #if DEBUGPRINT
415     CkPrintf("[%d] processControlPoints() haveControlPointChangeCallback=%d frameworkShouldAdvancePhase=%d\n", CkMyPe(), (int)haveControlPointChangeCallback, (int)frameworkShouldAdvancePhase);
416 #endif
418     //==========================================================================================
419     // Print the data for each phase
421     const int s = allData.phases.size();
423 #if DEBUGPRINT
424     CkPrintf("\n\nExamining critical paths and priorities and idle times (num phases=%d)\n", s );
425     for(int p=0;p<s;++p){
426       const instrumentedPhase &phase = allData.phases[p];
427       const idleTimeContainer &idle = phase.idleTime;
428       //      vector<MergeablePathHistory> const &criticalPaths = phase.criticalPaths;
429       vector<double> const &times = phase.times;
431       CkPrintf("Phase %d:\n", p);
432       idle.print();
433      //   CkPrintf("critical paths: (* affected by control point)\n");
434         //   for(int i=0;i<criticalPaths.size(); i++){
435         // If affected by a control point
436         //      criticalPaths[i].print();
438       //        criticalPaths[i].print();
439       //        CkPrintf("\n");
440         
442       //   }
443       CkPrintf("Timings:\n");
444       for(int i=0;i<times.size(); i++){
445         CkPrintf("%lf ", times[i]);
446       }
447       CkPrintf("\n");
449     }
450     
451     CkPrintf("\n\n");
454 #endif
458     //==========================================================================================
459     // If this is a phase during which we try to adapt control point values based on critical path
460 #if 0
461     if( s%5 == 4) {
463       // Find the most recent phase with valid critical path data and idle time measurements      
464       int whichPhase=-1;
465       for(int p=0; p<s; ++p){
466         const instrumentedPhase &phase = allData.phases[p];
467         const idleTimeContainer &idle = phase.idleTime;
468         if(idle.isValid() && phase.criticalPaths.size()>0 ){
469           whichPhase = p;
470         }
471       }
472       
473       
474       CkPrintf("Examining phase %d which has a valid idle time and critical paths\n", whichPhase);
475       const instrumentedPhase &phase = allData.phases[whichPhase];
476       const idleTimeContainer &idle = phase.idleTime;
477       
478       if(idle.min > 0.1){
479         CkPrintf("Min PE idle is HIGH. %.2lf%% > 10%%\n", idle.min*100.0);
480         
481         // Determine the median critical path for this phase
482         int medianCriticalPathIdx = phase.medianCriticalPathIdx();
483         const PathHistory &path = phase.criticalPaths[medianCriticalPathIdx];
484         CkAssert(phase.criticalPaths.size() > 0);
485         CkAssert(phase.criticalPaths.size() > medianCriticalPathIdx); 
486         
487         CkPrintf("Median Critical Path has time %lf\n", path.getTotalTime());
488         
489         if(phase.times[medianCriticalPathIdx] > 1.2 * path.getTotalTime()){
490           CkPrintf("The application step(%lf) is taking significantly longer than the critical path(%lf). BAD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
493           CkPrintf("Finding control points related to the critical path\n");
494           int cpcount = 0;
495           std::set<std::string> controlPointsAffectingCriticalPath;
497           
498           for(int e=0;e<path.getNumUsed();e++){
499             if(path.getUsedCount(e)>0){
500               int ep = path.getUsedEp(e);
502               std::map<std::string, std::set<int> >::iterator iter;
503               for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
504                 if(iter->second.count(ep)>0){
505                   controlPointsAffectingCriticalPath.insert(iter->first);
506                   CkPrintf("Control Point \"%s\" affects the critical path\n", iter->first.c_str());
507                   cpcount++;
508                 }
509               }
510               
511             }
512           }
513           
515           if(cpcount==0){
516             CkPrintf("No control points are known to affect the critical path\n");
517           } else {
518             double beginTime = CmiWallTimer();
520             CkPrintf("Attempting to modify control point values for %d control points that affect the critical path\n", controlPointsAffectingCriticalPath.size());
521             
522             newControlPoints = phase.controlPoints;
523             
524             if(frameworkShouldAdvancePhase){
525               gotoNextPhase();  
526             }
527             
528             if(haveControlPointChangeCallback){ 
529 #if DEBUGPRINT
530               CkPrintf("Calling control point change callback\n");
531 #endif
532               // Create a high priority message and send it to the callback
533               controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
534               *((int*)CkPriorityPtr(msg)) = -INT_MAX;
535               CkSetQueueing(msg, CK_QUEUEING_IFIFO);
536               controlPointChangeCallback.send(msg);
537               
538             }
539             
540             
541             // adjust the control points that can affect the critical path
543             char textDescription[4096*2];
544             textDescription[0] = '\0';
546             std::map<std::string,int>::iterator newCP;
547             for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
548               if( controlPointsAffectingCriticalPath.count(newCP->first) > 0 ){
549                 // decrease the value (increase priority) if within range
550                 int lowerbound = controlPointSpace[newCP->first].first;
551                 if(newCP->second > lowerbound){
552                   newControlPointsAvailable = true;
553                   newCP->second --;
554                 }
555               }
556             }
557            
558             // Create a string for a projections user event
559             if(1){
560               std::map<std::string,int>::iterator newCP;
561               for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
562                 sprintf(textDescription+strlen(textDescription), "<br>\"%s\"=%d", newCP->first.c_str(), newCP->second);
563               }
564             }
566             char *userEventString = new char[4096*2];
567             sprintf(userEventString, "Adjusting control points affecting critical path: %s ***", textDescription);
568             
569             static int userEventCounter = 20000;
570             CkPrintf("USER EVENT: %s\n", userEventString);
571             
572             traceRegisterUserEvent(userEventString, userEventCounter); 
573             traceUserBracketEvent(userEventCounter, beginTime, CmiWallTimer());
574             userEventCounter++;
575             
576           }
577           
578         } else {
579           CkPrintf("The application step(%lf) is dominated mostly by the critical path(%lf). GOOD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
580         }
581         
582         
583       }
584       
585     } else {
586       
587       
588     }
589    
590 #endif
594     if(frameworkShouldAdvancePhase){
595       gotoNextPhase();  
596     }
597     
598     if(haveControlPointChangeCallback){ 
599       // Create a high priority message and send it to the callback
600       controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
601       *((int*)CkPriorityPtr(msg)) = -INT_MAX;
602       CkSetQueueing(msg, CK_QUEUEING_IFIFO);
603       controlPointChangeCallback.send(msg);
604     }
605     
606   }
607   
608   /// Determine if any control point is known to affect an entry method
609   bool controlPointManager::controlPointAffectsThisEP(int ep){
610     std::map<std::string, std::set<int> >::iterator iter;
611     for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
612       if(iter->second.count(ep)>0){
613         return true;
614       }
615     }
616     return false;    
617   }
618   
619   /// Determine if any control point is known to affect a chare array  
620   bool controlPointManager::controlPointAffectsThisArray(int array){
621     std::map<std::string, std::set<int> >::iterator iter;
622     for(iter=affectsPrioritiesArray.begin(); iter!= affectsPrioritiesArray.end(); ++iter){
623       if(iter->second.count(array)>0){
624         return true;
625       }
626     }
627     return false;   
628   }
629   
631   /// The data from the current phase
632   instrumentedPhase * controlPointManager::currentPhaseData(){
633     int s = allData.phases.size();
634     CkAssert(s>=1);
635     return allData.phases[s-1];
636   }
639   /// The data from the previous phase
640   instrumentedPhase * controlPointManager::previousPhaseData(){
641     int s = allData.phases.size();
642     if(s >= 2 && phase_id > 0) {
643       return allData.phases[s-2];
644     } else {
645       return NULL;
646     }
647   }
649   /// The data from two phases back
650   instrumentedPhase * controlPointManager::twoAgoPhaseData(){
651     int s = allData.phases.size();
652     if(s >= 3 && phase_id > 0) {
653       return allData.phases[s-3];
654     } else {
655       return NULL;
656     }
657   }
658   
660   /// Called by either the application or the Control Point Framework to advance to the next phase  
661   void controlPointManager::gotoNextPhase(){
662     CkPrintf("gotoNextPhase shouldGatherAll=%d enableCPTracing=%d\n", (int)shouldGatherAll, (int)enableCPTracing);
663     fflush(stdout);
664       
665     if(enableCPTracing){
666       if(shouldGatherAll && CkMyPe() == 0 && !alreadyRequestedAll){
667         alreadyRequestedAll = true;
668         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherAll(NULL), 0, thisProxy);
669         CkPrintf("Requesting all measurements\n");
670         thisProxy.requestAll(*cb);
671         delete cb;
672         
673       } else {
674         
675         if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
676           alreadyRequestedMemoryUsage = true;
677           CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
678           thisProxy.requestMemoryUsage(*cb);
679           delete cb;
680         }
681         
682         if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
683           alreadyRequestedIdleTime = true;
684           CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
685           thisProxy.requestIdleTime(*cb);
686           delete cb;
687         }
688       }
689     }
693 #if CMK_LBDB_ON && 0
694     LBDatabase * myLBdatabase = LBDatabaseObj();
695     LBDB * myLBDB = myLBdatabase->getLBDB();       // LBDB is Defined in LBDBManager.h
696     const CkVec<LBObj*> objs = myLBDB->getObjs();
697     
698     LBRealType maxObjWallTime = -1.0;
699     
700     for(int i=0;i<objs.length();i++){
701       LBObj* o = objs[i];
702       const LDObjData d = o->ObjData();
703       LBRealType cpuTime = d.cpuTime;
704       LBRealType wallTime = d.wallTime;
705       // can also get object handles from the LDObjData struct
706       CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
707       if(wallTime > maxObjWallTime){
709       }
710       
711     }
713     myLBDB->ClearLoads(); // BUG: Probably very dangerous if we are actually using load balancing
714     
715 #endif    
718     
719     // increment phase id
720     phase_id++;
721     
723     // Create new entry for the phase we are starting now
724     instrumentedPhase * newPhase = new instrumentedPhase();
725     allData.phases.push_back(newPhase);
726     
727     CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
729   }
731   /// An application uses this to register an instrumented timing for this phase
732   void controlPointManager::setTiming(double time){
733     currentPhaseData()->times.push_back(time);
735 #if USE_CRITICAL_PATH_HEADER_ARRAY
736        
737     // First we should register this currently executing message as a path, because it is likely an important one to consider.
738     //    registerTerminalEntryMethod();
739     
740     // save the critical path for this phase
741     //   thisPhaseData.criticalPaths.push_back(maxTerminalPathHistory);
742     //    maxTerminalPathHistory.reset();
743     
744     
745     // Reset the counts for the currently executing message
746     //resetThisEntryPath();
747         
748 #endif
749     
750   }
751   
752   /// Entry method called on all PEs to request CPU utilization statistics
753   void controlPointManager::requestIdleTime(CkCallback cb){
754     CkAssert(enableCPTracing);
755    
756     double i = localControlPointTracingInstance()->idleRatio();
757     double idle[3];
758     idle[0] = i;
759     idle[1] = i;
760     idle[2] = i;
761     
762     //    CkPrintf("[%d] idleRatio=%f\n", CkMyPe(), i);
763     
764     localControlPointTracingInstance()->resetTimings();
765     
766     contribute(3*sizeof(double),idle,idleTimeReductionType, cb);
767   }
768   
769   /// All processors reduce their memory usages in requestIdleTime() to this method
770   void controlPointManager::gatherIdleTime(CkReductionMsg *msg){
771     CkAssert(enableCPTracing);
773     int size=msg->getSize() / sizeof(double);
774     CkAssert(size==3);
775     double *r=(double *) msg->getData();
776         
777     instrumentedPhase* prevPhase = previousPhaseData();
778     if(prevPhase != NULL){
779       prevPhase->idleTime.min = r[0];
780       prevPhase->idleTime.avg = r[1]/CkNumPes();
781       prevPhase->idleTime.max = r[2];
782       prevPhase->idleTime.print();
783       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, prevPhase);
784     } else {
785       CkPrintf("There is no previous phase to store the idle time measurements\n");
786     }
787     
788     alreadyRequestedIdleTime = false;
789     checkForShutdown();
790     delete msg;
791   }
798   /// Entry method called on all PEs to request CPU utilization statistics and memory usage
799   void controlPointManager::requestAll(CkCallback cb){
800     CkAssert(enableCPTracing);
802     TraceControlPoints *t = localControlPointTracingInstance();
804     double data[ALL_REDUCTION_SIZE];
806     double *idle = data;
807     double *over = data+3;
808     double *mem = data+6;
809     double *msgBytes = data+7;  
810     double *grainsize = data+11;  
812     const double i = t->idleRatio();
813     idle[0] = i;
814     idle[1] = i;
815     idle[2] = i;
817     const double o = t->overheadRatio();
818     over[0] = o;
819     over[1] = o;
820     over[2] = o;
822     const double m = t->memoryUsageMB();
823     mem[0] = m;
825     msgBytes[0] = t->b2;
826     msgBytes[1] = t->b3;
827     msgBytes[2] = t->b2mlen;
828     msgBytes[3] = t->b3mlen;
830     grainsize[0] = t->grainSize();
831     
832     localControlPointTracingInstance()->resetAll();
834     contribute(ALL_REDUCTION_SIZE*sizeof(double),data,allMeasuresReductionType, cb);
835   }
836   
837   /// All processors reduce their memory usages in requestIdleTime() to this method
838   void controlPointManager::gatherAll(CkReductionMsg *msg){
839     CkAssert(enableCPTracing);
841     CkAssert(msg->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
842     int size=msg->getSize() / sizeof(double);
843     double *data=(double *) msg->getData();
844         
845     double *idle = data;
846     double *over = data+3;
847     double *mem = data+6;
848     double *msgBytes = data+7;
849     double *granularity = data+11;
852     //    std::string b = allData.toString();
854     instrumentedPhase* prevPhase = previousPhaseData();
855     if(prevPhase != NULL){
856       prevPhase->idleTime.min = idle[0];
857       prevPhase->idleTime.avg = idle[1]/CkNumPes();
858       prevPhase->idleTime.max = idle[2];
859       
860       prevPhase->overheadTime.min = over[0];
861       prevPhase->overheadTime.avg = over[1]/CkNumPes();
862       prevPhase->overheadTime.max = over[2];
863       
864       prevPhase->memoryUsageMB = mem[0];
865       
866       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf  mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, (double)prevPhase->memoryUsageMB, prevPhase);
868       double bytesPerInvoke2 = msgBytes[2] / msgBytes[0];
869       double bytesPerInvoke3 = msgBytes[3] / msgBytes[1];
871       /** The average of the grain sizes on all PEs in us */
872       prevPhase->grainSize = (granularity[0] / (double)CkNumPes());
874       CkPrintf("Bytes Per Invokation 2 = %f\n", bytesPerInvoke2);
875       CkPrintf("Bytes Per Invokation 3 = %f\n", bytesPerInvoke3);
877       CkPrintf("Bytes Per us of work 2 = %f\n", bytesPerInvoke2/prevPhase->grainSize);
878       CkPrintf("Bytes Per us of work 3 = %f\n", bytesPerInvoke3/prevPhase->grainSize);
880       if(bytesPerInvoke2 > bytesPerInvoke3)
881         prevPhase->bytesPerInvoke = bytesPerInvoke2;
882       else
883         prevPhase->bytesPerInvoke = bytesPerInvoke3;
885       //      prevPhase->print();
886       // CkPrintf("prevPhase=%p number of timings=%d\n", prevPhase, prevPhase->times.size() );
888       //      std::string a = allData.toString();
890       //CkPrintf("Before:\n%s\nAfter:\n%s\n\n", b.c_str(), a.c_str());
891       
892     } else {
893       CkPrintf("There is no previous phase to store measurements\n");
894     }
895     
896     alreadyRequestedAll = false;
897     checkForShutdown();
898     delete msg;
899   }
904   void controlPointManager::checkForShutdown(){
905     if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
906       doExitNow();
907     }
908   }
911   void controlPointManager::exitIfReady(){
912      if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
913        //  CkPrintf("controlPointManager::exitIfReady exiting immediately\n");
914        doExitNow();
915      } else {
916        // CkPrintf("controlPointManager::exitIfReady Delaying exiting\n");
917        exitWhenReady = true;
918      }
919   }
923   void controlPointManager::doExitNow(){
924           _TRACE_BEGIN_EXECUTE_DETAILED(-1, -1, _threadEP,CkMyPe(), 0, NULL, this);
925           writeOutputToDisk();
926     // CkPrintf("[%d] Control point manager calling CkContinueExit()\n", CkMyPe());
927     CkContinueExit();
928   }
930   void controlPointManager::writeOutputToDisk(){
931           if(writeDataFileAtShutdown){
932                   controlPointManagerProxy.ckLocalBranch()->writeDataFile();
933           }
934   }
937   /// Entry method called on all PEs to request memory usage
938   void controlPointManager::requestMemoryUsage(CkCallback cb){
939     int m = CmiMaxMemoryUsage() / 1024 / 1024;
940     CmiResetMaxMemory();
941     //    CkPrintf("PE %d Memory Usage is %d MB\n",CkMyPe(), m);
942     contribute(sizeof(int),&m,CkReduction::max_int, cb);
943   }
945   /// All processors reduce their memory usages to this method
946   void controlPointManager::gatherMemoryUsage(CkReductionMsg *msg){
947     int size=msg->getSize() / sizeof(int);
948     CkAssert(size==1);
949     int *m=(int *) msg->getData();
951     CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
952     
953     instrumentedPhase* prevPhase = previousPhaseData();
954     if(prevPhase != NULL){
955       prevPhase->memoryUsageMB = m[0];
956     } else {
957       CkPrintf("No place to store memory usage");
958     }
960     alreadyRequestedMemoryUsage = false;
961     checkForShutdown();
962     delete msg;
963   }
966 //   /// Inform the control point framework that a named control point affects the priorities of some array  
967 //   void controlPointManager::associatePriorityArray(const char *name, int groupIdx){
968 //     CkPrintf("Associating control point \"%s\" affects priority of array id=%d\n", name, groupIdx );
969     
970 //     if(affectsPrioritiesArray.count(std::string(name)) > 0 ) {
971 //       affectsPrioritiesArray[std::string(name)].insert(groupIdx);
972 //     } else {
973 //       std::set<int> s;
974 //       s.insert(groupIdx);
975 //       affectsPrioritiesArray[std::string(name)] = s;
976 //     }
977     
978 // #if DEBUGPRINT   
979 //     std::map<std::string, std::set<int> >::iterator f;
980 //     for(f=affectsPrioritiesArray.begin(); f!=affectsPrioritiesArray.end();++f){
981 //       std::string name = f->first;
982 //       std::set<int> &vals = f->second;
983 //       cout << "Control point " << name << " affects arrays: ";
984 //       std::set<int>::iterator i;
985 //       for(i=vals.begin(); i!=vals.end();++i){
986 //      cout << *i << " ";
987 //       }
988 //       cout << endl;
989 //     }
990 // #endif
991     
992 //   }
993   
994 //   /// Inform the control point framework that a named control point affects the priority of some entry method
995 //   void controlPointManager::associatePriorityEntry(const char *name, int idx){
996 //     CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
998 //       if(affectsPrioritiesEP.count(std::string(name)) > 0 ) {
999 //       affectsPrioritiesEP[std::string(name)].insert(idx);
1000 //     } else {
1001 //       std::set<int> s;
1002 //       s.insert(idx);
1003 //       affectsPrioritiesEP[std::string(name)] = s;
1004 //     }
1005     
1006 // #if DEBUGPRINT
1007 //     std::map<std::string, std::set<int> >::iterator f;
1008 //     for(f=affectsPrioritiesEP.begin(); f!=affectsPrioritiesEP.end();++f){
1009 //       std::string name = f->first;
1010 //       std::set<int> &vals = f->second;
1011 //       cout << "Control point " << name << " affects EP: ";
1012 //       std::set<int>::iterator i;
1013 //       for(i=vals.begin(); i!=vals.end();++i){
1014 //      cout << *i << " ";
1015 //       }
1016 //       cout << endl;
1017 //     }
1018 // #endif
1021 //   }
1022   
1025 /// An interface callable by the application.
1026 void gotoNextPhase(){
1027   controlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
1030 FLINKAGE void FTN_NAME(GOTONEXTPHASE,gotonextphase)()
1032   gotoNextPhase();
1036 /// A mainchare that is used just to create our controlPointManager group at startup
1037 class controlPointMain : public CBase_controlPointMain {
1038 public:
1039   controlPointMain(CkArgMsg* args){
1040 #if OLDRANDSEED
1041     struct timeval tp;
1042     gettimeofday(& tp, NULL);
1043     random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
1044 #else
1045     double time = CmiWallTimer();
1046     int sec = (int)time;
1047     int usec = (int)((time - (double)sec)*1000000.0);
1048     random_seed =  sec ^ usec;
1049 #endif
1050     
1051     
1052     double period, periodms;
1053     bool haveSamplePeriod = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriod", &period,"The time between Control Point Framework samples (in seconds)");
1054     bool haveSamplePeriodMs = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriodMs", &periodms,"The time between Control Point Framework samples (in milliseconds)");
1055     if(haveSamplePeriod){
1056       CkPrintf("controlPointSamplePeriod = %lf sec\n", period);
1057       controlPointSamplePeriod =  (int)(period * 1000); /**< A readonly */
1058     } else if(haveSamplePeriodMs){
1059       CkPrintf("controlPointSamplePeriodMs = %lf ms\n", periodms);
1060       controlPointSamplePeriod = periodms; /**< A readonly */
1061     } else {
1062       controlPointSamplePeriod =  DEFAULT_CONTROL_POINT_SAMPLE_PERIOD;
1063     }
1065   
1066     
1067     whichTuningScheme = RandomSelection;
1070     if( CmiGetArgFlagDesc(args->argv,"+CPSchemeRandom","Randomly Select Control Point Values") ){
1071       whichTuningScheme = RandomSelection;
1072     } else if ( CmiGetArgFlagDesc(args->argv,"+CPExhaustiveSearch","Exhaustive Search of Control Point Values") ){
1073       whichTuningScheme = ExhaustiveSearch;
1074     } else if ( CmiGetArgFlagDesc(args->argv,"+CPAlwaysUseDefaults","Always Use The Provided Default Control Point Values") ){
1075       whichTuningScheme = AlwaysDefaults;
1076     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimulAnneal","Simulated Annealing Search of Control Point Values") ){
1077       whichTuningScheme = SimulatedAnnealing;
1078     } else if ( CmiGetArgFlagDesc(args->argv,"+CPCriticalPathPrio","Use Critical Path to adapt Control Point Values") ){
1079       whichTuningScheme = CriticalPathAutoPrioritization;
1080     } else if ( CmiGetArgFlagDesc(args->argv,"+CPBestKnown","Use BestKnown Timing for Control Point Values") ){
1081       whichTuningScheme = UseBestKnownTiming;
1082     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSteering","Use Steering to adjust Control Point Values") ){
1083       whichTuningScheme = UseSteering;
1084     } else if ( CmiGetArgFlagDesc(args->argv,"+CPMemoryAware", "Adjust control points to approach available memory") ){
1085       whichTuningScheme = MemoryAware;
1086     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimplex", "Nelder-Mead Simplex Algorithm") ){
1087       whichTuningScheme = Simplex;
1088     } else if ( CmiGetArgFlagDesc(args->argv,"+CPDivideConquer", "A divide and conquer program specific steering scheme") ){
1089       whichTuningScheme = DivideAndConquer;
1090     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriod", "Adjust the load balancing period (Constant Predictor)") ){
1091       whichTuningScheme = LDBPeriod;
1092     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodLinear", "Adjust the load balancing period (Linear Predictor)") ){
1093       whichTuningScheme = LDBPeriodLinear;
1094     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodQuadratic", "Adjust the load balancing period (Quadratic Predictor)") ){
1095       whichTuningScheme = LDBPeriodQuadratic;
1096     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodOptimal", "Adjust the load balancing period (Optimal Predictor)") ){
1097       whichTuningScheme = LDBPeriodOptimal;
1098     }
1100     char *defValStr = NULL;
1101     if( CmiGetArgStringDesc(args->argv, "+CPDefaultValues", &defValStr, "Specify the default control point values used for the first couple phases") ){
1102       CkPrintf("You specified default value string: %s\n", defValStr);
1103       
1104       // Parse the string, looking for commas
1105      
1107       char *tok = strtok(defValStr, ",");
1108       while (tok) {
1109         // split on the equal sign
1110         int len = strlen(tok);
1111         char *eqsign = strchr(tok, '=');
1112         if(eqsign != NULL && eqsign != tok){
1113           *eqsign = '\0';
1114           char *cpname = tok;
1115           std::string cpName(tok);
1116           char *cpDefVal = eqsign+1;      
1117           int v=-1;
1118           if(sscanf(cpDefVal, "%d", &v) == 1){
1119             CkPrintf("Command Line Argument Specifies that Control Point \"%s\" defaults to %d\n", cpname, v);
1120             CkAssert(CkMyPe() == 0); // can only access defaultControlPointValues on PE 0
1121             defaultControlPointValues[cpName] = v;
1122           }
1123         }
1124         tok = strtok(NULL, ",");
1125       }
1127     }
1129     shouldGatherAll = false;
1130     shouldGatherMemoryUsage = false;
1131     shouldGatherUtilization = false;
1132     
1133     if ( CmiGetArgFlagDesc(args->argv,"+CPGatherAll","Gather all types of measurements for each phase") ){
1134       shouldGatherAll = true;
1135     } else {
1136       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherMemoryUsage","Gather memory usage after each phase") ){
1137         shouldGatherMemoryUsage = true;
1138       }
1139       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherUtilization","Gather utilization & Idle time after each phase") ){
1140         shouldGatherUtilization = true;
1141       }
1142     }
1143     
1144     writeDataFileAtShutdown = false;   
1145     if( CmiGetArgFlagDesc(args->argv,"+CPSaveData","Save Control Point timings & configurations at completion") ){
1146       writeDataFileAtShutdown = true;
1147     }
1149     shouldFilterOutputData = true;
1150     if( CmiGetArgFlagDesc(args->argv,"+CPNoFilterData","Don't filter phases from output data") ){
1151       shouldFilterOutputData = false;
1152     }
1155    loadDataFileAtStartup = false;   
1156     if( CmiGetArgFlagDesc(args->argv,"+CPLoadData","Load Control Point timings & configurations at startup") ){
1157       loadDataFileAtStartup = true;
1158     }
1160     char *cpdatafile;
1161     if( CmiGetArgStringDesc(args->argv, "+CPDataFilename", &cpdatafile, "Specify control point data file to save/load") ){
1162       sprintf(CPDataFilename, "%s", cpdatafile);
1163     } else {
1164       sprintf(CPDataFilename, "controlPointData.txt");
1165     }
1168     controlPointManagerProxy = CProxy_controlPointManager::ckNew();
1170     delete args;
1171   }
1172   ~controlPointMain(){}
1175 /// An interface callable by the application.
1176 void registerCPChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
1177   CkAssert(CkMyPe() == 0);
1178   CkPrintf("Application has registered a control point change callback\n");
1179   controlPointManagerProxy.ckLocalBranch()->setCPCallback(cb, frameworkShouldAdvancePhase);
1182 /// An interface callable by the application.
1183 void setFrameworkAdvancePhase(bool frameworkShouldAdvancePhase){
1184   if(CkMyPe() == 0){
1185     CkPrintf("Application has specified that framework should %sadvance phase\n", frameworkShouldAdvancePhase?"":"not ");
1186     controlPointManagerProxy.ckLocalBranch()->setFrameworkAdvancePhase(frameworkShouldAdvancePhase);
1187   }
1190 /// An interface callable by the application.
1191 void registerControlPointTiming(double time){
1192   CkAssert(CkMyPe() == 0);
1193 #if DEBUGPRINT>0
1194   CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
1195 #endif
1196   controlPointManagerProxy.ckLocalBranch()->setTiming(time);
1199 /// An interface callable by the application.
1200 void controlPointTimingStamp() {
1201   CkAssert(CkMyPe() == 0);
1202 #if DEBUGPRINT>0
1203   CkPrintf("Program registering its own timing with controlPointTimingStamp()\n", time);
1204 #endif
1205   
1206   static double prev_time = 0.0;
1207   double t = CmiWallTimer();
1208   double duration = t - prev_time;
1209   prev_time = t;
1210     
1211   controlPointManagerProxy.ckLocalBranch()->setTiming(duration);
1214 FLINKAGE void FTN_NAME(CONTROLPOINTTIMINGSTAMP,controlpointtimingstamp)()
1216   controlPointTimingStamp();
1220 FLINKAGE void FTN_NAME(SETFRAMEWORKADVANCEPHASEF,setframeworkadvancephasef)(CMK_TYPEDEF_INT4 *value)
1222   setFrameworkAdvancePhase(*value);
1228 /// Shutdown the control point framework, writing data to disk if necessary
1229 extern "C" void controlPointShutdown(){
1230   if(CkMyPe() == 0){
1232     if (!controlPointManagerProxy.ckGetGroupID().isZero()) {
1233       // wait for gathering of idle time & memory usage to complete
1234       controlPointManagerProxy.ckLocalBranch()->exitIfReady();
1235     } else {
1236       CkContinueExit();
1237     }
1238   }
1241 /// A function called at startup on each node to register controlPointShutdown() to be called at CkExit()
1242 void controlPointInitNode(){
1243   registerExitFn(controlPointShutdown);
1246 /// Called periodically to allow control point framework to do things periodically
1247 static void periodicProcessControlPoints(void* ptr, double currWallTime){
1248 #ifdef DEBUGPRINT
1249   CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
1250 #endif
1251   controlPointManagerProxy.ckLocalBranch()->processControlPoints();
1252   CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
1259 /// Determine a control point value using some optimization scheme (use max known, simmulated annealling, 
1260 /// user observed characteristic to adapt specific control point values.
1261 /// This function must return valid values for newControlPoints.
1262 void controlPointManager::generatePlan() {
1263   const double startGenerateTime = CmiWallTimer();
1264   const int phase_id = this->phase_id;
1265   const int effective_phase = allData.phases.size();
1267   // Only generate a plan once per phase
1268   if(generatedPlanForStep == phase_id) 
1269     return;
1270   generatedPlanForStep = phase_id;
1272   CkPrintf("Generating Plan for phase %d\n", phase_id); 
1273   printTuningScheme();
1275   // By default lets put the previous phase data into newControlPoints
1276   instrumentedPhase *prevPhase = previousPhaseData();
1277   for(std::map<std::string, int >::const_iterator cpsIter=prevPhase->controlPoints.begin(); cpsIter != prevPhase->controlPoints.end(); ++cpsIter){
1278           newControlPoints[cpsIter->first] = cpsIter->second;
1279   }
1282   if( whichTuningScheme == RandomSelection ){
1283     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1284     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1285       const std::string &name = cpsIter->first;
1286       const std::pair<int,int> &bounds = cpsIter->second;
1287       const int lb = bounds.first;
1288       const int ub = bounds.second;
1289       newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1290     }
1291   } else if( whichTuningScheme == CriticalPathAutoPrioritization) {
1292     // -----------------------------------------------------------
1293     //  USE CRITICAL PATH TO ADJUST PRIORITIES
1294     //   
1295     // This scheme will return the median value for the range 
1296     // early on, and then will transition over to the new control points
1297     // determined by the critical path adapting code
1299     // This won't work until the periodic function is fixed up a bit
1301     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1302     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1303       const std::string &name = cpsIter->first;
1304       const std::pair<int,int> &bounds = cpsIter->second;
1305       const int lb = bounds.first;
1306       const int ub = bounds.second;
1307       newControlPoints[name] =  (lb+ub)/2;
1308     }
1310   } else if ( whichTuningScheme == MemoryAware ) {
1312     // -----------------------------------------------------------
1313     //  STEERING BASED ON MEMORY USAGE
1315     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1316     instrumentedPhase *prevPhase = previousPhaseData();
1318     if(phase_id%2 == 0){
1319       CkPrintf("Steering (memory based) based on 2 phases ago:\n");
1320       twoAgoPhase->print();
1321       CkPrintf("\n");
1322       fflush(stdout);
1323       
1324       // See if memory usage is low:
1325       double memUsage = twoAgoPhase->memoryUsageMB;
1326       CkPrintf("Steering (memory based) encountered memory usage of (%f MB)\n", memUsage);
1327       fflush(stdout);
1328       if(memUsage < 1100.0 && memUsage > 0.0){ // Kraken has about 16GB and 12 cores per node
1329         CkPrintf("Steering (memory based) encountered low memory usage (%f) < 1200 \n", memUsage);
1330         CkPrintf("Steering (memory based) controlPointSpace.size()=\n", controlPointSpace.size());
1331         
1332         // Initialize plan to be the values from two phases ago (later we'll adjust this)
1333         newControlPoints = twoAgoPhase->controlPoints;
1336         CkPrintf("Steering (memory based) initialized plan\n");
1337         fflush(stdout);
1339         // look for a possible control point knob to turn
1340         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["MemoryConsumption"];
1341         
1342         // FIXME: assume for now that we just have one control point with the effect, and one direction to turn it
1343         bool found = false;
1344         std::string cpName;
1345         std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1346         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1347         for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1348           cpName = iter->first;
1349           info = &iter->second;
1350           found = true;
1351           break;
1352         }
1354         // Adapt the control point value
1355         if(found){
1356           CkPrintf("Steering found knob to turn that should increase memory consumption\n");
1357           fflush(stdout);
1358           const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1359           const int maxValue = controlPointSpace[cpName].second;
1360           
1361           if(twoAgoValue+1 <= maxValue){
1362             newControlPoints[cpName] = twoAgoValue+1; // increase from two phases back
1363           }
1364         }
1365         
1366       }
1367     }
1369     CkPrintf("Steering (memory based) done for this phase\n");
1370     fflush(stdout);
1372   } else if ( whichTuningScheme == UseBestKnownTiming ) {
1374     // -----------------------------------------------------------
1375     //  USE BEST KNOWN TIME  
1376     static bool firstTime = true;
1377     if(firstTime){
1378       firstTime = false;
1379       instrumentedPhase *best = allData.findBest(); 
1380       CkPrintf("Best known phase is:\n");
1381       best->print();
1382       CkPrintf("\n");
1383       
1384       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1385       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
1386         const std::string &name = cpsIter->first;
1387         newControlPoints[name] =  best->controlPoints[name];
1388       }
1389     }
1390   } else if( whichTuningScheme == LDBPeriod) {
1391     // Assume this is used in this manner:
1392     //  1) go to next phase
1393     //  2) request control point
1394     //  3) load balancing
1395     //  4) computation
1396     
1397     
1398     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1399     instrumentedPhase *prevPhase = previousPhaseData();
1400     
1401     
1402     const std::vector<double> &times = twoAgoPhase->times;
1403     const int oldNumTimings = times.size();
1406     const std::vector<double> &timesNew = prevPhase->times;
1407     const int newNumTimings = timesNew.size();
1410     if(oldNumTimings > 4 && newNumTimings > 4){
1411       
1412       // Build model of execution time based on two phases ago
1413       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1414       
1415       double oldSum = 0;
1416       
1417       for(int i=2; i<oldNumTimings; i++){
1418         oldSum += times[i];
1419       }
1420       
1421       const double oldAvg = oldSum / (oldNumTimings-2);
1422       
1423       
1424       
1425       
1426       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1427       const double expectedTotalTime = oldAvg * newNumTimings;
1428       
1429       
1430       // Measure actual time
1431       double newSum = 0.0;
1432       for(int i=2; i<newNumTimings; ++i){
1433         newSum += timesNew[i];
1434       }
1435       
1436       const double newAvg = newSum / (newNumTimings-2);
1437       const double newTotalTimeExcludingLBSteps = newAvg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1438       
1439       const double benefit = expectedTotalTime - newTotalTimeExcludingLBSteps;
1440       
1441       // Determine load balance cost
1442       const double lbcost = timesNew[0] + timesNew[1] - 2.0*newAvg;
1443       
1444       const double benefitAfterLB = benefit - lbcost;
1445     
1446     
1447       // Determine whether LB cost outweights the estimated benefit
1448       CkPrintf("Constant Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, newTotalTimeExcludingLBSteps);
1449     
1450     
1451       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1452       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1453         const std::string &name = cpsIter->first;
1454         const std::pair<int,int> &bounds = cpsIter->second;
1455         const int lb = bounds.first;
1456         const int ub = bounds.second;
1457       
1458         if(benefitAfterLB > 0){
1459           CkPrintf("Constant Model: Beneficial LB\n");
1460           int newval = newControlPoints[name] / 2;
1461           if(newval > lb)
1462             newControlPoints[name] = newval;
1463           else 
1464             newControlPoints[name] = lb;
1465         } else {
1466           CkPrintf("Constant Model: Detrimental LB\n");
1467           int newval = newControlPoints[name] * 2;
1468           if(newval < ub)
1469             newControlPoints[name] = newval;
1470           else
1471             newControlPoints[name] = ub;
1472         }
1473       }
1474     }
1475     
1476     
1477   }  else if( whichTuningScheme == LDBPeriodLinear) {
1478     // Assume this is used in this manner:
1479     //  1) go to next phase
1480     //  2) request control point
1481     //  3) load balancing
1482     //  4) computation
1485     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1486     instrumentedPhase *prevPhase = previousPhaseData();
1487     
1488     const std::vector<double> &times = twoAgoPhase->times;
1489     const int oldNumTimings = times.size();
1491     const std::vector<double> &timesNew = prevPhase->times;
1492     const int newNumTimings = timesNew.size();
1493     
1495     if(oldNumTimings > 4 && newNumTimings > 4){
1497       // Build model of execution time based on two phases ago
1498       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1499       const int b1 = 2 + (oldNumTimings-2)/2;
1500       double s1 = 0;
1501       double s2 = 0;
1502     
1503       const double ldbStepsTime = times[0] + times[1];
1504     
1505       for(int i=2; i<b1; i++){
1506         s1 += times[i];
1507       }
1508       for(int i=b1; i<oldNumTimings; i++){
1509         s2 += times[i];
1510       }
1511       
1512       
1513       // Compute the estimated time for the last phase's data
1514     
1515       const double a1 = s1 / (double)(b1-2);
1516       const double a2 = s2 / (double)(oldNumTimings-b1);
1517       const double aold = (a1+a2)/2.0;
1519       const double expectedTotalTime = newNumTimings*(aold+(oldNumTimings+newNumTimings)*(a2-a1)/oldNumTimings);
1520         
1521     
1522       // Measure actual time
1523       double sum = 0.0;
1524       for(int i=2; i<newNumTimings; ++i){
1525         sum += timesNew[i];
1526       }
1528       const double avg = sum / ((double)(newNumTimings-2));
1529       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1531       const double benefit = expectedTotalTime - actualTotalTime;
1533       // Determine load balance cost
1534       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1536       const double benefitAfterLB = benefit - lbcost;
1538     
1539       // Determine whether LB cost outweights the estimated benefit
1540       CkPrintf("Linear Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, actualTotalTime);
1541     
1542     
1543     
1544       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1545       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1546         const std::string &name = cpsIter->first;
1547         const std::pair<int,int> &bounds = cpsIter->second;
1548         const int lb = bounds.first;
1549         const int ub = bounds.second;
1550       
1551         if(benefitAfterLB > 0){
1552           CkPrintf("Linear Model: Beneficial LB\n");
1553           int newval = newControlPoints[name] / 2;
1554           if(newval > lb)
1555             newControlPoints[name] = newval;
1556           else 
1557             newControlPoints[name] = lb;
1558         } else {
1559           CkPrintf("Linear Model: Detrimental LB\n");
1560           int newval = newControlPoints[name] * 2;
1561           if(newval < ub)
1562             newControlPoints[name] = newval;
1563           else 
1564             newControlPoints[name] = ub;
1565         }
1566       }
1567     }
1569   }
1571   else if( whichTuningScheme == LDBPeriodQuadratic) {
1572     // Assume this is used in this manner:
1573     //  1) go to next phase
1574     //  2) request control point
1575     //  3) load balancing
1576     //  4) computation
1579     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1580     instrumentedPhase *prevPhase = previousPhaseData();
1581         
1582     const std::vector<double> &times = twoAgoPhase->times;
1583     const int oldNumTimings = times.size();
1585     const std::vector<double> &timesNew = prevPhase->times;
1586     const int newNumTimings = timesNew.size();
1588     
1589     if(oldNumTimings > 4 && newNumTimings > 4){
1592       // Build model of execution time based on two phases ago
1593       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1594       const int b1 = 2 + (oldNumTimings-2)/3;
1595       const int b2 = 2 + (2*(oldNumTimings-2))/3;
1596       double s1 = 0;
1597       double s2 = 0;
1598       double s3 = 0;
1600       const double ldbStepsTime = times[0] + times[1];
1601     
1602       for(int i=2; i<b1; i++){
1603         s1 += times[i];
1604       }
1605       for(int i=b1; i<b2; i++){
1606         s2 += times[i];
1607       }
1608       for(int i=b2; i<oldNumTimings; i++){
1609         s3 += times[i];
1610       }
1612     
1613       // Compute the estimated time for the last phase's data
1614     
1615       const double a1 = s1 / (double)(b1-2);
1616       const double a2 = s2 / (double)(b2-b1);
1617       const double a3 = s3 / (double)(oldNumTimings-b2);
1618     
1619       const double a = (a1-2.0*a2+a3)/2.0;
1620       const double b = (a1-4.0*a2+3.0*a3)/2.0;
1621       const double c = a3;
1622     
1623       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1624       const double x1 = (double)newNumTimings / (double)oldNumTimings * 3.0 + 0.5;  // should be 3.5 if ratio is same
1625       const double x2 = 0.5;
1626       const double expectedTotalTime = a*x1*x1*x1/3.0 + b*x1*x1/2.0 + c*x1 - (a*x2*x2*x2/3.0 + b*x2*x2/2.0 + c*x2);
1627    
1628     
1629       // Measure actual time
1630       double sum = 0.0;
1631       for(int i=2; i<newNumTimings; ++i){
1632         sum += timesNew[i];
1633       }
1635       const double avg = sum / ((double)(newNumTimings-2));
1636       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1638       const double benefit = expectedTotalTime - actualTotalTime;
1640       // Determine load balance cost
1641       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1643       const double benefitAfterLB = benefit - lbcost;
1645     
1646       // Determine whether LB cost outweights the estimated benefit
1647       CkPrintf("Quadratic Model: lbcost = %f, expected = %f, actual = %f, x1=%f, a1=%f, a2=%f, a3=%f, b1=%d, b2=%d, a=%f, b=%f, c=%f\n", lbcost, expectedTotalTime, actualTotalTime, x1, a1, a2, a3, b1, b2, a, b, c);
1648     
1649     
1650     
1651       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1652       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1653         const std::string &name = cpsIter->first;
1654         const std::pair<int,int> &bounds = cpsIter->second;
1655         const int lb = bounds.first;
1656         const int ub = bounds.second;
1657       
1658         if(benefitAfterLB > 0){
1659           CkPrintf("QuadraticModel: Beneficial LB\n");
1660           int newval = newControlPoints[name] / 2;
1661           if(newval > lb)
1662             newControlPoints[name] = newval;
1663           else 
1664             newControlPoints[name] = lb;
1665         } else {
1666           CkPrintf("QuadraticModel: Detrimental LB\n");
1667           int newval = newControlPoints[name] * 2;
1668           if(newval < ub)
1669             newControlPoints[name] = newval;
1670           else 
1671             newControlPoints[name] = ub;
1672         }
1673       
1674       }
1675     }
1676     
1678   }  else if( whichTuningScheme == LDBPeriodOptimal) {
1679     // Assume this is used in this manner:
1680     //  1) go to next phase
1681     //  2) request control point
1682     //  3) load balancing
1683     //  4) computation
1687     instrumentedPhase *prevPhase = previousPhaseData();
1688     
1689     const std::vector<double> &times = prevPhase->times;
1690     const int numTimings = times.size();
1691     
1692     if( numTimings > 4){
1694       const int b1 = 2 + (numTimings-2)/2;
1695       double s1 = 0;
1696       double s2 = 0;
1697     
1698     
1699       for(int i=2; i<b1; i++){
1700         s1 += times[i];
1701       }
1702       for(int i=b1; i<numTimings; i++){
1703         s2 += times[i];
1704       }
1705       
1706     
1707       const double a1 = s1 / (double)(b1-2);
1708       const double a2 = s2 / (double)(numTimings-b1);
1709       const double avg = (a1+a1) / 2.0;
1711       const double m = (a2-a1)/((double)(numTimings-2)/2.0); // An approximation of the slope of the execution times    
1713       const double ldbStepsTime = times[0] + times[1];
1714       const double lbcost = ldbStepsTime - 2.0*avg; // An approximation of the 
1715       
1717       int newval = roundDouble(sqrt(2.0*lbcost/m));
1718       
1719       // We don't really know what to do if m<=0, so we'll just double the period
1720       if(m<=0)
1721         newval = 2*numTimings;     
1722       
1723       CkPrintf("Optimal Model (double when negative): lbcost = %f, m = %f, new ldbperiod should be %d\n", lbcost, m, newval);    
1724     
1725     
1726       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1727       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1728         // TODO: lookup only control points that are relevant instead of all of them
1729         const std::string &name = cpsIter->first;
1730         const std::pair<int,int> &bounds = cpsIter->second;
1731         const int lb = bounds.first;
1732         const int ub = bounds.second;
1733         
1734         if(newval < lb){
1735           newControlPoints[name] = lb;
1736         } else if(newval > ub){
1737           newControlPoints[name] = ub;
1738         } else {
1739           newControlPoints[name] = newval;
1740         } 
1741         
1742       }
1743       
1744       
1745     }
1746     
1749   } else if ( whichTuningScheme == UseSteering ) {
1750           // -----------------------------------------------------------
1751           //  STEERING BASED ON KNOWLEDGE
1753           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1754           // plans are only generated after 3 phases
1756           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1757           instrumentedPhase *prevPhase = previousPhaseData();
1759           if(phase_id%4 == 0){
1760                   CkPrintf("Steering based on 2 phases ago:\n");
1761                   twoAgoPhase->print();
1762                   CkPrintf("\n");
1763                   fflush(stdout);
1765                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1768                   // ========================================= Concurrency =============================================
1769                   // See if idle time is high:
1770                   {
1771                           double idleTime = twoAgoPhase->idleTime.avg;
1772                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1773                           fflush(stdout);
1774                           if(idleTime > 0.10){
1775                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1776                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1778                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1780                                   bool found = false;
1781                                   std::string cpName;
1782                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1783                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1784                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1785                                           cpName = iter->first;
1786                                           info = &iter->second;
1788                                           // Initialize a new plan based on two phases ago
1789                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1791                                           CkPrintf("Steering found knob to turn\n");
1792                                           fflush(stdout);
1794                                           if(info->first == ControlPoint::EFF_INC){
1795                                                   const int maxValue = controlPointSpace[cpName].second;
1796                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1797                                                   if(twoAgoValue+1 <= maxValue){
1798                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1799                                                   }
1800                                           } else {
1801                                                   const int minValue = controlPointSpace[cpName].second;
1802                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1803                                                   if(twoAgoValue-1 >= minValue){
1804                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1805                                                   }
1806                                           }
1808                                           possibleNextStepPlans.push_back(aNewPlan);
1810                                   }
1811                           }
1812                   }
1814                   // ========================================= Grain Size =============================================
1815                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1816                   {
1817                           double overheadTime = twoAgoPhase->overheadTime.avg;
1818                           CkPrintf("Steering encountered overhead time (%f)\n", overheadTime);
1819                           fflush(stdout);
1820                           if(overheadTime > 0.10){
1821                                   CkPrintf("Steering encountered high overhead time(%f) > 10%%\n", overheadTime);
1822                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1824                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GrainSize"];
1826                                   bool found = false;
1827                                   std::string cpName;
1828                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1829                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1830                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1831                                           cpName = iter->first;
1832                                           info = &iter->second;
1834                                           // Initialize a new plan based on two phases ago
1835                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1839                                           CkPrintf("Steering found knob to turn\n");
1840                                           fflush(stdout);
1842                                           if(info->first == ControlPoint::EFF_INC){
1843                                                   const int maxValue = controlPointSpace[cpName].second;
1844                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1845                                                   if(twoAgoValue+1 <= maxValue){
1846                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1847                                                   }
1848                                           } else {
1849                                                   const int minValue = controlPointSpace[cpName].second;
1850                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1851                                                   if(twoAgoValue-1 >= minValue){
1852                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1853                                                   }
1854                                           }
1856                                           possibleNextStepPlans.push_back(aNewPlan);
1858                                   }
1860                           }
1861                   }
1862                   // ========================================= GPU Offload =============================================
1863                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1864                   {
1865                           double idleTime = twoAgoPhase->idleTime.avg;
1866                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1867                           fflush(stdout);
1868                           if(idleTime > 0.10){
1869                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1870                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1872                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GPUOffloadedWork"];
1874                                   bool found = false;
1875                                   std::string cpName;
1876                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1877                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1878                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1879                                           cpName = iter->first;
1880                                           info = &iter->second;
1882                                           // Initialize a new plan based on two phases ago
1883                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1886                                           CkPrintf("Steering found knob to turn\n");
1887                                           fflush(stdout);
1889                                           if(info->first == ControlPoint::EFF_DEC){
1890                                                   const int maxValue = controlPointSpace[cpName].second;
1891                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1892                                                   if(twoAgoValue+1 <= maxValue){
1893                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1894                                                   }
1895                                           } else {
1896                                                   const int minValue = controlPointSpace[cpName].second;
1897                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1898                                                   if(twoAgoValue-1 >= minValue){
1899                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1900                                                   }
1901                                           }
1903                                           possibleNextStepPlans.push_back(aNewPlan);
1905                                   }
1907                           }
1908                   }
1910                   // ========================================= Done =============================================
1913                   if(possibleNextStepPlans.size() > 0){
1914                           newControlPoints = possibleNextStepPlans[0];
1915                   }
1918                   CkPrintf("Steering done for this phase\n");
1919                   fflush(stdout);
1921           }  else {
1922                   // This is not a phase to do steering, so stick with previously used values (one phase ago)
1923                   CkPrintf("not a phase to do steering, so stick with previously planned values (one phase ago)\n");
1924                   fflush(stdout);
1925           }
1929   } else if( whichTuningScheme == SimulatedAnnealing ) {
1931     // -----------------------------------------------------------
1932     //  SIMULATED ANNEALING
1933     //  Simulated Annealing style hill climbing method
1934     //
1935     //  Find the best search space configuration, and try something
1936     //  nearby it, with a radius decreasing as phases increase
1938     CkPrintf("Finding best phase\n");
1939     instrumentedPhase *bestPhase = allData.findBest();  
1940     CkPrintf("best found:\n"); 
1941     bestPhase->print(); 
1942     CkPrintf("\n"); 
1943     
1945     const int convergeByPhase = 100;
1946     // Determine from 0.0 to 1.0 how far along we are
1947     const double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
1949     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1950     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1951       const std::string &name = cpsIter->first;
1952       const std::pair<int,int> &bounds = cpsIter->second;
1953       const int minValue = bounds.first;
1954       const int maxValue = bounds.second;
1955       
1956       const int before = bestPhase->controlPoints[name];   
1957   
1958       const int range = (int)((maxValue-minValue+1)*(1.0-progress));
1960       int high = min(before+range, maxValue);
1961       int low = max(before-range, minValue);
1962       
1963       newControlPoints[name] = low;
1964       if(high-low > 0){
1965         newControlPoints[name] += randInt(high-low, name.c_str(), phase_id); 
1966       } 
1967       
1968     }
1970   } else if ( whichTuningScheme == DivideAndConquer ) {
1972           // -----------------------------------------------------------
1973           //  STEERING FOR Divide & Conquer Programs
1974           //  This scheme uses no timing information. It just tries to converge to the point where idle time = overhead time.
1975           //  For a Fibonacci example, this appears to be a good heurstic for finding the best performing program.
1976           //  The scheme can be applied within a single program tree computation, if the tree is being traversed depth first.
1978           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1979           // plans are only generated after 3 phases
1981           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1982           instrumentedPhase *prevPhase = previousPhaseData();
1984           if(phase_id%4 == 0){
1985                   CkPrintf("Divide & Conquer Steering based on 2 phases ago:\n");
1986                   twoAgoPhase->print();
1987                   CkPrintf("\n");
1988                   fflush(stdout);
1990                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1993                   // ========================================= Concurrency =============================================
1994                   // See if idle time is high:
1995                   {
1996                           double idleTime = twoAgoPhase->idleTime.avg;
1997                           double overheadTime = twoAgoPhase->overheadTime.avg;
2000                           CkPrintf("Divide & Conquer Steering encountered overhead time (%f) idle time (%f)\n",overheadTime, idleTime);
2001                           fflush(stdout);
2002                           if(idleTime+overheadTime > 0.10){
2003                                   CkPrintf("Steering encountered high idle+overheadTime time(%f) > 10%%\n", idleTime+overheadTime);
2004                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
2006                                   int direction = -1;
2007                                   if (idleTime>overheadTime){
2008                                           // We need to decrease the grain size, or increase the available concurrency
2009                                           direction = 1;
2010                                   }
2012                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
2014                                   bool found = false;
2015                                   std::string cpName;
2016                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
2017                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
2018                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
2019                                           cpName = iter->first;
2020                                           info = &iter->second;
2021                                           
2022                                           // Initialize a new plan based on two phases ago
2023                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
2025                                           CkPrintf("Divide & Conquer Steering found knob to turn\n");
2026                                           fflush(stdout);
2028                                           int adjustByAmount = (int)(myAbs(idleTime-overheadTime)*5.0);
2029                                           
2030                                           if(info->first == ControlPoint::EFF_INC){
2031                                             const int minValue = controlPointSpace[cpName].first;
2032                                             const int maxValue = controlPointSpace[cpName].second;
2033                                             const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
2034                                             const int newVal = closestInRange(twoAgoValue+adjustByAmount*direction, minValue, maxValue);                                          
2035                                             CkAssert(newVal <= maxValue && newVal >= minValue);
2036                                             aNewPlan[cpName] = newVal;
2037                                           } else {
2038                                             const int minValue = controlPointSpace[cpName].first;
2039                                             const int maxValue = controlPointSpace[cpName].second;
2040                                             const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
2041                                             const int newVal = closestInRange(twoAgoValue-adjustByAmount*direction, minValue, maxValue);
2042                                             CkAssert(newVal <= maxValue && newVal >= minValue);
2043                                             aNewPlan[cpName] = newVal;
2044                                           }
2045                                           
2046                                           possibleNextStepPlans.push_back(aNewPlan);
2047                                   }
2048                           }
2049                   }
2051                   if(possibleNextStepPlans.size() > 0){
2052                     CkPrintf("Divide & Conquer Steering found %d possible next phases, using first one\n", possibleNextStepPlans.size());
2053                     newControlPoints = possibleNextStepPlans[0];
2054                   } else {
2055                     CkPrintf("Divide & Conquer Steering found no possible next phases\n");
2056                   }
2057           }
2059   } else if( whichTuningScheme == Simplex ) {
2061           // -----------------------------------------------------------
2062           //  Nelder Mead Simplex Algorithm
2063           //
2064           //  A scheme that takes a simplex (n+1 points) and moves it
2065           //  toward the minimum, eventually converging there.
2067           s.adapt(controlPointSpace, newControlPoints, phase_id, allData);
2069   } else if( whichTuningScheme == ExhaustiveSearch ) {
2070     
2071     // -----------------------------------------------------------
2072     // EXHAUSTIVE SEARCH
2073    
2074     int numDimensions = controlPointSpace.size();
2075     CkAssert(numDimensions > 0);
2076   
2077     vector<int> lowerBounds(numDimensions);
2078     vector<int> upperBounds(numDimensions); 
2079   
2080     int d=0;
2081     std::map<std::string, pair<int,int> >::iterator iter;
2082     for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
2083       //    CkPrintf("Examining dimension %d\n", d);
2084       lowerBounds[d] = iter->second.first;
2085       upperBounds[d] = iter->second.second;
2086       d++;
2087     }
2088    
2089     // get names for each dimension (control point)
2090     vector<std::string> names(numDimensions);
2091     d=0;
2092     for(std::map<std::string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
2093       names[d] = niter->first;
2094       d++;
2095     }
2096   
2097   
2098     // Create the first possible configuration
2099     vector<int> config = lowerBounds;
2100     config.push_back(0);
2101   
2102     // Increment until finding an unused configuration
2103     allData.cleanupNames(); // put -1 values in for any control points missing
2104     std::vector<instrumentedPhase*> &phases = allData.phases;     
2106     while(true){
2107     
2108       std::vector<instrumentedPhase*>::iterator piter; 
2109       bool testedConfiguration = false; 
2110       for(piter = phases.begin(); piter != phases.end(); piter++){ 
2111       
2112         // Test if the configuration matches this phase
2113         bool match = true;
2114         for(int j=0;j<numDimensions;j++){
2115           match &= (*piter)->controlPoints[names[j]] == config[j];
2116         }
2117       
2118         if(match){
2119           testedConfiguration = true; 
2120           break;
2121         } 
2122       } 
2123     
2124       if(testedConfiguration == false){ 
2125         break;
2126       } 
2127     
2128       // go to next configuration
2129       config[0] ++;
2130       // Propagate "carrys"
2131       for(int i=0;i<numDimensions;i++){
2132         if(config[i] > upperBounds[i]){
2133           config[i+1]++;
2134           config[i] = lowerBounds[i];
2135         }
2136       }
2137       // check if we have exhausted all possible values
2138       if(config[numDimensions] > 0){
2139         break;
2140       }
2141        
2142     }
2143   
2144     if(config[numDimensions] > 0){
2145       for(int i=0;i<numDimensions;i++){ 
2146         config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
2147       }
2148     }
2150     // results are now in config[i]
2151     
2152     for(int i=0; i<numDimensions; i++){
2153       newControlPoints[names[i]] = config[i];
2154       CkPrintf("Exhaustive search chose:   %s   -> %d\n", names[i].c_str(), config[i]);
2155     }
2158   } else {
2159     CkAbort("Some Control Point tuning strategy must be enabled.\n");
2160   }
2163   const double endGenerateTime = CmiWallTimer();
2164   
2165   CkPrintf("Time to generate next control point configuration(s): %f sec\n", (endGenerateTime - startGenerateTime) );
2166   
2173 /// Get control point value from range of integers [lb,ub]
2174 int controlPoint(const char *name, int lb, int ub){
2175   instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
2176   const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
2177   std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
2178   int result;
2180   // if we already have control point values for phase, return them
2181   if( thisPhaseData->controlPoints.count(std::string(name))>0 && thisPhaseData->controlPoints[std::string(name)]>=0 ){
2182     CkPrintf("Already have control point values for phase. %s -> %d\n", name, (int)(thisPhaseData->controlPoints[std::string(name)]) );
2183     return thisPhaseData->controlPoints[std::string(name)];
2184   }
2185   
2187   if( phase_id < 4 || whichTuningScheme == AlwaysDefaults){
2188     // For the first few phases, just use the lower bound, or the default if one was provided 
2189     // This ensures that the ranges for all the control points are known before we do anything fancy
2190     result = lb;
2192     if(defaultControlPointValues.count(std::string(name)) > 0){
2193       int v = defaultControlPointValues[std::string(name)];
2194       CkPrintf("Startup phase using default value of %d for  \"%s\"\n", v, name);   
2195       result = v;
2196     }
2198   } else if(controlPointSpace.count(std::string(name)) == 0){
2199     // if this is the first time we've seen the CP, then return the lower bound
2200     result = lb;
2201     
2202   }  else {
2203     // Generate a plan one time for each phase
2204     controlPointManagerProxy.ckLocalBranch()->generatePlan();
2205     
2206     // Use the planned value:
2207     result = controlPointManagerProxy.ckLocalBranch()->newControlPoints[std::string(name)];
2208     
2209   }
2211   if(!isInRange(result,ub,lb)){
2212     std::cerr << "control point = " << result << " is out of range: " << lb << " " << ub << std::endl;
2213     fflush(stdout);
2214     fflush(stderr);
2215   }
2216   CkAssert(isInRange(result,ub,lb));
2217   thisPhaseData->controlPoints[std::string(name)] = result; // was insert() 
2219   controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub))); 
2221   CkPrintf("Control Point \"%s\" for phase %d is: %d\n", name, phase_id, result);
2222   //  thisPhaseData->print();
2223   
2224   return result;
2228 FLINKAGE int FTN_NAME(CONTROLPOINT, controlpoint)(CMK_TYPEDEF_INT4 *lb, CMK_TYPEDEF_INT4 *ub){
2229   CkAssert(CkMyPe() == 0);
2230   return controlPoint("FortranCP", *lb, *ub);
2236 /// Inform the control point framework that a named control point affects the priorities of some array  
2237 // void controlPointPriorityArray(const char *name, CProxy_ArrayBase &arraybase){
2238 //   CkGroupID aid = arraybase.ckGetArrayID();
2239 //   int groupIdx = aid.idx;
2240 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityArray(name, groupIdx);
2241 //   //  CkPrintf("Associating control point \"%s\" with array id=%d\n", name, groupIdx );
2242 // }
2245 // /// Inform the control point framework that a named control point affects the priorities of some entry method  
2246 // void controlPointPriorityEntry(const char *name, int idx){
2247 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityEntry(name, idx);
2248 //   //  CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
2249 // }
2255 /** Determine the next configuration to try using the Nelder Mead Simplex Algorithm.
2257     This function decomposes the algorithm into a state machine that allows it to
2258     evaluate one or more configurations through subsequent clls to this function.
2259     The state diagram is pictured in the NelderMeadStateDiagram.pdf diagram.
2261     At one point in the algorithm, n+1 parameter configurations must be evaluated,
2262     so a list of them will be created and they will be evaluated, one per call.
2264     Currently there is no stopping criteria, but the simplex ought to contract down
2265     to a few close configurations, and hence not much change will happen after this 
2266     point.
2268  */
2269 void simplexScheme::adapt(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2271         if(useBestKnown){
2272                 CkPrintf("Simplex Tuning: Simplex algorithm is done, using best known phase:\n");
2273                 return;
2274         }
2277         if(firstSimplexPhase< 0){
2278                 firstSimplexPhase = allData.phases.size()-1;
2279                 CkPrintf("First simplex phase is %d\n", firstSimplexPhase);
2280         }
2282         int n = controlPointSpace.size();
2284         CkAssert(n>=2);
2287         if(simplexState == beginning){
2288                 // First we evaluate n+1 random points, then we go to a different state
2289                 if(allData.phases.size() < firstSimplexPhase + n+2      ){
2290                         CkPrintf("Simplex Tuning: chose random configuration\n");
2292                         // Choose random values from the middle third of the range in each dimension
2293                         std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2294                         for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2295                                 const std::string &name = cpsIter->first;
2296                                 const std::pair<int,int> &bounds = cpsIter->second;
2297                                 const int lb = bounds.first;
2298                                 const int ub = bounds.second;
2299                                 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
2300                         }
2301                 } else {
2302                         // Set initial simplex:
2303                         for(int i=0; i<n+1; i++){
2304                                 simplexIndices.insert(firstSimplexPhase+i);
2305                         }
2306                         CkAssert(simplexIndices.size() == n+1);
2308                         // Transition to reflecting state
2309                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2311                 }
2313         } else if (simplexState == reflecting){
2314                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2315                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2317                 // Find the highest time from other points in the simplex
2318                 double highestTimeForOthersInSimplex = 0.0;
2319                 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2320                         double t = allData.phases[*iter]->medianTime();
2321                         if(*iter != worstPhase && t > highestTimeForOthersInSimplex) {
2322                                 highestTimeForOthersInSimplex = t;
2323                         }
2324                 }
2326                 CkPrintf("After reflecting, the median time for the phase is %f, previous worst phase %d time was %f\n", recentPhaseTime, worstPhase, previousWorstPhaseTime);
2328                 if(recentPhaseTime < highestTimeForOthersInSimplex){
2329                         // if y* < yl,  transition to "expanding"
2330                         doExpansion(controlPointSpace, newControlPoints, phase_id, allData);
2332                 } else if (recentPhaseTime <= highestTimeForOthersInSimplex){
2333                         // else if y* <= yi replace ph with p* and transition to "evaluatingOne"
2334                         CkAssert(simplexIndices.size() == n+1);
2335                         simplexIndices.erase(worstPhase);
2336                         CkAssert(simplexIndices.size() == n);
2337                         simplexIndices.insert(pPhase);
2338                         CkAssert(simplexIndices.size() == n+1);
2339                         // Transition to reflecting state
2340                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2342                 } else {
2343                         // if y* > yh
2344                         if(recentPhaseTime <= worstTime){
2345                                 // replace Ph with P*
2346                                 CkAssert(simplexIndices.size() == n+1);
2347                                 simplexIndices.erase(worstPhase);
2348                                 CkAssert(simplexIndices.size() == n);
2349                                 simplexIndices.insert(pPhase);
2350                                 CkAssert(simplexIndices.size() == n+1);
2351                                 // Because we later will possibly replace Ph with P**, and we just replaced it with P*, we need to update our Ph reference
2352                                 worstPhase = pPhase;
2353                                 // Just as a sanity check, make sure we don't use the non-updated values.
2354                                 worst.clear();
2355                         }
2357                         // Now, form P** and do contracting phase
2358                         doContraction(controlPointSpace, newControlPoints, phase_id, allData);
2360                 }
2362         } else if (simplexState == doneExpanding){
2363                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2364                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2365                 // A new configuration has been evaluated
2367                 // Check to see if y** < y1
2368                 if(recentPhaseTime < bestTime){
2369                         // replace Ph by P**
2370                         CkAssert(simplexIndices.size() == n+1);
2371                         simplexIndices.erase(worstPhase);
2372                         CkAssert(simplexIndices.size() == n);
2373                         simplexIndices.insert(p2Phase);
2374                         CkAssert(simplexIndices.size() == n+1);
2375                 } else {
2376                         //      replace Ph by P*
2377                         CkAssert(simplexIndices.size() == n+1);
2378                         simplexIndices.erase(worstPhase);
2379                         CkAssert(simplexIndices.size() == n);
2380                         simplexIndices.insert(pPhase);
2381                         CkAssert(simplexIndices.size() == n+1);
2382                 }
2384                 // Transition to reflecting state
2385                 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2387         }  else if (simplexState == contracting){
2388                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2389                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2390                 // A new configuration has been evaluated
2392                 // Check to see if y** > yh
2393                 if(recentPhaseTime <= worstTime){
2394                         // replace Ph by P**
2395                         CkPrintf("Replacing phase %d with %d\n", worstPhase, p2Phase);
2396                         CkAssert(simplexIndices.size() == n+1);
2397                         simplexIndices.erase(worstPhase);
2398                         CkAssert(simplexIndices.size() == n);
2399                         simplexIndices.insert(p2Phase);
2400                         CkAssert(simplexIndices.size() == n+1);
2401                         // Transition to reflecting state
2402                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2404                 } else {
2405                         //      conceptually we will replace all Pi by (Pi+Pl)/2, but there is nothing to store this until after we have tried all of them
2406                         simplexState = stillContracting;
2408                         // A set of phases for which (P_i+P_l)/2 ought to be evaluated
2409                         stillMustContractList = simplexIndices;
2411                         CkPrintf("Simplex Tuning: Switched to state: stillContracting\n");
2412                 }
2414         } else if (simplexState == stillContracting){
2415                 CkPrintf("Simplex Tuning: stillContracting found %d configurations left to try\n", stillMustContractList.size());
2417                 if(stillMustContractList.size()>0){
2418                         int c = *stillMustContractList.begin();
2419                         stillMustContractList.erase(c);
2420                         CkPrintf("Simplex Tuning: stillContracting evaluating configuration derived from phase %d\n", c);
2422                         std::vector<double> cPhaseConfig = pointCoords(allData, c);
2424                         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2425                         int v = 0;
2426                         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2427                                 const std::string &name = cpsIter->first;
2428                                 const std::pair<int,int> &bounds = cpsIter->second;
2429                                 const int lb = bounds.first;
2430                                 const int ub = bounds.second;
2432                                 double val = (cPhaseConfig[v] + best[v])/2.0;
2434                                 newControlPoints[name] = keepInRange((int)val,lb,ub);
2435                                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2436                                 v++;
2437                         }
2438                 } else {
2439                         // We have tried all configurations. We should update the simplex to refer to all the newly tried configurations, and start over
2440                         CkAssert(stillMustContractList.size() == 0);
2441                         simplexIndices.clear();
2442                         CkAssert(simplexIndices.size()==0);
2443                         for(int i=0; i<n+1; i++){
2444                                 simplexIndices.insert(allData.phases.size()-2-i);
2445                         }
2446                         CkAssert(simplexIndices.size()==n+1);
2448                         // Transition to reflecting state
2449                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2451                 }
2454         } else if (simplexState == expanding){
2455                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2456                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2457                 // A new configuration has been evaluated
2459                 // determine if y** < yl
2460                 if(recentPhaseTime < bestTime){
2461                         // replace Ph by P**
2462                         CkAssert(simplexIndices.size() == n+1);
2463                         simplexIndices.erase(worstPhase);
2464                         CkAssert(simplexIndices.size() == n);
2465                         simplexIndices.insert(p2Phase);
2466                         CkAssert(simplexIndices.size() == n+1);
2467                         // Transition to reflecting state
2468                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2470                 } else {
2471                         // else, replace ph with p*
2472                         CkAssert(simplexIndices.size() == n+1);
2473                         simplexIndices.erase(worstPhase);
2474                         CkAssert(simplexIndices.size() == n);
2475                         simplexIndices.insert(pPhase);
2476                         CkAssert(simplexIndices.size() == n+1);
2477                         // Transition to reflecting state
2478                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2479                 }
2482         } else {
2483                 CkAbort("Unknown simplexState");
2484         }
2490 /** Replace the worst point with its reflection across the centroid. */
2491 void simplexScheme::doReflection(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2493         int n = controlPointSpace.size();
2495         printSimplex(allData);
2497         computeCentroidBestWorst(controlPointSpace, newControlPoints, phase_id, allData);
2500         // Quit if the diameter of our simplex is small
2501         double maxr = 0.0;
2502         for(int i=0; i<n+1; i++){
2503                 //              Compute r^2 of this simplex point from the centroid
2504                 double r2 = 0.0;
2505                 std::vector<double> p = pointCoords(allData, i);
2506                 for(int d=0; d<p.size(); d++){
2507                         double r1 = (p[d] * centroid[d]);
2508                         r2 += r1*r1;
2509                 }
2510                 if(r2 > maxr)
2511                         maxr = r2;
2512         }
2514 #if 0
2515         // At some point we quit this tuning
2516         if(maxr < 10){
2517                 useBestKnown = true;
2518                 instrumentedPhase *best = allData.findBest();
2519                 CkPrintf("Simplex Tuning: Simplex diameter is small, so switching over to best known phase:\n");
2521                 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2522                 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
2523                         const std::string &name = cpsIter->first;
2524                         newControlPoints[name] =  best->controlPoints[name];
2525                 }
2526         }
2527 #endif
2529         // Compute new point P* =(1+alpha)*centroid - alpha(worstPoint)
2531         pPhase = allData.phases.size()-1;
2532         P.resize(n);
2533         for(int i=0; i<n; i++){
2534                 P[i] = (1.0+alpha) * centroid[i] - alpha * worst[i] ;
2535         }
2537         for(int i=0; i<P.size(); i++){
2538                 CkPrintf("Simplex Tuning: P dimension %d is %f\n", i, P[i]);
2539         }
2542         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2543         int v = 0;
2544         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2545                 const std::string &name = cpsIter->first;
2546                 const std::pair<int,int> &bounds = cpsIter->second;
2547                 const int lb = bounds.first;
2548                 const int ub = bounds.second;
2549                 newControlPoints[name] = keepInRange((int)P[v],lb,ub);
2550                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2551                 v++;
2552         }
2555         // Transition to "reflecting" state
2556         simplexState = reflecting;
2557         CkPrintf("Simplex Tuning: Switched to state: reflecting\n");
2564 /** Replace the newly tested reflection with a further expanded version of itself. */
2565 void simplexScheme::doExpansion(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2566         int n = controlPointSpace.size();
2567         printSimplex(allData);
2569         // Note that the original Nelder Mead paper has an error when it displays the equation for P** in figure 1.
2570         // I believe the equation for P** in the text on page 308 is correct.
2572         // Compute new point P2 = (1+gamma)*P - gamma(centroid)
2575         p2Phase = allData.phases.size()-1;
2576         P2.resize(n);
2577         for(int i=0; i<n; i++){
2578                 P2[i] = ( (1.0+gamma) * P[i] - gamma * centroid[i] );
2579         }
2581         for(int i=0; i<P2.size(); i++){
2582                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2583         }
2586         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2587         int v = 0;
2588         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2589                 const std::string &name = cpsIter->first;
2590                 const std::pair<int,int> &bounds = cpsIter->second;
2591                 const int lb = bounds.first;
2592                 const int ub = bounds.second;
2593                 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
2594                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Expanding %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2595                 v++;
2596         }
2599         // Transition to "doneExpanding" state
2600         simplexState = doneExpanding;
2601         CkPrintf("Simplex Tuning: Switched to state: doneExpanding\n");
2608 /** Replace the newly tested reflection with a further expanded version of itself. */
2609 void simplexScheme::doContraction(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2610         int n = controlPointSpace.size();
2611         printSimplex(allData);
2613         // Compute new point P2 = beta*Ph + (1-beta)*centroid
2616         p2Phase = allData.phases.size()-1;
2617         P2.resize(n);
2618         for(int i=0; i<n; i++){
2619                 P2[i] = ( beta*worst[i] + (1.0-beta)*centroid[i] );
2620         }
2622         for(int i=0; i<P2.size(); i++){
2623                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2624         }
2627         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2628         int v = 0;
2629         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2630                 const std::string &name = cpsIter->first;
2631                 const std::pair<int,int> &bounds = cpsIter->second;
2632                 const int lb = bounds.first;
2633                 const int ub = bounds.second;
2634                 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
2635                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Contracting %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2636                 v++;
2637         }
2638         
2639         
2640         // Transition to "contracting" state
2641         simplexState = contracting;
2642         CkPrintf("Simplex Tuning: Switched to state: contracting\n");
2649 void simplexScheme::computeCentroidBestWorst(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2650         int n = controlPointSpace.size();
2652         // Find worst performing point in the simplex
2653         worstPhase = -1;
2654         worstTime = -1.0;
2655         bestPhase = 10000000;
2656         bestTime = 10000000;
2657         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2658                 double t = allData.phases[*iter]->medianTime();
2659                 if(t > worstTime){
2660                         worstTime = t;
2661                         worstPhase = *iter;
2662                 }
2663                 if(t < bestTime){
2664                         bestTime = t;
2665                         bestPhase = *iter;
2666                 }
2667         }
2668         CkAssert(worstTime != -1.0 && worstPhase != -1 && bestTime != 10000000 && bestPhase != 10000000);
2670         best = pointCoords(allData, bestPhase);
2671         CkAssert(best.size() == n);
2673         worst = pointCoords(allData, worstPhase);
2674         CkAssert(worst.size() == n);
2676         // Calculate centroid of the remaining points in the simplex
2677         centroid.resize(n);
2678         for(int i=0; i<n; i++){
2679                 centroid[i] = 0.0;
2680         }
2682         int numPts = 0;
2684         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2685                 if(*iter != worstPhase){
2686                         numPts ++;
2687                         // Accumulate into the result vector
2688                         int c = 0;
2689                         for(std::map<std::string,int>::iterator citer = allData.phases[*iter]->controlPoints.begin(); citer != allData.phases[*iter]->controlPoints.end(); ++citer){
2690                                 centroid[c] += citer->second;
2691                                 c++;
2692                         }
2694                 }
2695         }
2697         // Now divide the sums by the number of points.
2698         for(int v = 0; v<centroid.size(); v++) {
2699                 centroid[v] /= (double)numPts;
2700         }
2702         CkAssert(centroid.size() == n);
2704         for(int i=0; i<centroid.size(); i++){
2705                 CkPrintf("Centroid dimension %d is %f\n", i, centroid[i]);
2706         }
2713 std::vector<double> simplexScheme::pointCoords(instrumentedData &allData, int i){
2714         std::vector<double> result;
2715         for(std::map<std::string,int>::iterator citer = allData.phases[i]->controlPoints.begin(); citer != allData.phases[i]->controlPoints.end(); ++citer){
2716                 result.push_back((double)citer->second);
2717         }
2718         return result;
2724 void ControlPointWriteOutputToDisk(){
2725         CkAssert(CkMyPe() == 0);
2726         controlPointManagerProxy.ckLocalBranch()->writeOutputToDisk();
2731 /*! @} */
2733 #include "ControlPoints.def.h"
2735 #endif