6 // http://www.scl.ameslab.gov/Projects/Rabbit/
7 // http://www.support.compaq.com/nttools
12 #include "trace-counter.h"
17 #define DEBUGF(x) // CmiPrintf x
20 // for performance monitoring
21 extern "C" int start_counters(int e0, int e1);
22 extern "C" int read_counters(int e0, long long *c0, int e1, long long *c1);
24 // a message will be composed of 2*numCounters + NUM_EXTRA_PERF
25 // reduction[0-(numStats-1)] correspond to counters 0-(numStats-1)
26 // the next is phase (only write if CmiMyPe() == 0)
27 // the next is the idle time (if any calculated)
28 // the next four are (respectively) flops/s, l1 miss%, l2 miss%, tlb miss%
29 // write -1 if not able to calculate
30 static const int NUM_EXTRA_PERF = 6;
32 CkpvStaticDeclare(Trace*, _trace);
33 CpvStaticDeclare(CountLogPool*, _logPool);
34 CpvStaticDeclare(char*, _logName);
35 CpvStaticDeclare(char**, _counterNames);
36 CpvStaticDeclare(char**, _counterDesc);
37 CpvStaticDeclare(int, _numCounters);
38 CpvStaticDeclare(int, _reductionID);
39 static int _numEvents = 0;
40 CpvDeclare(double, version);
42 // a rudimentary reduction to print out the performance results across the run
43 CmiHandler StatTableReduction(char* msg)
45 DEBUGF(("StatTableReduction called\n", CmiMyPe(), CmiNumPes()));
46 static double* reduce = NULL;
47 static int numReduce = 0;
48 int numCounters = CpvAccess(_numCounters);
49 int size = 2*CpvAccess(_numCounters)+NUM_EXTRA_PERF;
53 reduce = new double[size];
54 for (i=0; i<size; i++) { reduce[i] = 0.0; }
55 DEBUGF((" allocated reduce numCounters %d size %d\n",
59 // see above for the feilds of this message
60 double* msgResults = (double *)(msg+ALIGN8(CmiMsgHeaderSizeBytes));
61 for (i=0; i<size; i++) {
62 reduce[i] += msgResults[i];
65 char** counterNames = CpvAccess(_counterNames);
67 DEBUGF((" numReduce %d numPes %d\n", numReduce, CmiNumPes()));
68 int phase = reduce[2*numCounters];
69 if (numReduce >= CmiNumPes()) {
70 // finished with reduction, print out results
72 for (i=0; i<numCounters; i++) {
73 if (reduce[2*i+1]>0.0) { // is time > 0?
75 CmiPrintf("PHASE %d %s totalCount %f totalTime (us) %f\n"
76 "PHASE %d %s count/proc %f avgTime (us)/phase %f\n",
77 phase, counterNames[i], reduce[2*i], reduce[2*i+1]*1e6,
78 phase, counterNames[i], reduce[2*i]/CmiNumPes(),
79 reduce[2*i+1]*1e6/CmiNumPes());
82 CmiPrintf("%s totalCount %f totalTime (us) %f\n"
83 "%s count/proc %f avgTime (us)/phase %f\n",
84 counterNames[i], reduce[2*i], reduce[2*i+1]*1e6,
85 counterNames[i], reduce[2*i]/CmiNumPes(),
86 reduce[2*i+1]*1e6/CmiNumPes());
91 CmiPrintf("PHASE %d totalIdleTime (us) %f avgIdleTime (us)/phase %f\n",
92 phase, reduce[2*numCounters+1]*1e6,
93 reduce[2*numCounters+1]*1e6/CmiNumPes());
96 CmiPrintf("totalIdleTime (us) %f avgIdleTime (us)/phase %f\n",
97 reduce[2*numCounters+1]*1e6,
98 reduce[2*numCounters+1]*1e6/CmiNumPes());
100 if (reduce[2*numCounters+2] > 0.0) {
103 CmiPrintf("PHASE %d flops/s %f flops/s/PE %f\n",
104 phase, reduce[2*numCounters+2],
105 reduce[2*numCounters+2]/CmiNumPes());
108 CmiPrintf("flops/s %f flops/s/PE %f\n",
109 reduce[2*numCounters+2],
110 reduce[2*numCounters+2]/CmiNumPes());
113 char* missRate = NULL;
114 for (i=0; i<3; i++) {
116 case 0: missRate = "l1 avg miss rate (%)"; break;
117 case 1: missRate = "l2 avg miss rate (%)"; break;
118 case 2: missRate = "tlb avg miss rate (%)"; break;
120 if (reduce[2*numCounters+3+i] >= 0.0) {
122 CmiPrintf("PHASE %d %s %f\n",
123 phase, missRate, reduce[2*numCounters+3+i]/CmiNumPes()*100);
127 missRate, reduce[2*numCounters+3+i]/CmiNumPes()*100);
139 //! The following is the list of arguments that can be passed to
140 //! the +counter{1|2} command line arguments.
141 //! To add or change, change NUM_COUNTER_ARGS and follow the examples
142 //! Use three constructor arguments:
143 //! 1) Code (for SGI libperfex) associated with counter.
144 //! 2) String to be entered on the command line.
145 //! 3) String that is the description of the counter.
146 //! All NUM_COUNTER_ARGS are automatically registered via
147 //! TraceCounter::TraceCounter() definition.
148 static const int NUM_COUNTER_ARGS = 32;
149 static TraceCounter::CounterArg COUNTER_ARG[NUM_COUNTER_ARGS] =
150 { TraceCounter::CounterArg( 0, "CYCLES0", "Cycles (also see code 16)"),
151 TraceCounter::CounterArg( 1, "INSTR", "Issued instructions"),
152 TraceCounter::CounterArg( 2, "LOAD", "Issued loads"),
153 TraceCounter::CounterArg( 3, "STORE", "Issued stores"),
154 TraceCounter::CounterArg( 4, "STORE_COND", "Issued store conditionals"),
155 TraceCounter::CounterArg( 5, "FAIL_COND", "Failed store conditionals"),
156 TraceCounter::CounterArg( 6, "DECODE_BR", "Decoded branches. (This changes meaning in 3.x versions of R10000. It becomes resolved branches)"),
157 TraceCounter::CounterArg( 7, "QUADWORDS2", "Quadwords written back from secondary cache"),
158 TraceCounter::CounterArg( 8, "CACHE_ER2", "Correctable secondary cache data array ECC errors"),
159 TraceCounter::CounterArg( 9, "L1_IMISS", "Primary (L1) instruction cache misses"),
160 TraceCounter::CounterArg(10, "L2_IMISS", "Secondary (L2) instruction cache misses"),
161 TraceCounter::CounterArg(11, "INSTRMISPR", "Instruction misprediction from secondary cache way prediction table"),
162 TraceCounter::CounterArg(12, "EXT_INTERV", "External interventions"),
163 TraceCounter::CounterArg(13, "EXT_INVAL", "External invalidations"),
164 TraceCounter::CounterArg(14, "VIRT_COHER", "Virtual coherency conditions. (This changes meaning in 3.x versions of R10000. It becomes ALU/FPU forward progress cycles. On the R12000, this counter is always 0)."),
165 TraceCounter::CounterArg(15, "GR_INSTR15", "Graduated instructions (also see code 17)"),
166 TraceCounter::CounterArg(16, "CYCLES16", "Cycles (also see code 0)"),
167 TraceCounter::CounterArg(17, "GR_INSTR17", "Graduated instructions (also see code 15)"),
168 TraceCounter::CounterArg(18, "GR_LOAD" , "Graduated loads"),
169 TraceCounter::CounterArg(19, "GR_STORE", "Graduated stores"),
170 TraceCounter::CounterArg(20, "GR_ST_COND", "Graduated store conditionals"),
171 TraceCounter::CounterArg(21, "GR_FLOPS", "Graduated floating point instructions"),
172 TraceCounter::CounterArg(22, "QUADWORDS1", "Quadwords written back from primary data cache"),
173 TraceCounter::CounterArg(23, "TLB_MISS", "TLB misses"),
174 TraceCounter::CounterArg(24, "MIS_BR", "Mispredicted branches"),
175 TraceCounter::CounterArg(25, "L1_DMISS", "Primary (L1) data cache misses"),
176 TraceCounter::CounterArg(26, "L2_DMISS", "Primary (L2) data cache misses"),
177 TraceCounter::CounterArg(27, "DATA_MIS", "Data misprediction from secondary cache way predicition table"),
178 TraceCounter::CounterArg(28, "EXT_INTERV2", "External intervention hits in secondary cache (L2)"),
179 TraceCounter::CounterArg(29, "EXT_INVAL2", "External invalidation hits in secondary cache"),
180 TraceCounter::CounterArg(30, "CLEAN_ST_PRE", "Store/prefetch exclusive to clean block in secondary cache"),
181 TraceCounter::CounterArg(31, "SHARE_ST_PRE", "Store/prefetch exclusive to shared block in secondary cache")
184 // this is called by the Charm++ runtime system
185 void _createTracecounter(char **argv)
187 DEBUGF(("%d/%d DEBUG: createTraceCounter\n", CmiMyPe(), CmiNumPes()));
188 CkpvInitialize(Trace*, _trace);
189 TraceCounter* tc = new TraceCounter(); _MEMCHECK(tc);
191 CkpvAccess(_trace) = tc;
192 CkpvAccess(_traces)->addTrace(CkpvAccess(_trace));
196 StatTable::StatTable():
197 stats_(NULL), numStats_(0)
199 DEBUGF(("%d/%d DEBUG: StatTable::StatTable %08x size %d\n",
200 CmiMyPe(), CmiNumPes(), this));
202 DEBUGF(("%d/%d registering reductionID\n", CmiMyPe(), CmiNumPes()));
203 CpvAccess(_reductionID) = CmiRegisterHandler((CmiHandler)StatTableReduction);
207 StatTable::~StatTable() { delete [] stats_; }
209 // initialize the stat table internals
210 void StatTable::init(int argc)
212 char** counterNames = CpvAccess(_counterNames);
213 char** counterDesc = CpvAccess(_counterDesc);
215 if (argc > numStats_) {
217 stats_ = new Statistics[argc]; _MEMCHECK(stats_);
220 for (int i=0; i<argc; i++) {
221 DEBUGF(("%d/%d DEBUG: %d name %s\n desc %s\n",
222 CmiMyPe(), CmiNumPes(), i, name[i], desc[i]));
223 stats_[i].name = counterNames[i];
224 stats_[i].desc = counterDesc[i];
229 //! one entry is called for 'time' seconds, value is counter reading
230 void StatTable::setEp(int epidx, int stat, long long value, double time)
232 // CmiPrintf("StatTable::setEp %08x %d %d %d %f\n",
233 // this, epidx, stat, value, time);
235 CkAssert(epidx<MAX_ENTRIES);
236 CkAssert(stat<numStats_ && stat>=0);
238 int numCalled = stats_[stat].numCalled[epidx];
239 double avg = stats_[stat].avgCount[epidx];
240 double stdDev = stats_[stat].stdDevCount[epidx];
241 stats_[stat].numCalled[epidx]++;
242 stats_[stat].avgCount[epidx] = (avg * numCalled + value) / (numCalled + 1);
243 // note that the stddev calculation is not quite correct, but it's
244 // almost correct and over time will converge with true value
245 avg = stats_[stat].avgCount[epidx];
246 stats_[stat].stdDevCount[epidx] =
247 (stdDev * numCalled + (value-avg)*(value-avg)) / (numCalled+1);
248 // CmiPrintf("variance %f avg %f value %ld calcVariance %f\n",
249 // stdDev, avg, value, stats_[stat].stdDevCount[epidx]);
250 stats_[stat].totTime[epidx] += time;
251 if (stats_[stat].maxCount[epidx] < value) {
252 stats_[stat].maxCount[epidx] = value;
254 if (stats_[stat].minCount[epidx] > value) {
255 stats_[stat].minCount[epidx] = value;
259 //! write three lines for each stat:
260 //! 1. number of calls for each entry
261 //! 2. average count for each entry
262 //! 3. total time in us spent for each entry
263 void StatTable::write(FILE* fp)
265 DEBUGF(("%d/%d DEBUG: Writing StatTable\n", CmiMyPe(), CmiNumPes()));
267 int _numEntries=_entryTable.size();
268 for (i=0; i<numStats_; i++) {
269 // FAKE OUT AND WRITE AN OVERVIEW AS LAST ENTRY
270 // write description of the entry
271 fprintf(fp, "[%s {%s}]\n", stats_[i].name, stats_[i].desc);
272 // write number of calls for each entry
273 fprintf(fp, "[%s num_called] ", stats_[i].name);
274 for (j=0; j<_numEntries+1; j++) {
275 fprintf(fp, "%d ", stats_[i].numCalled[j]);
278 // write average count for each
279 fprintf(fp, "[%s avg_count] ", stats_[i].name);
280 for (j=0; j<_numEntries+1; j++) {
281 fprintf(fp, "%f ", stats_[i].avgCount[j]);
284 // write standard deviation of count for each
285 fprintf(fp, "[%s std_dev_count] ", stats_[i].name);
286 for (j=0; j<_numEntries+1; j++) {
287 fprintf(fp, "%f ", sqrt(stats_[i].stdDevCount[j]));
290 // write total time in us spent for each entry
291 fprintf(fp, "[%s total_time(us)] ", stats_[i].name);
292 for (j=0; j<_numEntries+1; j++) {
293 fprintf(fp, "%f ", stats_[i].totTime[j]*1e6);
296 // write max count for each entry
297 fprintf(fp, "[%s max_count] ", stats_[i].name);
298 for (j=0; j<_numEntries+1; j++) {
299 fprintf(fp, "%ld ", stats_[i].maxCount[j]);
302 // write min count for each entry
303 fprintf(fp, "[%s min_count] ", stats_[i].name);
304 for (j=0; j<_numEntries+1; j++) {
305 if (stats_[i].minCount[j] == MAXLONGLONG) { stats_[i].minCount[j] = 0; }
306 fprintf(fp, "%ld ", stats_[i].minCount[j]);
310 DEBUGF(("%d/%d DEBUG: Finished writing StatTable\n", CmiMyPe(), CmiNumPes()));
313 //! set all of internals to null
314 void StatTable::clear()
316 for (int i=0; i<numStats_; i++) {
317 for (int j=0; j<MAX_ENTRIES; j++) {
318 stats_[i].numCalled[j] = 0;
319 stats_[i].avgCount[j] = 0.0;
320 stats_[i].stdDevCount[j] = 0.0;
321 stats_[i].totTime[j] = 0.0;
322 stats_[i].maxCount[j] = 0.0;
323 stats_[i].minCount[j] = MAXLONGLONG;
328 //! do a reduction across processors to calculate the total count for
329 //! each count, and if the count has flops, etc, then calc the
330 //! the flops/s, etc...
331 void StatTable::doReduction(int phase, double idleTime) {
332 DEBUGF(("%d/%d DEBUG: StatTable::doReduction()\n",
333 CmiMyPe(), CmiNumPes(), this));
334 // see above (NUM_EXTRA_PERF) for the fields in the message
336 ALIGN8(CmiMsgHeaderSizeBytes)+
337 sizeof(double)*(2*numStats_+NUM_EXTRA_PERF);
338 char *msg = (char *)CmiAlloc(msgSize);
339 double* reduction = (double*)(msg+ALIGN8(CmiMsgHeaderSizeBytes));
340 // calculate flops/s, l1%, l2%, tlb% if it's there
341 char** counterNames = CpvAccess(_counterNames);
342 int GR_FLOPS = -1; double flopsRate = -1.0;
343 int LOAD = -1; double loadRate = -1.0;
344 int STORE = -1; double storeRate = -1.0;
345 int L1_DMISS = -1; double l1Rate = -1.0;
346 int L2_DMISS = -1; double l2Rate = -1.0;
347 int TLB_MISS = -1; double tlbRate = -1.0;
349 for (i=0; i<2*numStats_+NUM_EXTRA_PERF; i++) { reduction[i] = 0.0; }
350 for (i=0; i<numStats_; i++) {
351 for (int j=0; j<MAX_ENTRIES; j++) {
352 reduction[2*i] += stats_[i].numCalled[j]*stats_[i].avgCount[j];
353 reduction[2*i+1] += stats_[i].totTime[j];
355 if (strcmp(counterNames[i], "GR_FLOPS")==0) { GR_FLOPS = i; }
356 else if (strcmp(counterNames[i], "LOAD")==0) { LOAD = i; }
357 else if (strcmp(counterNames[i], "STORE")==0) { STORE = i; }
358 else if (strcmp(counterNames[i], "L1_DMISS")==0) { L1_DMISS = i; }
359 else if (strcmp(counterNames[i], "L2_DMISS")==0) { L2_DMISS = i; }
360 else if (strcmp(counterNames[i], "TLB_MISS")==0) { TLB_MISS = i; }
362 if (CmiMyPe()==0) { reduction[2*numStats_] = phase; }
363 reduction[2*numStats_+1] = idleTime;
364 // -1 for the rest of the calc values
365 reduction[2*numStats_+2] = -1.0;
366 reduction[2*numStats_+3] = -1.0;
367 reduction[2*numStats_+4] = -1.0;
368 reduction[2*numStats_+5] = -1.0;
369 // calculate flops/s, l1%, l2%, tlb% if it's there
372 for (i=0; i<6; i++) {
374 case 0: rate = &flopsRate; index = GR_FLOPS; break;
375 case 1: rate = &loadRate; index = LOAD; break;
376 case 2: rate = &storeRate; index = STORE; break;
377 case 3: rate = &l1Rate; index = L1_DMISS; break;
378 case 4: rate = &l2Rate; index = L2_DMISS; break;
379 case 5: rate = &tlbRate; index = TLB_MISS; break;
381 if (index >= 0 && reduction[2*index+1] > 0.0) {
382 // if we have the counter AND it's times were non-zero
383 *rate = reduction[2*index]/reduction[2*index+1];
386 // store rates if there
387 if (GR_FLOPS >= 0) { reduction[2*numStats_+2] = flopsRate; }
388 if (LOAD >= 0 && STORE >= 0) {
389 double memRate = loadRate + storeRate;
390 if (L1_DMISS >= 0 & memRate > 0) {
391 reduction[2*numStats_+3] = l1Rate / memRate;
393 if (L2_DMISS >= 0 & memRate > 0) {
394 reduction[2*numStats_+4] = l2Rate / memRate;
396 if (TLB_MISS >= 0 & memRate > 0) {
397 reduction[2*numStats_+5] = tlbRate / memRate;
402 CmiSetHandler(msg, (int)CpvAccess(_reductionID));
403 int handlerID = CmiGetHandler(msg);
404 DEBUGF(("%d/%d handlerID %d reductionID %d\n",
405 CmiMyPe(), CmiNumPes(), handlerID, CpvAccess(_reductionID)));
406 CmiSyncSendAndFree(0, msgSize, msg);
409 CountLogPool::CountLogPool():
413 DEBUGF(("%d/%d DEBUG: CountLogPool::CountLogPool() %08x\n",
414 CmiMyPe(), CmiNumPes(), this));
417 // open file, if phase is -1, don't add the phase string
418 FILE* CountLogPool::openFile(int phase) {
419 // technically, the sprintf into pestr & phasestr are unnecessary,
420 // can just make a limit and check for that
422 DEBUGF(("%d CountLogPool::openFile(%d)\n", CmiMyPe(), phase));
423 const static int strSize = 10;
424 char pestr[strSize+1];
425 char phasestr[strSize+1];
426 snprintf(pestr, strSize, "%d", CmiMyPe());
427 pestr[strSize] = '\0';
428 int len = strlen(CpvAccess(_logName)) + strlen("phase.count.") + 2*strSize + 1;
429 char* fname = new char[len+1]; _MEMCHECK(fname);
431 snprintf(phasestr, strSize, "%d", phase);
432 phasestr[strSize] = '\0';
433 sprintf(fname, "%s.phase%s.%s.count", CpvAccess(_logName), phasestr, pestr);
435 else { sprintf(fname, "%s.%s.count", CpvAccess(_logName), pestr); }
437 DEBUGF(("%d/%d DEBUG: TRACE: %s:%d\n", CmiMyPe(), CmiNumPes(), fname, errno));
439 fp = fopen(fname, "w+");
440 } while (!fp && errno == EINTR);
443 CmiAbort("Cannot open Summary Trace File for writing...\n");
448 void CountLogPool::write(int phase)
450 if (phase >= 0) { lastPhase_ = phase; }
451 if (phase < 0 && lastPhase_ >= 0) { lastPhase_++; phase = lastPhase_; }
453 int _numEntries=_entryTable.size();
454 FILE* fp = (phase==-1) ? openFile() : openFile(phase);
455 fprintf(fp, "ver:%3.1f %d/%d ep:%d counters:%d\n",
456 CpvAccess(version), CmiMyPe(), CmiNumPes(), _numEntries+1,
462 void CountLogPool::writeSts(int phase)
464 // technically, the sprintf into phasestr is unnecessary,
465 // can just make a limit and check for that
467 if (phase >= 0) { lastPhase_ = phase; }
468 DEBUGF(("%d CountLogPool::writeSts(%d)\n", CmiMyPe(), phase));
470 const static int strSize = 10;
471 char phasestr[strSize+1];
472 int _numEntries=_entryTable.size();
473 // add strSize for phase number
475 new char[strlen(CpvAccess(_logName))+strlen(".count.sts")+strSize];
477 if (phase < 0 && lastPhase_ >= 0) { phase = lastPhase_; }
479 snprintf(phasestr, strSize, "%d", phase);
480 phasestr[strSize] = '\0';
481 sprintf(fname, "%s.phase%s.count.sts", CpvAccess(_logName), phasestr);
483 else { sprintf(fname, "%s.count.sts", CpvAccess(_logName)); }
484 FILE *sts = fopen(fname, "w+");
485 // DEBUGF(("%d/%d DEBUG: File: %s \n", CmiMyPe(), CmiNumPes(), fname));
487 CmiAbort("Cannot open summary sts file for writing.\n");
488 CmiPrintf("WRITING FILE=%s\n", fname);
490 fprintf(sts, "MACHINE %s\n",CMK_MACHINE_NAME);
491 fprintf(sts, "PROCESSORS %d\n", CmiNumPes());
492 fprintf(sts, "TOTAL_CHARES %d\n", _chareTable.size());
493 fprintf(sts, "TOTAL_EPS %d\n", _numEntries+1); // make extra overview
494 fprintf(sts, "TOTAL_MSGS %d\n", _msgTable.size());
495 fprintf(sts, "TOTAL_PSEUDOS %d\n", 0);
496 fprintf(sts, "TOTAL_EVENTS %d\n", _numEvents);
498 for(i=0;i<_chareTable.size();i++)
499 fprintf(sts, "CHARE %d %s\n", i, _chareTable[i]->name);
500 for(i=0;i<_numEntries;i++)
501 fprintf(sts, "ENTRY CHARE %d %s %d %d\n", i, _entryTable[i]->name,
502 _entryTable[i]->chareIdx, _entryTable[i]->msgIdx);
503 // fake out, make OVERVIEW
504 fprintf(sts, "ENTRY CHARE %d OVERVIEW -1 -1\n", _numEntries);
505 for(i=0;i<_msgTable.size();i++)
506 fprintf(sts, "MESSAGE %d %ld\n", i, _msgTable[i]->size);
507 for(i=0;i<_numEvents;i++)
508 fprintf(sts, "EVENT %d Event%d\n", i, i);
509 fprintf(sts, "END\n");
513 void CountLogPool::setEp(int epidx,
514 int index1, long long count1,
515 int index2, long long count2,
518 // DEBUGF(("%d/%d DEBUG: CountLogPool::setEp %08x %d %d %d %f\n",
519 // CmiMyPe(), CmiNumPes(), this, epidx, count1, count2, time));
521 if (epidx >= MAX_ENTRIES) {
522 CmiAbort("CountLogPool::setEp too many entry points!\n");
524 stats_.setEp(epidx, index1, count1, time);
525 stats_.setEp(epidx, index2, count2, time);
529 TraceCounter::TraceCounter() :
530 // comand line processing
540 // result of different command line opts
542 switchRandom_ (false),
543 switchByPhase_ (false),
545 writeByPhase_ (false),
546 // store between start/stop of counter read
558 for (int i=0; i<NUM_COUNTER_ARGS; i++) { registerArg(&COUNTER_ARG[i]); }
562 TraceCounter::~TraceCounter() {
563 delete [] commandLine_;
567 //! process command line arguments!
568 void TraceCounter::traceInit(char **argv)
570 CpvInitialize(CountLogPool*, _logPool);
571 CpvInitialize(char*, _logName);
572 CpvInitialize(double, version);
573 CpvInitialize(char**, _counterNames);
574 CpvInitialize(char**, _counterDesc);
575 CpvInitialize(int, _numCounters);
576 CpvInitialize(int, _reductionID);
578 CpvAccess(_logName) = (char *) malloc(strlen(argv[0])+1);
579 _MEMCHECK(CpvAccess(_logName));
580 strcpy(CpvAccess(_logName), argv[0]);
581 CpvAccess(version) = VER;
584 // parse command line args
585 char* counters = NULL;
589 if (CmiGetArgStringDesc(argv, "+counters", &counters, "Measure these performance counters")) {
590 if (CmiMyPe()==0) { CmiPrintf("Counters: %s\n", counters); }
592 int limit = strlen(counters);
593 char* ptr = counters;
594 while (offset < limit &&
595 (ptr = strtok(&counters[offset], ",")) != NULL)
597 offset += strlen(ptr)+1;
598 ptr = &ptr[strlen(ptr)+1];
602 CmiPrintf("There are %d counters\n", numCounters);
604 commandLine_ = new CounterArg[numCounters];
606 for (i=0; i<numCounters; i++) {
607 commandLine_[i].arg = ptr;
608 if (!matchArg(&commandLine_[i])) {
609 if (CmiMyPe()==0) { CmiPrintf("Bad arg: [%s]\n", ptr); }
612 ptr = &ptr[strlen(ptr)+1];
615 commandLineSz_ = numCounters;
617 // check to see if args are valid, output if not
618 if (badArg || CmiGetArgFlagDesc(argv, "+count-help", "List available performance counters")) {
619 if (CmiMyPe() == 0) { printHelp(); }
620 ConverseExit(); return;
622 else if (counters == NULL) {
623 if (CmiMyPe() == 0) { usage(); }
624 ConverseExit(); return;
627 // get optional command line args
628 overview_ = CmiGetArgFlag(argv, "+count-overview");
629 switchRandom_ = CmiGetArgFlag(argv, "+count-switchrandom");
630 switchByPhase_ = CmiGetArgFlag(argv, "+count-switchbyphase");
631 noLog_ = CmiGetArgFlag(argv, "+count-nolog");
632 writeByPhase_ = CmiGetArgFlag(argv, "+count-writebyphase");
633 char* logName = NULL;
634 if (CmiGetArgString(argv, "+count-logname", &logName)) {
635 CpvAccess(_logName) = logName;
638 CmiPrintf("+count-logname and +count-nolog are MUTUALLY EXCLUSIVE\n");
644 if (switchByPhase_ && overview_) {
647 "+count-switchbyphase and +count-overview are MUTUALLY EXCLUSIVE\n"
648 "+count-overview automatically switches by phase.\n");
653 if (writeByPhase_ && noLog_) {
655 CmiPrintf("+count-writebyphase and +count-nolog are MUTUALLY EXCLUSIVE\n");
661 // parse through commandLine_, figure out which belongs on which list (1 vs 2)
662 CounterArg* last1 = NULL;
663 CounterArg* last2 = NULL;
664 CounterArg* tmp = NULL;
665 counter1Sz_ = counter2Sz_ = 0;
666 for (i=0; i<commandLineSz_; i++) {
667 tmp = &commandLine_[i];
668 if (tmp->code < NUM_COUNTER_ARGS/2) {
669 if (counter1_ == NULL) { counter1_ = tmp; last1 = counter1_; }
670 else { last1->next = tmp; last1 = tmp; }
674 if (counter2_ == NULL) { counter2_ = tmp; last2 = counter2_; }
675 else { last2->next = tmp; last2 = tmp; }
679 if (counter1_ == NULL) {
682 CmiPrintf("\nMust specify some counters with code < %d\n",
687 if (counter2_ == NULL) {
690 CmiPrintf("\nMust specify some counters with code >= %d\n",
695 last1->next = counter1_;
696 last2->next = counter2_;
698 // all args valid, now set up logging
699 if (CmiMyPe() == 0) {
700 CmiPrintf("Running with tracemode=counter and args:\n");
701 // print out counter1 set
705 CmiPrintf(" <counter1-%d>=%d %s %s\n", i, tmp->code, tmp->arg, tmp->desc);
708 } while (tmp != counter1_);
709 // print out counter2 set
713 CmiPrintf(" <counter2-%d>=%d %s %s\n", i, tmp->code, tmp->arg, tmp->desc);
716 } while (tmp != counter2_);
719 "+count-overview %d\n+count-switchrandom %d\n"
720 "+count-switchbyphase %d\n+count-nolog %d\n"
721 "+count-logname %s\n+count-writebyphase %d\n",
722 overview_, switchRandom_, switchByPhase_, noLog_,
723 logName, writeByPhase_);
726 // DEBUGF((" DEBUG: Counter1=%d Counter2=%d\n", counter1_, counter2_));
727 CpvAccess(_logPool) = new CountLogPool();
729 // allocate names so can do reduction/analysis on the fly
730 char** counterNames = new char*[counter1Sz_+counter2Sz_];
731 char** counterDesc = new char*[counter1Sz_+counter2Sz_];
733 for (i=0; i<counter1Sz_; i++) {
735 counterNames[i] = tmp->arg;
736 counterDesc[i] = tmp->desc;
740 for (i=0; i<counter2Sz_; i++) {
741 tmp->index = counter1Sz_+i;
742 counterNames[counter1Sz_+i] = tmp->arg;
743 counterDesc[counter1Sz_+i] = tmp->desc;
746 CpvAccess(_counterNames) = counterNames;
747 CpvAccess(_counterDesc) = counterDesc;
748 CpvAccess(_numCounters) = numCounters;
749 // don't erase counterNames or counterDesc,
750 // the reduction client will do it on the final reduction
752 _MEMCHECK(CpvAccess(_logPool));
753 CpvAccess(_logPool)->init(numCounters);
754 DEBUGF(("%d/%d DEBUG: Created _logPool at %08x\n",
755 CmiMyPe(), CmiNumPes(), CpvAccess(_logPool)));
758 //! turn trace on/off, note that charm will automatically call traceBegin()
759 //! at the beginning of every run unless the command line option "+traceoff"
761 void TraceCounter::traceBegin() {
762 DEBUGF(("%d/%d traceBegin called\n", CmiMyPe(), CmiNumPes()));
764 static bool print = true;
768 CmiPrintf("%d/%d WARN: traceBegin called but trace already on!\n"
769 " Sure you didn't mean to use +traceoff?\n",
770 CmiMyPe(), CmiNumPes());
775 if (overview_) { beginOverview(); }
777 if (writeByPhase_) { phase_++; }
782 //! turn trace on/off, note that charm will automatically call traceBegin()
783 //! at the beginning of every run unless the command line option "+traceoff"
785 void TraceCounter::traceEnd() {
786 DEBUGF(("%d/%d traceEnd called\n", CmiMyPe(), CmiNumPes()));
788 static bool print = true;
792 CmiPrintf("%d/%d WARN: traceEnd called but trace not on!\n"
793 " Sure you didn't mean to use +traceoff?\n",
794 CmiMyPe(), CmiNumPes());
801 if (overview_) { endOverview(); } // overview switches counters automatic
802 else if (switchByPhase_) { switchCounters(); };
804 if (!noLog_ && writeByPhase_) {
805 if (CmiMyPe()==0) { CpvAccess(_logPool)->writeSts(phase_); }
806 CpvAccess(_logPool)->write(phase_);
809 CpvAccess(_logPool)->doReduction(reductionPhase_, idleTime_);
812 CpvAccess(_logPool)->clearEps();
814 // setTrace must go after the writes otherwise the writes won't go through
815 DEBUGF(("%d/%d DEBUG: Created _logPool at %08x\n",
816 CmiMyPe(), CmiNumPes(), CpvAccess(_logPool)));
820 //! begin/end execution of a Charm++ entry point
821 //! NOTE: begin/endPack and begin/endUnpack can be called in between
822 //! a beginExecute and its corresponding endExecute.
823 void TraceCounter::beginExecute(envelope *e)
825 // no message means thread execution
826 if (e==NULL) { beginExecute(-1,-1,_threadEP,-1); }
827 else { beginExecute(-1,-1,e->getEpIdx(),-1); }
830 //! begin/end execution of a Charm++ entry point
831 //! NOTE: begin/endPack and begin/endUnpack can be called in between
832 //! a beginExecute and its corresponding endExecute.
833 void TraceCounter::beginExecute
843 DEBUGF(("%d/%d DEBUG: beginExecute EP %d\n", CmiMyPe(), CmiNumPes(), ep));
845 if (!traceOn_ || overview_) { return; }
849 if (status_!= IDLE) {
850 static bool print = true;
854 CmiPrintf("WARN: %d beginExecute called when status not IDLE!\n",
860 else { status_=WORKING; }
862 if ((genStart_=start_counters(counter1_->code, counter2_->code))<0)
864 CmiPrintf("genStart=%d counter1=%d counter2=%d\n",
865 genStart_, counter1_->code, counter2_->code);
866 CmiAbort("ERROR: start_counters() in beginExecute\n");
869 startEP_=TraceTimer();
870 DEBUGF(("%d/%d DEBUG: beginExecute EP %d genStart %d\n",
871 CmiMyPe(), CmiNumPes(), ep, genStart_));
874 //! begin/end execution of a Charm++ entry point
875 //! NOTE: begin/endPack and begin/endUnpack can be called in between
876 //! a beginExecute and its corresponding endExecute.
877 void TraceCounter::endExecute()
879 DEBUGF(("%d/%d DEBUG: endExecute EP %d genStart_ %d\n",
880 CmiMyPe(), CmiNumPes(), execEP_, genStart_));
882 if (!traceOn_ || overview_) { return; }
884 if (status_!=WORKING) {
885 static bool print = true;
889 CmiPrintf("WARN: %d endExecute called when status not WORKING!\n",
895 else { status_=IDLE; }
897 double t = TraceTimer();
899 long long value1 = 0, value2 = 0;
901 if ((genRead=read_counters(counter1_->code, &value1, counter2_->code, &value2)) < 0 ||
902 genRead != genStart_)
904 CmiPrintf("genRead %d genStart_ %d counter1 %ld counter2 %ld\n",
905 genRead, genStart_, value1, value2);
907 CmiAbort("ERROR: read_counters() in endExecute\n");
911 "%d/%d DEBUG: endExecute genRead %d Time %f counter1 %d counter2 %ld\n",
912 CmiMyPe(), CmiNumPes(), genRead, t-startEP_, value1, value2));
915 CpvAccess(_logPool)->setEp(execEP_, counter1_->index, value1,
916 counter2_->index, value2, t-startEP_);
917 if (!switchByPhase_) { switchCounters(); }
921 //! begin/end idle time for this pe
922 void TraceCounter::beginIdle(double curWallTime) {
923 if (traceOn_) { startIdle_ = TraceTimer(curWallTime); }
926 //! begin/end idle time for this pe
927 void TraceCounter::endIdle(double curWallTime) {
928 if (traceOn_) { idleTime_ += TraceTimer(curWallTime)-startIdle_; }
931 //! begin/end the process of packing a message (to send)
932 void TraceCounter::beginPack()
934 DEBUGF(("%d/%d DEBUG: beginPack\n", CmiMyPe(), CmiNumPes()));
936 // beginPack/endPack can get called between beginExecute/endExecute
937 // and can't have nested counter reads on certain architectures and on
938 // on those architectures the time to call stop/start_counters can be
942 //! begin/end the process of packing a message (to send)
943 void TraceCounter::endPack() {
944 DEBUGF(("%d/%d DEBUG: endPack\n", CmiMyPe(), CmiNumPes()));
946 // beginPack/endPack can get called between beginExecute/endExecute
947 // and can't have nested counter reads on certain architectures and on
948 // on those architectures the time to call stop/start_counters can be
952 //! begin/end the process of unpacking a message (can occur before calling
953 //! a entry point or during an entry point when
954 void TraceCounter::beginUnpack() {
955 DEBUGF(("%d/%d DEBUG: beginUnpack\n", CmiMyPe(), CmiNumPes()));
957 // beginUnpack/endUnpack can get called between beginExecute/endExecute
958 // and can't have nested counter reads on certain architectures and on
959 // on those architectures the time to call stop/start_counters can be
963 //! begin/end the process of unpacking a message (can occur before calling
964 //! a entry point or during an entry point when
965 void TraceCounter::endUnpack() {
966 DEBUGF(("%d/%d DEBUG: endUnpack\n", CmiMyPe(), CmiNumPes()));
968 // beginUnpack/endUnpack can get called between beginExecute/endExecute
969 // and can't have nested counter reads on certain architectures and on
970 // on those architectures the time to call stop/start_counters can be
974 //! begin/end of execution
975 void TraceCounter::beginComputation()
979 //! end computation, do a reduction here in hopes that it finishes before
980 //! traceClose called and the program exits
981 void TraceCounter::endComputation() {
982 CpvAccess(_logPool)->doReduction(-1, idleTime_);
985 //! clear all data collected for entry points
986 void TraceCounter::traceClearEps() {
987 CpvAccess(_logPool)->clearEps();
990 //! write the summary sts file for this trace
991 void TraceCounter::traceWriteSts() {
992 if (traceOn_ && !noLog_) {
993 if (CmiMyPe()==0) { CpvAccess(_logPool)->writeSts(); }
997 //! do any clean-up necessary for tracing
998 void TraceCounter::traceClose()
1001 if (overview_) { endOverview(); }
1004 CpvAccess(_logPool)->write();
1006 CpvAccess(_logPool)->writeSts();
1007 CmiPrintf("TraceCounter dirty, writing results\n");
1013 if (CpvAccess(_logPool)!=NULL) {
1014 CkpvAccess(_trace)->endComputation();
1015 delete CpvAccess(_logPool);
1016 CpvAccess(_logPool) = NULL;
1020 //! start/stop the overall counting ov eps (don't write to logCount,
1021 //! just print to screen
1022 void TraceCounter::beginOverview()
1024 DEBUGF(("%d/%d DEBUG: beginOverview\n",
1025 CmiMyPe(), CmiNumPes()));
1026 if ((genStart_=start_counters(counter1_->code, counter2_->code))<0)
1028 CmiPrintf("genStart=%d counter1=%d counter2=%d\n",
1029 genStart_, counter1_->code, counter2_->code);
1030 CmiAbort("ERROR: start_counters() in beginOverview\n");
1032 startEP_=TraceTimer();
1033 DEBUGF(("%d/%d DEBUG: beginOverview\n", CmiMyPe(), CmiNumPes()));
1037 void TraceCounter::endOverview()
1039 DEBUGF(("%d/%d DEBUG: endOverview\n", CmiMyPe(), CmiNumPes()));
1041 double t = TraceTimer();
1042 int _numEntries=_entryTable.size();
1044 long long value1 = 0, value2 = 0;
1046 if ((genRead=read_counters(counter1_->code, &value1, counter2_->code, &value2)) < 0 ||
1047 genRead != genStart_)
1049 CmiPrintf("genRead %d genStart_ %d counter1 %ld counter2 %ld\n",
1050 genRead, genStart_, value1, value2);
1052 CmiAbort("ERROR: read_counters() in endOverview\n");
1056 "%d/%d DEBUG: endOverview genRead %d Time %f counter1 %ld counter2 %ld\n",
1057 CmiMyPe(), CmiNumPes(), genRead, t-startEP_, value1, value2));
1060 CpvAccess(_logPool)->setEp(_numEntries,
1061 counter1_->index, value1,
1062 counter2_->index, value2,
1065 "%d/%d OVERVIEW phase%d Time(us) %f %s %ld %s %ld Idle(us) %f"
1066 " (overflow? MAX=%ld)\n",
1067 CmiMyPe(), CmiNumPes(), phase_, (t-startEP_)*1e6, counter1_->arg, value1,
1068 counter2_->arg, value2, idleTime_*1e6, MAXLONGLONG));
1069 // this is phase boundary anyway, so switch counters
1073 //! switch counters by whatever switching strategy
1074 void TraceCounter::switchCounters()
1076 static bool first = true;
1077 if (switchRandom_) {
1079 if (first) { first = false; srand(TraceTimer()*1e6); }
1080 int counter1Change =
1081 (int) (rand() / (double) 0x7FFF * counter1Sz_ + 0.5); // 2^15-1
1082 int counter2Change =
1083 (int) (rand() / (double) 0x7FFF * counter2Sz_ + 0.5); // 2^15-1
1085 DEBUGF(("change %d/%d %d/%d\n",
1086 counter1Change, counter1Sz_,
1087 counter2Change, counter2Sz_));
1089 if (counter1Change > counter1Sz_) { counter1Change = counter1Sz_; }
1090 if (counter2Change > counter2Sz_) { counter2Change = counter2Sz_; }
1091 for (i=0; i<counter1Change; i++) { counter1_ = counter1_->next; }
1092 for (i=0; i<counter2Change; i++) { counter2_ = counter2_->next; }
1095 counter1_ = counter1_->next;
1096 counter2_ = counter2_->next;
1099 DEBUGF(("%d/%d New counters are %s %s\n",
1100 CmiMyPe(), CmiNumPes(), counter1_->arg, counter2_->arg));
1104 //! add the argument parameters to the linked list of args choices
1105 void TraceCounter::registerArg(CounterArg* arg)
1107 if (firstArg_ == NULL) {
1108 firstArg_ = lastArg_ = arg;
1109 argStrSize_ = strlen(arg->arg);
1112 // check to see if any redundancy
1113 CounterArg* check = firstArg_;
1114 while (check != NULL) {
1115 if (strcmp(check->arg, arg->arg)==0 || check->code == arg->code) {
1117 CmiPrintf("Two args with same name %s or code %d\n",
1118 arg->arg, arg->code);
1120 CmiAbort("TraceCounter::registerArg()\n");
1122 check = check->next;
1125 lastArg_->next = arg;
1127 int len = strlen(arg->arg);
1128 if (len > argStrSize_) { argStrSize_ = len; }
1132 //! see if the arg (str or code) matches any in the linked list of choices
1133 //! and sets arg->code to the SGI code
1134 //! return true if arg matches, false otherwise
1135 bool TraceCounter::matchArg(CounterArg* arg)
1137 bool match = false; // will be set to true if arg matches
1138 CounterArg* matchArg = firstArg_; // traverse linked list
1139 int matchCode = atoi(arg->arg); // in case user specs num on commline
1140 if (matchCode == 0) {
1141 if (arg->arg[0] != '0' || arg->arg[1] != '\0') { matchCode = -1; }
1143 // DEBUGF(("%d/%d DEBUG: Matching %s or %d\n", CmiMyPe(), CmiNumPes(), arg->arg, matchCode));
1144 while (matchArg != NULL && !match) {
1145 // DEBUGF(("%d/%d DEBUG: Examining %d %s\n", CmiMyPe(), CmiNumPes(), matchArg->code, matchArg->arg));
1146 if (strcmp(matchArg->arg, arg->arg)==0) {
1148 arg->code = matchArg->code;
1149 arg->desc = matchArg->desc;
1151 else if (matchArg->code == matchCode) {
1153 arg->code = matchArg->code;
1154 arg->arg = matchArg->arg;
1155 arg->desc = matchArg->desc;
1157 matchArg = matchArg->next;
1159 // DEBUGF(("%d/%d DEBUG: Match = %d\n", CmiMyPe(), CmiNumPes(), match));
1163 //! print out usage argument
1164 void TraceCounter::usage() {
1166 "ERROR: You've linked with '-tracemode counter', so you must specify\n"
1167 " the +counters <counters> option followed by any of the \n"
1168 " following optional command line options.\n"
1170 "REQUIRED: +counters <counter>\n"
1172 " +counters <counters>: Where <counters> is comma delimited list\n"
1173 " of valid counters. Type '+count-help' to\n"
1174 " get a list of valid counters.\n"
1176 "OPTIONAL: [+count-overview] [+count-switchrandom] [+switchbyphase]\n"
1178 " +count-overview: Collect counter values between start/stop\n"
1179 " of the program (or traceBegin/traceEnd if\n"
1180 " user marked events are on [see Performance\n"
1181 " Counter section of the Charm++ manual]).\n"
1182 " Normal operation collects counter values\n"
1183 " between the stop/start of Charm++ entry\n"
1185 " +count-switchrandom: Counters will switch randomly between\n"
1186 " each event instead of in the order\n"
1187 " specified by the <counters> arg.\n"
1188 " +count-switchbyphase: Counters will switch not every EP call,\n"
1189 " but only in between phases (between each\n"
1190 " traceBegin/traceEnd call).\n"
1191 " +count-nolog: Don't write any log files.\n"
1193 "See the Performance Counter section of the Charm++ manual for\n"
1194 "examples of different options.\n"
1198 //! print out all arguments in the linked-list of choices
1199 void TraceCounter::printHelp()
1202 "Specify one of the following (code or str) after +counters:\n\n"
1206 // create a format so that all the str line up
1208 snprintf(format, 64, " %%2d %%-%ds %%s\n", argStrSize_);
1210 CounterArg* help = firstArg_;
1211 while (help != NULL) {
1212 CmiPrintf(format, help->code, help->arg, help->desc);
1217 #else /* not implemented */
1218 void _createTracecounter(char **argv)
1221 #endif // CMK_ORIGIN2000