Add bug 467036 Add time cost statistics for Regtest to NEWS
[valgrind.git] / callgrind / main.c
blob4970d5a4d7a5ce4b3487cfdf9ddef71bd9d0ce91
2 /*--------------------------------------------------------------------*/
3 /*--- Callgrind ---*/
4 /*--- main.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of Callgrind, a Valgrind tool for call graph
9 profiling programs.
11 Copyright (C) 2002-2017, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
13 This tool is derived from and contains code from Cachegrind
14 Copyright (C) 2002-2017 Nicholas Nethercote (njn@valgrind.org)
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, see <http://www.gnu.org/licenses/>.
29 The GNU General Public License is contained in the file COPYING.
32 #include "config.h"
33 #include "callgrind.h"
34 #include "global.h"
36 #include "pub_tool_threadstate.h"
37 #include "pub_tool_gdbserver.h"
38 #include "pub_tool_transtab.h" // VG_(discard_translations_safely)
40 #include "cg_branchpred.c"
42 /*------------------------------------------------------------*/
43 /*--- Global variables ---*/
44 /*------------------------------------------------------------*/
46 /* for all threads */
47 CommandLineOptions CLG_(clo);
48 Statistics CLG_(stat);
49 Bool CLG_(instrument_state) = True; /* Instrumentation on ? */
51 /* thread and signal handler specific */
52 exec_state CLG_(current_state);
54 /* min of L1 and LL cache line sizes. This only gets set to a
55 non-zero value if we are doing cache simulation. */
56 Int CLG_(min_line_size) = 0;
59 /*------------------------------------------------------------*/
60 /*--- Statistics ---*/
61 /*------------------------------------------------------------*/
63 static void CLG_(init_statistics)(Statistics* s)
65 s->call_counter = 0;
66 s->jcnd_counter = 0;
67 s->jump_counter = 0;
68 s->rec_call_counter = 0;
69 s->ret_counter = 0;
70 s->bb_executions = 0;
72 s->context_counter = 0;
73 s->bb_retranslations = 0;
75 s->distinct_objs = 0;
76 s->distinct_files = 0;
77 s->distinct_fns = 0;
78 s->distinct_contexts = 0;
79 s->distinct_bbs = 0;
80 s->distinct_bbccs = 0;
81 s->distinct_instrs = 0;
82 s->distinct_skips = 0;
84 s->bb_hash_resizes = 0;
85 s->bbcc_hash_resizes = 0;
86 s->jcc_hash_resizes = 0;
87 s->cxt_hash_resizes = 0;
88 s->fn_array_resizes = 0;
89 s->call_stack_resizes = 0;
90 s->fn_stack_resizes = 0;
92 s->full_debug_BBs = 0;
93 s->file_line_debug_BBs = 0;
94 s->fn_name_debug_BBs = 0;
95 s->no_debug_BBs = 0;
96 s->bbcc_lru_misses = 0;
97 s->jcc_lru_misses = 0;
98 s->cxt_lru_misses = 0;
99 s->bbcc_clones = 0;
103 /*------------------------------------------------------------*/
104 /*--- Simple callbacks (not cache similator) ---*/
105 /*------------------------------------------------------------*/
107 VG_REGPARM(1)
108 static void log_global_event(InstrInfo* ii)
110 ULong* cost_Bus;
112 CLG_DEBUG(6, "log_global_event: Ir %#lx/%u\n",
113 CLG_(bb_base) + ii->instr_offset, ii->instr_size);
115 if (!CLG_(current_state).collect) return;
117 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BUS))>0 );
119 CLG_(current_state).cost[ fullOffset(EG_BUS) ]++;
121 if (CLG_(current_state).nonskipped)
122 cost_Bus = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BUS);
123 else
124 cost_Bus = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BUS];
125 cost_Bus[0]++;
129 /* For branches, we consult two different predictors, one which
130 predicts taken/untaken for conditional branches, and the other
131 which predicts the branch target address for indirect branches
132 (jump-to-register style ones). */
134 static VG_REGPARM(2)
135 void log_cond_branch(InstrInfo* ii, Word taken)
137 Bool miss;
138 Int fullOffset_Bc;
139 ULong* cost_Bc;
141 CLG_DEBUG(6, "log_cond_branch: Ir %#lx, taken %ld\n",
142 CLG_(bb_base) + ii->instr_offset, taken);
144 miss = 1 & do_cond_branch_predict(CLG_(bb_base) + ii->instr_offset, taken);
146 if (!CLG_(current_state).collect) return;
148 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BC))>0 );
150 if (CLG_(current_state).nonskipped)
151 cost_Bc = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BC);
152 else
153 cost_Bc = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BC];
155 fullOffset_Bc = fullOffset(EG_BC);
156 CLG_(current_state).cost[ fullOffset_Bc ]++;
157 cost_Bc[0]++;
158 if (miss) {
159 CLG_(current_state).cost[ fullOffset_Bc+1 ]++;
160 cost_Bc[1]++;
164 static VG_REGPARM(2)
165 void log_ind_branch(InstrInfo* ii, UWord actual_dst)
167 Bool miss;
168 Int fullOffset_Bi;
169 ULong* cost_Bi;
171 CLG_DEBUG(6, "log_ind_branch: Ir %#lx, dst %#lx\n",
172 CLG_(bb_base) + ii->instr_offset, actual_dst);
174 miss = 1 & do_ind_branch_predict(CLG_(bb_base) + ii->instr_offset, actual_dst);
176 if (!CLG_(current_state).collect) return;
178 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BI))>0 );
180 if (CLG_(current_state).nonskipped)
181 cost_Bi = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BI);
182 else
183 cost_Bi = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BI];
185 fullOffset_Bi = fullOffset(EG_BI);
186 CLG_(current_state).cost[ fullOffset_Bi ]++;
187 cost_Bi[0]++;
188 if (miss) {
189 CLG_(current_state).cost[ fullOffset_Bi+1 ]++;
190 cost_Bi[1]++;
194 /*------------------------------------------------------------*/
195 /*--- Instrumentation structures and event queue handling ---*/
196 /*------------------------------------------------------------*/
198 /* Maintain an ordered list of memory events which are outstanding, in
199 the sense that no IR has yet been generated to do the relevant
200 helper calls. The BB is scanned top to bottom and memory events
201 are added to the end of the list, merging with the most recent
202 notified event where possible (Dw immediately following Dr and
203 having the same size and EA can be merged).
205 This merging is done so that for architectures which have
206 load-op-store instructions (x86, amd64), the insn is treated as if
207 it makes just one memory reference (a modify), rather than two (a
208 read followed by a write at the same address).
210 At various points the list will need to be flushed, that is, IR
211 generated from it. That must happen before any possible exit from
212 the block (the end, or an IRStmt_Exit). Flushing also takes place
213 when there is no space to add a new event.
215 If we require the simulation statistics to be up to date with
216 respect to possible memory exceptions, then the list would have to
217 be flushed before each memory reference. That would however lose
218 performance by inhibiting event-merging during flushing.
220 Flushing the list consists of walking it start to end and emitting
221 instrumentation IR for each event, in the order in which they
222 appear. It may be possible to emit a single call for two adjacent
223 events in order to reduce the number of helper function calls made.
224 For example, it could well be profitable to handle two adjacent Ir
225 events with a single helper call. */
227 typedef
228 IRExpr
229 IRAtom;
231 typedef
232 enum {
233 Ev_Ir, // Instruction read
234 Ev_Dr, // Data read
235 Ev_Dw, // Data write
236 Ev_Dm, // Data modify (read then write)
237 Ev_Bc, // branch conditional
238 Ev_Bi, // branch indirect (to unknown destination)
239 Ev_G // Global bus event
241 EventTag;
243 typedef
244 struct {
245 EventTag tag;
246 InstrInfo* inode;
247 union {
248 struct {
249 } Ir;
250 struct {
251 IRAtom* ea;
252 Int szB;
253 } Dr;
254 struct {
255 IRAtom* ea;
256 Int szB;
257 } Dw;
258 struct {
259 IRAtom* ea;
260 Int szB;
261 } Dm;
262 struct {
263 IRAtom* taken; /* :: Ity_I1 */
264 } Bc;
265 struct {
266 IRAtom* dst;
267 } Bi;
268 struct {
269 } G;
270 } Ev;
272 Event;
274 static void init_Event ( Event* ev ) {
275 VG_(memset)(ev, 0, sizeof(Event));
278 static IRAtom* get_Event_dea ( Event* ev ) {
279 switch (ev->tag) {
280 case Ev_Dr: return ev->Ev.Dr.ea;
281 case Ev_Dw: return ev->Ev.Dw.ea;
282 case Ev_Dm: return ev->Ev.Dm.ea;
283 default: tl_assert(0);
287 static Int get_Event_dszB ( Event* ev ) {
288 switch (ev->tag) {
289 case Ev_Dr: return ev->Ev.Dr.szB;
290 case Ev_Dw: return ev->Ev.Dw.szB;
291 case Ev_Dm: return ev->Ev.Dm.szB;
292 default: tl_assert(0);
297 /* Up to this many unnotified events are allowed. Number is
298 arbitrary. Larger numbers allow more event merging to occur, but
299 potentially induce more spilling due to extending live ranges of
300 address temporaries. */
301 #define N_EVENTS 16
304 /* A struct which holds all the running state during instrumentation.
305 Mostly to avoid passing loads of parameters everywhere. */
306 typedef struct {
307 /* The current outstanding-memory-event list. */
308 Event events[N_EVENTS];
309 Int events_used;
311 /* The array of InstrInfo's is part of BB struct. */
312 BB* bb;
314 /* BB seen before (ie. re-instrumentation) */
315 Bool seen_before;
317 /* Number InstrInfo bins 'used' so far. */
318 UInt ii_index;
320 // current offset of guest instructions from BB start
321 UInt instr_offset;
323 /* The output SB being constructed. */
324 IRSB* sbOut;
325 } ClgState;
328 static void showEvent ( Event* ev )
330 switch (ev->tag) {
331 case Ev_Ir:
332 VG_(printf)("Ir (InstrInfo %p) at +%u\n",
333 ev->inode, ev->inode->instr_offset);
334 break;
335 case Ev_Dr:
336 VG_(printf)("Dr (InstrInfo %p) at +%u %d EA=",
337 ev->inode, ev->inode->instr_offset, ev->Ev.Dr.szB);
338 ppIRExpr(ev->Ev.Dr.ea);
339 VG_(printf)("\n");
340 break;
341 case Ev_Dw:
342 VG_(printf)("Dw (InstrInfo %p) at +%u %d EA=",
343 ev->inode, ev->inode->instr_offset, ev->Ev.Dw.szB);
344 ppIRExpr(ev->Ev.Dw.ea);
345 VG_(printf)("\n");
346 break;
347 case Ev_Dm:
348 VG_(printf)("Dm (InstrInfo %p) at +%u %d EA=",
349 ev->inode, ev->inode->instr_offset, ev->Ev.Dm.szB);
350 ppIRExpr(ev->Ev.Dm.ea);
351 VG_(printf)("\n");
352 break;
353 case Ev_Bc:
354 VG_(printf)("Bc %p GA=", ev->inode);
355 ppIRExpr(ev->Ev.Bc.taken);
356 VG_(printf)("\n");
357 break;
358 case Ev_Bi:
359 VG_(printf)("Bi %p DST=", ev->inode);
360 ppIRExpr(ev->Ev.Bi.dst);
361 VG_(printf)("\n");
362 break;
363 case Ev_G:
364 VG_(printf)("G %p\n", ev->inode);
365 break;
366 default:
367 tl_assert(0);
368 break;
372 /* Generate code for all outstanding memory events, and mark the queue
373 empty. Code is generated into cgs->sbOut, and this activity
374 'consumes' slots in cgs->bb. */
376 static void flushEvents ( ClgState* clgs )
378 Int i, regparms, inew;
379 const HChar* helperName;
380 void* helperAddr;
381 IRExpr** argv;
382 IRExpr* i_node_expr;
383 IRDirty* di;
384 Event* ev;
385 Event* ev2;
386 Event* ev3;
388 if (!clgs->seen_before) {
389 // extend event sets as needed
390 // available sets: D0 Dr
391 for(i=0; i<clgs->events_used; i++) {
392 ev = &clgs->events[i];
393 switch(ev->tag) {
394 case Ev_Ir:
395 // Ir event always is first for a guest instruction
396 CLG_ASSERT(ev->inode->eventset == 0);
397 ev->inode->eventset = CLG_(sets).base;
398 break;
399 case Ev_Dr:
400 // extend event set by Dr counters
401 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
402 EG_DR);
403 break;
404 case Ev_Dw:
405 case Ev_Dm:
406 // extend event set by Dw counters
407 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
408 EG_DW);
409 break;
410 case Ev_Bc:
411 // extend event set by Bc counters
412 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
413 EG_BC);
414 break;
415 case Ev_Bi:
416 // extend event set by Bi counters
417 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
418 EG_BI);
419 break;
420 case Ev_G:
421 // extend event set by Bus counter
422 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
423 EG_BUS);
424 break;
425 default:
426 tl_assert(0);
431 for(i = 0; i < clgs->events_used; i = inew) {
433 helperName = NULL;
434 helperAddr = NULL;
435 argv = NULL;
436 regparms = 0;
438 /* generate IR to notify event i and possibly the ones
439 immediately following it. */
440 tl_assert(i >= 0 && i < clgs->events_used);
442 ev = &clgs->events[i];
443 ev2 = ( i < clgs->events_used-1 ? &clgs->events[i+1] : NULL );
444 ev3 = ( i < clgs->events_used-2 ? &clgs->events[i+2] : NULL );
446 CLG_DEBUGIF(5) {
447 VG_(printf)(" flush ");
448 showEvent( ev );
451 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
453 /* Decide on helper fn to call and args to pass it, and advance
454 i appropriately.
455 Dm events have same effect as Dw events */
456 switch (ev->tag) {
457 case Ev_Ir:
458 /* Merge an Ir with a following Dr. */
459 if (ev2 && ev2->tag == Ev_Dr) {
460 /* Why is this true? It's because we're merging an Ir
461 with a following Dr. The Ir derives from the
462 instruction's IMark and the Dr from data
463 references which follow it. In short it holds
464 because each insn starts with an IMark, hence an
465 Ev_Ir, and so these Dr must pertain to the
466 immediately preceding Ir. Same applies to analogous
467 assertions in the subsequent cases. */
468 tl_assert(ev2->inode == ev->inode);
469 helperName = CLG_(cachesim).log_1I1Dr_name;
470 helperAddr = CLG_(cachesim).log_1I1Dr;
471 argv = mkIRExprVec_3( i_node_expr,
472 get_Event_dea(ev2),
473 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
474 regparms = 3;
475 inew = i+2;
477 /* Merge an Ir with a following Dw/Dm. */
478 else
479 if (ev2 && (ev2->tag == Ev_Dw || ev2->tag == Ev_Dm)) {
480 tl_assert(ev2->inode == ev->inode);
481 helperName = CLG_(cachesim).log_1I1Dw_name;
482 helperAddr = CLG_(cachesim).log_1I1Dw;
483 argv = mkIRExprVec_3( i_node_expr,
484 get_Event_dea(ev2),
485 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
486 regparms = 3;
487 inew = i+2;
489 /* Merge an Ir with two following Irs. */
490 else
491 if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir) {
492 helperName = CLG_(cachesim).log_3I0D_name;
493 helperAddr = CLG_(cachesim).log_3I0D;
494 argv = mkIRExprVec_3( i_node_expr,
495 mkIRExpr_HWord( (HWord)ev2->inode ),
496 mkIRExpr_HWord( (HWord)ev3->inode ) );
497 regparms = 3;
498 inew = i+3;
500 /* Merge an Ir with one following Ir. */
501 else
502 if (ev2 && ev2->tag == Ev_Ir) {
503 helperName = CLG_(cachesim).log_2I0D_name;
504 helperAddr = CLG_(cachesim).log_2I0D;
505 argv = mkIRExprVec_2( i_node_expr,
506 mkIRExpr_HWord( (HWord)ev2->inode ) );
507 regparms = 2;
508 inew = i+2;
510 /* No merging possible; emit as-is. */
511 else {
512 helperName = CLG_(cachesim).log_1I0D_name;
513 helperAddr = CLG_(cachesim).log_1I0D;
514 argv = mkIRExprVec_1( i_node_expr );
515 regparms = 1;
516 inew = i+1;
518 break;
519 case Ev_Dr:
520 /* Data read or modify */
521 helperName = CLG_(cachesim).log_0I1Dr_name;
522 helperAddr = CLG_(cachesim).log_0I1Dr;
523 argv = mkIRExprVec_3( i_node_expr,
524 get_Event_dea(ev),
525 mkIRExpr_HWord( get_Event_dszB(ev) ) );
526 regparms = 3;
527 inew = i+1;
528 break;
529 case Ev_Dw:
530 case Ev_Dm:
531 /* Data write */
532 helperName = CLG_(cachesim).log_0I1Dw_name;
533 helperAddr = CLG_(cachesim).log_0I1Dw;
534 argv = mkIRExprVec_3( i_node_expr,
535 get_Event_dea(ev),
536 mkIRExpr_HWord( get_Event_dszB(ev) ) );
537 regparms = 3;
538 inew = i+1;
539 break;
540 case Ev_Bc:
541 /* Conditional branch */
542 helperName = "log_cond_branch";
543 helperAddr = &log_cond_branch;
544 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
545 regparms = 2;
546 inew = i+1;
547 break;
548 case Ev_Bi:
549 /* Branch to an unknown destination */
550 helperName = "log_ind_branch";
551 helperAddr = &log_ind_branch;
552 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
553 regparms = 2;
554 inew = i+1;
555 break;
556 case Ev_G:
557 /* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */
558 helperName = "log_global_event";
559 helperAddr = &log_global_event;
560 argv = mkIRExprVec_1( i_node_expr );
561 regparms = 1;
562 inew = i+1;
563 break;
564 default:
565 tl_assert(0);
568 CLG_DEBUGIF(5) {
569 if (inew > i+1) {
570 VG_(printf)(" merge ");
571 showEvent( ev2 );
573 if (inew > i+2) {
574 VG_(printf)(" merge ");
575 showEvent( ev3 );
577 if (helperAddr)
578 VG_(printf)(" call %s (%p)\n",
579 helperName, helperAddr);
582 /* helper could be unset depending on the simulator used */
583 if (helperAddr == 0) continue;
585 /* Add the helper. */
586 tl_assert(helperName);
587 tl_assert(helperAddr);
588 tl_assert(argv);
589 di = unsafeIRDirty_0_N( regparms,
590 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
591 argv );
592 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
595 clgs->events_used = 0;
598 static void addEvent_Ir ( ClgState* clgs, InstrInfo* inode )
600 Event* evt;
601 tl_assert(clgs->seen_before || (inode->eventset == 0));
602 if (!CLG_(clo).simulate_cache) return;
604 if (clgs->events_used == N_EVENTS)
605 flushEvents(clgs);
606 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
607 evt = &clgs->events[clgs->events_used];
608 init_Event(evt);
609 evt->tag = Ev_Ir;
610 evt->inode = inode;
611 clgs->events_used++;
614 static
615 void addEvent_Dr ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
617 Event* evt;
618 tl_assert(isIRAtom(ea));
619 tl_assert(datasize >= 1);
620 if (!CLG_(clo).simulate_cache) return;
621 tl_assert(datasize <= CLG_(min_line_size));
623 if (clgs->events_used == N_EVENTS)
624 flushEvents(clgs);
625 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
626 evt = &clgs->events[clgs->events_used];
627 init_Event(evt);
628 evt->tag = Ev_Dr;
629 evt->inode = inode;
630 evt->Ev.Dr.szB = datasize;
631 evt->Ev.Dr.ea = ea;
632 clgs->events_used++;
635 static
636 void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
638 Event* evt;
639 tl_assert(isIRAtom(ea));
640 tl_assert(datasize >= 1);
641 if (!CLG_(clo).simulate_cache) return;
642 tl_assert(datasize <= CLG_(min_line_size));
644 /* Is it possible to merge this write with the preceding read? */
645 if (clgs->events_used > 0) {
646 Event* lastEvt = &clgs->events[clgs->events_used-1];
647 if ( lastEvt->tag == Ev_Dr
648 && lastEvt->Ev.Dr.szB == datasize
649 && lastEvt->inode == inode
650 && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
652 lastEvt->tag = Ev_Dm;
653 return;
657 /* No. Add as normal. */
658 if (clgs->events_used == N_EVENTS)
659 flushEvents(clgs);
660 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
661 evt = &clgs->events[clgs->events_used];
662 init_Event(evt);
663 evt->tag = Ev_Dw;
664 evt->inode = inode;
665 evt->Ev.Dw.szB = datasize;
666 evt->Ev.Dw.ea = ea;
667 clgs->events_used++;
670 static
671 void addEvent_D_guarded ( ClgState* clgs, InstrInfo* inode,
672 Int datasize, IRAtom* ea, IRAtom* guard,
673 Bool isWrite )
675 tl_assert(isIRAtom(ea));
676 tl_assert(guard);
677 tl_assert(isIRAtom(guard));
678 tl_assert(datasize >= 1);
679 if (!CLG_(clo).simulate_cache) return;
680 tl_assert(datasize <= CLG_(min_line_size));
682 /* Adding guarded memory actions and merging them with the existing
683 queue is too complex. Simply flush the queue and add this
684 action immediately. Since guarded loads and stores are pretty
685 rare, this is not thought likely to cause any noticeable
686 performance loss as a result of the loss of event-merging
687 opportunities. */
688 tl_assert(clgs->events_used >= 0);
689 flushEvents(clgs);
690 tl_assert(clgs->events_used == 0);
691 /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
692 IRExpr* i_node_expr;
693 const HChar* helperName;
694 void* helperAddr;
695 IRExpr** argv;
696 Int regparms;
697 IRDirty* di;
698 i_node_expr = mkIRExpr_HWord( (HWord)inode );
699 helperName = isWrite ? CLG_(cachesim).log_0I1Dw_name
700 : CLG_(cachesim).log_0I1Dr_name;
701 helperAddr = isWrite ? CLG_(cachesim).log_0I1Dw
702 : CLG_(cachesim).log_0I1Dr;
703 argv = mkIRExprVec_3( i_node_expr,
704 ea, mkIRExpr_HWord( datasize ) );
705 regparms = 3;
706 di = unsafeIRDirty_0_N(
707 regparms,
708 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
709 argv );
710 di->guard = guard;
711 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
714 static
715 void addEvent_Bc ( ClgState* clgs, InstrInfo* inode, IRAtom* guard )
717 Event* evt;
718 tl_assert(isIRAtom(guard));
719 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, guard)
720 == (sizeof(RegWord)==4 ? Ity_I32 : Ity_I64));
721 if (!CLG_(clo).simulate_branch) return;
723 if (clgs->events_used == N_EVENTS)
724 flushEvents(clgs);
725 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
726 evt = &clgs->events[clgs->events_used];
727 init_Event(evt);
728 evt->tag = Ev_Bc;
729 evt->inode = inode;
730 evt->Ev.Bc.taken = guard;
731 clgs->events_used++;
734 static
735 void addEvent_Bi ( ClgState* clgs, InstrInfo* inode, IRAtom* whereTo )
737 Event* evt;
738 tl_assert(isIRAtom(whereTo));
739 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, whereTo)
740 == (sizeof(RegWord)==4 ? Ity_I32 : Ity_I64));
741 if (!CLG_(clo).simulate_branch) return;
743 if (clgs->events_used == N_EVENTS)
744 flushEvents(clgs);
745 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
746 evt = &clgs->events[clgs->events_used];
747 init_Event(evt);
748 evt->tag = Ev_Bi;
749 evt->inode = inode;
750 evt->Ev.Bi.dst = whereTo;
751 clgs->events_used++;
754 static
755 void addEvent_G ( ClgState* clgs, InstrInfo* inode )
757 Event* evt;
758 if (!CLG_(clo).collect_bus) return;
760 if (clgs->events_used == N_EVENTS)
761 flushEvents(clgs);
762 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
763 evt = &clgs->events[clgs->events_used];
764 init_Event(evt);
765 evt->tag = Ev_G;
766 evt->inode = inode;
767 clgs->events_used++;
770 /* Initialise or check (if already seen before) an InstrInfo for next insn.
771 We only can set instr_offset/instr_size here. The required event set and
772 resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest
773 instructions. The event set is extended as required on flush of the event
774 queue (when Dm events were determined), cost offsets are determined at
775 end of BB instrumentation. */
776 static
777 InstrInfo* next_InstrInfo ( ClgState* clgs, UInt instr_size )
779 InstrInfo* ii;
780 tl_assert(clgs->ii_index >= 0);
781 tl_assert(clgs->ii_index < clgs->bb->instr_count);
782 ii = &clgs->bb->instr[ clgs->ii_index ];
784 if (clgs->seen_before) {
785 CLG_ASSERT(ii->instr_offset == clgs->instr_offset);
786 CLG_ASSERT(ii->instr_size == instr_size);
788 else {
789 ii->instr_offset = clgs->instr_offset;
790 ii->instr_size = instr_size;
791 ii->cost_offset = 0;
792 ii->eventset = 0;
795 clgs->ii_index++;
796 clgs->instr_offset += instr_size;
797 CLG_(stat).distinct_instrs++;
799 return ii;
802 // return total number of cost values needed for this BB
803 static
804 UInt update_cost_offsets( ClgState* clgs )
806 Int i;
807 InstrInfo* ii;
808 UInt cost_offset = 0;
810 CLG_ASSERT(clgs->bb->instr_count == clgs->ii_index);
811 for(i=0; i<clgs->ii_index; i++) {
812 ii = &clgs->bb->instr[i];
813 if (clgs->seen_before) {
814 CLG_ASSERT(ii->cost_offset == cost_offset);
815 } else
816 ii->cost_offset = cost_offset;
817 cost_offset += ii->eventset ? ii->eventset->size : 0;
820 return cost_offset;
823 /*------------------------------------------------------------*/
824 /*--- Instrumentation ---*/
825 /*------------------------------------------------------------*/
827 #if defined(VG_BIGENDIAN)
828 # define CLGEndness Iend_BE
829 #elif defined(VG_LITTLEENDIAN)
830 # define CLGEndness Iend_LE
831 #else
832 # error "Unknown endianness"
833 #endif
835 static
836 Addr IRConst2Addr(IRConst* con)
838 Addr addr;
840 if (sizeof(RegWord) == 4) {
841 CLG_ASSERT( con->tag == Ico_U32 );
842 addr = con->Ico.U32;
844 else if (sizeof(RegWord) == 8) {
845 CLG_ASSERT( con->tag == Ico_U64 );
846 addr = con->Ico.U64;
848 else
849 VG_(tool_panic)("Callgrind: invalid Addr type");
851 return addr;
854 /* First pass over a BB to instrument, counting instructions and jumps
855 * This is needed for the size of the BB struct to allocate
857 * Called from CLG_(get_bb)
859 void CLG_(collectBlockInfo)(IRSB* sbIn,
860 /*INOUT*/ UInt* instrs,
861 /*INOUT*/ UInt* cjmps,
862 /*INOUT*/ Bool* cjmp_inverted)
864 Int i;
865 IRStmt* st;
866 Addr instrAddr =0, jumpDst;
867 UInt instrLen = 0;
868 Bool toNextInstr = False;
870 // Ist_Exit has to be ignored in preamble code, before first IMark:
871 // preamble code is added by VEX for self modifying code, and has
872 // nothing to do with client code
873 Bool inPreamble = True;
875 if (!sbIn) return;
877 for (i = 0; i < sbIn->stmts_used; i++) {
878 st = sbIn->stmts[i];
879 if (Ist_IMark == st->tag) {
880 inPreamble = False;
882 instrAddr = st->Ist.IMark.addr;
883 instrLen = st->Ist.IMark.len;
885 (*instrs)++;
886 toNextInstr = False;
888 if (inPreamble) continue;
889 if (Ist_Exit == st->tag) {
890 jumpDst = IRConst2Addr(st->Ist.Exit.dst);
891 toNextInstr = (jumpDst == instrAddr + instrLen);
893 (*cjmps)++;
897 /* if the last instructions of BB conditionally jumps to next instruction
898 * (= first instruction of next BB in memory), this is a inverted by VEX.
900 *cjmp_inverted = toNextInstr;
903 static
904 void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy)
906 addStmtToIRSB( bbOut,
907 IRStmt_Store(CLGEndness,
908 IRExpr_Const(hWordTy == Ity_I32 ?
909 IRConst_U32( addr ) :
910 IRConst_U64( addr )),
911 IRExpr_Const(IRConst_U32(val)) ));
915 /* add helper call to setup_bbcc, with pointer to BB struct as argument
917 * precondition for setup_bbcc:
918 * - jmps_passed has number of cond.jumps passed in last executed BB
919 * - current_bbcc has a pointer to the BBCC of the last executed BB
920 * Thus, if bbcc_jmpkind is != -1 (JmpNone),
921 * current_bbcc->bb->jmp_addr
922 * gives the address of the jump source.
924 * the setup does 2 things:
925 * - trace call:
926 * * Unwind own call stack, i.e sync our ESP with real ESP
927 * This is for ESP manipulation (longjmps, C++ exec handling) and RET
928 * * For CALLs or JMPs crossing objects, record call arg +
929 * push are on own call stack
931 * - prepare for cache log functions:
932 * set current_bbcc to BBCC that gets the costs for this BB execution
933 * attached
935 static
936 void addBBSetupCall(ClgState* clgs)
938 IRDirty* di;
939 IRExpr *arg1, **argv;
941 arg1 = mkIRExpr_HWord( (HWord)clgs->bb );
942 argv = mkIRExprVec_1(arg1);
943 di = unsafeIRDirty_0_N( 1, "setup_bbcc",
944 VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ),
945 argv);
946 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
950 static
951 IRSB* CLG_(instrument)( VgCallbackClosure* closure,
952 IRSB* sbIn,
953 const VexGuestLayout* layout,
954 const VexGuestExtents* vge,
955 const VexArchInfo* archinfo_host,
956 IRType gWordTy, IRType hWordTy )
958 Int i;
959 IRStmt* st;
960 Addr origAddr;
961 InstrInfo* curr_inode = NULL;
962 ClgState clgs;
963 UInt cJumps = 0;
964 IRTypeEnv* tyenv = sbIn->tyenv;
966 if (gWordTy != hWordTy) {
967 /* We don't currently support this case. */
968 VG_(tool_panic)("host/guest word size mismatch");
971 // No instrumentation if it is switched off
972 if (! CLG_(instrument_state)) {
973 CLG_DEBUG(5, "instrument(BB %#lx) [Instrumentation OFF]\n",
974 (Addr)closure->readdr);
975 return sbIn;
978 CLG_DEBUG(3, "+ instrument(BB %#lx)\n", (Addr)closure->readdr);
980 /* Set up SB for instrumented IR */
981 clgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
983 // Copy verbatim any IR preamble preceding the first IMark
984 i = 0;
985 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
986 addStmtToIRSB( clgs.sbOut, sbIn->stmts[i] );
987 i++;
990 // Get the first statement, and origAddr from it
991 CLG_ASSERT(sbIn->stmts_used >0);
992 CLG_ASSERT(i < sbIn->stmts_used);
993 st = sbIn->stmts[i];
994 CLG_ASSERT(Ist_IMark == st->tag);
996 origAddr = st->Ist.IMark.addr + st->Ist.IMark.delta;
997 CLG_ASSERT(origAddr == st->Ist.IMark.addr
998 + st->Ist.IMark.delta); // XXX: check no overflow
1000 /* Get BB struct (creating if necessary).
1001 * JS: The hash table is keyed with orig_addr_noredir -- important!
1002 * JW: Why? If it is because of different chasing of the redirection,
1003 * this is not needed, as chasing is switched off in callgrind
1005 clgs.bb = CLG_(get_bb)(origAddr, sbIn, &(clgs.seen_before));
1007 addBBSetupCall(&clgs);
1009 // Set up running state
1010 clgs.events_used = 0;
1011 clgs.ii_index = 0;
1012 clgs.instr_offset = 0;
1014 for (/*use current i*/; i < sbIn->stmts_used; i++) {
1016 st = sbIn->stmts[i];
1017 CLG_ASSERT(isFlatIRStmt(st));
1019 switch (st->tag) {
1020 case Ist_NoOp:
1021 case Ist_AbiHint:
1022 case Ist_Put:
1023 case Ist_PutI:
1024 case Ist_MBE:
1025 break;
1027 case Ist_IMark: {
1028 Addr cia = st->Ist.IMark.addr + st->Ist.IMark.delta;
1029 UInt isize = st->Ist.IMark.len;
1030 CLG_ASSERT(clgs.instr_offset == cia - origAddr);
1031 // If Vex fails to decode an instruction, the size will be zero.
1032 // Pretend otherwise.
1033 if (isize == 0) isize = VG_MIN_INSTR_SZB;
1035 // Sanity-check size.
1036 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
1037 || VG_CLREQ_SZB == isize );
1039 // Init the inode, record it as the current one.
1040 // Subsequent Dr/Dw/Dm events from the same instruction will
1041 // also use it.
1042 curr_inode = next_InstrInfo (&clgs, isize);
1044 addEvent_Ir( &clgs, curr_inode );
1045 break;
1048 case Ist_WrTmp: {
1049 IRExpr* data = st->Ist.WrTmp.data;
1050 if (data->tag == Iex_Load) {
1051 IRExpr* aexpr = data->Iex.Load.addr;
1052 // Note also, endianness info is ignored. I guess
1053 // that's not interesting.
1054 addEvent_Dr( &clgs, curr_inode,
1055 sizeofIRType(data->Iex.Load.ty), aexpr );
1057 break;
1060 case Ist_Store: {
1061 IRExpr* data = st->Ist.Store.data;
1062 IRExpr* aexpr = st->Ist.Store.addr;
1063 addEvent_Dw( &clgs, curr_inode,
1064 sizeofIRType(typeOfIRExpr(sbIn->tyenv, data)), aexpr );
1065 break;
1068 case Ist_StoreG: {
1069 IRStoreG* sg = st->Ist.StoreG.details;
1070 IRExpr* data = sg->data;
1071 IRExpr* addr = sg->addr;
1072 IRType type = typeOfIRExpr(tyenv, data);
1073 tl_assert(type != Ity_INVALID);
1074 addEvent_D_guarded( &clgs, curr_inode,
1075 sizeofIRType(type), addr, sg->guard,
1076 True/*isWrite*/ );
1077 break;
1080 case Ist_LoadG: {
1081 IRLoadG* lg = st->Ist.LoadG.details;
1082 IRType type = Ity_INVALID; /* loaded type */
1083 IRType typeWide = Ity_INVALID; /* after implicit widening */
1084 IRExpr* addr = lg->addr;
1085 typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
1086 tl_assert(type != Ity_INVALID);
1087 addEvent_D_guarded( &clgs, curr_inode,
1088 sizeofIRType(type), addr, lg->guard,
1089 False/*!isWrite*/ );
1090 break;
1093 case Ist_Dirty: {
1094 Int dataSize;
1095 IRDirty* d = st->Ist.Dirty.details;
1096 if (d->mFx != Ifx_None) {
1097 /* This dirty helper accesses memory. Collect the details. */
1098 tl_assert(d->mAddr != NULL);
1099 tl_assert(d->mSize != 0);
1100 dataSize = d->mSize;
1101 // Large (eg. 28B, 108B, 512B on x86) data-sized
1102 // instructions will be done inaccurately, but they're
1103 // very rare and this avoids errors from hitting more
1104 // than two cache lines in the simulation.
1105 if (CLG_(clo).simulate_cache && dataSize > CLG_(min_line_size))
1106 dataSize = CLG_(min_line_size);
1107 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1108 addEvent_Dr( &clgs, curr_inode, dataSize, d->mAddr );
1109 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1110 addEvent_Dw( &clgs, curr_inode, dataSize, d->mAddr );
1111 } else {
1112 tl_assert(d->mAddr == NULL);
1113 tl_assert(d->mSize == 0);
1115 break;
1118 case Ist_CAS: {
1119 /* We treat it as a read and a write of the location. I
1120 think that is the same behaviour as it was before IRCAS
1121 was introduced, since prior to that point, the Vex
1122 front ends would translate a lock-prefixed instruction
1123 into a (normal) read followed by a (normal) write. */
1124 Int dataSize;
1125 IRCAS* cas = st->Ist.CAS.details;
1126 CLG_ASSERT(cas->addr && isIRAtom(cas->addr));
1127 CLG_ASSERT(cas->dataLo);
1128 dataSize = sizeofIRType(typeOfIRExpr(sbIn->tyenv, cas->dataLo));
1129 if (cas->dataHi != NULL)
1130 dataSize *= 2; /* since this is a doubleword-cas */
1131 addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr );
1132 addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr );
1133 addEvent_G( &clgs, curr_inode );
1134 break;
1137 case Ist_LLSC: {
1138 IRType dataTy;
1139 if (st->Ist.LLSC.storedata == NULL) {
1140 /* LL */
1141 dataTy = typeOfIRTemp(sbIn->tyenv, st->Ist.LLSC.result);
1142 addEvent_Dr( &clgs, curr_inode,
1143 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1144 /* flush events before LL, should help SC to succeed */
1145 flushEvents( &clgs );
1146 } else {
1147 /* SC */
1148 dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata);
1149 addEvent_Dw( &clgs, curr_inode,
1150 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1151 /* I don't know whether the global-bus-lock cost should
1152 be attributed to the LL or the SC, but it doesn't
1153 really matter since they always have to be used in
1154 pairs anyway. Hence put it (quite arbitrarily) on
1155 the SC. */
1156 addEvent_G( &clgs, curr_inode );
1158 break;
1161 case Ist_Exit: {
1162 Bool guest_exit, inverted;
1164 /* VEX code generation sometimes inverts conditional branches.
1165 * As Callgrind counts (conditional) jumps, it has to correct
1166 * inversions. The heuristic is the following:
1167 * (1) Callgrind switches off SB chasing and unrolling, and
1168 * therefore it assumes that a candidate for inversion only is
1169 * the last conditional branch in an SB.
1170 * (2) inversion is assumed if the branch jumps to the address of
1171 * the next guest instruction in memory.
1172 * This heuristic is precalculated in CLG_(collectBlockInfo)().
1174 * Branching behavior is also used for branch prediction. Note that
1175 * above heuristic is different from what Cachegrind does.
1176 * Cachegrind uses (2) for all branches.
1178 if (cJumps+1 == clgs.bb->cjmp_count)
1179 inverted = clgs.bb->cjmp_inverted;
1180 else
1181 inverted = False;
1183 // call branch predictor only if this is a branch in guest code
1184 guest_exit = (st->Ist.Exit.jk == Ijk_Boring) ||
1185 (st->Ist.Exit.jk == Ijk_Call) ||
1186 (st->Ist.Exit.jk == Ijk_Ret);
1188 if (guest_exit) {
1189 /* Stuff to widen the guard expression to a host word, so
1190 we can pass it to the branch predictor simulation
1191 functions easily. */
1192 IRType tyW = hWordTy;
1193 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64;
1194 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64;
1195 IRTemp guard1 = newIRTemp(clgs.sbOut->tyenv, Ity_I1);
1196 IRTemp guardW = newIRTemp(clgs.sbOut->tyenv, tyW);
1197 IRTemp guard = newIRTemp(clgs.sbOut->tyenv, tyW);
1198 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1199 : IRExpr_Const(IRConst_U64(1));
1201 /* Widen the guard expression. */
1202 addStmtToIRSB( clgs.sbOut,
1203 IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1204 addStmtToIRSB( clgs.sbOut,
1205 IRStmt_WrTmp( guardW,
1206 IRExpr_Unop(widen,
1207 IRExpr_RdTmp(guard1))) );
1208 /* If the exit is inverted, invert the sense of the guard. */
1209 addStmtToIRSB(
1210 clgs.sbOut,
1211 IRStmt_WrTmp(
1212 guard,
1213 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1214 : IRExpr_RdTmp(guardW)
1216 /* And post the event. */
1217 addEvent_Bc( &clgs, curr_inode, IRExpr_RdTmp(guard) );
1220 /* We may never reach the next statement, so need to flush
1221 all outstanding transactions now. */
1222 flushEvents( &clgs );
1224 CLG_ASSERT(clgs.ii_index>0);
1225 if (!clgs.seen_before) {
1226 ClgJumpKind jk;
1228 if (st->Ist.Exit.jk == Ijk_Call) jk = jk_Call;
1229 else if (st->Ist.Exit.jk == Ijk_Ret) jk = jk_Return;
1230 else {
1231 if (IRConst2Addr(st->Ist.Exit.dst) ==
1232 origAddr + curr_inode->instr_offset + curr_inode->instr_size)
1233 jk = jk_None;
1234 else
1235 jk = jk_Jump;
1238 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
1239 clgs.bb->jmp[cJumps].jmpkind = jk;
1242 /* Update global variable jmps_passed before the jump
1243 * A correction is needed if VEX inverted the last jump condition
1245 UInt val = inverted ? cJumps+1 : cJumps;
1246 addConstMemStoreStmt( clgs.sbOut,
1247 (UWord) &CLG_(current_state).jmps_passed,
1248 val, hWordTy);
1249 cJumps++;
1251 break;
1254 default:
1255 tl_assert(0);
1256 break;
1259 /* Copy the original statement */
1260 addStmtToIRSB( clgs.sbOut, st );
1262 CLG_DEBUGIF(5) {
1263 VG_(printf)(" pass ");
1264 ppIRStmt(st);
1265 VG_(printf)("\n");
1269 /* Deal with branches to unknown destinations. Except ignore ones
1270 which are function returns as we assume the return stack
1271 predictor never mispredicts. */
1272 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
1273 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1274 switch (sbIn->next->tag) {
1275 case Iex_Const:
1276 break; /* boring - branch to known address */
1277 case Iex_RdTmp:
1278 /* looks like an indirect branch (branch to unknown) */
1279 addEvent_Bi( &clgs, curr_inode, sbIn->next );
1280 break;
1281 default:
1282 /* shouldn't happen - if the incoming IR is properly
1283 flattened, should only have tmp and const cases to
1284 consider. */
1285 tl_assert(0);
1289 /* At the end of the bb. Flush outstandings. */
1290 flushEvents( &clgs );
1292 /* Update global variable jmps_passed at end of SB.
1293 * As CLG_(current_state).jmps_passed is reset to 0 in setup_bbcc,
1294 * this can be omitted if there is no conditional jump in this SB.
1295 * A correction is needed if VEX inverted the last jump condition
1297 if (cJumps>0) {
1298 UInt jmps_passed = cJumps;
1299 if (clgs.bb->cjmp_inverted) jmps_passed--;
1300 addConstMemStoreStmt( clgs.sbOut,
1301 (UWord) &CLG_(current_state).jmps_passed,
1302 jmps_passed, hWordTy);
1304 CLG_ASSERT(clgs.bb->cjmp_count == cJumps);
1305 CLG_ASSERT(clgs.bb->instr_count == clgs.ii_index);
1307 /* Info for final exit from BB */
1309 ClgJumpKind jk;
1311 if (sbIn->jumpkind == Ijk_Call) jk = jk_Call;
1312 else if (sbIn->jumpkind == Ijk_Ret) jk = jk_Return;
1313 else {
1314 jk = jk_Jump;
1315 if ((sbIn->next->tag == Iex_Const) &&
1316 (IRConst2Addr(sbIn->next->Iex.Const.con) ==
1317 origAddr + clgs.instr_offset))
1318 jk = jk_None;
1320 clgs.bb->jmp[cJumps].jmpkind = jk;
1321 /* Instruction index of the call/ret at BB end
1322 * (it is wrong for fall-through, but does not matter) */
1323 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
1326 /* swap information of last exit with final exit if inverted */
1327 if (clgs.bb->cjmp_inverted) {
1328 ClgJumpKind jk;
1329 UInt instr;
1331 jk = clgs.bb->jmp[cJumps].jmpkind;
1332 clgs.bb->jmp[cJumps].jmpkind = clgs.bb->jmp[cJumps-1].jmpkind;
1333 clgs.bb->jmp[cJumps-1].jmpkind = jk;
1334 instr = clgs.bb->jmp[cJumps].instr;
1335 clgs.bb->jmp[cJumps].instr = clgs.bb->jmp[cJumps-1].instr;
1336 clgs.bb->jmp[cJumps-1].instr = instr;
1339 if (clgs.seen_before) {
1340 CLG_ASSERT(clgs.bb->cost_count == update_cost_offsets(&clgs));
1341 CLG_ASSERT(clgs.bb->instr_len == clgs.instr_offset);
1343 else {
1344 clgs.bb->cost_count = update_cost_offsets(&clgs);
1345 clgs.bb->instr_len = clgs.instr_offset;
1348 CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n",
1349 origAddr, clgs.bb->instr_len,
1350 clgs.bb->cjmp_count, clgs.bb->cost_count);
1351 if (cJumps>0) {
1352 CLG_DEBUG(3, " [ ");
1353 for (i=0;i<cJumps;i++)
1354 CLG_DEBUG(3, "%u ", clgs.bb->jmp[i].instr);
1355 CLG_DEBUG(3, "], last inverted: %s \n",
1356 clgs.bb->cjmp_inverted ? "yes":"no");
1359 return clgs.sbOut;
1362 /*--------------------------------------------------------------------*/
1363 /*--- Discarding BB info ---*/
1364 /*--------------------------------------------------------------------*/
1366 // Called when a translation is removed from the translation cache for
1367 // any reason at all: to free up space, because the guest code was
1368 // unmapped or modified, or for any arbitrary reason.
1369 static
1370 void clg_discard_superblock_info ( Addr orig_addr, VexGuestExtents vge )
1372 tl_assert(vge.n_used > 0);
1374 if (0)
1375 VG_(printf)( "discard_superblock_info: %p, %p, %llu\n",
1376 (void*)orig_addr,
1377 (void*)vge.base[0], (ULong)vge.len[0]);
1379 // Get BB info, remove from table, free BB info. Simple!
1380 // When created, the BB is keyed by the first instruction address,
1381 // (not orig_addr, but eventually redirected address). Thus, we
1382 // use the first instruction address in vge.
1383 CLG_(delete_bb)(vge.base[0]);
1387 /*------------------------------------------------------------*/
1388 /*--- CLG_(fini)() and related function ---*/
1389 /*------------------------------------------------------------*/
1393 static void zero_thread_cost(thread_info* t)
1395 Int i;
1397 for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1398 if (!CLG_(current_call_stack).entry[i].jcc) continue;
1400 /* reset call counters to current for active calls */
1401 CLG_(copy_cost)( CLG_(sets).full,
1402 CLG_(current_call_stack).entry[i].enter_cost,
1403 CLG_(current_state).cost );
1404 CLG_(current_call_stack).entry[i].jcc->call_counter = 0;
1407 CLG_(forall_bbccs)(CLG_(zero_bbcc));
1409 /* set counter for last dump */
1410 CLG_(copy_cost)( CLG_(sets).full,
1411 t->lastdump_cost, CLG_(current_state).cost );
1414 void CLG_(zero_all_cost)(Bool only_current_thread)
1416 if (VG_(clo_verbosity) > 1)
1417 VG_(message)(Vg_DebugMsg, " Zeroing costs...\n");
1419 if (only_current_thread)
1420 zero_thread_cost(CLG_(get_current_thread)());
1421 else
1422 CLG_(forall_threads)(zero_thread_cost);
1424 if (VG_(clo_verbosity) > 1)
1425 VG_(message)(Vg_DebugMsg, " ...done\n");
1428 static
1429 void unwind_thread(thread_info* t)
1431 /* unwind signal handlers */
1432 while(CLG_(current_state).sig !=0)
1433 CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig);
1435 /* unwind regular call stack */
1436 while(CLG_(current_call_stack).sp>0)
1437 CLG_(pop_call_stack)();
1439 /* reset context and function stack for context generation */
1440 CLG_(init_exec_state)( &CLG_(current_state) );
1441 CLG_(current_fn_stack).top = CLG_(current_fn_stack).bottom;
1444 static
1445 void zero_state_cost(thread_info* t)
1447 CLG_(zero_cost)( CLG_(sets).full, CLG_(current_state).cost );
1450 void CLG_(set_instrument_state)(const HChar* reason, Bool state)
1452 if (CLG_(instrument_state) == state) {
1453 CLG_DEBUG(2, "%s: instrumentation already %s\n",
1454 reason, state ? "ON" : "OFF");
1455 return;
1457 CLG_(instrument_state) = state;
1458 CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n",
1459 reason, state ? "ON" : "OFF");
1461 VG_(discard_translations_safely)( (Addr)0x1000, ~(SizeT)0xfff, "callgrind");
1463 /* reset internal state: call stacks, simulator */
1464 CLG_(forall_threads)(unwind_thread);
1465 CLG_(forall_threads)(zero_state_cost);
1466 (*CLG_(cachesim).clear)();
1468 if (VG_(clo_verbosity) > 1)
1469 VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s\n",
1470 reason, state ? "ON" : "OFF");
1473 /* helper for dump_state_togdb */
1474 static void dump_state_of_thread_togdb(thread_info* ti)
1476 static FullCost sum = 0, tmp = 0;
1477 Int t, i;
1478 BBCC *from, *to;
1479 call_entry* ce;
1480 HChar *mcost;
1482 t = CLG_(current_tid);
1483 CLG_(init_cost_lz)( CLG_(sets).full, &sum );
1484 CLG_(copy_cost_lz)( CLG_(sets).full, &tmp, ti->lastdump_cost );
1485 CLG_(add_diff_cost)( CLG_(sets).full, sum, ti->lastdump_cost,
1486 ti->states.entry[0]->cost);
1487 CLG_(copy_cost)( CLG_(sets).full, ti->lastdump_cost, tmp );
1488 mcost = CLG_(mappingcost_as_string)(CLG_(dumpmap), sum);
1489 VG_(gdb_printf)("events-%d: %s\n", t, mcost);
1490 VG_(free)(mcost);
1491 VG_(gdb_printf)("frames-%d: %d\n", t, CLG_(current_call_stack).sp);
1493 ce = 0;
1494 for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1495 ce = CLG_(get_call_entry)(i);
1496 /* if this frame is skipped, we don't have counters */
1497 if (!ce->jcc) continue;
1499 from = ce->jcc->from;
1500 VG_(gdb_printf)("function-%d-%d: %s\n",t, i, from->cxt->fn[0]->name);
1501 VG_(gdb_printf)("calls-%d-%d: %llu\n",t, i, ce->jcc->call_counter);
1503 /* FIXME: EventSets! */
1504 CLG_(copy_cost)( CLG_(sets).full, sum, ce->jcc->cost );
1505 CLG_(copy_cost)( CLG_(sets).full, tmp, ce->enter_cost );
1506 CLG_(add_diff_cost)( CLG_(sets).full, sum,
1507 ce->enter_cost, CLG_(current_state).cost );
1508 CLG_(copy_cost)( CLG_(sets).full, ce->enter_cost, tmp );
1510 mcost = CLG_(mappingcost_as_string)(CLG_(dumpmap), sum);
1511 VG_(gdb_printf)("events-%d-%d: %s\n",t, i, mcost);
1512 VG_(free)(mcost);
1514 if (ce && ce->jcc) {
1515 to = ce->jcc->to;
1516 VG_(gdb_printf)("function-%d-%d: %s\n",t, i, to->cxt->fn[0]->name );
1520 /* Dump current state */
1521 static void dump_state_togdb(void)
1523 thread_info** th;
1524 int t;
1525 Int orig_tid = CLG_(current_tid);
1527 VG_(gdb_printf)("instrumentation: %s\n",
1528 CLG_(instrument_state) ? "on":"off");
1529 if (!CLG_(instrument_state)) return;
1531 VG_(gdb_printf)("executed-bbs: %llu\n", CLG_(stat).bb_executions);
1532 VG_(gdb_printf)("executed-calls: %llu\n", CLG_(stat).call_counter);
1533 VG_(gdb_printf)("distinct-bbs: %d\n", CLG_(stat).distinct_bbs);
1534 VG_(gdb_printf)("distinct-calls: %d\n", CLG_(stat).distinct_jccs);
1535 VG_(gdb_printf)("distinct-functions: %d\n", CLG_(stat).distinct_fns);
1536 VG_(gdb_printf)("distinct-contexts: %d\n", CLG_(stat).distinct_contexts);
1538 /* "events:" line. Given here because it will be dynamic in the future */
1539 HChar *evmap = CLG_(eventmapping_as_string)(CLG_(dumpmap));
1540 VG_(gdb_printf)("events: %s\n", evmap);
1541 VG_(free)(evmap);
1542 /* "part:" line (number of last part. Is 0 at start */
1543 VG_(gdb_printf)("part: %d\n", CLG_(get_dump_counter)());
1545 /* threads */
1546 th = CLG_(get_threads)();
1547 VG_(gdb_printf)("threads:");
1548 for(t=1;t<VG_N_THREADS;t++) {
1549 if (!th[t]) continue;
1550 VG_(gdb_printf)(" %d", t);
1552 VG_(gdb_printf)("\n");
1553 VG_(gdb_printf)("current-tid: %d\n", orig_tid);
1554 CLG_(forall_threads)(dump_state_of_thread_togdb);
1558 static void print_monitor_help ( void )
1560 VG_(gdb_printf) ("\n");
1561 VG_(gdb_printf) ("callgrind monitor commands:\n");
1562 VG_(gdb_printf) (" dump [<dump_hint>]\n");
1563 VG_(gdb_printf) (" dump counters\n");
1564 VG_(gdb_printf) (" zero\n");
1565 VG_(gdb_printf) (" zero counters\n");
1566 VG_(gdb_printf) (" status\n");
1567 VG_(gdb_printf) (" print status\n");
1568 VG_(gdb_printf) (" instrumentation [on|off]\n");
1569 VG_(gdb_printf) (" get/set (if on/off given) instrumentation state\n");
1570 VG_(gdb_printf) ("\n");
1573 /* return True if request recognised, False otherwise */
1574 static Bool handle_gdb_monitor_command (ThreadId tid, const HChar *req)
1576 HChar* wcmd;
1577 HChar s[VG_(strlen)(req) + 1]; /* copy for strtok_r */
1578 HChar *ssaveptr;
1580 VG_(strcpy) (s, req);
1582 wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
1583 switch (VG_(keyword_id) ("help dump zero status instrumentation",
1584 wcmd, kwd_report_duplicated_matches)) {
1585 case -2: /* multiple matches */
1586 return True;
1587 case -1: /* not found */
1588 return False;
1589 case 0: /* help */
1590 print_monitor_help();
1591 return True;
1592 case 1: { /* dump */
1593 CLG_(dump_profile)(req, False);
1594 return True;
1596 case 2: { /* zero */
1597 CLG_(zero_all_cost)(False);
1598 return True;
1601 case 3: { /* status */
1602 HChar* arg = VG_(strtok_r) (0, " ", &ssaveptr);
1603 if (arg && (VG_(strcmp)(arg, "internal") == 0)) {
1604 /* internal interface to callgrind_control */
1605 dump_state_togdb();
1606 return True;
1609 if (!CLG_(instrument_state)) {
1610 VG_(gdb_printf)("No status available as instrumentation is switched off\n");
1611 } else {
1612 // Status information to be improved ...
1613 thread_info** th = CLG_(get_threads)();
1614 Int t, tcount = 0;
1615 for(t=1;t<VG_N_THREADS;t++)
1616 if (th[t]) tcount++;
1617 VG_(gdb_printf)("%d thread(s) running.\n", tcount);
1619 return True;
1622 case 4: { /* instrumentation */
1623 HChar* arg = VG_(strtok_r) (0, " ", &ssaveptr);
1624 if (!arg) {
1625 VG_(gdb_printf)("instrumentation: %s\n",
1626 CLG_(instrument_state) ? "on":"off");
1628 else
1629 CLG_(set_instrument_state)("Command", VG_(strcmp)(arg,"off")!=0);
1630 return True;
1633 default:
1634 tl_assert(0);
1635 return False;
1639 static
1640 Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret)
1642 if (!VG_IS_TOOL_USERREQ('C','T',args[0])
1643 && VG_USERREQ__GDB_MONITOR_COMMAND != args[0])
1644 return False;
1646 switch(args[0]) {
1647 case VG_USERREQ__DUMP_STATS:
1648 CLG_(dump_profile)("Client Request", True);
1649 *ret = 0; /* meaningless */
1650 break;
1652 case VG_USERREQ__DUMP_STATS_AT:
1654 const HChar *arg = (HChar*)args[1];
1655 HChar buf[30 + VG_(strlen)(arg)]; // large enough
1656 VG_(sprintf)(buf,"Client Request: %s", arg);
1657 CLG_(dump_profile)(buf, True);
1658 *ret = 0; /* meaningless */
1660 break;
1662 case VG_USERREQ__ZERO_STATS:
1663 CLG_(zero_all_cost)(True);
1664 *ret = 0; /* meaningless */
1665 break;
1667 case VG_USERREQ__TOGGLE_COLLECT:
1668 CLG_(current_state).collect = !CLG_(current_state).collect;
1669 CLG_DEBUG(2, "Client Request: toggled collection state to %s\n",
1670 CLG_(current_state).collect ? "ON" : "OFF");
1671 *ret = 0; /* meaningless */
1672 break;
1674 case VG_USERREQ__START_INSTRUMENTATION:
1675 CLG_(set_instrument_state)("Client Request", True);
1676 *ret = 0; /* meaningless */
1677 break;
1679 case VG_USERREQ__STOP_INSTRUMENTATION:
1680 CLG_(set_instrument_state)("Client Request", False);
1681 *ret = 0; /* meaningless */
1682 break;
1684 case VG_USERREQ__GDB_MONITOR_COMMAND: {
1685 Bool handled = handle_gdb_monitor_command (tid, (HChar*)args[1]);
1686 if (handled)
1687 *ret = 1;
1688 else
1689 *ret = 0;
1690 return handled;
1692 default:
1693 return False;
1696 return True;
1700 /* Syscall Timing. syscalltime[tid] is the time at which thread tid last
1701 started a syscall. */
1703 /* struct vki_timespec syscalltime[VG_N_THREADS];
1704 Whatever the syscall we use to measure the syscall time, we convert to
1705 seconds and nanoseconds. */
1706 struct vki_timespec *syscalltime;
1707 struct vki_timespec *syscallcputime;
1710 static
1711 void collect_time (struct vki_timespec *systime, struct vki_timespec *syscputime)
1713 switch (CLG_(clo).collect_systime) {
1714 case systime_no: tl_assert (0);
1715 case systime_msec: {
1716 UInt ms_timer = VG_(read_millisecond_timer)();
1717 systime->tv_sec = ms_timer / 1000;
1718 systime->tv_nsec = (ms_timer % 1000) * 1000000L;
1719 break;
1721 case systime_usec: {
1722 struct vki_timeval tv_now;
1723 VG_(gettimeofday)(&tv_now, NULL);
1724 systime->tv_sec = tv_now.tv_sec;
1725 systime->tv_nsec = tv_now.tv_usec * 1000;
1726 break;
1728 case systime_nsec:
1729 # if defined(VGO_linux) || defined(VGO_solaris) || defined(VGO_freebsd)
1730 VG_(clock_gettime)(systime, VKI_CLOCK_MONOTONIC);
1731 VG_(clock_gettime)(syscputime, VKI_CLOCK_THREAD_CPUTIME_ID);
1733 # elif defined(VGO_darwin)
1734 tl_assert(0);
1735 # else
1736 # error "Unknown OS"
1737 # endif
1738 break;
1742 static
1743 void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno,
1744 UWord* args, UInt nArgs)
1746 collect_time(&syscalltime[tid],
1747 CLG_(clo).collect_systime == systime_nsec ? &syscallcputime[tid] : NULL);
1750 /* Returns "after - before" in the unit as specified by --collect-systime.
1751 after is supposed to be >= before, and tv_nsec must be >= 0 and < One_Second_In_Nsec. */
1752 static
1753 ULong vki_timespec_diff (struct vki_timespec after, struct vki_timespec before)
1755 vki_time_t diff_sec = after.tv_sec - before.tv_sec;
1756 long diff_nsec = after.tv_nsec - before.tv_nsec;
1757 ULong nsec_factor; // factor to convert the desired unit into nsec.
1759 if (diff_nsec < 0) {
1760 diff_sec--;
1761 diff_nsec += 1000000000ULL;
1763 switch (CLG_(clo).collect_systime) {
1764 case systime_no: tl_assert (0);
1765 case systime_msec: nsec_factor = 1000000ULL; break;
1766 case systime_usec: nsec_factor = 1000ULL; break;
1767 case systime_nsec: nsec_factor = 1ULL; break;
1768 default: tl_assert(0);
1770 return ((ULong) diff_sec * 1000000000ULL + diff_nsec) / nsec_factor;
1773 static
1774 void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno,
1775 UWord* args, UInt nArgs, SysRes res)
1777 if (CLG_(current_state).bbcc) {
1778 Int o;
1779 struct vki_timespec ts_now;
1780 struct vki_timespec ts_cpunow;
1781 ULong diff;
1783 collect_time(&ts_now,
1784 CLG_(clo).collect_systime == systime_nsec ? &ts_cpunow : NULL);
1786 diff = vki_timespec_diff (ts_now, syscalltime[tid]);
1788 /* offset o is for "SysCount", o+1 for "SysTime",
1789 o+2 is (optionally) "SysCpuTime". */
1790 o = fullOffset(EG_SYS);
1791 CLG_ASSERT(o>=0);
1792 CLG_DEBUG(0," Time (Off %d) for Syscall %u: %llu\n", o, syscallno,
1793 diff);
1795 if (!CLG_(current_state).bbcc->skipped)
1796 CLG_(init_cost_lz)(CLG_(sets).full,
1797 &(CLG_(current_state).bbcc->skipped));
1798 CLG_(current_state).cost[o] ++;
1799 CLG_(current_state).cost[o+1] += diff;
1800 CLG_(current_state).bbcc->skipped[o] ++;
1801 CLG_(current_state).bbcc->skipped[o+1] += diff;
1802 if (CLG_(clo).collect_systime == systime_nsec) {
1803 diff = vki_timespec_diff (ts_cpunow, syscallcputime[tid]);
1804 CLG_DEBUG(0," SysCpuTime (Off %d) for Syscall %u: %llu\n", o+2, syscallno,
1805 diff);
1806 CLG_(current_state).cost[o+2] += diff;
1807 CLG_(current_state).bbcc->skipped[o+2] += diff;
1812 static UInt ULong_width(ULong n)
1814 UInt w = 0;
1815 while (n > 0) {
1816 n = n / 10;
1817 w++;
1819 if (w == 0) w = 1;
1820 return w + (w-1)/3; // add space for commas
1823 static
1824 void branchsim_printstat(int l1, int l2, int l3)
1826 static HChar fmt[128]; // large enough
1827 FullCost total;
1828 ULong Bc_total_b, Bc_total_mp, Bi_total_b, Bi_total_mp;
1829 ULong B_total_b, B_total_mp;
1831 total = CLG_(total_cost);
1832 Bc_total_b = total[ fullOffset(EG_BC) ];
1833 Bc_total_mp = total[ fullOffset(EG_BC)+1 ];
1834 Bi_total_b = total[ fullOffset(EG_BI) ];
1835 Bi_total_mp = total[ fullOffset(EG_BI)+1 ];
1837 /* Make format string, getting width right for numbers */
1838 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1839 l1, l2, l3);
1841 if (0 == Bc_total_b) Bc_total_b = 1;
1842 if (0 == Bi_total_b) Bi_total_b = 1;
1843 B_total_b = Bc_total_b + Bi_total_b;
1844 B_total_mp = Bc_total_mp + Bi_total_mp;
1846 VG_(umsg)("\n");
1847 VG_(umsg)(fmt, "Branches: ",
1848 B_total_b, Bc_total_b, Bi_total_b);
1850 VG_(umsg)(fmt, "Mispredicts: ",
1851 B_total_mp, Bc_total_mp, Bi_total_mp);
1853 VG_(umsg)("Mispred rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1854 l1, B_total_mp * 100.0 / B_total_b,
1855 l2, Bc_total_mp * 100.0 / Bc_total_b,
1856 l3, Bi_total_mp * 100.0 / Bi_total_b);
1859 static
1860 void clg_print_stats(void)
1862 int BB_lookups =
1863 CLG_(stat).full_debug_BBs +
1864 CLG_(stat).fn_name_debug_BBs +
1865 CLG_(stat).file_line_debug_BBs +
1866 CLG_(stat).no_debug_BBs;
1868 /* Hash table stats */
1869 VG_(message)(Vg_DebugMsg, "Distinct objects: %d\n",
1870 CLG_(stat).distinct_objs);
1871 VG_(message)(Vg_DebugMsg, "Distinct files: %d\n",
1872 CLG_(stat).distinct_files);
1873 VG_(message)(Vg_DebugMsg, "Distinct fns: %d\n",
1874 CLG_(stat).distinct_fns);
1875 VG_(message)(Vg_DebugMsg, "Distinct contexts:%d\n",
1876 CLG_(stat).distinct_contexts);
1877 VG_(message)(Vg_DebugMsg, "Distinct BBs: %d\n",
1878 CLG_(stat).distinct_bbs);
1879 VG_(message)(Vg_DebugMsg, "Cost entries: %u (Chunks %u)\n",
1880 CLG_(costarray_entries), CLG_(costarray_chunks));
1881 VG_(message)(Vg_DebugMsg, "Distinct BBCCs: %d\n",
1882 CLG_(stat).distinct_bbccs);
1883 VG_(message)(Vg_DebugMsg, "Distinct JCCs: %d\n",
1884 CLG_(stat).distinct_jccs);
1885 VG_(message)(Vg_DebugMsg, "Distinct skips: %d\n",
1886 CLG_(stat).distinct_skips);
1887 VG_(message)(Vg_DebugMsg, "BB lookups: %d\n",
1888 BB_lookups);
1889 if (BB_lookups>0) {
1890 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)\n",
1891 CLG_(stat).full_debug_BBs * 100 / BB_lookups,
1892 CLG_(stat).full_debug_BBs);
1893 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)\n",
1894 CLG_(stat).file_line_debug_BBs * 100 / BB_lookups,
1895 CLG_(stat).file_line_debug_BBs);
1896 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)\n",
1897 CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups,
1898 CLG_(stat).fn_name_debug_BBs);
1899 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)\n",
1900 CLG_(stat).no_debug_BBs * 100 / BB_lookups,
1901 CLG_(stat).no_debug_BBs);
1903 VG_(message)(Vg_DebugMsg, "BBCC Clones: %d\n",
1904 CLG_(stat).bbcc_clones);
1905 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d\n",
1906 CLG_(stat).bb_retranslations);
1907 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d\n",
1908 CLG_(stat).distinct_instrs);
1910 VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d\n",
1911 CLG_(stat).cxt_lru_misses);
1912 VG_(message)(Vg_DebugMsg, "LRU BBCC Misses: %d\n",
1913 CLG_(stat).bbcc_lru_misses);
1914 VG_(message)(Vg_DebugMsg, "LRU JCC Misses: %d\n",
1915 CLG_(stat).jcc_lru_misses);
1916 VG_(message)(Vg_DebugMsg, "BBs Executed: %llu\n",
1917 CLG_(stat).bb_executions);
1918 VG_(message)(Vg_DebugMsg, "Calls: %llu\n",
1919 CLG_(stat).call_counter);
1920 VG_(message)(Vg_DebugMsg, "CondJMP followed: %llu\n",
1921 CLG_(stat).jcnd_counter);
1922 VG_(message)(Vg_DebugMsg, "Boring JMPs: %llu\n",
1923 CLG_(stat).jump_counter);
1924 VG_(message)(Vg_DebugMsg, "Recursive calls: %llu\n",
1925 CLG_(stat).rec_call_counter);
1926 VG_(message)(Vg_DebugMsg, "Returns: %llu\n",
1927 CLG_(stat).ret_counter);
1931 static
1932 void finish(void)
1934 HChar fmt[128]; // large enough
1935 Int l1, l2, l3;
1936 FullCost total;
1938 CLG_DEBUG(0, "finish()\n");
1940 (*CLG_(cachesim).finish)();
1942 /* pop all remaining items from CallStack for correct sum
1944 CLG_(forall_threads)(unwind_thread);
1946 CLG_(dump_profile)(0, False);
1948 if (VG_(clo_verbosity) == 0) return;
1950 if (VG_(clo_stats)) {
1951 VG_(message)(Vg_DebugMsg, "\n");
1952 clg_print_stats();
1953 VG_(message)(Vg_DebugMsg, "\n");
1956 HChar *evmap = CLG_(eventmapping_as_string)(CLG_(dumpmap));
1957 VG_(message)(Vg_UserMsg, "Events : %s\n", evmap);
1958 VG_(free)(evmap);
1959 HChar *mcost = CLG_(mappingcost_as_string)(CLG_(dumpmap), CLG_(total_cost));
1960 VG_(message)(Vg_UserMsg, "Collected : %s\n", mcost);
1961 VG_(free)(mcost);
1962 VG_(message)(Vg_UserMsg, "\n");
1964 /* determine value widths for statistics */
1965 total = CLG_(total_cost);
1966 l1 = ULong_width( total[fullOffset(EG_IR)] );
1967 l2 = l3 = 0;
1968 if (CLG_(clo).simulate_cache) {
1969 l2 = ULong_width( total[fullOffset(EG_DR)] );
1970 l3 = ULong_width( total[fullOffset(EG_DW)] );
1972 if (CLG_(clo).simulate_branch) {
1973 int l2b = ULong_width( total[fullOffset(EG_BC)] );
1974 int l3b = ULong_width( total[fullOffset(EG_BI)] );
1975 if (l2b > l2) l2 = l2b;
1976 if (l3b > l3) l3 = l3b;
1979 /* Make format string, getting width right for numbers */
1980 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
1982 /* Always print this */
1983 VG_(umsg)(fmt, "I refs: ", total[fullOffset(EG_IR)] );
1985 if (CLG_(clo).simulate_cache)
1986 (*CLG_(cachesim).printstat)(l1, l2, l3);
1988 if (CLG_(clo).simulate_branch)
1989 branchsim_printstat(l1, l2, l3);
1994 void CLG_(fini)(Int exitcode)
1996 finish();
2000 /*--------------------------------------------------------------------*/
2001 /*--- Setup ---*/
2002 /*--------------------------------------------------------------------*/
2004 static void clg_start_client_code_callback ( ThreadId tid, ULong blocks_done )
2006 static ULong last_blocks_done = 0;
2008 if (0)
2009 VG_(printf)("%d R %llu\n", (Int)tid, blocks_done);
2011 /* throttle calls to CLG_(run_thread) by number of BBs executed */
2012 if (blocks_done - last_blocks_done < 5000) return;
2013 last_blocks_done = blocks_done;
2015 CLG_(run_thread)( tid );
2018 static
2019 void CLG_(post_clo_init)(void)
2021 if (VG_(clo_vex_control).iropt_register_updates_default
2022 != VexRegUpdSpAtMemAccess) {
2023 CLG_DEBUG(1, " Using user specified value for "
2024 "--vex-iropt-register-updates\n");
2025 } else {
2026 CLG_DEBUG(1,
2027 " Using default --vex-iropt-register-updates="
2028 "sp-at-mem-access\n");
2031 if (CLG_(clo).collect_systime != systime_no) {
2032 VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime),
2033 CLG_(post_syscalltime));
2034 syscalltime = CLG_MALLOC("cl.main.pci.1",
2035 VG_N_THREADS * sizeof syscalltime[0]);
2036 for (UInt i = 0; i < VG_N_THREADS; ++i) {
2037 syscalltime[i].tv_sec = 0;
2038 syscalltime[i].tv_nsec = 0;
2040 if (CLG_(clo).collect_systime == systime_nsec) {
2041 syscallcputime = CLG_MALLOC("cl.main.pci.2",
2042 VG_N_THREADS * sizeof syscallcputime[0]);
2043 for (UInt i = 0; i < VG_N_THREADS; ++i) {
2044 syscallcputime[i].tv_sec = 0;
2045 syscallcputime[i].tv_nsec = 0;
2050 if (VG_(clo_px_file_backed) != VexRegUpdSpAtMemAccess) {
2051 CLG_DEBUG(1, " Using user specified value for "
2052 "--px-file-backed\n");
2053 } else {
2054 CLG_DEBUG(1,
2055 " Using default --px-file-backed="
2056 "sp-at-mem-access\n");
2059 if (VG_(clo_vex_control).iropt_unroll_thresh != 0) {
2060 VG_(message)(Vg_UserMsg,
2061 "callgrind only works with --vex-iropt-unroll-thresh=0\n"
2062 "=> resetting it back to 0\n");
2063 VG_(clo_vex_control).iropt_unroll_thresh = 0; // cannot be overridden.
2065 if (VG_(clo_vex_control).guest_chase) {
2066 VG_(message)(Vg_UserMsg,
2067 "callgrind only works with --vex-guest-chase=no\n"
2068 "=> resetting it back to 'no'\n");
2069 VG_(clo_vex_control).guest_chase = False; // cannot be overridden.
2072 CLG_DEBUG(1, " dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No");
2073 CLG_DEBUG(1, " call sep. : %d\n", CLG_(clo).separate_callers);
2074 CLG_DEBUG(1, " rec. sep. : %d\n", CLG_(clo).separate_recursions);
2076 if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) {
2077 VG_(message)(Vg_UserMsg, "Using source line as position.\n");
2078 CLG_(clo).dump_line = True;
2081 CLG_(init_dumps)();
2083 (*CLG_(cachesim).post_clo_init)();
2085 CLG_(init_eventsets)();
2086 CLG_(init_statistics)(& CLG_(stat));
2087 CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) );
2089 /* initialize hash tables */
2090 CLG_(init_obj_table)();
2091 CLG_(init_cxt_table)();
2092 CLG_(init_bb_hash)();
2094 CLG_(init_threads)();
2095 CLG_(run_thread)(1);
2097 CLG_(instrument_state) = CLG_(clo).instrument_atstart;
2099 if (VG_(clo_verbosity) > 0) {
2100 VG_(message)(Vg_UserMsg,
2101 "For interactive control, run 'callgrind_control%s%s -h'.\n",
2102 (VG_(arg_vgdb_prefix) ? " " : ""),
2103 (VG_(arg_vgdb_prefix) ? VG_(arg_vgdb_prefix) : ""));
2107 static
2108 void CLG_(pre_clo_init)(void)
2110 VG_(details_name) ("Callgrind");
2111 VG_(details_version) (NULL);
2112 VG_(details_description) ("a call-graph generating cache profiler");
2113 VG_(details_copyright_author)("Copyright (C) 2002-2017, and GNU GPL'd, "
2114 "by Josef Weidendorfer et al.");
2115 VG_(details_bug_reports_to) (VG_BUGS_TO);
2116 VG_(details_avg_translation_sizeB) ( 500 );
2118 VG_(clo_vex_control).iropt_register_updates_default
2119 = VG_(clo_px_file_backed)
2120 = VexRegUpdSpAtMemAccess; // overridable by the user.
2122 VG_(clo_vex_control).iropt_unroll_thresh = 0; // cannot be overridden.
2123 VG_(clo_vex_control).guest_chase = False; // cannot be overridden.
2125 VG_(basic_tool_funcs) (CLG_(post_clo_init),
2126 CLG_(instrument),
2127 CLG_(fini));
2129 VG_(needs_superblock_discards)(clg_discard_superblock_info);
2132 VG_(needs_command_line_options)(CLG_(process_cmd_line_option),
2133 CLG_(print_usage),
2134 CLG_(print_debug_usage));
2136 VG_(needs_client_requests)(CLG_(handle_client_request));
2137 VG_(needs_print_stats) (clg_print_stats);
2139 VG_(track_start_client_code) ( & clg_start_client_code_callback );
2140 VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) );
2141 VG_(track_post_deliver_signal)( & CLG_(post_signal) );
2143 CLG_(set_clo_defaults)();
2147 VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init))
2149 /*--------------------------------------------------------------------*/
2150 /*--- end main.c ---*/
2151 /*--------------------------------------------------------------------*/