Add some initialisations that seemed to be required as a result of an LTO build with...
[valgrind.git] / callgrind / main.c
blob46968539cad294c174565a7884dc6d329fd6dae8
2 /*--------------------------------------------------------------------*/
3 /*--- Callgrind ---*/
4 /*--- main.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of Callgrind, a Valgrind tool for call graph
9 profiling programs.
11 Copyright (C) 2002-2017, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
13 This tool is derived from and contains code from Cachegrind
14 Copyright (C) 2002-2017 Nicholas Nethercote (njn@valgrind.org)
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, write to the Free Software
28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 02111-1307, USA.
31 The GNU General Public License is contained in the file COPYING.
34 #include "config.h"
35 #include "callgrind.h"
36 #include "global.h"
38 #include "pub_tool_threadstate.h"
39 #include "pub_tool_gdbserver.h"
40 #include "pub_tool_transtab.h" // VG_(discard_translations_safely)
42 #include "cg_branchpred.c"
44 /*------------------------------------------------------------*/
45 /*--- Global variables ---*/
46 /*------------------------------------------------------------*/
48 /* for all threads */
49 CommandLineOptions CLG_(clo);
50 Statistics CLG_(stat);
51 Bool CLG_(instrument_state) = True; /* Instrumentation on ? */
53 /* thread and signal handler specific */
54 exec_state CLG_(current_state);
56 /* min of L1 and LL cache line sizes. This only gets set to a
57 non-zero value if we are doing cache simulation. */
58 Int CLG_(min_line_size) = 0;
61 /*------------------------------------------------------------*/
62 /*--- Statistics ---*/
63 /*------------------------------------------------------------*/
65 static void CLG_(init_statistics)(Statistics* s)
67 s->call_counter = 0;
68 s->jcnd_counter = 0;
69 s->jump_counter = 0;
70 s->rec_call_counter = 0;
71 s->ret_counter = 0;
72 s->bb_executions = 0;
74 s->context_counter = 0;
75 s->bb_retranslations = 0;
77 s->distinct_objs = 0;
78 s->distinct_files = 0;
79 s->distinct_fns = 0;
80 s->distinct_contexts = 0;
81 s->distinct_bbs = 0;
82 s->distinct_bbccs = 0;
83 s->distinct_instrs = 0;
84 s->distinct_skips = 0;
86 s->bb_hash_resizes = 0;
87 s->bbcc_hash_resizes = 0;
88 s->jcc_hash_resizes = 0;
89 s->cxt_hash_resizes = 0;
90 s->fn_array_resizes = 0;
91 s->call_stack_resizes = 0;
92 s->fn_stack_resizes = 0;
94 s->full_debug_BBs = 0;
95 s->file_line_debug_BBs = 0;
96 s->fn_name_debug_BBs = 0;
97 s->no_debug_BBs = 0;
98 s->bbcc_lru_misses = 0;
99 s->jcc_lru_misses = 0;
100 s->cxt_lru_misses = 0;
101 s->bbcc_clones = 0;
105 /*------------------------------------------------------------*/
106 /*--- Simple callbacks (not cache similator) ---*/
107 /*------------------------------------------------------------*/
109 VG_REGPARM(1)
110 static void log_global_event(InstrInfo* ii)
112 ULong* cost_Bus;
114 CLG_DEBUG(6, "log_global_event: Ir %#lx/%u\n",
115 CLG_(bb_base) + ii->instr_offset, ii->instr_size);
117 if (!CLG_(current_state).collect) return;
119 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BUS))>0 );
121 CLG_(current_state).cost[ fullOffset(EG_BUS) ]++;
123 if (CLG_(current_state).nonskipped)
124 cost_Bus = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BUS);
125 else
126 cost_Bus = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BUS];
127 cost_Bus[0]++;
131 /* For branches, we consult two different predictors, one which
132 predicts taken/untaken for conditional branches, and the other
133 which predicts the branch target address for indirect branches
134 (jump-to-register style ones). */
136 static VG_REGPARM(2)
137 void log_cond_branch(InstrInfo* ii, Word taken)
139 Bool miss;
140 Int fullOffset_Bc;
141 ULong* cost_Bc;
143 CLG_DEBUG(6, "log_cond_branch: Ir %#lx, taken %ld\n",
144 CLG_(bb_base) + ii->instr_offset, taken);
146 miss = 1 & do_cond_branch_predict(CLG_(bb_base) + ii->instr_offset, taken);
148 if (!CLG_(current_state).collect) return;
150 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BC))>0 );
152 if (CLG_(current_state).nonskipped)
153 cost_Bc = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BC);
154 else
155 cost_Bc = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BC];
157 fullOffset_Bc = fullOffset(EG_BC);
158 CLG_(current_state).cost[ fullOffset_Bc ]++;
159 cost_Bc[0]++;
160 if (miss) {
161 CLG_(current_state).cost[ fullOffset_Bc+1 ]++;
162 cost_Bc[1]++;
166 static VG_REGPARM(2)
167 void log_ind_branch(InstrInfo* ii, UWord actual_dst)
169 Bool miss;
170 Int fullOffset_Bi;
171 ULong* cost_Bi;
173 CLG_DEBUG(6, "log_ind_branch: Ir %#lx, dst %#lx\n",
174 CLG_(bb_base) + ii->instr_offset, actual_dst);
176 miss = 1 & do_ind_branch_predict(CLG_(bb_base) + ii->instr_offset, actual_dst);
178 if (!CLG_(current_state).collect) return;
180 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BI))>0 );
182 if (CLG_(current_state).nonskipped)
183 cost_Bi = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BI);
184 else
185 cost_Bi = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BI];
187 fullOffset_Bi = fullOffset(EG_BI);
188 CLG_(current_state).cost[ fullOffset_Bi ]++;
189 cost_Bi[0]++;
190 if (miss) {
191 CLG_(current_state).cost[ fullOffset_Bi+1 ]++;
192 cost_Bi[1]++;
196 /*------------------------------------------------------------*/
197 /*--- Instrumentation structures and event queue handling ---*/
198 /*------------------------------------------------------------*/
200 /* Maintain an ordered list of memory events which are outstanding, in
201 the sense that no IR has yet been generated to do the relevant
202 helper calls. The BB is scanned top to bottom and memory events
203 are added to the end of the list, merging with the most recent
204 notified event where possible (Dw immediately following Dr and
205 having the same size and EA can be merged).
207 This merging is done so that for architectures which have
208 load-op-store instructions (x86, amd64), the insn is treated as if
209 it makes just one memory reference (a modify), rather than two (a
210 read followed by a write at the same address).
212 At various points the list will need to be flushed, that is, IR
213 generated from it. That must happen before any possible exit from
214 the block (the end, or an IRStmt_Exit). Flushing also takes place
215 when there is no space to add a new event.
217 If we require the simulation statistics to be up to date with
218 respect to possible memory exceptions, then the list would have to
219 be flushed before each memory reference. That would however lose
220 performance by inhibiting event-merging during flushing.
222 Flushing the list consists of walking it start to end and emitting
223 instrumentation IR for each event, in the order in which they
224 appear. It may be possible to emit a single call for two adjacent
225 events in order to reduce the number of helper function calls made.
226 For example, it could well be profitable to handle two adjacent Ir
227 events with a single helper call. */
229 typedef
230 IRExpr
231 IRAtom;
233 typedef
234 enum {
235 Ev_Ir, // Instruction read
236 Ev_Dr, // Data read
237 Ev_Dw, // Data write
238 Ev_Dm, // Data modify (read then write)
239 Ev_Bc, // branch conditional
240 Ev_Bi, // branch indirect (to unknown destination)
241 Ev_G // Global bus event
243 EventTag;
245 typedef
246 struct {
247 EventTag tag;
248 InstrInfo* inode;
249 union {
250 struct {
251 } Ir;
252 struct {
253 IRAtom* ea;
254 Int szB;
255 } Dr;
256 struct {
257 IRAtom* ea;
258 Int szB;
259 } Dw;
260 struct {
261 IRAtom* ea;
262 Int szB;
263 } Dm;
264 struct {
265 IRAtom* taken; /* :: Ity_I1 */
266 } Bc;
267 struct {
268 IRAtom* dst;
269 } Bi;
270 struct {
271 } G;
272 } Ev;
274 Event;
276 static void init_Event ( Event* ev ) {
277 VG_(memset)(ev, 0, sizeof(Event));
280 static IRAtom* get_Event_dea ( Event* ev ) {
281 switch (ev->tag) {
282 case Ev_Dr: return ev->Ev.Dr.ea;
283 case Ev_Dw: return ev->Ev.Dw.ea;
284 case Ev_Dm: return ev->Ev.Dm.ea;
285 default: tl_assert(0);
289 static Int get_Event_dszB ( Event* ev ) {
290 switch (ev->tag) {
291 case Ev_Dr: return ev->Ev.Dr.szB;
292 case Ev_Dw: return ev->Ev.Dw.szB;
293 case Ev_Dm: return ev->Ev.Dm.szB;
294 default: tl_assert(0);
299 /* Up to this many unnotified events are allowed. Number is
300 arbitrary. Larger numbers allow more event merging to occur, but
301 potentially induce more spilling due to extending live ranges of
302 address temporaries. */
303 #define N_EVENTS 16
306 /* A struct which holds all the running state during instrumentation.
307 Mostly to avoid passing loads of parameters everywhere. */
308 typedef struct {
309 /* The current outstanding-memory-event list. */
310 Event events[N_EVENTS];
311 Int events_used;
313 /* The array of InstrInfo's is part of BB struct. */
314 BB* bb;
316 /* BB seen before (ie. re-instrumentation) */
317 Bool seen_before;
319 /* Number InstrInfo bins 'used' so far. */
320 UInt ii_index;
322 // current offset of guest instructions from BB start
323 UInt instr_offset;
325 /* The output SB being constructed. */
326 IRSB* sbOut;
327 } ClgState;
330 static void showEvent ( Event* ev )
332 switch (ev->tag) {
333 case Ev_Ir:
334 VG_(printf)("Ir (InstrInfo %p) at +%u\n",
335 ev->inode, ev->inode->instr_offset);
336 break;
337 case Ev_Dr:
338 VG_(printf)("Dr (InstrInfo %p) at +%u %d EA=",
339 ev->inode, ev->inode->instr_offset, ev->Ev.Dr.szB);
340 ppIRExpr(ev->Ev.Dr.ea);
341 VG_(printf)("\n");
342 break;
343 case Ev_Dw:
344 VG_(printf)("Dw (InstrInfo %p) at +%u %d EA=",
345 ev->inode, ev->inode->instr_offset, ev->Ev.Dw.szB);
346 ppIRExpr(ev->Ev.Dw.ea);
347 VG_(printf)("\n");
348 break;
349 case Ev_Dm:
350 VG_(printf)("Dm (InstrInfo %p) at +%u %d EA=",
351 ev->inode, ev->inode->instr_offset, ev->Ev.Dm.szB);
352 ppIRExpr(ev->Ev.Dm.ea);
353 VG_(printf)("\n");
354 break;
355 case Ev_Bc:
356 VG_(printf)("Bc %p GA=", ev->inode);
357 ppIRExpr(ev->Ev.Bc.taken);
358 VG_(printf)("\n");
359 break;
360 case Ev_Bi:
361 VG_(printf)("Bi %p DST=", ev->inode);
362 ppIRExpr(ev->Ev.Bi.dst);
363 VG_(printf)("\n");
364 break;
365 case Ev_G:
366 VG_(printf)("G %p\n", ev->inode);
367 break;
368 default:
369 tl_assert(0);
370 break;
374 /* Generate code for all outstanding memory events, and mark the queue
375 empty. Code is generated into cgs->sbOut, and this activity
376 'consumes' slots in cgs->bb. */
378 static void flushEvents ( ClgState* clgs )
380 Int i, regparms, inew;
381 const HChar* helperName;
382 void* helperAddr;
383 IRExpr** argv;
384 IRExpr* i_node_expr;
385 IRDirty* di;
386 Event* ev;
387 Event* ev2;
388 Event* ev3;
390 if (!clgs->seen_before) {
391 // extend event sets as needed
392 // available sets: D0 Dr
393 for(i=0; i<clgs->events_used; i++) {
394 ev = &clgs->events[i];
395 switch(ev->tag) {
396 case Ev_Ir:
397 // Ir event always is first for a guest instruction
398 CLG_ASSERT(ev->inode->eventset == 0);
399 ev->inode->eventset = CLG_(sets).base;
400 break;
401 case Ev_Dr:
402 // extend event set by Dr counters
403 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
404 EG_DR);
405 break;
406 case Ev_Dw:
407 case Ev_Dm:
408 // extend event set by Dw counters
409 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
410 EG_DW);
411 break;
412 case Ev_Bc:
413 // extend event set by Bc counters
414 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
415 EG_BC);
416 break;
417 case Ev_Bi:
418 // extend event set by Bi counters
419 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
420 EG_BI);
421 break;
422 case Ev_G:
423 // extend event set by Bus counter
424 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
425 EG_BUS);
426 break;
427 default:
428 tl_assert(0);
433 for(i = 0; i < clgs->events_used; i = inew) {
435 helperName = NULL;
436 helperAddr = NULL;
437 argv = NULL;
438 regparms = 0;
440 /* generate IR to notify event i and possibly the ones
441 immediately following it. */
442 tl_assert(i >= 0 && i < clgs->events_used);
444 ev = &clgs->events[i];
445 ev2 = ( i < clgs->events_used-1 ? &clgs->events[i+1] : NULL );
446 ev3 = ( i < clgs->events_used-2 ? &clgs->events[i+2] : NULL );
448 CLG_DEBUGIF(5) {
449 VG_(printf)(" flush ");
450 showEvent( ev );
453 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
455 /* Decide on helper fn to call and args to pass it, and advance
456 i appropriately.
457 Dm events have same effect as Dw events */
458 switch (ev->tag) {
459 case Ev_Ir:
460 /* Merge an Ir with a following Dr. */
461 if (ev2 && ev2->tag == Ev_Dr) {
462 /* Why is this true? It's because we're merging an Ir
463 with a following Dr. The Ir derives from the
464 instruction's IMark and the Dr from data
465 references which follow it. In short it holds
466 because each insn starts with an IMark, hence an
467 Ev_Ir, and so these Dr must pertain to the
468 immediately preceding Ir. Same applies to analogous
469 assertions in the subsequent cases. */
470 tl_assert(ev2->inode == ev->inode);
471 helperName = CLG_(cachesim).log_1I1Dr_name;
472 helperAddr = CLG_(cachesim).log_1I1Dr;
473 argv = mkIRExprVec_3( i_node_expr,
474 get_Event_dea(ev2),
475 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
476 regparms = 3;
477 inew = i+2;
479 /* Merge an Ir with a following Dw/Dm. */
480 else
481 if (ev2 && (ev2->tag == Ev_Dw || ev2->tag == Ev_Dm)) {
482 tl_assert(ev2->inode == ev->inode);
483 helperName = CLG_(cachesim).log_1I1Dw_name;
484 helperAddr = CLG_(cachesim).log_1I1Dw;
485 argv = mkIRExprVec_3( i_node_expr,
486 get_Event_dea(ev2),
487 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
488 regparms = 3;
489 inew = i+2;
491 /* Merge an Ir with two following Irs. */
492 else
493 if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir) {
494 helperName = CLG_(cachesim).log_3I0D_name;
495 helperAddr = CLG_(cachesim).log_3I0D;
496 argv = mkIRExprVec_3( i_node_expr,
497 mkIRExpr_HWord( (HWord)ev2->inode ),
498 mkIRExpr_HWord( (HWord)ev3->inode ) );
499 regparms = 3;
500 inew = i+3;
502 /* Merge an Ir with one following Ir. */
503 else
504 if (ev2 && ev2->tag == Ev_Ir) {
505 helperName = CLG_(cachesim).log_2I0D_name;
506 helperAddr = CLG_(cachesim).log_2I0D;
507 argv = mkIRExprVec_2( i_node_expr,
508 mkIRExpr_HWord( (HWord)ev2->inode ) );
509 regparms = 2;
510 inew = i+2;
512 /* No merging possible; emit as-is. */
513 else {
514 helperName = CLG_(cachesim).log_1I0D_name;
515 helperAddr = CLG_(cachesim).log_1I0D;
516 argv = mkIRExprVec_1( i_node_expr );
517 regparms = 1;
518 inew = i+1;
520 break;
521 case Ev_Dr:
522 /* Data read or modify */
523 helperName = CLG_(cachesim).log_0I1Dr_name;
524 helperAddr = CLG_(cachesim).log_0I1Dr;
525 argv = mkIRExprVec_3( i_node_expr,
526 get_Event_dea(ev),
527 mkIRExpr_HWord( get_Event_dszB(ev) ) );
528 regparms = 3;
529 inew = i+1;
530 break;
531 case Ev_Dw:
532 case Ev_Dm:
533 /* Data write */
534 helperName = CLG_(cachesim).log_0I1Dw_name;
535 helperAddr = CLG_(cachesim).log_0I1Dw;
536 argv = mkIRExprVec_3( i_node_expr,
537 get_Event_dea(ev),
538 mkIRExpr_HWord( get_Event_dszB(ev) ) );
539 regparms = 3;
540 inew = i+1;
541 break;
542 case Ev_Bc:
543 /* Conditional branch */
544 helperName = "log_cond_branch";
545 helperAddr = &log_cond_branch;
546 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
547 regparms = 2;
548 inew = i+1;
549 break;
550 case Ev_Bi:
551 /* Branch to an unknown destination */
552 helperName = "log_ind_branch";
553 helperAddr = &log_ind_branch;
554 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
555 regparms = 2;
556 inew = i+1;
557 break;
558 case Ev_G:
559 /* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */
560 helperName = "log_global_event";
561 helperAddr = &log_global_event;
562 argv = mkIRExprVec_1( i_node_expr );
563 regparms = 1;
564 inew = i+1;
565 break;
566 default:
567 tl_assert(0);
570 CLG_DEBUGIF(5) {
571 if (inew > i+1) {
572 VG_(printf)(" merge ");
573 showEvent( ev2 );
575 if (inew > i+2) {
576 VG_(printf)(" merge ");
577 showEvent( ev3 );
579 if (helperAddr)
580 VG_(printf)(" call %s (%p)\n",
581 helperName, helperAddr);
584 /* helper could be unset depending on the simulator used */
585 if (helperAddr == 0) continue;
587 /* Add the helper. */
588 tl_assert(helperName);
589 tl_assert(helperAddr);
590 tl_assert(argv);
591 di = unsafeIRDirty_0_N( regparms,
592 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
593 argv );
594 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
597 clgs->events_used = 0;
600 static void addEvent_Ir ( ClgState* clgs, InstrInfo* inode )
602 Event* evt;
603 tl_assert(clgs->seen_before || (inode->eventset == 0));
604 if (!CLG_(clo).simulate_cache) return;
606 if (clgs->events_used == N_EVENTS)
607 flushEvents(clgs);
608 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
609 evt = &clgs->events[clgs->events_used];
610 init_Event(evt);
611 evt->tag = Ev_Ir;
612 evt->inode = inode;
613 clgs->events_used++;
616 static
617 void addEvent_Dr ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
619 Event* evt;
620 tl_assert(isIRAtom(ea));
621 tl_assert(datasize >= 1);
622 if (!CLG_(clo).simulate_cache) return;
623 tl_assert(datasize <= CLG_(min_line_size));
625 if (clgs->events_used == N_EVENTS)
626 flushEvents(clgs);
627 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
628 evt = &clgs->events[clgs->events_used];
629 init_Event(evt);
630 evt->tag = Ev_Dr;
631 evt->inode = inode;
632 evt->Ev.Dr.szB = datasize;
633 evt->Ev.Dr.ea = ea;
634 clgs->events_used++;
637 static
638 void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
640 Event* evt;
641 tl_assert(isIRAtom(ea));
642 tl_assert(datasize >= 1);
643 if (!CLG_(clo).simulate_cache) return;
644 tl_assert(datasize <= CLG_(min_line_size));
646 /* Is it possible to merge this write with the preceding read? */
647 if (clgs->events_used > 0) {
648 Event* lastEvt = &clgs->events[clgs->events_used-1];
649 if ( lastEvt->tag == Ev_Dr
650 && lastEvt->Ev.Dr.szB == datasize
651 && lastEvt->inode == inode
652 && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
654 lastEvt->tag = Ev_Dm;
655 return;
659 /* No. Add as normal. */
660 if (clgs->events_used == N_EVENTS)
661 flushEvents(clgs);
662 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
663 evt = &clgs->events[clgs->events_used];
664 init_Event(evt);
665 evt->tag = Ev_Dw;
666 evt->inode = inode;
667 evt->Ev.Dw.szB = datasize;
668 evt->Ev.Dw.ea = ea;
669 clgs->events_used++;
672 static
673 void addEvent_D_guarded ( ClgState* clgs, InstrInfo* inode,
674 Int datasize, IRAtom* ea, IRAtom* guard,
675 Bool isWrite )
677 tl_assert(isIRAtom(ea));
678 tl_assert(guard);
679 tl_assert(isIRAtom(guard));
680 tl_assert(datasize >= 1);
681 if (!CLG_(clo).simulate_cache) return;
682 tl_assert(datasize <= CLG_(min_line_size));
684 /* Adding guarded memory actions and merging them with the existing
685 queue is too complex. Simply flush the queue and add this
686 action immediately. Since guarded loads and stores are pretty
687 rare, this is not thought likely to cause any noticeable
688 performance loss as a result of the loss of event-merging
689 opportunities. */
690 tl_assert(clgs->events_used >= 0);
691 flushEvents(clgs);
692 tl_assert(clgs->events_used == 0);
693 /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
694 IRExpr* i_node_expr;
695 const HChar* helperName;
696 void* helperAddr;
697 IRExpr** argv;
698 Int regparms;
699 IRDirty* di;
700 i_node_expr = mkIRExpr_HWord( (HWord)inode );
701 helperName = isWrite ? CLG_(cachesim).log_0I1Dw_name
702 : CLG_(cachesim).log_0I1Dr_name;
703 helperAddr = isWrite ? CLG_(cachesim).log_0I1Dw
704 : CLG_(cachesim).log_0I1Dr;
705 argv = mkIRExprVec_3( i_node_expr,
706 ea, mkIRExpr_HWord( datasize ) );
707 regparms = 3;
708 di = unsafeIRDirty_0_N(
709 regparms,
710 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
711 argv );
712 di->guard = guard;
713 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
716 static
717 void addEvent_Bc ( ClgState* clgs, InstrInfo* inode, IRAtom* guard )
719 Event* evt;
720 tl_assert(isIRAtom(guard));
721 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, guard)
722 == (sizeof(RegWord)==4 ? Ity_I32 : Ity_I64));
723 if (!CLG_(clo).simulate_branch) return;
725 if (clgs->events_used == N_EVENTS)
726 flushEvents(clgs);
727 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
728 evt = &clgs->events[clgs->events_used];
729 init_Event(evt);
730 evt->tag = Ev_Bc;
731 evt->inode = inode;
732 evt->Ev.Bc.taken = guard;
733 clgs->events_used++;
736 static
737 void addEvent_Bi ( ClgState* clgs, InstrInfo* inode, IRAtom* whereTo )
739 Event* evt;
740 tl_assert(isIRAtom(whereTo));
741 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, whereTo)
742 == (sizeof(RegWord)==4 ? Ity_I32 : Ity_I64));
743 if (!CLG_(clo).simulate_branch) return;
745 if (clgs->events_used == N_EVENTS)
746 flushEvents(clgs);
747 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
748 evt = &clgs->events[clgs->events_used];
749 init_Event(evt);
750 evt->tag = Ev_Bi;
751 evt->inode = inode;
752 evt->Ev.Bi.dst = whereTo;
753 clgs->events_used++;
756 static
757 void addEvent_G ( ClgState* clgs, InstrInfo* inode )
759 Event* evt;
760 if (!CLG_(clo).collect_bus) return;
762 if (clgs->events_used == N_EVENTS)
763 flushEvents(clgs);
764 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
765 evt = &clgs->events[clgs->events_used];
766 init_Event(evt);
767 evt->tag = Ev_G;
768 evt->inode = inode;
769 clgs->events_used++;
772 /* Initialise or check (if already seen before) an InstrInfo for next insn.
773 We only can set instr_offset/instr_size here. The required event set and
774 resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest
775 instructions. The event set is extended as required on flush of the event
776 queue (when Dm events were determined), cost offsets are determined at
777 end of BB instrumentation. */
778 static
779 InstrInfo* next_InstrInfo ( ClgState* clgs, UInt instr_size )
781 InstrInfo* ii;
782 tl_assert(clgs->ii_index >= 0);
783 tl_assert(clgs->ii_index < clgs->bb->instr_count);
784 ii = &clgs->bb->instr[ clgs->ii_index ];
786 if (clgs->seen_before) {
787 CLG_ASSERT(ii->instr_offset == clgs->instr_offset);
788 CLG_ASSERT(ii->instr_size == instr_size);
790 else {
791 ii->instr_offset = clgs->instr_offset;
792 ii->instr_size = instr_size;
793 ii->cost_offset = 0;
794 ii->eventset = 0;
797 clgs->ii_index++;
798 clgs->instr_offset += instr_size;
799 CLG_(stat).distinct_instrs++;
801 return ii;
804 // return total number of cost values needed for this BB
805 static
806 UInt update_cost_offsets( ClgState* clgs )
808 Int i;
809 InstrInfo* ii;
810 UInt cost_offset = 0;
812 CLG_ASSERT(clgs->bb->instr_count == clgs->ii_index);
813 for(i=0; i<clgs->ii_index; i++) {
814 ii = &clgs->bb->instr[i];
815 if (clgs->seen_before) {
816 CLG_ASSERT(ii->cost_offset == cost_offset);
817 } else
818 ii->cost_offset = cost_offset;
819 cost_offset += ii->eventset ? ii->eventset->size : 0;
822 return cost_offset;
825 /*------------------------------------------------------------*/
826 /*--- Instrumentation ---*/
827 /*------------------------------------------------------------*/
829 #if defined(VG_BIGENDIAN)
830 # define CLGEndness Iend_BE
831 #elif defined(VG_LITTLEENDIAN)
832 # define CLGEndness Iend_LE
833 #else
834 # error "Unknown endianness"
835 #endif
837 static
838 Addr IRConst2Addr(IRConst* con)
840 Addr addr;
842 if (sizeof(RegWord) == 4) {
843 CLG_ASSERT( con->tag == Ico_U32 );
844 addr = con->Ico.U32;
846 else if (sizeof(RegWord) == 8) {
847 CLG_ASSERT( con->tag == Ico_U64 );
848 addr = con->Ico.U64;
850 else
851 VG_(tool_panic)("Callgrind: invalid Addr type");
853 return addr;
856 /* First pass over a BB to instrument, counting instructions and jumps
857 * This is needed for the size of the BB struct to allocate
859 * Called from CLG_(get_bb)
861 void CLG_(collectBlockInfo)(IRSB* sbIn,
862 /*INOUT*/ UInt* instrs,
863 /*INOUT*/ UInt* cjmps,
864 /*INOUT*/ Bool* cjmp_inverted)
866 Int i;
867 IRStmt* st;
868 Addr instrAddr =0, jumpDst;
869 UInt instrLen = 0;
870 Bool toNextInstr = False;
872 // Ist_Exit has to be ignored in preamble code, before first IMark:
873 // preamble code is added by VEX for self modifying code, and has
874 // nothing to do with client code
875 Bool inPreamble = True;
877 if (!sbIn) return;
879 for (i = 0; i < sbIn->stmts_used; i++) {
880 st = sbIn->stmts[i];
881 if (Ist_IMark == st->tag) {
882 inPreamble = False;
884 instrAddr = st->Ist.IMark.addr;
885 instrLen = st->Ist.IMark.len;
887 (*instrs)++;
888 toNextInstr = False;
890 if (inPreamble) continue;
891 if (Ist_Exit == st->tag) {
892 jumpDst = IRConst2Addr(st->Ist.Exit.dst);
893 toNextInstr = (jumpDst == instrAddr + instrLen);
895 (*cjmps)++;
899 /* if the last instructions of BB conditionally jumps to next instruction
900 * (= first instruction of next BB in memory), this is a inverted by VEX.
902 *cjmp_inverted = toNextInstr;
905 static
906 void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy)
908 addStmtToIRSB( bbOut,
909 IRStmt_Store(CLGEndness,
910 IRExpr_Const(hWordTy == Ity_I32 ?
911 IRConst_U32( addr ) :
912 IRConst_U64( addr )),
913 IRExpr_Const(IRConst_U32(val)) ));
917 /* add helper call to setup_bbcc, with pointer to BB struct as argument
919 * precondition for setup_bbcc:
920 * - jmps_passed has number of cond.jumps passed in last executed BB
921 * - current_bbcc has a pointer to the BBCC of the last executed BB
922 * Thus, if bbcc_jmpkind is != -1 (JmpNone),
923 * current_bbcc->bb->jmp_addr
924 * gives the address of the jump source.
926 * the setup does 2 things:
927 * - trace call:
928 * * Unwind own call stack, i.e sync our ESP with real ESP
929 * This is for ESP manipulation (longjmps, C++ exec handling) and RET
930 * * For CALLs or JMPs crossing objects, record call arg +
931 * push are on own call stack
933 * - prepare for cache log functions:
934 * set current_bbcc to BBCC that gets the costs for this BB execution
935 * attached
937 static
938 void addBBSetupCall(ClgState* clgs)
940 IRDirty* di;
941 IRExpr *arg1, **argv;
943 arg1 = mkIRExpr_HWord( (HWord)clgs->bb );
944 argv = mkIRExprVec_1(arg1);
945 di = unsafeIRDirty_0_N( 1, "setup_bbcc",
946 VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ),
947 argv);
948 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
952 static
953 IRSB* CLG_(instrument)( VgCallbackClosure* closure,
954 IRSB* sbIn,
955 const VexGuestLayout* layout,
956 const VexGuestExtents* vge,
957 const VexArchInfo* archinfo_host,
958 IRType gWordTy, IRType hWordTy )
960 Int i;
961 IRStmt* st;
962 Addr origAddr;
963 InstrInfo* curr_inode = NULL;
964 ClgState clgs;
965 UInt cJumps = 0;
966 IRTypeEnv* tyenv = sbIn->tyenv;
968 if (gWordTy != hWordTy) {
969 /* We don't currently support this case. */
970 VG_(tool_panic)("host/guest word size mismatch");
973 // No instrumentation if it is switched off
974 if (! CLG_(instrument_state)) {
975 CLG_DEBUG(5, "instrument(BB %#lx) [Instrumentation OFF]\n",
976 (Addr)closure->readdr);
977 return sbIn;
980 CLG_DEBUG(3, "+ instrument(BB %#lx)\n", (Addr)closure->readdr);
982 /* Set up SB for instrumented IR */
983 clgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
985 // Copy verbatim any IR preamble preceding the first IMark
986 i = 0;
987 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
988 addStmtToIRSB( clgs.sbOut, sbIn->stmts[i] );
989 i++;
992 // Get the first statement, and origAddr from it
993 CLG_ASSERT(sbIn->stmts_used >0);
994 CLG_ASSERT(i < sbIn->stmts_used);
995 st = sbIn->stmts[i];
996 CLG_ASSERT(Ist_IMark == st->tag);
998 origAddr = st->Ist.IMark.addr + st->Ist.IMark.delta;
999 CLG_ASSERT(origAddr == st->Ist.IMark.addr
1000 + st->Ist.IMark.delta); // XXX: check no overflow
1002 /* Get BB struct (creating if necessary).
1003 * JS: The hash table is keyed with orig_addr_noredir -- important!
1004 * JW: Why? If it is because of different chasing of the redirection,
1005 * this is not needed, as chasing is switched off in callgrind
1007 clgs.bb = CLG_(get_bb)(origAddr, sbIn, &(clgs.seen_before));
1009 addBBSetupCall(&clgs);
1011 // Set up running state
1012 clgs.events_used = 0;
1013 clgs.ii_index = 0;
1014 clgs.instr_offset = 0;
1016 for (/*use current i*/; i < sbIn->stmts_used; i++) {
1018 st = sbIn->stmts[i];
1019 CLG_ASSERT(isFlatIRStmt(st));
1021 switch (st->tag) {
1022 case Ist_NoOp:
1023 case Ist_AbiHint:
1024 case Ist_Put:
1025 case Ist_PutI:
1026 case Ist_MBE:
1027 break;
1029 case Ist_IMark: {
1030 Addr cia = st->Ist.IMark.addr + st->Ist.IMark.delta;
1031 UInt isize = st->Ist.IMark.len;
1032 CLG_ASSERT(clgs.instr_offset == cia - origAddr);
1033 // If Vex fails to decode an instruction, the size will be zero.
1034 // Pretend otherwise.
1035 if (isize == 0) isize = VG_MIN_INSTR_SZB;
1037 // Sanity-check size.
1038 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
1039 || VG_CLREQ_SZB == isize );
1041 // Init the inode, record it as the current one.
1042 // Subsequent Dr/Dw/Dm events from the same instruction will
1043 // also use it.
1044 curr_inode = next_InstrInfo (&clgs, isize);
1046 addEvent_Ir( &clgs, curr_inode );
1047 break;
1050 case Ist_WrTmp: {
1051 IRExpr* data = st->Ist.WrTmp.data;
1052 if (data->tag == Iex_Load) {
1053 IRExpr* aexpr = data->Iex.Load.addr;
1054 // Note also, endianness info is ignored. I guess
1055 // that's not interesting.
1056 addEvent_Dr( &clgs, curr_inode,
1057 sizeofIRType(data->Iex.Load.ty), aexpr );
1059 break;
1062 case Ist_Store: {
1063 IRExpr* data = st->Ist.Store.data;
1064 IRExpr* aexpr = st->Ist.Store.addr;
1065 addEvent_Dw( &clgs, curr_inode,
1066 sizeofIRType(typeOfIRExpr(sbIn->tyenv, data)), aexpr );
1067 break;
1070 case Ist_StoreG: {
1071 IRStoreG* sg = st->Ist.StoreG.details;
1072 IRExpr* data = sg->data;
1073 IRExpr* addr = sg->addr;
1074 IRType type = typeOfIRExpr(tyenv, data);
1075 tl_assert(type != Ity_INVALID);
1076 addEvent_D_guarded( &clgs, curr_inode,
1077 sizeofIRType(type), addr, sg->guard,
1078 True/*isWrite*/ );
1079 break;
1082 case Ist_LoadG: {
1083 IRLoadG* lg = st->Ist.LoadG.details;
1084 IRType type = Ity_INVALID; /* loaded type */
1085 IRType typeWide = Ity_INVALID; /* after implicit widening */
1086 IRExpr* addr = lg->addr;
1087 typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
1088 tl_assert(type != Ity_INVALID);
1089 addEvent_D_guarded( &clgs, curr_inode,
1090 sizeofIRType(type), addr, lg->guard,
1091 False/*!isWrite*/ );
1092 break;
1095 case Ist_Dirty: {
1096 Int dataSize;
1097 IRDirty* d = st->Ist.Dirty.details;
1098 if (d->mFx != Ifx_None) {
1099 /* This dirty helper accesses memory. Collect the details. */
1100 tl_assert(d->mAddr != NULL);
1101 tl_assert(d->mSize != 0);
1102 dataSize = d->mSize;
1103 // Large (eg. 28B, 108B, 512B on x86) data-sized
1104 // instructions will be done inaccurately, but they're
1105 // very rare and this avoids errors from hitting more
1106 // than two cache lines in the simulation.
1107 if (CLG_(clo).simulate_cache && dataSize > CLG_(min_line_size))
1108 dataSize = CLG_(min_line_size);
1109 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1110 addEvent_Dr( &clgs, curr_inode, dataSize, d->mAddr );
1111 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1112 addEvent_Dw( &clgs, curr_inode, dataSize, d->mAddr );
1113 } else {
1114 tl_assert(d->mAddr == NULL);
1115 tl_assert(d->mSize == 0);
1117 break;
1120 case Ist_CAS: {
1121 /* We treat it as a read and a write of the location. I
1122 think that is the same behaviour as it was before IRCAS
1123 was introduced, since prior to that point, the Vex
1124 front ends would translate a lock-prefixed instruction
1125 into a (normal) read followed by a (normal) write. */
1126 Int dataSize;
1127 IRCAS* cas = st->Ist.CAS.details;
1128 CLG_ASSERT(cas->addr && isIRAtom(cas->addr));
1129 CLG_ASSERT(cas->dataLo);
1130 dataSize = sizeofIRType(typeOfIRExpr(sbIn->tyenv, cas->dataLo));
1131 if (cas->dataHi != NULL)
1132 dataSize *= 2; /* since this is a doubleword-cas */
1133 addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr );
1134 addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr );
1135 addEvent_G( &clgs, curr_inode );
1136 break;
1139 case Ist_LLSC: {
1140 IRType dataTy;
1141 if (st->Ist.LLSC.storedata == NULL) {
1142 /* LL */
1143 dataTy = typeOfIRTemp(sbIn->tyenv, st->Ist.LLSC.result);
1144 addEvent_Dr( &clgs, curr_inode,
1145 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1146 /* flush events before LL, should help SC to succeed */
1147 flushEvents( &clgs );
1148 } else {
1149 /* SC */
1150 dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata);
1151 addEvent_Dw( &clgs, curr_inode,
1152 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1153 /* I don't know whether the global-bus-lock cost should
1154 be attributed to the LL or the SC, but it doesn't
1155 really matter since they always have to be used in
1156 pairs anyway. Hence put it (quite arbitrarily) on
1157 the SC. */
1158 addEvent_G( &clgs, curr_inode );
1160 break;
1163 case Ist_Exit: {
1164 Bool guest_exit, inverted;
1166 /* VEX code generation sometimes inverts conditional branches.
1167 * As Callgrind counts (conditional) jumps, it has to correct
1168 * inversions. The heuristic is the following:
1169 * (1) Callgrind switches off SB chasing and unrolling, and
1170 * therefore it assumes that a candidate for inversion only is
1171 * the last conditional branch in an SB.
1172 * (2) inversion is assumed if the branch jumps to the address of
1173 * the next guest instruction in memory.
1174 * This heuristic is precalculated in CLG_(collectBlockInfo)().
1176 * Branching behavior is also used for branch prediction. Note that
1177 * above heuristic is different from what Cachegrind does.
1178 * Cachegrind uses (2) for all branches.
1180 if (cJumps+1 == clgs.bb->cjmp_count)
1181 inverted = clgs.bb->cjmp_inverted;
1182 else
1183 inverted = False;
1185 // call branch predictor only if this is a branch in guest code
1186 guest_exit = (st->Ist.Exit.jk == Ijk_Boring) ||
1187 (st->Ist.Exit.jk == Ijk_Call) ||
1188 (st->Ist.Exit.jk == Ijk_Ret);
1190 if (guest_exit) {
1191 /* Stuff to widen the guard expression to a host word, so
1192 we can pass it to the branch predictor simulation
1193 functions easily. */
1194 IRType tyW = hWordTy;
1195 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64;
1196 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64;
1197 IRTemp guard1 = newIRTemp(clgs.sbOut->tyenv, Ity_I1);
1198 IRTemp guardW = newIRTemp(clgs.sbOut->tyenv, tyW);
1199 IRTemp guard = newIRTemp(clgs.sbOut->tyenv, tyW);
1200 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1201 : IRExpr_Const(IRConst_U64(1));
1203 /* Widen the guard expression. */
1204 addStmtToIRSB( clgs.sbOut,
1205 IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1206 addStmtToIRSB( clgs.sbOut,
1207 IRStmt_WrTmp( guardW,
1208 IRExpr_Unop(widen,
1209 IRExpr_RdTmp(guard1))) );
1210 /* If the exit is inverted, invert the sense of the guard. */
1211 addStmtToIRSB(
1212 clgs.sbOut,
1213 IRStmt_WrTmp(
1214 guard,
1215 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1216 : IRExpr_RdTmp(guardW)
1218 /* And post the event. */
1219 addEvent_Bc( &clgs, curr_inode, IRExpr_RdTmp(guard) );
1222 /* We may never reach the next statement, so need to flush
1223 all outstanding transactions now. */
1224 flushEvents( &clgs );
1226 CLG_ASSERT(clgs.ii_index>0);
1227 if (!clgs.seen_before) {
1228 ClgJumpKind jk;
1230 if (st->Ist.Exit.jk == Ijk_Call) jk = jk_Call;
1231 else if (st->Ist.Exit.jk == Ijk_Ret) jk = jk_Return;
1232 else {
1233 if (IRConst2Addr(st->Ist.Exit.dst) ==
1234 origAddr + curr_inode->instr_offset + curr_inode->instr_size)
1235 jk = jk_None;
1236 else
1237 jk = jk_Jump;
1240 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
1241 clgs.bb->jmp[cJumps].jmpkind = jk;
1244 /* Update global variable jmps_passed before the jump
1245 * A correction is needed if VEX inverted the last jump condition
1247 UInt val = inverted ? cJumps+1 : cJumps;
1248 addConstMemStoreStmt( clgs.sbOut,
1249 (UWord) &CLG_(current_state).jmps_passed,
1250 val, hWordTy);
1251 cJumps++;
1253 break;
1256 default:
1257 tl_assert(0);
1258 break;
1261 /* Copy the original statement */
1262 addStmtToIRSB( clgs.sbOut, st );
1264 CLG_DEBUGIF(5) {
1265 VG_(printf)(" pass ");
1266 ppIRStmt(st);
1267 VG_(printf)("\n");
1271 /* Deal with branches to unknown destinations. Except ignore ones
1272 which are function returns as we assume the return stack
1273 predictor never mispredicts. */
1274 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
1275 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1276 switch (sbIn->next->tag) {
1277 case Iex_Const:
1278 break; /* boring - branch to known address */
1279 case Iex_RdTmp:
1280 /* looks like an indirect branch (branch to unknown) */
1281 addEvent_Bi( &clgs, curr_inode, sbIn->next );
1282 break;
1283 default:
1284 /* shouldn't happen - if the incoming IR is properly
1285 flattened, should only have tmp and const cases to
1286 consider. */
1287 tl_assert(0);
1291 /* At the end of the bb. Flush outstandings. */
1292 flushEvents( &clgs );
1294 /* Update global variable jmps_passed at end of SB.
1295 * As CLG_(current_state).jmps_passed is reset to 0 in setup_bbcc,
1296 * this can be omitted if there is no conditional jump in this SB.
1297 * A correction is needed if VEX inverted the last jump condition
1299 if (cJumps>0) {
1300 UInt jmps_passed = cJumps;
1301 if (clgs.bb->cjmp_inverted) jmps_passed--;
1302 addConstMemStoreStmt( clgs.sbOut,
1303 (UWord) &CLG_(current_state).jmps_passed,
1304 jmps_passed, hWordTy);
1306 CLG_ASSERT(clgs.bb->cjmp_count == cJumps);
1307 CLG_ASSERT(clgs.bb->instr_count == clgs.ii_index);
1309 /* Info for final exit from BB */
1311 ClgJumpKind jk;
1313 if (sbIn->jumpkind == Ijk_Call) jk = jk_Call;
1314 else if (sbIn->jumpkind == Ijk_Ret) jk = jk_Return;
1315 else {
1316 jk = jk_Jump;
1317 if ((sbIn->next->tag == Iex_Const) &&
1318 (IRConst2Addr(sbIn->next->Iex.Const.con) ==
1319 origAddr + clgs.instr_offset))
1320 jk = jk_None;
1322 clgs.bb->jmp[cJumps].jmpkind = jk;
1323 /* Instruction index of the call/ret at BB end
1324 * (it is wrong for fall-through, but does not matter) */
1325 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
1328 /* swap information of last exit with final exit if inverted */
1329 if (clgs.bb->cjmp_inverted) {
1330 ClgJumpKind jk;
1331 UInt instr;
1333 jk = clgs.bb->jmp[cJumps].jmpkind;
1334 clgs.bb->jmp[cJumps].jmpkind = clgs.bb->jmp[cJumps-1].jmpkind;
1335 clgs.bb->jmp[cJumps-1].jmpkind = jk;
1336 instr = clgs.bb->jmp[cJumps].instr;
1337 clgs.bb->jmp[cJumps].instr = clgs.bb->jmp[cJumps-1].instr;
1338 clgs.bb->jmp[cJumps-1].instr = instr;
1341 if (clgs.seen_before) {
1342 CLG_ASSERT(clgs.bb->cost_count == update_cost_offsets(&clgs));
1343 CLG_ASSERT(clgs.bb->instr_len == clgs.instr_offset);
1345 else {
1346 clgs.bb->cost_count = update_cost_offsets(&clgs);
1347 clgs.bb->instr_len = clgs.instr_offset;
1350 CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n",
1351 origAddr, clgs.bb->instr_len,
1352 clgs.bb->cjmp_count, clgs.bb->cost_count);
1353 if (cJumps>0) {
1354 CLG_DEBUG(3, " [ ");
1355 for (i=0;i<cJumps;i++)
1356 CLG_DEBUG(3, "%u ", clgs.bb->jmp[i].instr);
1357 CLG_DEBUG(3, "], last inverted: %s \n",
1358 clgs.bb->cjmp_inverted ? "yes":"no");
1361 return clgs.sbOut;
1364 /*--------------------------------------------------------------------*/
1365 /*--- Discarding BB info ---*/
1366 /*--------------------------------------------------------------------*/
1368 // Called when a translation is removed from the translation cache for
1369 // any reason at all: to free up space, because the guest code was
1370 // unmapped or modified, or for any arbitrary reason.
1371 static
1372 void clg_discard_superblock_info ( Addr orig_addr, VexGuestExtents vge )
1374 tl_assert(vge.n_used > 0);
1376 if (0)
1377 VG_(printf)( "discard_superblock_info: %p, %p, %llu\n",
1378 (void*)orig_addr,
1379 (void*)vge.base[0], (ULong)vge.len[0]);
1381 // Get BB info, remove from table, free BB info. Simple!
1382 // When created, the BB is keyed by the first instruction address,
1383 // (not orig_addr, but eventually redirected address). Thus, we
1384 // use the first instruction address in vge.
1385 CLG_(delete_bb)(vge.base[0]);
1389 /*------------------------------------------------------------*/
1390 /*--- CLG_(fini)() and related function ---*/
1391 /*------------------------------------------------------------*/
1395 static void zero_thread_cost(thread_info* t)
1397 Int i;
1399 for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1400 if (!CLG_(current_call_stack).entry[i].jcc) continue;
1402 /* reset call counters to current for active calls */
1403 CLG_(copy_cost)( CLG_(sets).full,
1404 CLG_(current_call_stack).entry[i].enter_cost,
1405 CLG_(current_state).cost );
1406 CLG_(current_call_stack).entry[i].jcc->call_counter = 0;
1409 CLG_(forall_bbccs)(CLG_(zero_bbcc));
1411 /* set counter for last dump */
1412 CLG_(copy_cost)( CLG_(sets).full,
1413 t->lastdump_cost, CLG_(current_state).cost );
1416 void CLG_(zero_all_cost)(Bool only_current_thread)
1418 if (VG_(clo_verbosity) > 1)
1419 VG_(message)(Vg_DebugMsg, " Zeroing costs...\n");
1421 if (only_current_thread)
1422 zero_thread_cost(CLG_(get_current_thread)());
1423 else
1424 CLG_(forall_threads)(zero_thread_cost);
1426 if (VG_(clo_verbosity) > 1)
1427 VG_(message)(Vg_DebugMsg, " ...done\n");
1430 static
1431 void unwind_thread(thread_info* t)
1433 /* unwind signal handlers */
1434 while(CLG_(current_state).sig !=0)
1435 CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig);
1437 /* unwind regular call stack */
1438 while(CLG_(current_call_stack).sp>0)
1439 CLG_(pop_call_stack)();
1441 /* reset context and function stack for context generation */
1442 CLG_(init_exec_state)( &CLG_(current_state) );
1443 CLG_(current_fn_stack).top = CLG_(current_fn_stack).bottom;
1446 static
1447 void zero_state_cost(thread_info* t)
1449 CLG_(zero_cost)( CLG_(sets).full, CLG_(current_state).cost );
1452 void CLG_(set_instrument_state)(const HChar* reason, Bool state)
1454 if (CLG_(instrument_state) == state) {
1455 CLG_DEBUG(2, "%s: instrumentation already %s\n",
1456 reason, state ? "ON" : "OFF");
1457 return;
1459 CLG_(instrument_state) = state;
1460 CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n",
1461 reason, state ? "ON" : "OFF");
1463 VG_(discard_translations_safely)( (Addr)0x1000, ~(SizeT)0xfff, "callgrind");
1465 /* reset internal state: call stacks, simulator */
1466 CLG_(forall_threads)(unwind_thread);
1467 CLG_(forall_threads)(zero_state_cost);
1468 (*CLG_(cachesim).clear)();
1470 if (VG_(clo_verbosity) > 1)
1471 VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s\n",
1472 reason, state ? "ON" : "OFF");
1475 /* helper for dump_state_togdb */
1476 static void dump_state_of_thread_togdb(thread_info* ti)
1478 static FullCost sum = 0, tmp = 0;
1479 Int t, i;
1480 BBCC *from, *to;
1481 call_entry* ce;
1482 HChar *mcost;
1484 t = CLG_(current_tid);
1485 CLG_(init_cost_lz)( CLG_(sets).full, &sum );
1486 CLG_(copy_cost_lz)( CLG_(sets).full, &tmp, ti->lastdump_cost );
1487 CLG_(add_diff_cost)( CLG_(sets).full, sum, ti->lastdump_cost,
1488 ti->states.entry[0]->cost);
1489 CLG_(copy_cost)( CLG_(sets).full, ti->lastdump_cost, tmp );
1490 mcost = CLG_(mappingcost_as_string)(CLG_(dumpmap), sum);
1491 VG_(gdb_printf)("events-%d: %s\n", t, mcost);
1492 VG_(free)(mcost);
1493 VG_(gdb_printf)("frames-%d: %d\n", t, CLG_(current_call_stack).sp);
1495 ce = 0;
1496 for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1497 ce = CLG_(get_call_entry)(i);
1498 /* if this frame is skipped, we don't have counters */
1499 if (!ce->jcc) continue;
1501 from = ce->jcc->from;
1502 VG_(gdb_printf)("function-%d-%d: %s\n",t, i, from->cxt->fn[0]->name);
1503 VG_(gdb_printf)("calls-%d-%d: %llu\n",t, i, ce->jcc->call_counter);
1505 /* FIXME: EventSets! */
1506 CLG_(copy_cost)( CLG_(sets).full, sum, ce->jcc->cost );
1507 CLG_(copy_cost)( CLG_(sets).full, tmp, ce->enter_cost );
1508 CLG_(add_diff_cost)( CLG_(sets).full, sum,
1509 ce->enter_cost, CLG_(current_state).cost );
1510 CLG_(copy_cost)( CLG_(sets).full, ce->enter_cost, tmp );
1512 mcost = CLG_(mappingcost_as_string)(CLG_(dumpmap), sum);
1513 VG_(gdb_printf)("events-%d-%d: %s\n",t, i, mcost);
1514 VG_(free)(mcost);
1516 if (ce && ce->jcc) {
1517 to = ce->jcc->to;
1518 VG_(gdb_printf)("function-%d-%d: %s\n",t, i, to->cxt->fn[0]->name );
1522 /* Dump current state */
1523 static void dump_state_togdb(void)
1525 thread_info** th;
1526 int t;
1527 Int orig_tid = CLG_(current_tid);
1529 VG_(gdb_printf)("instrumentation: %s\n",
1530 CLG_(instrument_state) ? "on":"off");
1531 if (!CLG_(instrument_state)) return;
1533 VG_(gdb_printf)("executed-bbs: %llu\n", CLG_(stat).bb_executions);
1534 VG_(gdb_printf)("executed-calls: %llu\n", CLG_(stat).call_counter);
1535 VG_(gdb_printf)("distinct-bbs: %d\n", CLG_(stat).distinct_bbs);
1536 VG_(gdb_printf)("distinct-calls: %d\n", CLG_(stat).distinct_jccs);
1537 VG_(gdb_printf)("distinct-functions: %d\n", CLG_(stat).distinct_fns);
1538 VG_(gdb_printf)("distinct-contexts: %d\n", CLG_(stat).distinct_contexts);
1540 /* "events:" line. Given here because it will be dynamic in the future */
1541 HChar *evmap = CLG_(eventmapping_as_string)(CLG_(dumpmap));
1542 VG_(gdb_printf)("events: %s\n", evmap);
1543 VG_(free)(evmap);
1544 /* "part:" line (number of last part. Is 0 at start */
1545 VG_(gdb_printf)("part: %d\n", CLG_(get_dump_counter)());
1547 /* threads */
1548 th = CLG_(get_threads)();
1549 VG_(gdb_printf)("threads:");
1550 for(t=1;t<VG_N_THREADS;t++) {
1551 if (!th[t]) continue;
1552 VG_(gdb_printf)(" %d", t);
1554 VG_(gdb_printf)("\n");
1555 VG_(gdb_printf)("current-tid: %d\n", orig_tid);
1556 CLG_(forall_threads)(dump_state_of_thread_togdb);
1560 static void print_monitor_help ( void )
1562 VG_(gdb_printf) ("\n");
1563 VG_(gdb_printf) ("callgrind monitor commands:\n");
1564 VG_(gdb_printf) (" dump [<dump_hint>]\n");
1565 VG_(gdb_printf) (" dump counters\n");
1566 VG_(gdb_printf) (" zero\n");
1567 VG_(gdb_printf) (" zero counters\n");
1568 VG_(gdb_printf) (" status\n");
1569 VG_(gdb_printf) (" print status\n");
1570 VG_(gdb_printf) (" instrumentation [on|off]\n");
1571 VG_(gdb_printf) (" get/set (if on/off given) instrumentation state\n");
1572 VG_(gdb_printf) ("\n");
1575 /* return True if request recognised, False otherwise */
1576 static Bool handle_gdb_monitor_command (ThreadId tid, const HChar *req)
1578 HChar* wcmd;
1579 HChar s[VG_(strlen)(req) + 1]; /* copy for strtok_r */
1580 HChar *ssaveptr;
1582 VG_(strcpy) (s, req);
1584 wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
1585 switch (VG_(keyword_id) ("help dump zero status instrumentation",
1586 wcmd, kwd_report_duplicated_matches)) {
1587 case -2: /* multiple matches */
1588 return True;
1589 case -1: /* not found */
1590 return False;
1591 case 0: /* help */
1592 print_monitor_help();
1593 return True;
1594 case 1: { /* dump */
1595 CLG_(dump_profile)(req, False);
1596 return True;
1598 case 2: { /* zero */
1599 CLG_(zero_all_cost)(False);
1600 return True;
1603 case 3: { /* status */
1604 HChar* arg = VG_(strtok_r) (0, " ", &ssaveptr);
1605 if (arg && (VG_(strcmp)(arg, "internal") == 0)) {
1606 /* internal interface to callgrind_control */
1607 dump_state_togdb();
1608 return True;
1611 if (!CLG_(instrument_state)) {
1612 VG_(gdb_printf)("No status available as instrumentation is switched off\n");
1613 } else {
1614 // Status information to be improved ...
1615 thread_info** th = CLG_(get_threads)();
1616 Int t, tcount = 0;
1617 for(t=1;t<VG_N_THREADS;t++)
1618 if (th[t]) tcount++;
1619 VG_(gdb_printf)("%d thread(s) running.\n", tcount);
1621 return True;
1624 case 4: { /* instrumentation */
1625 HChar* arg = VG_(strtok_r) (0, " ", &ssaveptr);
1626 if (!arg) {
1627 VG_(gdb_printf)("instrumentation: %s\n",
1628 CLG_(instrument_state) ? "on":"off");
1630 else
1631 CLG_(set_instrument_state)("Command", VG_(strcmp)(arg,"off")!=0);
1632 return True;
1635 default:
1636 tl_assert(0);
1637 return False;
1641 static
1642 Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret)
1644 if (!VG_IS_TOOL_USERREQ('C','T',args[0])
1645 && VG_USERREQ__GDB_MONITOR_COMMAND != args[0])
1646 return False;
1648 switch(args[0]) {
1649 case VG_USERREQ__DUMP_STATS:
1650 CLG_(dump_profile)("Client Request", True);
1651 *ret = 0; /* meaningless */
1652 break;
1654 case VG_USERREQ__DUMP_STATS_AT:
1656 const HChar *arg = (HChar*)args[1];
1657 HChar buf[30 + VG_(strlen)(arg)]; // large enough
1658 VG_(sprintf)(buf,"Client Request: %s", arg);
1659 CLG_(dump_profile)(buf, True);
1660 *ret = 0; /* meaningless */
1662 break;
1664 case VG_USERREQ__ZERO_STATS:
1665 CLG_(zero_all_cost)(True);
1666 *ret = 0; /* meaningless */
1667 break;
1669 case VG_USERREQ__TOGGLE_COLLECT:
1670 CLG_(current_state).collect = !CLG_(current_state).collect;
1671 CLG_DEBUG(2, "Client Request: toggled collection state to %s\n",
1672 CLG_(current_state).collect ? "ON" : "OFF");
1673 *ret = 0; /* meaningless */
1674 break;
1676 case VG_USERREQ__START_INSTRUMENTATION:
1677 CLG_(set_instrument_state)("Client Request", True);
1678 *ret = 0; /* meaningless */
1679 break;
1681 case VG_USERREQ__STOP_INSTRUMENTATION:
1682 CLG_(set_instrument_state)("Client Request", False);
1683 *ret = 0; /* meaningless */
1684 break;
1686 case VG_USERREQ__GDB_MONITOR_COMMAND: {
1687 Bool handled = handle_gdb_monitor_command (tid, (HChar*)args[1]);
1688 if (handled)
1689 *ret = 1;
1690 else
1691 *ret = 0;
1692 return handled;
1694 default:
1695 return False;
1698 return True;
1702 /* Syscall Timing */
1704 /* struct timeval syscalltime[VG_N_THREADS]; */
1705 #if CLG_MICROSYSTIME
1706 ULong *syscalltime;
1707 #else
1708 UInt *syscalltime;
1709 #endif
1711 static
1712 void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno,
1713 UWord* args, UInt nArgs)
1715 if (CLG_(clo).collect_systime) {
1716 #if CLG_MICROSYSTIME
1717 struct vki_timeval tv_now;
1718 VG_(gettimeofday)(&tv_now, NULL);
1719 syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec;
1720 #else
1721 syscalltime[tid] = VG_(read_millisecond_timer)();
1722 #endif
1726 static
1727 void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno,
1728 UWord* args, UInt nArgs, SysRes res)
1730 if (CLG_(clo).collect_systime &&
1731 CLG_(current_state).bbcc) {
1732 Int o;
1733 #if CLG_MICROSYSTIME
1734 struct vki_timeval tv_now;
1735 ULong diff;
1737 VG_(gettimeofday)(&tv_now, NULL);
1738 diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid];
1739 #else
1740 UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid];
1741 #endif
1743 /* offset o is for "SysCount", o+1 for "SysTime" */
1744 o = fullOffset(EG_SYS);
1745 CLG_ASSERT(o>=0);
1746 CLG_DEBUG(0," Time (Off %d) for Syscall %u: %llu\n", o, syscallno,
1747 (ULong)diff);
1749 CLG_(current_state).cost[o] ++;
1750 CLG_(current_state).cost[o+1] += diff;
1751 if (!CLG_(current_state).bbcc->skipped)
1752 CLG_(init_cost_lz)(CLG_(sets).full,
1753 &(CLG_(current_state).bbcc->skipped));
1754 CLG_(current_state).bbcc->skipped[o] ++;
1755 CLG_(current_state).bbcc->skipped[o+1] += diff;
1759 static UInt ULong_width(ULong n)
1761 UInt w = 0;
1762 while (n > 0) {
1763 n = n / 10;
1764 w++;
1766 if (w == 0) w = 1;
1767 return w + (w-1)/3; // add space for commas
1770 static
1771 void branchsim_printstat(int l1, int l2, int l3)
1773 static HChar fmt[128]; // large enough
1774 FullCost total;
1775 ULong Bc_total_b, Bc_total_mp, Bi_total_b, Bi_total_mp;
1776 ULong B_total_b, B_total_mp;
1778 total = CLG_(total_cost);
1779 Bc_total_b = total[ fullOffset(EG_BC) ];
1780 Bc_total_mp = total[ fullOffset(EG_BC)+1 ];
1781 Bi_total_b = total[ fullOffset(EG_BI) ];
1782 Bi_total_mp = total[ fullOffset(EG_BI)+1 ];
1784 /* Make format string, getting width right for numbers */
1785 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1786 l1, l2, l3);
1788 if (0 == Bc_total_b) Bc_total_b = 1;
1789 if (0 == Bi_total_b) Bi_total_b = 1;
1790 B_total_b = Bc_total_b + Bi_total_b;
1791 B_total_mp = Bc_total_mp + Bi_total_mp;
1793 VG_(umsg)("\n");
1794 VG_(umsg)(fmt, "Branches: ",
1795 B_total_b, Bc_total_b, Bi_total_b);
1797 VG_(umsg)(fmt, "Mispredicts: ",
1798 B_total_mp, Bc_total_mp, Bi_total_mp);
1800 VG_(umsg)("Mispred rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1801 l1, B_total_mp * 100.0 / B_total_b,
1802 l2, Bc_total_mp * 100.0 / Bc_total_b,
1803 l3, Bi_total_mp * 100.0 / Bi_total_b);
1806 static
1807 void clg_print_stats(void)
1809 int BB_lookups =
1810 CLG_(stat).full_debug_BBs +
1811 CLG_(stat).fn_name_debug_BBs +
1812 CLG_(stat).file_line_debug_BBs +
1813 CLG_(stat).no_debug_BBs;
1815 /* Hash table stats */
1816 VG_(message)(Vg_DebugMsg, "Distinct objects: %d\n",
1817 CLG_(stat).distinct_objs);
1818 VG_(message)(Vg_DebugMsg, "Distinct files: %d\n",
1819 CLG_(stat).distinct_files);
1820 VG_(message)(Vg_DebugMsg, "Distinct fns: %d\n",
1821 CLG_(stat).distinct_fns);
1822 VG_(message)(Vg_DebugMsg, "Distinct contexts:%d\n",
1823 CLG_(stat).distinct_contexts);
1824 VG_(message)(Vg_DebugMsg, "Distinct BBs: %d\n",
1825 CLG_(stat).distinct_bbs);
1826 VG_(message)(Vg_DebugMsg, "Cost entries: %u (Chunks %u)\n",
1827 CLG_(costarray_entries), CLG_(costarray_chunks));
1828 VG_(message)(Vg_DebugMsg, "Distinct BBCCs: %d\n",
1829 CLG_(stat).distinct_bbccs);
1830 VG_(message)(Vg_DebugMsg, "Distinct JCCs: %d\n",
1831 CLG_(stat).distinct_jccs);
1832 VG_(message)(Vg_DebugMsg, "Distinct skips: %d\n",
1833 CLG_(stat).distinct_skips);
1834 VG_(message)(Vg_DebugMsg, "BB lookups: %d\n",
1835 BB_lookups);
1836 if (BB_lookups>0) {
1837 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)\n",
1838 CLG_(stat).full_debug_BBs * 100 / BB_lookups,
1839 CLG_(stat).full_debug_BBs);
1840 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)\n",
1841 CLG_(stat).file_line_debug_BBs * 100 / BB_lookups,
1842 CLG_(stat).file_line_debug_BBs);
1843 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)\n",
1844 CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups,
1845 CLG_(stat).fn_name_debug_BBs);
1846 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)\n",
1847 CLG_(stat).no_debug_BBs * 100 / BB_lookups,
1848 CLG_(stat).no_debug_BBs);
1850 VG_(message)(Vg_DebugMsg, "BBCC Clones: %d\n",
1851 CLG_(stat).bbcc_clones);
1852 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d\n",
1853 CLG_(stat).bb_retranslations);
1854 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d\n",
1855 CLG_(stat).distinct_instrs);
1857 VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d\n",
1858 CLG_(stat).cxt_lru_misses);
1859 VG_(message)(Vg_DebugMsg, "LRU BBCC Misses: %d\n",
1860 CLG_(stat).bbcc_lru_misses);
1861 VG_(message)(Vg_DebugMsg, "LRU JCC Misses: %d\n",
1862 CLG_(stat).jcc_lru_misses);
1863 VG_(message)(Vg_DebugMsg, "BBs Executed: %llu\n",
1864 CLG_(stat).bb_executions);
1865 VG_(message)(Vg_DebugMsg, "Calls: %llu\n",
1866 CLG_(stat).call_counter);
1867 VG_(message)(Vg_DebugMsg, "CondJMP followed: %llu\n",
1868 CLG_(stat).jcnd_counter);
1869 VG_(message)(Vg_DebugMsg, "Boring JMPs: %llu\n",
1870 CLG_(stat).jump_counter);
1871 VG_(message)(Vg_DebugMsg, "Recursive calls: %llu\n",
1872 CLG_(stat).rec_call_counter);
1873 VG_(message)(Vg_DebugMsg, "Returns: %llu\n",
1874 CLG_(stat).ret_counter);
1878 static
1879 void finish(void)
1881 HChar fmt[128]; // large enough
1882 Int l1, l2, l3;
1883 FullCost total;
1885 CLG_DEBUG(0, "finish()\n");
1887 (*CLG_(cachesim).finish)();
1889 /* pop all remaining items from CallStack for correct sum
1891 CLG_(forall_threads)(unwind_thread);
1893 CLG_(dump_profile)(0, False);
1895 if (VG_(clo_verbosity) == 0) return;
1897 if (VG_(clo_stats)) {
1898 VG_(message)(Vg_DebugMsg, "\n");
1899 clg_print_stats();
1900 VG_(message)(Vg_DebugMsg, "\n");
1903 HChar *evmap = CLG_(eventmapping_as_string)(CLG_(dumpmap));
1904 VG_(message)(Vg_UserMsg, "Events : %s\n", evmap);
1905 VG_(free)(evmap);
1906 HChar *mcost = CLG_(mappingcost_as_string)(CLG_(dumpmap), CLG_(total_cost));
1907 VG_(message)(Vg_UserMsg, "Collected : %s\n", mcost);
1908 VG_(free)(mcost);
1909 VG_(message)(Vg_UserMsg, "\n");
1911 /* determine value widths for statistics */
1912 total = CLG_(total_cost);
1913 l1 = ULong_width( total[fullOffset(EG_IR)] );
1914 l2 = l3 = 0;
1915 if (CLG_(clo).simulate_cache) {
1916 l2 = ULong_width( total[fullOffset(EG_DR)] );
1917 l3 = ULong_width( total[fullOffset(EG_DW)] );
1919 if (CLG_(clo).simulate_branch) {
1920 int l2b = ULong_width( total[fullOffset(EG_BC)] );
1921 int l3b = ULong_width( total[fullOffset(EG_BI)] );
1922 if (l2b > l2) l2 = l2b;
1923 if (l3b > l3) l3 = l3b;
1926 /* Make format string, getting width right for numbers */
1927 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
1929 /* Always print this */
1930 VG_(umsg)(fmt, "I refs: ", total[fullOffset(EG_IR)] );
1932 if (CLG_(clo).simulate_cache)
1933 (*CLG_(cachesim).printstat)(l1, l2, l3);
1935 if (CLG_(clo).simulate_branch)
1936 branchsim_printstat(l1, l2, l3);
1941 void CLG_(fini)(Int exitcode)
1943 finish();
1947 /*--------------------------------------------------------------------*/
1948 /*--- Setup ---*/
1949 /*--------------------------------------------------------------------*/
1951 static void clg_start_client_code_callback ( ThreadId tid, ULong blocks_done )
1953 static ULong last_blocks_done = 0;
1955 if (0)
1956 VG_(printf)("%d R %llu\n", (Int)tid, blocks_done);
1958 /* throttle calls to CLG_(run_thread) by number of BBs executed */
1959 if (blocks_done - last_blocks_done < 5000) return;
1960 last_blocks_done = blocks_done;
1962 CLG_(run_thread)( tid );
1965 static
1966 void CLG_(post_clo_init)(void)
1968 if (VG_(clo_vex_control).iropt_register_updates_default
1969 != VexRegUpdSpAtMemAccess) {
1970 CLG_DEBUG(1, " Using user specified value for "
1971 "--vex-iropt-register-updates\n");
1972 } else {
1973 CLG_DEBUG(1,
1974 " Using default --vex-iropt-register-updates="
1975 "sp-at-mem-access\n");
1978 if (CLG_(clo).collect_systime) {
1979 VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime),
1980 CLG_(post_syscalltime));
1981 syscalltime = CLG_MALLOC("cl.main.pci.1",
1982 VG_N_THREADS * sizeof syscalltime[0]);
1983 for (UInt i = 0; i < VG_N_THREADS; ++i) {
1984 syscalltime[i] = 0;
1988 if (VG_(clo_px_file_backed) != VexRegUpdSpAtMemAccess) {
1989 CLG_DEBUG(1, " Using user specified value for "
1990 "--px-file-backed\n");
1991 } else {
1992 CLG_DEBUG(1,
1993 " Using default --px-file-backed="
1994 "sp-at-mem-access\n");
1997 if (VG_(clo_vex_control).iropt_unroll_thresh != 0) {
1998 VG_(message)(Vg_UserMsg,
1999 "callgrind only works with --vex-iropt-unroll-thresh=0\n"
2000 "=> resetting it back to 0\n");
2001 VG_(clo_vex_control).iropt_unroll_thresh = 0; // cannot be overridden.
2003 if (VG_(clo_vex_control).guest_chase_thresh != 0) {
2004 VG_(message)(Vg_UserMsg,
2005 "callgrind only works with --vex-guest-chase-thresh=0\n"
2006 "=> resetting it back to 0\n");
2007 VG_(clo_vex_control).guest_chase_thresh = 0; // cannot be overridden.
2010 CLG_DEBUG(1, " dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No");
2011 CLG_DEBUG(1, " call sep. : %d\n", CLG_(clo).separate_callers);
2012 CLG_DEBUG(1, " rec. sep. : %d\n", CLG_(clo).separate_recursions);
2014 if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) {
2015 VG_(message)(Vg_UserMsg, "Using source line as position.\n");
2016 CLG_(clo).dump_line = True;
2019 CLG_(init_dumps)();
2021 (*CLG_(cachesim).post_clo_init)();
2023 CLG_(init_eventsets)();
2024 CLG_(init_statistics)(& CLG_(stat));
2025 CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) );
2027 /* initialize hash tables */
2028 CLG_(init_obj_table)();
2029 CLG_(init_cxt_table)();
2030 CLG_(init_bb_hash)();
2032 CLG_(init_threads)();
2033 CLG_(run_thread)(1);
2035 CLG_(instrument_state) = CLG_(clo).instrument_atstart;
2037 if (VG_(clo_verbosity) > 0) {
2038 VG_(message)(Vg_UserMsg,
2039 "For interactive control, run 'callgrind_control%s%s -h'.\n",
2040 (VG_(arg_vgdb_prefix) ? " " : ""),
2041 (VG_(arg_vgdb_prefix) ? VG_(arg_vgdb_prefix) : ""));
2045 static
2046 void CLG_(pre_clo_init)(void)
2048 VG_(details_name) ("Callgrind");
2049 VG_(details_version) (NULL);
2050 VG_(details_description) ("a call-graph generating cache profiler");
2051 VG_(details_copyright_author)("Copyright (C) 2002-2017, and GNU GPL'd, "
2052 "by Josef Weidendorfer et al.");
2053 VG_(details_bug_reports_to) (VG_BUGS_TO);
2054 VG_(details_avg_translation_sizeB) ( 500 );
2056 VG_(clo_vex_control).iropt_register_updates_default
2057 = VG_(clo_px_file_backed)
2058 = VexRegUpdSpAtMemAccess; // overridable by the user.
2060 VG_(clo_vex_control).iropt_unroll_thresh = 0; // cannot be overridden.
2061 VG_(clo_vex_control).guest_chase_thresh = 0; // cannot be overridden.
2063 VG_(basic_tool_funcs) (CLG_(post_clo_init),
2064 CLG_(instrument),
2065 CLG_(fini));
2067 VG_(needs_superblock_discards)(clg_discard_superblock_info);
2070 VG_(needs_command_line_options)(CLG_(process_cmd_line_option),
2071 CLG_(print_usage),
2072 CLG_(print_debug_usage));
2074 VG_(needs_client_requests)(CLG_(handle_client_request));
2075 VG_(needs_print_stats) (clg_print_stats);
2077 VG_(track_start_client_code) ( & clg_start_client_code_callback );
2078 VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) );
2079 VG_(track_post_deliver_signal)( & CLG_(post_signal) );
2081 CLG_(set_clo_defaults)();
2085 VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init))
2087 /*--------------------------------------------------------------------*/
2088 /*--- end main.c ---*/
2089 /*--------------------------------------------------------------------*/