2 /*--------------------------------------------------------------------*/
3 /*--- Cachegrind: everything but the simulation itself. ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Cachegrind, a Valgrind tool for cache
11 Copyright (C) 2002-2017 Nicholas Nethercote
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
27 The GNU General Public License is contained in the file COPYING.
30 #include "pub_tool_basics.h"
31 #include "pub_tool_debuginfo.h"
32 #include "pub_tool_libcbase.h"
33 #include "pub_tool_libcassert.h"
34 #include "pub_tool_libcfile.h"
35 #include "pub_tool_libcprint.h"
36 #include "pub_tool_libcproc.h"
37 #include "pub_tool_mallocfree.h"
38 #include "pub_tool_options.h"
39 #include "pub_tool_oset.h"
40 #include "pub_tool_tooliface.h"
41 #include "pub_tool_xarray.h"
42 #include "pub_tool_clientstate.h"
43 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
47 #include "cg_branchpred.c"
49 /*------------------------------------------------------------*/
51 /*------------------------------------------------------------*/
53 /* Set to 1 for very verbose debugging */
56 /*------------------------------------------------------------*/
58 /*------------------------------------------------------------*/
60 static Bool clo_cache_sim
= False
; /* do cache simulation? */
61 static Bool clo_branch_sim
= False
; /* do branch simulation? */
62 static const HChar
* clo_cachegrind_out_file
= "cachegrind.out.%p";
64 /*------------------------------------------------------------*/
65 /*--- Cachesim configuration ---*/
66 /*------------------------------------------------------------*/
68 static Int min_line_size
= 0; /* min of L1 and LL cache line sizes */
70 /*------------------------------------------------------------*/
71 /*--- Types and Data Structures ---*/
72 /*------------------------------------------------------------*/
76 ULong a
; /* total # memory accesses of this kind */
77 ULong m1
; /* misses in the first level cache */
78 ULong mL
; /* misses in the second level cache */
84 ULong b
; /* total # branches of this kind */
85 ULong mp
; /* number of branches mispredicted */
89 //------------------------------------------------------------
90 // Primary data structure #1: CC table
91 // - Holds the per-source-line hit/miss stats, grouped by file/function/line.
92 // - an ordered set of CCs. CC indexing done by file/function/line (as
93 // determined from the instrAddr).
94 // - Traversed for dumping stats at end in file/func/line hierarchy.
104 CodeLoc loc
; /* Source location that these counts pertain to */
105 CacheCC Ir
; /* Insn read counts */
106 CacheCC Dr
; /* Data read counts */
107 CacheCC Dw
; /* Data write/modify counts */
108 BranchCC Bc
; /* Conditional branch counts */
109 BranchCC Bi
; /* Indirect branch counts */
112 // First compare file, then fn, then line.
113 static Word
cmp_CodeLoc_LineCC(const void *vloc
, const void *vcc
)
116 const CodeLoc
* a
= (const CodeLoc
*)vloc
;
117 const CodeLoc
* b
= &(((const LineCC
*)vcc
)->loc
);
119 res
= VG_(strcmp
)(a
->file
, b
->file
);
123 res
= VG_(strcmp
)(a
->fn
, b
->fn
);
127 return a
->line
- b
->line
;
130 static OSet
* CC_table
;
132 //------------------------------------------------------------
133 // Primary data structure #2: InstrInfo table
134 // - Holds the cached info about each instr that is used for simulation.
135 // - table(SB_start_addr, list(InstrInfo))
136 // - For each SB, each InstrInfo in the list holds info about the
137 // instruction (instrLen, instrAddr, etc), plus a pointer to its line
138 // CC. This node is what's passed to the simulation function.
139 // - When SBs are discarded the relevant list(instr_details) is freed.
141 typedef struct _InstrInfo InstrInfo
;
145 LineCC
* parent
; // parent line-CC
148 typedef struct _SB_info SB_info
;
150 Addr SB_addr
; // key; MUST BE FIRST
155 static OSet
* instrInfoTable
;
157 //------------------------------------------------------------
158 // Secondary data structure: string table
159 // - holds strings, avoiding dups
160 // - used for filenames and function names, each of which will be
161 // pointed to by one or more CCs.
162 // - it also allows equality checks just by pointer comparison, which
163 // is good when printing the output file at the end.
165 static OSet
* stringTable
;
167 //------------------------------------------------------------
169 static Int distinct_files
= 0;
170 static Int distinct_fns
= 0;
171 static Int distinct_lines
= 0;
172 static Int distinct_instrsGen
= 0;
173 static Int distinct_instrsNoX
= 0;
175 static Int full_debugs
= 0;
176 static Int file_line_debugs
= 0;
177 static Int fn_debugs
= 0;
178 static Int no_debugs
= 0;
180 /*------------------------------------------------------------*/
181 /*--- String table operations ---*/
182 /*------------------------------------------------------------*/
184 static Word
stringCmp( const void* key
, const void* elem
)
186 return VG_(strcmp
)(*(const HChar
*const *)key
, *(const HChar
*const *)elem
);
189 // Get a permanent string; either pull it out of the string table if it's
190 // been encountered before, or dup it and put it into the string table.
191 static HChar
* get_perm_string(const HChar
* s
)
193 HChar
** s_ptr
= VG_(OSetGen_Lookup
)(stringTable
, &s
);
197 HChar
** s_node
= VG_(OSetGen_AllocNode
)(stringTable
, sizeof(HChar
*));
198 *s_node
= VG_(strdup
)("cg.main.gps.1", s
);
199 VG_(OSetGen_Insert
)(stringTable
, s_node
);
204 /*------------------------------------------------------------*/
205 /*--- CC table operations ---*/
206 /*------------------------------------------------------------*/
208 static void get_debug_info(Addr instr_addr
, const HChar
**dir
,
209 const HChar
**file
, const HChar
**fn
, UInt
* line
)
211 DiEpoch ep
= VG_(current_DiEpoch
)();
212 Bool found_file_line
= VG_(get_filename_linenum
)(
218 Bool found_fn
= VG_(get_fnname
)(ep
, instr_addr
, fn
);
220 if (!found_file_line
) {
228 if (found_file_line
) {
229 if (found_fn
) full_debugs
++;
230 else file_line_debugs
++;
232 if (found_fn
) fn_debugs
++;
237 // Do a three step traversal: by file, then fn, then line.
238 // Returns a pointer to the line CC, creates a new one if necessary.
239 static LineCC
* get_lineCC(Addr origAddr
)
241 const HChar
*fn
, *file
, *dir
;
246 get_debug_info(origAddr
, &dir
, &file
, &fn
, &line
);
248 // Form an absolute pathname if a directory is available
249 HChar absfile
[VG_(strlen
)(dir
) + 1 + VG_(strlen
)(file
) + 1];
252 VG_(sprintf
)(absfile
, "%s/%s", dir
, file
);
254 VG_(sprintf
)(absfile
, "%s", file
);
261 lineCC
= VG_(OSetGen_Lookup
)(CC_table
, &loc
);
263 // Allocate and zero a new node.
264 lineCC
= VG_(OSetGen_AllocNode
)(CC_table
, sizeof(LineCC
));
265 lineCC
->loc
.file
= get_perm_string(loc
.file
);
266 lineCC
->loc
.fn
= get_perm_string(loc
.fn
);
267 lineCC
->loc
.line
= loc
.line
;
281 VG_(OSetGen_Insert
)(CC_table
, lineCC
);
287 /*------------------------------------------------------------*/
288 /*--- Cache simulation functions ---*/
289 /*------------------------------------------------------------*/
291 /* A common case for an instruction read event is that the
292 * bytes read belong to the same cache line in both L1I and LL
293 * (if cache line sizes of L1 and LL are the same).
294 * As this can be detected at instrumentation time, and results
295 * in faster simulation, special-casing is benefical.
297 * Abbreviations used in var/function names:
298 * IrNoX - instruction read does not cross cache lines
299 * IrGen - generic instruction read; not detected as IrNoX
300 * Ir - not known / not important whether it is an IrNoX
303 // Only used with --cache-sim=no.
305 void log_1Ir(InstrInfo
* n
)
310 // Only used with --cache-sim=no.
312 void log_2Ir(InstrInfo
* n
, InstrInfo
* n2
)
318 // Only used with --cache-sim=no.
320 void log_3Ir(InstrInfo
* n
, InstrInfo
* n2
, InstrInfo
* n3
)
327 // Generic case for instruction reads: may cross cache lines.
328 // All other Ir handlers expect IrNoX instruction reads.
330 void log_1IrGen_0D_cache_access(InstrInfo
* n
)
332 //VG_(printf)("1IrGen_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
333 // n, n->instr_addr, n->instr_len);
334 cachesim_I1_doref_Gen(n
->instr_addr
, n
->instr_len
,
335 &n
->parent
->Ir
.m1
, &n
->parent
->Ir
.mL
);
340 void log_1IrNoX_0D_cache_access(InstrInfo
* n
)
342 //VG_(printf)("1IrNoX_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
343 // n, n->instr_addr, n->instr_len);
344 cachesim_I1_doref_NoX(n
->instr_addr
, n
->instr_len
,
345 &n
->parent
->Ir
.m1
, &n
->parent
->Ir
.mL
);
350 void log_2IrNoX_0D_cache_access(InstrInfo
* n
, InstrInfo
* n2
)
352 //VG_(printf)("2IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
353 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
354 // n, n->instr_addr, n->instr_len,
355 // n2, n2->instr_addr, n2->instr_len);
356 cachesim_I1_doref_NoX(n
->instr_addr
, n
->instr_len
,
357 &n
->parent
->Ir
.m1
, &n
->parent
->Ir
.mL
);
359 cachesim_I1_doref_NoX(n2
->instr_addr
, n2
->instr_len
,
360 &n2
->parent
->Ir
.m1
, &n2
->parent
->Ir
.mL
);
365 void log_3IrNoX_0D_cache_access(InstrInfo
* n
, InstrInfo
* n2
, InstrInfo
* n3
)
367 //VG_(printf)("3IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
368 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
369 // " CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
370 // n, n->instr_addr, n->instr_len,
371 // n2, n2->instr_addr, n2->instr_len,
372 // n3, n3->instr_addr, n3->instr_len);
373 cachesim_I1_doref_NoX(n
->instr_addr
, n
->instr_len
,
374 &n
->parent
->Ir
.m1
, &n
->parent
->Ir
.mL
);
376 cachesim_I1_doref_NoX(n2
->instr_addr
, n2
->instr_len
,
377 &n2
->parent
->Ir
.m1
, &n2
->parent
->Ir
.mL
);
379 cachesim_I1_doref_NoX(n3
->instr_addr
, n3
->instr_len
,
380 &n3
->parent
->Ir
.m1
, &n3
->parent
->Ir
.mL
);
385 void log_1IrNoX_1Dr_cache_access(InstrInfo
* n
, Addr data_addr
, Word data_size
)
387 //VG_(printf)("1IrNoX_1Dr: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
388 // " daddr=0x%010lx, dsize=%lu\n",
389 // n, n->instr_addr, n->instr_len, data_addr, data_size);
390 cachesim_I1_doref_NoX(n
->instr_addr
, n
->instr_len
,
391 &n
->parent
->Ir
.m1
, &n
->parent
->Ir
.mL
);
394 cachesim_D1_doref(data_addr
, data_size
,
395 &n
->parent
->Dr
.m1
, &n
->parent
->Dr
.mL
);
400 void log_1IrNoX_1Dw_cache_access(InstrInfo
* n
, Addr data_addr
, Word data_size
)
402 //VG_(printf)("1IrNoX_1Dw: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
403 // " daddr=0x%010lx, dsize=%lu\n",
404 // n, n->instr_addr, n->instr_len, data_addr, data_size);
405 cachesim_I1_doref_NoX(n
->instr_addr
, n
->instr_len
,
406 &n
->parent
->Ir
.m1
, &n
->parent
->Ir
.mL
);
409 cachesim_D1_doref(data_addr
, data_size
,
410 &n
->parent
->Dw
.m1
, &n
->parent
->Dw
.mL
);
414 /* Note that addEvent_D_guarded assumes that log_0Ir_1Dr_cache_access
415 and log_0Ir_1Dw_cache_access have exactly the same prototype. If
416 you change them, you must change addEvent_D_guarded too. */
418 void log_0Ir_1Dr_cache_access(InstrInfo
* n
, Addr data_addr
, Word data_size
)
420 //VG_(printf)("0Ir_1Dr: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
421 // n, data_addr, data_size);
422 cachesim_D1_doref(data_addr
, data_size
,
423 &n
->parent
->Dr
.m1
, &n
->parent
->Dr
.mL
);
427 /* See comment on log_0Ir_1Dr_cache_access. */
429 void log_0Ir_1Dw_cache_access(InstrInfo
* n
, Addr data_addr
, Word data_size
)
431 //VG_(printf)("0Ir_1Dw: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
432 // n, data_addr, data_size);
433 cachesim_D1_doref(data_addr
, data_size
,
434 &n
->parent
->Dw
.m1
, &n
->parent
->Dw
.mL
);
438 /* For branches, we consult two different predictors, one which
439 predicts taken/untaken for conditional branches, and the other
440 which predicts the branch target address for indirect branches
441 (jump-to-register style ones). */
444 void log_cond_branch(InstrInfo
* n
, Word taken
)
446 //VG_(printf)("cbrnch: CCaddr=0x%010lx, taken=0x%010lx\n",
450 += (1 & do_cond_branch_predict(n
->instr_addr
, taken
));
454 void log_ind_branch(InstrInfo
* n
, UWord actual_dst
)
456 //VG_(printf)("ibrnch: CCaddr=0x%010lx, dst=0x%010lx\n",
460 += (1 & do_ind_branch_predict(n
->instr_addr
, actual_dst
));
464 /*------------------------------------------------------------*/
465 /*--- Instrumentation types and structures ---*/
466 /*------------------------------------------------------------*/
468 /* Maintain an ordered list of memory events which are outstanding, in
469 the sense that no IR has yet been generated to do the relevant
470 helper calls. The BB is scanned top to bottom and memory events
471 are added to the end of the list, merging with the most recent
472 notified event where possible (Dw immediately following Dr and
473 having the same size and EA can be merged).
475 This merging is done so that for architectures which have
476 load-op-store instructions (x86, amd64), the insn is treated as if
477 it makes just one memory reference (a modify), rather than two (a
478 read followed by a write at the same address).
480 At various points the list will need to be flushed, that is, IR
481 generated from it. That must happen before any possible exit from
482 the block (the end, or an IRStmt_Exit). Flushing also takes place
483 when there is no space to add a new event.
485 If we require the simulation statistics to be up to date with
486 respect to possible memory exceptions, then the list would have to
487 be flushed before each memory reference. That would however lose
488 performance by inhibiting event-merging during flushing.
490 Flushing the list consists of walking it start to end and emitting
491 instrumentation IR for each event, in the order in which they
492 appear. It may be possible to emit a single call for two adjacent
493 events in order to reduce the number of helper function calls made.
494 For example, it could well be profitable to handle two adjacent Ir
495 events with a single helper call. */
503 Ev_IrNoX
, // Instruction read not crossing cache lines
504 Ev_IrGen
, // Generic Ir, not being detected as IrNoX
507 Ev_Dm
, // Data modify (read then write)
508 Ev_Bc
, // branch conditional
509 Ev_Bi
// branch indirect (to unknown destination)
535 IRAtom
* taken
; /* :: Ity_I1 */
544 static void init_Event ( Event
* ev
) {
545 VG_(memset
)(ev
, 0, sizeof(Event
));
548 static IRAtom
* get_Event_dea ( Event
* ev
) {
550 case Ev_Dr
: return ev
->Ev
.Dr
.ea
;
551 case Ev_Dw
: return ev
->Ev
.Dw
.ea
;
552 case Ev_Dm
: return ev
->Ev
.Dm
.ea
;
553 default: tl_assert(0);
557 static Int
get_Event_dszB ( Event
* ev
) {
559 case Ev_Dr
: return ev
->Ev
.Dr
.szB
;
560 case Ev_Dw
: return ev
->Ev
.Dw
.szB
;
561 case Ev_Dm
: return ev
->Ev
.Dm
.szB
;
562 default: tl_assert(0);
567 /* Up to this many unnotified events are allowed. Number is
568 arbitrary. Larger numbers allow more event merging to occur, but
569 potentially induce more spilling due to extending live ranges of
570 address temporaries. */
574 /* A struct which holds all the running state during instrumentation.
575 Mostly to avoid passing loads of parameters everywhere. */
578 /* The current outstanding-memory-event list. */
579 Event events
[N_EVENTS
];
582 /* The array of InstrInfo bins for the BB. */
585 /* Number InstrInfo bins 'used' so far. */
588 /* The output SB being constructed. */
594 /*------------------------------------------------------------*/
595 /*--- Instrumentation main ---*/
596 /*------------------------------------------------------------*/
598 // Note that origAddr is the real origAddr, not the address of the first
599 // instruction in the block (they can be different due to redirection).
601 SB_info
* get_SB_info(IRSB
* sbIn
, Addr origAddr
)
607 // Count number of original instrs in SB
609 for (i
= 0; i
< sbIn
->stmts_used
; i
++) {
611 if (Ist_IMark
== st
->tag
) n_instrs
++;
614 // Check that we don't have an entry for this BB in the instr-info table.
615 // If this assertion fails, there has been some screwup: some
616 // translations must have been discarded but Cachegrind hasn't discarded
617 // the corresponding entries in the instr-info table.
618 sbInfo
= VG_(OSetGen_Lookup
)(instrInfoTable
, &origAddr
);
619 tl_assert(NULL
== sbInfo
);
621 // BB never translated before (at this address, at least; could have
622 // been unloaded and then reloaded elsewhere in memory)
623 sbInfo
= VG_(OSetGen_AllocNode
)(instrInfoTable
,
624 sizeof(SB_info
) + n_instrs
*sizeof(InstrInfo
));
625 sbInfo
->SB_addr
= origAddr
;
626 sbInfo
->n_instrs
= n_instrs
;
627 VG_(OSetGen_Insert
)( instrInfoTable
, sbInfo
);
633 static void showEvent ( Event
* ev
)
637 VG_(printf
)("IrGen %p\n", ev
->inode
);
640 VG_(printf
)("IrNoX %p\n", ev
->inode
);
643 VG_(printf
)("Dr %p %d EA=", ev
->inode
, ev
->Ev
.Dr
.szB
);
644 ppIRExpr(ev
->Ev
.Dr
.ea
);
648 VG_(printf
)("Dw %p %d EA=", ev
->inode
, ev
->Ev
.Dw
.szB
);
649 ppIRExpr(ev
->Ev
.Dw
.ea
);
653 VG_(printf
)("Dm %p %d EA=", ev
->inode
, ev
->Ev
.Dm
.szB
);
654 ppIRExpr(ev
->Ev
.Dm
.ea
);
658 VG_(printf
)("Bc %p GA=", ev
->inode
);
659 ppIRExpr(ev
->Ev
.Bc
.taken
);
663 VG_(printf
)("Bi %p DST=", ev
->inode
);
664 ppIRExpr(ev
->Ev
.Bi
.dst
);
673 // Reserve and initialise an InstrInfo for the first mention of a new insn.
675 InstrInfo
* setup_InstrInfo ( CgState
* cgs
, Addr instr_addr
, UInt instr_len
)
678 tl_assert(cgs
->sbInfo_i
>= 0);
679 tl_assert(cgs
->sbInfo_i
< cgs
->sbInfo
->n_instrs
);
680 i_node
= &cgs
->sbInfo
->instrs
[ cgs
->sbInfo_i
];
681 i_node
->instr_addr
= instr_addr
;
682 i_node
->instr_len
= instr_len
;
683 i_node
->parent
= get_lineCC(instr_addr
);
689 /* Generate code for all outstanding memory events, and mark the queue
690 empty. Code is generated into cgs->bbOut, and this activity
691 'consumes' slots in cgs->sbInfo. */
693 static void flushEvents ( CgState
* cgs
)
696 const HChar
* helperName
;
706 while (i
< cgs
->events_used
) {
713 /* generate IR to notify event i and possibly the ones
714 immediately following it. */
715 tl_assert(i
>= 0 && i
< cgs
->events_used
);
717 ev
= &cgs
->events
[i
];
718 ev2
= ( i
< cgs
->events_used
-1 ? &cgs
->events
[i
+1] : NULL
);
719 ev3
= ( i
< cgs
->events_used
-2 ? &cgs
->events
[i
+2] : NULL
);
722 VG_(printf
)(" flush ");
726 i_node_expr
= mkIRExpr_HWord( (HWord
)ev
->inode
);
728 /* Decide on helper fn to call and args to pass it, and advance
732 /* Merge an IrNoX with a following Dr/Dm. */
733 if (ev2
&& (ev2
->tag
== Ev_Dr
|| ev2
->tag
== Ev_Dm
)) {
734 /* Why is this true? It's because we're merging an Ir
735 with a following Dr or Dm. The Ir derives from the
736 instruction's IMark and the Dr/Dm from data
737 references which follow it. In short it holds
738 because each insn starts with an IMark, hence an
739 Ev_Ir, and so these Dr/Dm must pertain to the
740 immediately preceding Ir. Same applies to analogous
741 assertions in the subsequent cases. */
742 tl_assert(ev2
->inode
== ev
->inode
);
743 helperName
= "log_1IrNoX_1Dr_cache_access";
744 helperAddr
= &log_1IrNoX_1Dr_cache_access
;
745 argv
= mkIRExprVec_3( i_node_expr
,
747 mkIRExpr_HWord( get_Event_dszB(ev2
) ) );
751 /* Merge an IrNoX with a following Dw. */
753 if (ev2
&& ev2
->tag
== Ev_Dw
) {
754 tl_assert(ev2
->inode
== ev
->inode
);
755 helperName
= "log_1IrNoX_1Dw_cache_access";
756 helperAddr
= &log_1IrNoX_1Dw_cache_access
;
757 argv
= mkIRExprVec_3( i_node_expr
,
759 mkIRExpr_HWord( get_Event_dszB(ev2
) ) );
763 /* Merge an IrNoX with two following IrNoX's. */
765 if (ev2
&& ev3
&& ev2
->tag
== Ev_IrNoX
&& ev3
->tag
== Ev_IrNoX
)
768 helperName
= "log_3IrNoX_0D_cache_access";
769 helperAddr
= &log_3IrNoX_0D_cache_access
;
771 helperName
= "log_3Ir";
772 helperAddr
= &log_3Ir
;
774 argv
= mkIRExprVec_3( i_node_expr
,
775 mkIRExpr_HWord( (HWord
)ev2
->inode
),
776 mkIRExpr_HWord( (HWord
)ev3
->inode
) );
780 /* Merge an IrNoX with one following IrNoX. */
782 if (ev2
&& ev2
->tag
== Ev_IrNoX
) {
784 helperName
= "log_2IrNoX_0D_cache_access";
785 helperAddr
= &log_2IrNoX_0D_cache_access
;
787 helperName
= "log_2Ir";
788 helperAddr
= &log_2Ir
;
790 argv
= mkIRExprVec_2( i_node_expr
,
791 mkIRExpr_HWord( (HWord
)ev2
->inode
) );
795 /* No merging possible; emit as-is. */
798 helperName
= "log_1IrNoX_0D_cache_access";
799 helperAddr
= &log_1IrNoX_0D_cache_access
;
801 helperName
= "log_1Ir";
802 helperAddr
= &log_1Ir
;
804 argv
= mkIRExprVec_1( i_node_expr
);
811 helperName
= "log_1IrGen_0D_cache_access";
812 helperAddr
= &log_1IrGen_0D_cache_access
;
814 helperName
= "log_1Ir";
815 helperAddr
= &log_1Ir
;
817 argv
= mkIRExprVec_1( i_node_expr
);
823 /* Data read or modify */
824 helperName
= "log_0Ir_1Dr_cache_access";
825 helperAddr
= &log_0Ir_1Dr_cache_access
;
826 argv
= mkIRExprVec_3( i_node_expr
,
828 mkIRExpr_HWord( get_Event_dszB(ev
) ) );
834 helperName
= "log_0Ir_1Dw_cache_access";
835 helperAddr
= &log_0Ir_1Dw_cache_access
;
836 argv
= mkIRExprVec_3( i_node_expr
,
838 mkIRExpr_HWord( get_Event_dszB(ev
) ) );
843 /* Conditional branch */
844 helperName
= "log_cond_branch";
845 helperAddr
= &log_cond_branch
;
846 argv
= mkIRExprVec_2( i_node_expr
, ev
->Ev
.Bc
.taken
);
851 /* Branch to an unknown destination */
852 helperName
= "log_ind_branch";
853 helperAddr
= &log_ind_branch
;
854 argv
= mkIRExprVec_2( i_node_expr
, ev
->Ev
.Bi
.dst
);
862 /* Add the helper. */
863 tl_assert(helperName
);
864 tl_assert(helperAddr
);
866 di
= unsafeIRDirty_0_N( regparms
,
867 helperName
, VG_(fnptr_to_fnentry
)( helperAddr
),
869 addStmtToIRSB( cgs
->sbOut
, IRStmt_Dirty(di
) );
872 cgs
->events_used
= 0;
875 static void addEvent_Ir ( CgState
* cgs
, InstrInfo
* inode
)
878 if (cgs
->events_used
== N_EVENTS
)
880 tl_assert(cgs
->events_used
>= 0 && cgs
->events_used
< N_EVENTS
);
881 evt
= &cgs
->events
[cgs
->events_used
];
884 if (cachesim_is_IrNoX(inode
->instr_addr
, inode
->instr_len
)) {
886 distinct_instrsNoX
++;
889 distinct_instrsGen
++;
895 void addEvent_Dr ( CgState
* cgs
, InstrInfo
* inode
, Int datasize
, IRAtom
* ea
)
898 tl_assert(isIRAtom(ea
));
899 tl_assert(datasize
>= 1 && datasize
<= min_line_size
);
902 if (cgs
->events_used
== N_EVENTS
)
904 tl_assert(cgs
->events_used
>= 0 && cgs
->events_used
< N_EVENTS
);
905 evt
= &cgs
->events
[cgs
->events_used
];
909 evt
->Ev
.Dr
.szB
= datasize
;
915 void addEvent_Dw ( CgState
* cgs
, InstrInfo
* inode
, Int datasize
, IRAtom
* ea
)
919 tl_assert(isIRAtom(ea
));
920 tl_assert(datasize
>= 1 && datasize
<= min_line_size
);
925 /* Is it possible to merge this write with the preceding read? */
926 if (cgs
->events_used
> 0) {
927 Event
* lastEvt
= &cgs
->events
[cgs
->events_used
-1];
928 if ( lastEvt
->tag
== Ev_Dr
929 && lastEvt
->Ev
.Dr
.szB
== datasize
930 && lastEvt
->inode
== inode
931 && eqIRAtom(lastEvt
->Ev
.Dr
.ea
, ea
))
933 lastEvt
->tag
= Ev_Dm
;
938 /* No. Add as normal. */
939 if (cgs
->events_used
== N_EVENTS
)
941 tl_assert(cgs
->events_used
>= 0 && cgs
->events_used
< N_EVENTS
);
942 evt
= &cgs
->events
[cgs
->events_used
];
946 evt
->Ev
.Dw
.szB
= datasize
;
952 void addEvent_D_guarded ( CgState
* cgs
, InstrInfo
* inode
,
953 Int datasize
, IRAtom
* ea
, IRAtom
* guard
,
956 tl_assert(isIRAtom(ea
));
958 tl_assert(isIRAtom(guard
));
959 tl_assert(datasize
>= 1 && datasize
<= min_line_size
);
964 /* Adding guarded memory actions and merging them with the existing
965 queue is too complex. Simply flush the queue and add this
966 action immediately. Since guarded loads and stores are pretty
967 rare, this is not thought likely to cause any noticeable
968 performance loss as a result of the loss of event-merging
970 tl_assert(cgs
->events_used
>= 0);
972 tl_assert(cgs
->events_used
== 0);
973 /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
975 const HChar
* helperName
;
980 i_node_expr
= mkIRExpr_HWord( (HWord
)inode
);
981 helperName
= isWrite
? "log_0Ir_1Dw_cache_access"
982 : "log_0Ir_1Dr_cache_access";
983 helperAddr
= isWrite
? &log_0Ir_1Dw_cache_access
984 : &log_0Ir_1Dr_cache_access
;
985 argv
= mkIRExprVec_3( i_node_expr
,
986 ea
, mkIRExpr_HWord( datasize
) );
988 di
= unsafeIRDirty_0_N(
990 helperName
, VG_(fnptr_to_fnentry
)( helperAddr
),
993 addStmtToIRSB( cgs
->sbOut
, IRStmt_Dirty(di
) );
998 void addEvent_Bc ( CgState
* cgs
, InstrInfo
* inode
, IRAtom
* guard
)
1001 tl_assert(isIRAtom(guard
));
1002 tl_assert(typeOfIRExpr(cgs
->sbOut
->tyenv
, guard
)
1003 == (sizeof(RegWord
)==4 ? Ity_I32
: Ity_I64
));
1004 if (!clo_branch_sim
)
1006 if (cgs
->events_used
== N_EVENTS
)
1008 tl_assert(cgs
->events_used
>= 0 && cgs
->events_used
< N_EVENTS
);
1009 evt
= &cgs
->events
[cgs
->events_used
];
1013 evt
->Ev
.Bc
.taken
= guard
;
1018 void addEvent_Bi ( CgState
* cgs
, InstrInfo
* inode
, IRAtom
* whereTo
)
1021 tl_assert(isIRAtom(whereTo
));
1022 tl_assert(typeOfIRExpr(cgs
->sbOut
->tyenv
, whereTo
)
1023 == (sizeof(RegWord
)==4 ? Ity_I32
: Ity_I64
));
1024 if (!clo_branch_sim
)
1026 if (cgs
->events_used
== N_EVENTS
)
1028 tl_assert(cgs
->events_used
>= 0 && cgs
->events_used
< N_EVENTS
);
1029 evt
= &cgs
->events
[cgs
->events_used
];
1033 evt
->Ev
.Bi
.dst
= whereTo
;
1037 ////////////////////////////////////////////////////////////
1041 IRSB
* cg_instrument ( VgCallbackClosure
* closure
,
1043 const VexGuestLayout
* layout
,
1044 const VexGuestExtents
* vge
,
1045 const VexArchInfo
* archinfo_host
,
1046 IRType gWordTy
, IRType hWordTy
)
1051 Addr cia
; /* address of current insn */
1053 IRTypeEnv
* tyenv
= sbIn
->tyenv
;
1054 InstrInfo
* curr_inode
= NULL
;
1056 if (gWordTy
!= hWordTy
) {
1057 /* We don't currently support this case. */
1058 VG_(tool_panic
)("host/guest word size mismatch");
1062 cgs
.sbOut
= deepCopyIRSBExceptStmts(sbIn
);
1064 // Copy verbatim any IR preamble preceding the first IMark
1066 while (i
< sbIn
->stmts_used
&& sbIn
->stmts
[i
]->tag
!= Ist_IMark
) {
1067 addStmtToIRSB( cgs
.sbOut
, sbIn
->stmts
[i
] );
1071 // Get the first statement, and initial cia from it
1072 tl_assert(sbIn
->stmts_used
> 0);
1073 tl_assert(i
< sbIn
->stmts_used
);
1074 st
= sbIn
->stmts
[i
];
1075 tl_assert(Ist_IMark
== st
->tag
);
1077 cia
= st
->Ist
.IMark
.addr
;
1078 isize
= st
->Ist
.IMark
.len
;
1079 // If Vex fails to decode an instruction, the size will be zero.
1080 // Pretend otherwise.
1081 if (isize
== 0) isize
= VG_MIN_INSTR_SZB
;
1083 // Set up running state and get block info
1084 tl_assert(closure
->readdr
== vge
->base
[0]);
1085 cgs
.events_used
= 0;
1086 cgs
.sbInfo
= get_SB_info(sbIn
, (Addr
)closure
->readdr
);
1090 VG_(printf
)("\n\n---------- cg_instrument ----------\n");
1092 // Traverse the block, initialising inodes, adding events and flushing as
1094 for (/*use current i*/; i
< sbIn
->stmts_used
; i
++) {
1096 st
= sbIn
->stmts
[i
];
1097 tl_assert(isFlatIRStmt(st
));
1108 cia
= st
->Ist
.IMark
.addr
;
1109 isize
= st
->Ist
.IMark
.len
;
1111 // If Vex fails to decode an instruction, the size will be zero.
1112 // Pretend otherwise.
1113 if (isize
== 0) isize
= VG_MIN_INSTR_SZB
;
1115 // Sanity-check size.
1116 tl_assert( (VG_MIN_INSTR_SZB
<= isize
&& isize
<= VG_MAX_INSTR_SZB
)
1117 || VG_CLREQ_SZB
== isize
);
1119 // Get space for and init the inode, record it as the current one.
1120 // Subsequent Dr/Dw/Dm events from the same instruction will
1122 curr_inode
= setup_InstrInfo(&cgs
, cia
, isize
);
1124 addEvent_Ir( &cgs
, curr_inode
);
1128 IRExpr
* data
= st
->Ist
.WrTmp
.data
;
1129 if (data
->tag
== Iex_Load
) {
1130 IRExpr
* aexpr
= data
->Iex
.Load
.addr
;
1131 // Note also, endianness info is ignored. I guess
1132 // that's not interesting.
1133 addEvent_Dr( &cgs
, curr_inode
, sizeofIRType(data
->Iex
.Load
.ty
),
1140 IRExpr
* data
= st
->Ist
.Store
.data
;
1141 IRExpr
* aexpr
= st
->Ist
.Store
.addr
;
1142 addEvent_Dw( &cgs
, curr_inode
,
1143 sizeofIRType(typeOfIRExpr(tyenv
, data
)), aexpr
);
1148 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
1149 IRExpr
* data
= sg
->data
;
1150 IRExpr
* addr
= sg
->addr
;
1151 IRType type
= typeOfIRExpr(tyenv
, data
);
1152 tl_assert(type
!= Ity_INVALID
);
1153 addEvent_D_guarded( &cgs
, curr_inode
,
1154 sizeofIRType(type
), addr
, sg
->guard
,
1160 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
1161 IRType type
= Ity_INVALID
; /* loaded type */
1162 IRType typeWide
= Ity_INVALID
; /* after implicit widening */
1163 IRExpr
* addr
= lg
->addr
;
1164 typeOfIRLoadGOp(lg
->cvt
, &typeWide
, &type
);
1165 tl_assert(type
!= Ity_INVALID
);
1166 addEvent_D_guarded( &cgs
, curr_inode
,
1167 sizeofIRType(type
), addr
, lg
->guard
,
1168 False
/*!isWrite*/ );
1174 IRDirty
* d
= st
->Ist
.Dirty
.details
;
1175 if (d
->mFx
!= Ifx_None
) {
1176 /* This dirty helper accesses memory. Collect the details. */
1177 tl_assert(d
->mAddr
!= NULL
);
1178 tl_assert(d
->mSize
!= 0);
1179 dataSize
= d
->mSize
;
1180 // Large (eg. 28B, 108B, 512B on x86) data-sized
1181 // instructions will be done inaccurately, but they're
1182 // very rare and this avoids errors from hitting more
1183 // than two cache lines in the simulation.
1184 if (dataSize
> min_line_size
)
1185 dataSize
= min_line_size
;
1186 if (d
->mFx
== Ifx_Read
|| d
->mFx
== Ifx_Modify
)
1187 addEvent_Dr( &cgs
, curr_inode
, dataSize
, d
->mAddr
);
1188 if (d
->mFx
== Ifx_Write
|| d
->mFx
== Ifx_Modify
)
1189 addEvent_Dw( &cgs
, curr_inode
, dataSize
, d
->mAddr
);
1191 tl_assert(d
->mAddr
== NULL
);
1192 tl_assert(d
->mSize
== 0);
1198 /* We treat it as a read and a write of the location. I
1199 think that is the same behaviour as it was before IRCAS
1200 was introduced, since prior to that point, the Vex
1201 front ends would translate a lock-prefixed instruction
1202 into a (normal) read followed by a (normal) write. */
1204 IRCAS
* cas
= st
->Ist
.CAS
.details
;
1205 tl_assert(cas
->addr
!= NULL
);
1206 tl_assert(cas
->dataLo
!= NULL
);
1207 dataSize
= sizeofIRType(typeOfIRExpr(tyenv
, cas
->dataLo
));
1208 if (cas
->dataHi
!= NULL
)
1209 dataSize
*= 2; /* since it's a doubleword-CAS */
1210 /* I don't think this can ever happen, but play safe. */
1211 if (dataSize
> min_line_size
)
1212 dataSize
= min_line_size
;
1213 addEvent_Dr( &cgs
, curr_inode
, dataSize
, cas
->addr
);
1214 addEvent_Dw( &cgs
, curr_inode
, dataSize
, cas
->addr
);
1220 if (st
->Ist
.LLSC
.storedata
== NULL
) {
1222 dataTy
= typeOfIRTemp(tyenv
, st
->Ist
.LLSC
.result
);
1223 addEvent_Dr( &cgs
, curr_inode
,
1224 sizeofIRType(dataTy
), st
->Ist
.LLSC
.addr
);
1225 /* flush events before LL, should help SC to succeed */
1226 flushEvents( &cgs
);
1229 dataTy
= typeOfIRExpr(tyenv
, st
->Ist
.LLSC
.storedata
);
1230 addEvent_Dw( &cgs
, curr_inode
,
1231 sizeofIRType(dataTy
), st
->Ist
.LLSC
.addr
);
1237 // call branch predictor only if this is a branch in guest code
1238 if ( (st
->Ist
.Exit
.jk
== Ijk_Boring
) ||
1239 (st
->Ist
.Exit
.jk
== Ijk_Call
) ||
1240 (st
->Ist
.Exit
.jk
== Ijk_Ret
) )
1242 /* Stuff to widen the guard expression to a host word, so
1243 we can pass it to the branch predictor simulation
1244 functions easily. */
1248 IRType tyW
= hWordTy
;
1249 IROp widen
= tyW
==Ity_I32
? Iop_1Uto32
: Iop_1Uto64
;
1250 IROp opXOR
= tyW
==Ity_I32
? Iop_Xor32
: Iop_Xor64
;
1251 IRTemp guard1
= newIRTemp(cgs
.sbOut
->tyenv
, Ity_I1
);
1252 IRTemp guardW
= newIRTemp(cgs
.sbOut
->tyenv
, tyW
);
1253 IRTemp guard
= newIRTemp(cgs
.sbOut
->tyenv
, tyW
);
1254 IRExpr
* one
= tyW
==Ity_I32
? IRExpr_Const(IRConst_U32(1))
1255 : IRExpr_Const(IRConst_U64(1));
1257 /* First we need to figure out whether the side exit got
1258 inverted by the ir optimiser. To do that, figure out
1259 the next (fallthrough) instruction's address and the
1260 side exit address and see if they are the same. */
1263 /* Side exit address */
1264 dst
= st
->Ist
.Exit
.dst
;
1265 if (tyW
== Ity_I32
) {
1266 tl_assert(dst
->tag
== Ico_U32
);
1269 tl_assert(tyW
== Ity_I64
);
1270 tl_assert(dst
->tag
== Ico_U64
);
1274 inverted
= nia
== sea
;
1276 /* Widen the guard expression. */
1277 addStmtToIRSB( cgs
.sbOut
,
1278 IRStmt_WrTmp( guard1
, st
->Ist
.Exit
.guard
));
1279 addStmtToIRSB( cgs
.sbOut
,
1280 IRStmt_WrTmp( guardW
,
1282 IRExpr_RdTmp(guard1
))) );
1283 /* If the exit is inverted, invert the sense of the guard. */
1288 inverted
? IRExpr_Binop(opXOR
, IRExpr_RdTmp(guardW
), one
)
1289 : IRExpr_RdTmp(guardW
)
1291 /* And post the event. */
1292 addEvent_Bc( &cgs
, curr_inode
, IRExpr_RdTmp(guard
) );
1295 /* We may never reach the next statement, so need to flush
1296 all outstanding transactions now. */
1297 flushEvents( &cgs
);
1307 /* Copy the original statement */
1308 addStmtToIRSB( cgs
.sbOut
, st
);
1316 /* Deal with branches to unknown destinations. Except ignore ones
1317 which are function returns as we assume the return stack
1318 predictor never mispredicts. */
1319 if ((sbIn
->jumpkind
== Ijk_Boring
) || (sbIn
->jumpkind
== Ijk_Call
)) {
1320 if (0) { ppIRExpr( sbIn
->next
); VG_(printf
)("\n"); }
1321 switch (sbIn
->next
->tag
) {
1323 break; /* boring - branch to known address */
1325 /* looks like an indirect branch (branch to unknown) */
1326 addEvent_Bi( &cgs
, curr_inode
, sbIn
->next
);
1329 /* shouldn't happen - if the incoming IR is properly
1330 flattened, should only have tmp and const cases to
1336 /* At the end of the bb. Flush outstandings. */
1337 flushEvents( &cgs
);
1339 /* done. stay sane ... */
1340 tl_assert(cgs
.sbInfo_i
== cgs
.sbInfo
->n_instrs
);
1343 VG_(printf
)( "goto {");
1344 ppIRJumpKind(sbIn
->jumpkind
);
1346 ppIRExpr( sbIn
->next
);
1347 VG_(printf
)( "}\n");
1353 /*------------------------------------------------------------*/
1354 /*--- Cache configuration ---*/
1355 /*------------------------------------------------------------*/
1357 static cache_t clo_I1_cache
= UNDEFINED_CACHE
;
1358 static cache_t clo_D1_cache
= UNDEFINED_CACHE
;
1359 static cache_t clo_LL_cache
= UNDEFINED_CACHE
;
1361 /*------------------------------------------------------------*/
1362 /*--- cg_fini() and related function ---*/
1363 /*------------------------------------------------------------*/
1365 // Total reads/writes/misses. Calculated during CC traversal at the end.
1367 static CacheCC Ir_total
;
1368 static CacheCC Dr_total
;
1369 static CacheCC Dw_total
;
1370 static BranchCC Bc_total
;
1371 static BranchCC Bi_total
;
1373 static void fprint_CC_table_and_calc_totals(void)
1377 HChar
*currFile
= NULL
;
1378 const HChar
*currFn
= NULL
;
1381 // Setup output filename. Nb: it's important to do this now, ie. as late
1382 // as possible. If we do it at start-up and the program forks and the
1383 // output file format string contains a %p (pid) specifier, both the
1384 // parent and child will incorrectly write to the same file; this
1385 // happened in 3.3.0.
1386 HChar
* cachegrind_out_file
=
1387 VG_(expand_file_name
)("--cachegrind-out-file", clo_cachegrind_out_file
);
1389 fp
= VG_(fopen
)(cachegrind_out_file
, VKI_O_CREAT
|VKI_O_TRUNC
|VKI_O_WRONLY
,
1390 VKI_S_IRUSR
|VKI_S_IWUSR
);
1392 // If the file can't be opened for whatever reason (conflict
1393 // between multiple cachegrinded processes?), give up now.
1394 VG_(umsg
)("error: can't open output data file '%s'\n",
1395 cachegrind_out_file
);
1396 VG_(umsg
)(" ... so detailed results will be missing.\n");
1397 VG_(free
)(cachegrind_out_file
);
1400 VG_(free
)(cachegrind_out_file
);
1403 if (clo_cache_sim
) {
1404 // "desc:" lines (giving I1/D1/LL cache configuration). The spaces after
1405 // the 2nd colon makes cg_annotate's output look nicer.
1406 VG_(fprintf
)(fp
, "desc: I1 cache: %s\n"
1407 "desc: D1 cache: %s\n"
1408 "desc: LL cache: %s\n",
1409 I1
.desc_line
, D1
.desc_line
, LL
.desc_line
);
1413 VG_(fprintf
)(fp
, "cmd: %s", VG_(args_the_exename
));
1414 for (i
= 0; i
< VG_(sizeXA
)( VG_(args_for_client
) ); i
++) {
1415 HChar
* arg
= * (HChar
**) VG_(indexXA
)( VG_(args_for_client
), i
);
1416 VG_(fprintf
)(fp
, " %s", arg
);
1419 if (clo_cache_sim
&& clo_branch_sim
) {
1420 VG_(fprintf
)(fp
, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1423 else if (clo_cache_sim
&& !clo_branch_sim
) {
1424 VG_(fprintf
)(fp
, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1427 else if (!clo_cache_sim
&& clo_branch_sim
) {
1428 VG_(fprintf
)(fp
, "\nevents: Ir Bc Bcm Bi Bim\n");
1431 VG_(fprintf
)(fp
, "\nevents: Ir\n");
1434 // Traverse every lineCC
1435 VG_(OSetGen_ResetIter
)(CC_table
);
1436 while ( (lineCC
= VG_(OSetGen_Next
)(CC_table
)) ) {
1437 Bool just_hit_a_new_file
= False
;
1438 // If we've hit a new file, print a "fl=" line. Note that because
1439 // each string is stored exactly once in the string table, we can use
1440 // pointer comparison rather than strcmp() to test for equality, which
1441 // is good because most of the time the comparisons are equal and so
1442 // the whole strings would have to be checked.
1443 if ( lineCC
->loc
.file
!= currFile
) {
1444 currFile
= lineCC
->loc
.file
;
1445 VG_(fprintf
)(fp
, "fl=%s\n", currFile
);
1447 just_hit_a_new_file
= True
;
1449 // If we've hit a new function, print a "fn=" line. We know to do
1450 // this when the function name changes, and also every time we hit a
1451 // new file (in which case the new function name might be the same as
1452 // in the old file, hence the just_hit_a_new_file test).
1453 if ( just_hit_a_new_file
|| lineCC
->loc
.fn
!= currFn
) {
1454 currFn
= lineCC
->loc
.fn
;
1455 VG_(fprintf
)(fp
, "fn=%s\n", currFn
);
1460 if (clo_cache_sim
&& clo_branch_sim
) {
1461 VG_(fprintf
)(fp
, "%d %llu %llu %llu"
1464 " %llu %llu %llu %llu\n",
1466 lineCC
->Ir
.a
, lineCC
->Ir
.m1
, lineCC
->Ir
.mL
,
1467 lineCC
->Dr
.a
, lineCC
->Dr
.m1
, lineCC
->Dr
.mL
,
1468 lineCC
->Dw
.a
, lineCC
->Dw
.m1
, lineCC
->Dw
.mL
,
1469 lineCC
->Bc
.b
, lineCC
->Bc
.mp
,
1470 lineCC
->Bi
.b
, lineCC
->Bi
.mp
);
1472 else if (clo_cache_sim
&& !clo_branch_sim
) {
1473 VG_(fprintf
)(fp
, "%d %llu %llu %llu"
1475 " %llu %llu %llu\n",
1477 lineCC
->Ir
.a
, lineCC
->Ir
.m1
, lineCC
->Ir
.mL
,
1478 lineCC
->Dr
.a
, lineCC
->Dr
.m1
, lineCC
->Dr
.mL
,
1479 lineCC
->Dw
.a
, lineCC
->Dw
.m1
, lineCC
->Dw
.mL
);
1481 else if (!clo_cache_sim
&& clo_branch_sim
) {
1482 VG_(fprintf
)(fp
, "%d %llu"
1483 " %llu %llu %llu %llu\n",
1486 lineCC
->Bc
.b
, lineCC
->Bc
.mp
,
1487 lineCC
->Bi
.b
, lineCC
->Bi
.mp
);
1490 VG_(fprintf
)(fp
, "%d %llu\n",
1495 // Update summary stats
1496 Ir_total
.a
+= lineCC
->Ir
.a
;
1497 Ir_total
.m1
+= lineCC
->Ir
.m1
;
1498 Ir_total
.mL
+= lineCC
->Ir
.mL
;
1499 Dr_total
.a
+= lineCC
->Dr
.a
;
1500 Dr_total
.m1
+= lineCC
->Dr
.m1
;
1501 Dr_total
.mL
+= lineCC
->Dr
.mL
;
1502 Dw_total
.a
+= lineCC
->Dw
.a
;
1503 Dw_total
.m1
+= lineCC
->Dw
.m1
;
1504 Dw_total
.mL
+= lineCC
->Dw
.mL
;
1505 Bc_total
.b
+= lineCC
->Bc
.b
;
1506 Bc_total
.mp
+= lineCC
->Bc
.mp
;
1507 Bi_total
.b
+= lineCC
->Bi
.b
;
1508 Bi_total
.mp
+= lineCC
->Bi
.mp
;
1513 // Summary stats must come after rest of table, since we calculate them
1514 // during traversal. */
1515 if (clo_cache_sim
&& clo_branch_sim
) {
1516 VG_(fprintf
)(fp
, "summary:"
1520 " %llu %llu %llu %llu\n",
1521 Ir_total
.a
, Ir_total
.m1
, Ir_total
.mL
,
1522 Dr_total
.a
, Dr_total
.m1
, Dr_total
.mL
,
1523 Dw_total
.a
, Dw_total
.m1
, Dw_total
.mL
,
1524 Bc_total
.b
, Bc_total
.mp
,
1525 Bi_total
.b
, Bi_total
.mp
);
1527 else if (clo_cache_sim
&& !clo_branch_sim
) {
1528 VG_(fprintf
)(fp
, "summary:"
1531 " %llu %llu %llu\n",
1532 Ir_total
.a
, Ir_total
.m1
, Ir_total
.mL
,
1533 Dr_total
.a
, Dr_total
.m1
, Dr_total
.mL
,
1534 Dw_total
.a
, Dw_total
.m1
, Dw_total
.mL
);
1536 else if (!clo_cache_sim
&& clo_branch_sim
) {
1537 VG_(fprintf
)(fp
, "summary:"
1539 " %llu %llu %llu %llu\n",
1541 Bc_total
.b
, Bc_total
.mp
,
1542 Bi_total
.b
, Bi_total
.mp
);
1545 VG_(fprintf
)(fp
, "summary:"
1553 static UInt
ULong_width(ULong n
)
1561 return w
+ (w
-1)/3; // add space for commas
1564 static void cg_fini(Int exitcode
)
1566 static HChar fmt
[128]; // OK; large enough
1570 ULong LL_total_m
, LL_total_mr
, LL_total_mw
,
1571 LL_total
, LL_total_r
, LL_total_w
;
1574 fprint_CC_table_and_calc_totals();
1576 if (VG_(clo_verbosity
) == 0)
1579 // Nb: this isn't called "MAX" because that overshadows a global on Darwin.
1580 #define CG_MAX(a, b) ((a) >= (b) ? (a) : (b))
1582 /* I cache results. Use the I_refs value to determine the first column
1584 l1
= ULong_width(Ir_total
.a
);
1585 l2
= ULong_width(CG_MAX(Dr_total
.a
, Bc_total
.b
));
1586 l3
= ULong_width(CG_MAX(Dw_total
.a
, Bi_total
.b
));
1588 /* Make format string, getting width right for numbers */
1589 VG_(sprintf
)(fmt
, "%%s %%,%dllu\n", l1
);
1591 /* Always print this */
1592 VG_(umsg
)(fmt
, "I refs: ", Ir_total
.a
);
1594 /* If cache profiling is enabled, show D access numbers and all
1596 if (clo_cache_sim
) {
1597 VG_(umsg
)(fmt
, "I1 misses: ", Ir_total
.m1
);
1598 VG_(umsg
)(fmt
, "LLi misses: ", Ir_total
.mL
);
1600 if (0 == Ir_total
.a
) Ir_total
.a
= 1;
1601 VG_(umsg
)("I1 miss rate: %*.2f%%\n", l1
,
1602 Ir_total
.m1
* 100.0 / Ir_total
.a
);
1603 VG_(umsg
)("LLi miss rate: %*.2f%%\n", l1
,
1604 Ir_total
.mL
* 100.0 / Ir_total
.a
);
1607 /* D cache results. Use the D_refs.rd and D_refs.wr values to
1608 * determine the width of columns 2 & 3. */
1609 D_total
.a
= Dr_total
.a
+ Dw_total
.a
;
1610 D_total
.m1
= Dr_total
.m1
+ Dw_total
.m1
;
1611 D_total
.mL
= Dr_total
.mL
+ Dw_total
.mL
;
1613 /* Make format string, getting width right for numbers */
1614 VG_(sprintf
)(fmt
, "%%s %%,%dllu (%%,%dllu rd + %%,%dllu wr)\n",
1617 VG_(umsg
)(fmt
, "D refs: ",
1618 D_total
.a
, Dr_total
.a
, Dw_total
.a
);
1619 VG_(umsg
)(fmt
, "D1 misses: ",
1620 D_total
.m1
, Dr_total
.m1
, Dw_total
.m1
);
1621 VG_(umsg
)(fmt
, "LLd misses: ",
1622 D_total
.mL
, Dr_total
.mL
, Dw_total
.mL
);
1624 if (0 == D_total
.a
) D_total
.a
= 1;
1625 if (0 == Dr_total
.a
) Dr_total
.a
= 1;
1626 if (0 == Dw_total
.a
) Dw_total
.a
= 1;
1627 VG_(umsg
)("D1 miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1628 l1
, D_total
.m1
* 100.0 / D_total
.a
,
1629 l2
, Dr_total
.m1
* 100.0 / Dr_total
.a
,
1630 l3
, Dw_total
.m1
* 100.0 / Dw_total
.a
);
1631 VG_(umsg
)("LLd miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1632 l1
, D_total
.mL
* 100.0 / D_total
.a
,
1633 l2
, Dr_total
.mL
* 100.0 / Dr_total
.a
,
1634 l3
, Dw_total
.mL
* 100.0 / Dw_total
.a
);
1637 /* LL overall results */
1639 LL_total
= Dr_total
.m1
+ Dw_total
.m1
+ Ir_total
.m1
;
1640 LL_total_r
= Dr_total
.m1
+ Ir_total
.m1
;
1641 LL_total_w
= Dw_total
.m1
;
1642 VG_(umsg
)(fmt
, "LL refs: ",
1643 LL_total
, LL_total_r
, LL_total_w
);
1645 LL_total_m
= Dr_total
.mL
+ Dw_total
.mL
+ Ir_total
.mL
;
1646 LL_total_mr
= Dr_total
.mL
+ Ir_total
.mL
;
1647 LL_total_mw
= Dw_total
.mL
;
1648 VG_(umsg
)(fmt
, "LL misses: ",
1649 LL_total_m
, LL_total_mr
, LL_total_mw
);
1651 VG_(umsg
)("LL miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1652 l1
, LL_total_m
* 100.0 / (Ir_total
.a
+ D_total
.a
),
1653 l2
, LL_total_mr
* 100.0 / (Ir_total
.a
+ Dr_total
.a
),
1654 l3
, LL_total_mw
* 100.0 / Dw_total
.a
);
1657 /* If branch profiling is enabled, show branch overall results. */
1658 if (clo_branch_sim
) {
1659 /* Make format string, getting width right for numbers */
1660 VG_(sprintf
)(fmt
, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1663 if (0 == Bc_total
.b
) Bc_total
.b
= 1;
1664 if (0 == Bi_total
.b
) Bi_total
.b
= 1;
1665 B_total
.b
= Bc_total
.b
+ Bi_total
.b
;
1666 B_total
.mp
= Bc_total
.mp
+ Bi_total
.mp
;
1669 VG_(umsg
)(fmt
, "Branches: ",
1670 B_total
.b
, Bc_total
.b
, Bi_total
.b
);
1672 VG_(umsg
)(fmt
, "Mispredicts: ",
1673 B_total
.mp
, Bc_total
.mp
, Bi_total
.mp
);
1675 VG_(umsg
)("Mispred rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1676 l1
, B_total
.mp
* 100.0 / B_total
.b
,
1677 l2
, Bc_total
.mp
* 100.0 / Bc_total
.b
,
1678 l3
, Bi_total
.mp
* 100.0 / Bi_total
.b
);
1682 if (VG_(clo_stats
)) {
1683 Int debug_lookups
= full_debugs
+ fn_debugs
+
1684 file_line_debugs
+ no_debugs
;
1687 VG_(dmsg
)("cachegrind: distinct files : %d\n", distinct_files
);
1688 VG_(dmsg
)("cachegrind: distinct functions : %d\n", distinct_fns
);
1689 VG_(dmsg
)("cachegrind: distinct lines : %d\n", distinct_lines
);
1690 VG_(dmsg
)("cachegrind: distinct instrs NoX: %d\n", distinct_instrsNoX
);
1691 VG_(dmsg
)("cachegrind: distinct instrs Gen: %d\n", distinct_instrsGen
);
1692 VG_(dmsg
)("cachegrind: debug lookups : %d\n", debug_lookups
);
1694 VG_(dmsg
)("cachegrind: with full info:%6.1f%% (%d)\n",
1695 full_debugs
* 100.0 / debug_lookups
, full_debugs
);
1696 VG_(dmsg
)("cachegrind: with file/line info:%6.1f%% (%d)\n",
1697 file_line_debugs
* 100.0 / debug_lookups
, file_line_debugs
);
1698 VG_(dmsg
)("cachegrind: with fn name info:%6.1f%% (%d)\n",
1699 fn_debugs
* 100.0 / debug_lookups
, fn_debugs
);
1700 VG_(dmsg
)("cachegrind: with zero info:%6.1f%% (%d)\n",
1701 no_debugs
* 100.0 / debug_lookups
, no_debugs
);
1703 VG_(dmsg
)("cachegrind: string table size: %u\n",
1704 VG_(OSetGen_Size
)(stringTable
));
1705 VG_(dmsg
)("cachegrind: CC table size: %u\n",
1706 VG_(OSetGen_Size
)(CC_table
));
1707 VG_(dmsg
)("cachegrind: InstrInfo table size: %u\n",
1708 VG_(OSetGen_Size
)(instrInfoTable
));
1712 /*--------------------------------------------------------------------*/
1713 /*--- Discarding BB info ---*/
1714 /*--------------------------------------------------------------------*/
1716 // Called when a translation is removed from the translation cache for
1717 // any reason at all: to free up space, because the guest code was
1718 // unmapped or modified, or for any arbitrary reason.
1720 void cg_discard_superblock_info ( Addr orig_addr64
, VexGuestExtents vge
)
1723 Addr orig_addr
= vge
.base
[0];
1725 tl_assert(vge
.n_used
> 0);
1728 VG_(printf
)( "discard_basic_block_info: %p, %p, %llu\n",
1730 (void*)vge
.base
[0], (ULong
)vge
.len
[0]);
1732 // Get BB info, remove from table, free BB info. Simple! Note that we
1733 // use orig_addr, not the first instruction address in vge.
1734 sbInfo
= VG_(OSetGen_Remove
)(instrInfoTable
, &orig_addr
);
1735 tl_assert(NULL
!= sbInfo
);
1736 VG_(OSetGen_FreeNode
)(instrInfoTable
, sbInfo
);
1739 /*--------------------------------------------------------------------*/
1740 /*--- Command line processing ---*/
1741 /*--------------------------------------------------------------------*/
1743 static Bool
cg_process_cmd_line_option(const HChar
* arg
)
1745 if (VG_(str_clo_cache_opt
)(arg
,
1750 else if VG_STR_CLO( arg
, "--cachegrind-out-file", clo_cachegrind_out_file
) {}
1751 else if VG_BOOL_CLO(arg
, "--cache-sim", clo_cache_sim
) {}
1752 else if VG_BOOL_CLO(arg
, "--branch-sim", clo_branch_sim
) {}
1759 static void cg_print_usage(void)
1761 VG_(print_cache_clo_opts
)();
1763 " --cache-sim=yes|no collect cache stats? [yes]\n"
1764 " --branch-sim=yes|no collect branch prediction stats? [no]\n"
1765 " --cachegrind-out-file=<file> output file name [cachegrind.out.%%p]\n"
1769 static void cg_print_debug_usage(void)
1776 /*--------------------------------------------------------------------*/
1778 /*--------------------------------------------------------------------*/
1780 static void cg_post_clo_init(void); /* just below */
1782 static void cg_pre_clo_init(void)
1784 VG_(details_name
) ("Cachegrind");
1785 VG_(details_version
) (NULL
);
1786 VG_(details_description
) ("a cache and branch-prediction profiler");
1787 VG_(details_copyright_author
)(
1788 "Copyright (C) 2002-2017, and GNU GPL'd, by Nicholas Nethercote et al.");
1789 VG_(details_bug_reports_to
) (VG_BUGS_TO
);
1790 VG_(details_avg_translation_sizeB
) ( 500 );
1792 VG_(clo_vex_control
).iropt_register_updates_default
1793 = VG_(clo_px_file_backed
)
1794 = VexRegUpdSpAtMemAccess
; // overridable by the user.
1796 VG_(basic_tool_funcs
) (cg_post_clo_init
,
1800 VG_(needs_superblock_discards
)(cg_discard_superblock_info
);
1801 VG_(needs_command_line_options
)(cg_process_cmd_line_option
,
1803 cg_print_debug_usage
);
1806 static void cg_post_clo_init(void)
1808 cache_t I1c
, D1c
, LLc
;
1811 VG_(OSetGen_Create
)(offsetof(LineCC
, loc
),
1813 VG_(malloc
), "cg.main.cpci.1",
1816 VG_(OSetGen_Create
)(/*keyOff*/0,
1818 VG_(malloc
), "cg.main.cpci.2",
1821 VG_(OSetGen_Create
)(/*keyOff*/0,
1823 VG_(malloc
), "cg.main.cpci.3",
1826 VG_(post_clo_init_configure_caches
)(&I1c
, &D1c
, &LLc
,
1831 // min_line_size is used to make sure that we never feed
1832 // accesses to the simulator straddling more than two
1833 // cache lines at any cache level
1834 min_line_size
= (I1c
.line_size
< D1c
.line_size
) ? I1c
.line_size
: D1c
.line_size
;
1835 min_line_size
= (LLc
.line_size
< min_line_size
) ? LLc
.line_size
: min_line_size
;
1837 Int largest_load_or_store_size
1838 = VG_(machine_get_size_of_largest_guest_register
)();
1839 if (min_line_size
< largest_load_or_store_size
) {
1840 /* We can't continue, because the cache simulation might
1841 straddle more than 2 lines, and it will assert. So let's
1842 just stop before we start. */
1843 VG_(umsg
)("Cachegrind: cannot continue: the minimum line size (%d)\n",
1844 (Int
)min_line_size
);
1845 VG_(umsg
)(" must be equal to or larger than the maximum register size (%d)\n",
1846 largest_load_or_store_size
);
1847 VG_(umsg
)(" but it is not. Exiting now.\n");
1851 cachesim_initcaches(I1c
, D1c
, LLc
);
1854 VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init
)
1856 /*--------------------------------------------------------------------*/
1858 /*--------------------------------------------------------------------*/