Bug 473604 - Fix bug472219.c compile failure with Clang 16
[valgrind.git] / cachegrind / cg_main.c
blobef3ea03ebca79cd492a208552cd5e2221d455539
2 /*--------------------------------------------------------------------*/
3 /*--- Cachegrind: everything but the simulation itself. ---*/
4 /*--- cg_main.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of Cachegrind, a Valgrind tool for cache
9 profiling programs.
11 Copyright (C) 2002-2017 Nicholas Nethercote
12 njn@valgrind.org
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
27 The GNU General Public License is contained in the file COPYING.
30 #include "pub_tool_basics.h"
31 #include "pub_tool_debuginfo.h"
32 #include "pub_tool_libcbase.h"
33 #include "pub_tool_libcassert.h"
34 #include "pub_tool_libcfile.h"
35 #include "pub_tool_libcprint.h"
36 #include "pub_tool_libcproc.h"
37 #include "pub_tool_mallocfree.h"
38 #include "pub_tool_options.h"
39 #include "pub_tool_oset.h"
40 #include "pub_tool_tooliface.h"
41 #include "pub_tool_xarray.h"
42 #include "pub_tool_clientstate.h"
43 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
45 #include "cg_arch.h"
46 #include "cg_sim.c"
47 #include "cg_branchpred.c"
49 /*------------------------------------------------------------*/
50 /*--- Constants ---*/
51 /*------------------------------------------------------------*/
53 /* Set to 1 for very verbose debugging */
54 #define DEBUG_CG 0
56 /*------------------------------------------------------------*/
57 /*--- Options ---*/
58 /*------------------------------------------------------------*/
60 static Bool clo_cache_sim = False; /* do cache simulation? */
61 static Bool clo_branch_sim = False; /* do branch simulation? */
62 static const HChar* clo_cachegrind_out_file = "cachegrind.out.%p";
64 /*------------------------------------------------------------*/
65 /*--- Cachesim configuration ---*/
66 /*------------------------------------------------------------*/
68 static Int min_line_size = 0; /* min of L1 and LL cache line sizes */
70 /*------------------------------------------------------------*/
71 /*--- Types and Data Structures ---*/
72 /*------------------------------------------------------------*/
74 typedef
75 struct {
76 ULong a; /* total # memory accesses of this kind */
77 ULong m1; /* misses in the first level cache */
78 ULong mL; /* misses in the second level cache */
80 CacheCC;
82 typedef
83 struct {
84 ULong b; /* total # branches of this kind */
85 ULong mp; /* number of branches mispredicted */
87 BranchCC;
89 //------------------------------------------------------------
90 // Primary data structure #1: CC table
91 // - Holds the per-source-line hit/miss stats, grouped by file/function/line.
92 // - an ordered set of CCs. CC indexing done by file/function/line (as
93 // determined from the instrAddr).
94 // - Traversed for dumping stats at end in file/func/line hierarchy.
96 typedef struct {
97 HChar* file;
98 const HChar* fn;
99 Int line;
101 CodeLoc;
103 typedef struct {
104 CodeLoc loc; /* Source location that these counts pertain to */
105 CacheCC Ir; /* Insn read counts */
106 CacheCC Dr; /* Data read counts */
107 CacheCC Dw; /* Data write/modify counts */
108 BranchCC Bc; /* Conditional branch counts */
109 BranchCC Bi; /* Indirect branch counts */
110 } LineCC;
112 // First compare file, then fn, then line.
113 static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc)
115 Word res;
116 const CodeLoc* a = (const CodeLoc*)vloc;
117 const CodeLoc* b = &(((const LineCC*)vcc)->loc);
119 res = VG_(strcmp)(a->file, b->file);
120 if (0 != res)
121 return res;
123 res = VG_(strcmp)(a->fn, b->fn);
124 if (0 != res)
125 return res;
127 return a->line - b->line;
130 static OSet* CC_table;
132 //------------------------------------------------------------
133 // Primary data structure #2: InstrInfo table
134 // - Holds the cached info about each instr that is used for simulation.
135 // - table(SB_start_addr, list(InstrInfo))
136 // - For each SB, each InstrInfo in the list holds info about the
137 // instruction (instrLen, instrAddr, etc), plus a pointer to its line
138 // CC. This node is what's passed to the simulation function.
139 // - When SBs are discarded the relevant list(instr_details) is freed.
141 typedef struct _InstrInfo InstrInfo;
142 struct _InstrInfo {
143 Addr instr_addr;
144 UChar instr_len;
145 LineCC* parent; // parent line-CC
148 typedef struct _SB_info SB_info;
149 struct _SB_info {
150 Addr SB_addr; // key; MUST BE FIRST
151 Int n_instrs;
152 InstrInfo instrs[0];
155 static OSet* instrInfoTable;
157 //------------------------------------------------------------
158 // Secondary data structure: string table
159 // - holds strings, avoiding dups
160 // - used for filenames and function names, each of which will be
161 // pointed to by one or more CCs.
162 // - it also allows equality checks just by pointer comparison, which
163 // is good when printing the output file at the end.
165 static OSet* stringTable;
167 //------------------------------------------------------------
168 // Stats
169 static Int distinct_files = 0;
170 static Int distinct_fns = 0;
171 static Int distinct_lines = 0;
172 static Int distinct_instrsGen = 0;
173 static Int distinct_instrsNoX = 0;
175 static Int full_debugs = 0;
176 static Int file_line_debugs = 0;
177 static Int fn_debugs = 0;
178 static Int no_debugs = 0;
180 /*------------------------------------------------------------*/
181 /*--- String table operations ---*/
182 /*------------------------------------------------------------*/
184 static Word stringCmp( const void* key, const void* elem )
186 return VG_(strcmp)(*(const HChar *const *)key, *(const HChar *const *)elem);
189 // Get a permanent string; either pull it out of the string table if it's
190 // been encountered before, or dup it and put it into the string table.
191 static HChar* get_perm_string(const HChar* s)
193 HChar** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s);
194 if (s_ptr) {
195 return *s_ptr;
196 } else {
197 HChar** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(HChar*));
198 *s_node = VG_(strdup)("cg.main.gps.1", s);
199 VG_(OSetGen_Insert)(stringTable, s_node);
200 return *s_node;
204 /*------------------------------------------------------------*/
205 /*--- CC table operations ---*/
206 /*------------------------------------------------------------*/
208 static void get_debug_info(Addr instr_addr, const HChar **dir,
209 const HChar **file, const HChar **fn, UInt* line)
211 DiEpoch ep = VG_(current_DiEpoch)();
212 Bool found_file_line = VG_(get_filename_linenum)(
214 instr_addr,
215 file, dir,
216 line
218 Bool found_fn = VG_(get_fnname)(ep, instr_addr, fn);
220 if (!found_file_line) {
221 *file = "???";
222 *line = 0;
224 if (!found_fn) {
225 *fn = "???";
228 if (found_file_line) {
229 if (found_fn) full_debugs++;
230 else file_line_debugs++;
231 } else {
232 if (found_fn) fn_debugs++;
233 else no_debugs++;
237 // Do a three step traversal: by file, then fn, then line.
238 // Returns a pointer to the line CC, creates a new one if necessary.
239 static LineCC* get_lineCC(Addr origAddr)
241 const HChar *fn, *file, *dir;
242 UInt line;
243 CodeLoc loc;
244 LineCC* lineCC;
246 get_debug_info(origAddr, &dir, &file, &fn, &line);
248 // Form an absolute pathname if a directory is available
249 HChar absfile[VG_(strlen)(dir) + 1 + VG_(strlen)(file) + 1];
251 if (dir[0]) {
252 VG_(sprintf)(absfile, "%s/%s", dir, file);
253 } else {
254 VG_(sprintf)(absfile, "%s", file);
257 loc.file = absfile;
258 loc.fn = fn;
259 loc.line = line;
261 lineCC = VG_(OSetGen_Lookup)(CC_table, &loc);
262 if (!lineCC) {
263 // Allocate and zero a new node.
264 lineCC = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC));
265 lineCC->loc.file = get_perm_string(loc.file);
266 lineCC->loc.fn = get_perm_string(loc.fn);
267 lineCC->loc.line = loc.line;
268 lineCC->Ir.a = 0;
269 lineCC->Ir.m1 = 0;
270 lineCC->Ir.mL = 0;
271 lineCC->Dr.a = 0;
272 lineCC->Dr.m1 = 0;
273 lineCC->Dr.mL = 0;
274 lineCC->Dw.a = 0;
275 lineCC->Dw.m1 = 0;
276 lineCC->Dw.mL = 0;
277 lineCC->Bc.b = 0;
278 lineCC->Bc.mp = 0;
279 lineCC->Bi.b = 0;
280 lineCC->Bi.mp = 0;
281 VG_(OSetGen_Insert)(CC_table, lineCC);
284 return lineCC;
287 /*------------------------------------------------------------*/
288 /*--- Cache simulation functions ---*/
289 /*------------------------------------------------------------*/
291 /* A common case for an instruction read event is that the
292 * bytes read belong to the same cache line in both L1I and LL
293 * (if cache line sizes of L1 and LL are the same).
294 * As this can be detected at instrumentation time, and results
295 * in faster simulation, special-casing is benefical.
297 * Abbreviations used in var/function names:
298 * IrNoX - instruction read does not cross cache lines
299 * IrGen - generic instruction read; not detected as IrNoX
300 * Ir - not known / not important whether it is an IrNoX
303 // Only used with --cache-sim=no.
304 static VG_REGPARM(1)
305 void log_1Ir(InstrInfo* n)
307 n->parent->Ir.a++;
310 // Only used with --cache-sim=no.
311 static VG_REGPARM(2)
312 void log_2Ir(InstrInfo* n, InstrInfo* n2)
314 n->parent->Ir.a++;
315 n2->parent->Ir.a++;
318 // Only used with --cache-sim=no.
319 static VG_REGPARM(3)
320 void log_3Ir(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
322 n->parent->Ir.a++;
323 n2->parent->Ir.a++;
324 n3->parent->Ir.a++;
327 // Generic case for instruction reads: may cross cache lines.
328 // All other Ir handlers expect IrNoX instruction reads.
329 static VG_REGPARM(1)
330 void log_1IrGen_0D_cache_access(InstrInfo* n)
332 //VG_(printf)("1IrGen_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
333 // n, n->instr_addr, n->instr_len);
334 cachesim_I1_doref_Gen(n->instr_addr, n->instr_len,
335 &n->parent->Ir.m1, &n->parent->Ir.mL);
336 n->parent->Ir.a++;
339 static VG_REGPARM(1)
340 void log_1IrNoX_0D_cache_access(InstrInfo* n)
342 //VG_(printf)("1IrNoX_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
343 // n, n->instr_addr, n->instr_len);
344 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
345 &n->parent->Ir.m1, &n->parent->Ir.mL);
346 n->parent->Ir.a++;
349 static VG_REGPARM(2)
350 void log_2IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2)
352 //VG_(printf)("2IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
353 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
354 // n, n->instr_addr, n->instr_len,
355 // n2, n2->instr_addr, n2->instr_len);
356 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
357 &n->parent->Ir.m1, &n->parent->Ir.mL);
358 n->parent->Ir.a++;
359 cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
360 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
361 n2->parent->Ir.a++;
364 static VG_REGPARM(3)
365 void log_3IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
367 //VG_(printf)("3IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
368 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
369 // " CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
370 // n, n->instr_addr, n->instr_len,
371 // n2, n2->instr_addr, n2->instr_len,
372 // n3, n3->instr_addr, n3->instr_len);
373 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
374 &n->parent->Ir.m1, &n->parent->Ir.mL);
375 n->parent->Ir.a++;
376 cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
377 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
378 n2->parent->Ir.a++;
379 cachesim_I1_doref_NoX(n3->instr_addr, n3->instr_len,
380 &n3->parent->Ir.m1, &n3->parent->Ir.mL);
381 n3->parent->Ir.a++;
384 static VG_REGPARM(3)
385 void log_1IrNoX_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
387 //VG_(printf)("1IrNoX_1Dr: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
388 // " daddr=0x%010lx, dsize=%lu\n",
389 // n, n->instr_addr, n->instr_len, data_addr, data_size);
390 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
391 &n->parent->Ir.m1, &n->parent->Ir.mL);
392 n->parent->Ir.a++;
394 cachesim_D1_doref(data_addr, data_size,
395 &n->parent->Dr.m1, &n->parent->Dr.mL);
396 n->parent->Dr.a++;
399 static VG_REGPARM(3)
400 void log_1IrNoX_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
402 //VG_(printf)("1IrNoX_1Dw: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
403 // " daddr=0x%010lx, dsize=%lu\n",
404 // n, n->instr_addr, n->instr_len, data_addr, data_size);
405 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
406 &n->parent->Ir.m1, &n->parent->Ir.mL);
407 n->parent->Ir.a++;
409 cachesim_D1_doref(data_addr, data_size,
410 &n->parent->Dw.m1, &n->parent->Dw.mL);
411 n->parent->Dw.a++;
414 /* Note that addEvent_D_guarded assumes that log_0Ir_1Dr_cache_access
415 and log_0Ir_1Dw_cache_access have exactly the same prototype. If
416 you change them, you must change addEvent_D_guarded too. */
417 static VG_REGPARM(3)
418 void log_0Ir_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
420 //VG_(printf)("0Ir_1Dr: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
421 // n, data_addr, data_size);
422 cachesim_D1_doref(data_addr, data_size,
423 &n->parent->Dr.m1, &n->parent->Dr.mL);
424 n->parent->Dr.a++;
427 /* See comment on log_0Ir_1Dr_cache_access. */
428 static VG_REGPARM(3)
429 void log_0Ir_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
431 //VG_(printf)("0Ir_1Dw: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
432 // n, data_addr, data_size);
433 cachesim_D1_doref(data_addr, data_size,
434 &n->parent->Dw.m1, &n->parent->Dw.mL);
435 n->parent->Dw.a++;
438 /* For branches, we consult two different predictors, one which
439 predicts taken/untaken for conditional branches, and the other
440 which predicts the branch target address for indirect branches
441 (jump-to-register style ones). */
443 static VG_REGPARM(2)
444 void log_cond_branch(InstrInfo* n, Word taken)
446 //VG_(printf)("cbrnch: CCaddr=0x%010lx, taken=0x%010lx\n",
447 // n, taken);
448 n->parent->Bc.b++;
449 n->parent->Bc.mp
450 += (1 & do_cond_branch_predict(n->instr_addr, taken));
453 static VG_REGPARM(2)
454 void log_ind_branch(InstrInfo* n, UWord actual_dst)
456 //VG_(printf)("ibrnch: CCaddr=0x%010lx, dst=0x%010lx\n",
457 // n, actual_dst);
458 n->parent->Bi.b++;
459 n->parent->Bi.mp
460 += (1 & do_ind_branch_predict(n->instr_addr, actual_dst));
464 /*------------------------------------------------------------*/
465 /*--- Instrumentation types and structures ---*/
466 /*------------------------------------------------------------*/
468 /* Maintain an ordered list of memory events which are outstanding, in
469 the sense that no IR has yet been generated to do the relevant
470 helper calls. The BB is scanned top to bottom and memory events
471 are added to the end of the list, merging with the most recent
472 notified event where possible (Dw immediately following Dr and
473 having the same size and EA can be merged).
475 This merging is done so that for architectures which have
476 load-op-store instructions (x86, amd64), the insn is treated as if
477 it makes just one memory reference (a modify), rather than two (a
478 read followed by a write at the same address).
480 At various points the list will need to be flushed, that is, IR
481 generated from it. That must happen before any possible exit from
482 the block (the end, or an IRStmt_Exit). Flushing also takes place
483 when there is no space to add a new event.
485 If we require the simulation statistics to be up to date with
486 respect to possible memory exceptions, then the list would have to
487 be flushed before each memory reference. That would however lose
488 performance by inhibiting event-merging during flushing.
490 Flushing the list consists of walking it start to end and emitting
491 instrumentation IR for each event, in the order in which they
492 appear. It may be possible to emit a single call for two adjacent
493 events in order to reduce the number of helper function calls made.
494 For example, it could well be profitable to handle two adjacent Ir
495 events with a single helper call. */
497 typedef
498 IRExpr
499 IRAtom;
501 typedef
502 enum {
503 Ev_IrNoX, // Instruction read not crossing cache lines
504 Ev_IrGen, // Generic Ir, not being detected as IrNoX
505 Ev_Dr, // Data read
506 Ev_Dw, // Data write
507 Ev_Dm, // Data modify (read then write)
508 Ev_Bc, // branch conditional
509 Ev_Bi // branch indirect (to unknown destination)
511 EventTag;
513 typedef
514 struct {
515 EventTag tag;
516 InstrInfo* inode;
517 union {
518 struct {
519 } IrGen;
520 struct {
521 } IrNoX;
522 struct {
523 IRAtom* ea;
524 Int szB;
525 } Dr;
526 struct {
527 IRAtom* ea;
528 Int szB;
529 } Dw;
530 struct {
531 IRAtom* ea;
532 Int szB;
533 } Dm;
534 struct {
535 IRAtom* taken; /* :: Ity_I1 */
536 } Bc;
537 struct {
538 IRAtom* dst;
539 } Bi;
540 } Ev;
542 Event;
544 static void init_Event ( Event* ev ) {
545 VG_(memset)(ev, 0, sizeof(Event));
548 static IRAtom* get_Event_dea ( Event* ev ) {
549 switch (ev->tag) {
550 case Ev_Dr: return ev->Ev.Dr.ea;
551 case Ev_Dw: return ev->Ev.Dw.ea;
552 case Ev_Dm: return ev->Ev.Dm.ea;
553 default: tl_assert(0);
557 static Int get_Event_dszB ( Event* ev ) {
558 switch (ev->tag) {
559 case Ev_Dr: return ev->Ev.Dr.szB;
560 case Ev_Dw: return ev->Ev.Dw.szB;
561 case Ev_Dm: return ev->Ev.Dm.szB;
562 default: tl_assert(0);
567 /* Up to this many unnotified events are allowed. Number is
568 arbitrary. Larger numbers allow more event merging to occur, but
569 potentially induce more spilling due to extending live ranges of
570 address temporaries. */
571 #define N_EVENTS 16
574 /* A struct which holds all the running state during instrumentation.
575 Mostly to avoid passing loads of parameters everywhere. */
576 typedef
577 struct {
578 /* The current outstanding-memory-event list. */
579 Event events[N_EVENTS];
580 Int events_used;
582 /* The array of InstrInfo bins for the BB. */
583 SB_info* sbInfo;
585 /* Number InstrInfo bins 'used' so far. */
586 Int sbInfo_i;
588 /* The output SB being constructed. */
589 IRSB* sbOut;
591 CgState;
594 /*------------------------------------------------------------*/
595 /*--- Instrumentation main ---*/
596 /*------------------------------------------------------------*/
598 // Note that origAddr is the real origAddr, not the address of the first
599 // instruction in the block (they can be different due to redirection).
600 static
601 SB_info* get_SB_info(IRSB* sbIn, Addr origAddr)
603 Int i, n_instrs;
604 IRStmt* st;
605 SB_info* sbInfo;
607 // Count number of original instrs in SB
608 n_instrs = 0;
609 for (i = 0; i < sbIn->stmts_used; i++) {
610 st = sbIn->stmts[i];
611 if (Ist_IMark == st->tag) n_instrs++;
614 // Check that we don't have an entry for this BB in the instr-info table.
615 // If this assertion fails, there has been some screwup: some
616 // translations must have been discarded but Cachegrind hasn't discarded
617 // the corresponding entries in the instr-info table.
618 sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr);
619 tl_assert(NULL == sbInfo);
621 // BB never translated before (at this address, at least; could have
622 // been unloaded and then reloaded elsewhere in memory)
623 sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable,
624 sizeof(SB_info) + n_instrs*sizeof(InstrInfo));
625 sbInfo->SB_addr = origAddr;
626 sbInfo->n_instrs = n_instrs;
627 VG_(OSetGen_Insert)( instrInfoTable, sbInfo );
629 return sbInfo;
633 static void showEvent ( Event* ev )
635 switch (ev->tag) {
636 case Ev_IrGen:
637 VG_(printf)("IrGen %p\n", ev->inode);
638 break;
639 case Ev_IrNoX:
640 VG_(printf)("IrNoX %p\n", ev->inode);
641 break;
642 case Ev_Dr:
643 VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
644 ppIRExpr(ev->Ev.Dr.ea);
645 VG_(printf)("\n");
646 break;
647 case Ev_Dw:
648 VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB);
649 ppIRExpr(ev->Ev.Dw.ea);
650 VG_(printf)("\n");
651 break;
652 case Ev_Dm:
653 VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB);
654 ppIRExpr(ev->Ev.Dm.ea);
655 VG_(printf)("\n");
656 break;
657 case Ev_Bc:
658 VG_(printf)("Bc %p GA=", ev->inode);
659 ppIRExpr(ev->Ev.Bc.taken);
660 VG_(printf)("\n");
661 break;
662 case Ev_Bi:
663 VG_(printf)("Bi %p DST=", ev->inode);
664 ppIRExpr(ev->Ev.Bi.dst);
665 VG_(printf)("\n");
666 break;
667 default:
668 tl_assert(0);
669 break;
673 // Reserve and initialise an InstrInfo for the first mention of a new insn.
674 static
675 InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len )
677 InstrInfo* i_node;
678 tl_assert(cgs->sbInfo_i >= 0);
679 tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs);
680 i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ];
681 i_node->instr_addr = instr_addr;
682 i_node->instr_len = instr_len;
683 i_node->parent = get_lineCC(instr_addr);
684 cgs->sbInfo_i++;
685 return i_node;
689 /* Generate code for all outstanding memory events, and mark the queue
690 empty. Code is generated into cgs->bbOut, and this activity
691 'consumes' slots in cgs->sbInfo. */
693 static void flushEvents ( CgState* cgs )
695 Int i, regparms;
696 const HChar* helperName;
697 void* helperAddr;
698 IRExpr** argv;
699 IRExpr* i_node_expr;
700 IRDirty* di;
701 Event* ev;
702 Event* ev2;
703 Event* ev3;
705 i = 0;
706 while (i < cgs->events_used) {
708 helperName = NULL;
709 helperAddr = NULL;
710 argv = NULL;
711 regparms = 0;
713 /* generate IR to notify event i and possibly the ones
714 immediately following it. */
715 tl_assert(i >= 0 && i < cgs->events_used);
717 ev = &cgs->events[i];
718 ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL );
719 ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL );
721 if (DEBUG_CG) {
722 VG_(printf)(" flush ");
723 showEvent( ev );
726 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
728 /* Decide on helper fn to call and args to pass it, and advance
729 i appropriately. */
730 switch (ev->tag) {
731 case Ev_IrNoX:
732 /* Merge an IrNoX with a following Dr/Dm. */
733 if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) {
734 /* Why is this true? It's because we're merging an Ir
735 with a following Dr or Dm. The Ir derives from the
736 instruction's IMark and the Dr/Dm from data
737 references which follow it. In short it holds
738 because each insn starts with an IMark, hence an
739 Ev_Ir, and so these Dr/Dm must pertain to the
740 immediately preceding Ir. Same applies to analogous
741 assertions in the subsequent cases. */
742 tl_assert(ev2->inode == ev->inode);
743 helperName = "log_1IrNoX_1Dr_cache_access";
744 helperAddr = &log_1IrNoX_1Dr_cache_access;
745 argv = mkIRExprVec_3( i_node_expr,
746 get_Event_dea(ev2),
747 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
748 regparms = 3;
749 i += 2;
751 /* Merge an IrNoX with a following Dw. */
752 else
753 if (ev2 && ev2->tag == Ev_Dw) {
754 tl_assert(ev2->inode == ev->inode);
755 helperName = "log_1IrNoX_1Dw_cache_access";
756 helperAddr = &log_1IrNoX_1Dw_cache_access;
757 argv = mkIRExprVec_3( i_node_expr,
758 get_Event_dea(ev2),
759 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
760 regparms = 3;
761 i += 2;
763 /* Merge an IrNoX with two following IrNoX's. */
764 else
765 if (ev2 && ev3 && ev2->tag == Ev_IrNoX && ev3->tag == Ev_IrNoX)
767 if (clo_cache_sim) {
768 helperName = "log_3IrNoX_0D_cache_access";
769 helperAddr = &log_3IrNoX_0D_cache_access;
770 } else {
771 helperName = "log_3Ir";
772 helperAddr = &log_3Ir;
774 argv = mkIRExprVec_3( i_node_expr,
775 mkIRExpr_HWord( (HWord)ev2->inode ),
776 mkIRExpr_HWord( (HWord)ev3->inode ) );
777 regparms = 3;
778 i += 3;
780 /* Merge an IrNoX with one following IrNoX. */
781 else
782 if (ev2 && ev2->tag == Ev_IrNoX) {
783 if (clo_cache_sim) {
784 helperName = "log_2IrNoX_0D_cache_access";
785 helperAddr = &log_2IrNoX_0D_cache_access;
786 } else {
787 helperName = "log_2Ir";
788 helperAddr = &log_2Ir;
790 argv = mkIRExprVec_2( i_node_expr,
791 mkIRExpr_HWord( (HWord)ev2->inode ) );
792 regparms = 2;
793 i += 2;
795 /* No merging possible; emit as-is. */
796 else {
797 if (clo_cache_sim) {
798 helperName = "log_1IrNoX_0D_cache_access";
799 helperAddr = &log_1IrNoX_0D_cache_access;
800 } else {
801 helperName = "log_1Ir";
802 helperAddr = &log_1Ir;
804 argv = mkIRExprVec_1( i_node_expr );
805 regparms = 1;
806 i++;
808 break;
809 case Ev_IrGen:
810 if (clo_cache_sim) {
811 helperName = "log_1IrGen_0D_cache_access";
812 helperAddr = &log_1IrGen_0D_cache_access;
813 } else {
814 helperName = "log_1Ir";
815 helperAddr = &log_1Ir;
817 argv = mkIRExprVec_1( i_node_expr );
818 regparms = 1;
819 i++;
820 break;
821 case Ev_Dr:
822 case Ev_Dm:
823 /* Data read or modify */
824 helperName = "log_0Ir_1Dr_cache_access";
825 helperAddr = &log_0Ir_1Dr_cache_access;
826 argv = mkIRExprVec_3( i_node_expr,
827 get_Event_dea(ev),
828 mkIRExpr_HWord( get_Event_dszB(ev) ) );
829 regparms = 3;
830 i++;
831 break;
832 case Ev_Dw:
833 /* Data write */
834 helperName = "log_0Ir_1Dw_cache_access";
835 helperAddr = &log_0Ir_1Dw_cache_access;
836 argv = mkIRExprVec_3( i_node_expr,
837 get_Event_dea(ev),
838 mkIRExpr_HWord( get_Event_dszB(ev) ) );
839 regparms = 3;
840 i++;
841 break;
842 case Ev_Bc:
843 /* Conditional branch */
844 helperName = "log_cond_branch";
845 helperAddr = &log_cond_branch;
846 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
847 regparms = 2;
848 i++;
849 break;
850 case Ev_Bi:
851 /* Branch to an unknown destination */
852 helperName = "log_ind_branch";
853 helperAddr = &log_ind_branch;
854 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
855 regparms = 2;
856 i++;
857 break;
858 default:
859 tl_assert(0);
862 /* Add the helper. */
863 tl_assert(helperName);
864 tl_assert(helperAddr);
865 tl_assert(argv);
866 di = unsafeIRDirty_0_N( regparms,
867 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
868 argv );
869 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
872 cgs->events_used = 0;
875 static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
877 Event* evt;
878 if (cgs->events_used == N_EVENTS)
879 flushEvents(cgs);
880 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
881 evt = &cgs->events[cgs->events_used];
882 init_Event(evt);
883 evt->inode = inode;
884 if (cachesim_is_IrNoX(inode->instr_addr, inode->instr_len)) {
885 evt->tag = Ev_IrNoX;
886 distinct_instrsNoX++;
887 } else {
888 evt->tag = Ev_IrGen;
889 distinct_instrsGen++;
891 cgs->events_used++;
894 static
895 void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
897 tl_assert(isIRAtom(ea));
899 if (!clo_cache_sim)
900 return;
902 tl_assert(datasize >= 1 && datasize <= min_line_size);
904 if (cgs->events_used == N_EVENTS) {
905 flushEvents(cgs);
907 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
908 Event* evt = &cgs->events[cgs->events_used];
909 init_Event(evt);
910 evt->tag = Ev_Dr;
911 evt->inode = inode;
912 evt->Ev.Dr.szB = datasize;
913 evt->Ev.Dr.ea = ea;
914 cgs->events_used++;
917 static
918 void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
920 tl_assert(isIRAtom(ea));
922 if (!clo_cache_sim)
923 return;
925 tl_assert(datasize >= 1 && datasize <= min_line_size);
927 /* Is it possible to merge this write with the preceding read? */
928 if (cgs->events_used > 0) {
929 Event* lastEvt = &cgs->events[cgs->events_used-1];
930 if ( lastEvt->tag == Ev_Dr
931 && lastEvt->Ev.Dr.szB == datasize
932 && lastEvt->inode == inode
933 && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
935 lastEvt->tag = Ev_Dm;
936 return;
940 /* No. Add as normal. */
941 if (cgs->events_used == N_EVENTS)
942 flushEvents(cgs);
943 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
944 Event* evt = &cgs->events[cgs->events_used];
945 init_Event(evt);
946 evt->tag = Ev_Dw;
947 evt->inode = inode;
948 evt->Ev.Dw.szB = datasize;
949 evt->Ev.Dw.ea = ea;
950 cgs->events_used++;
953 static
954 void addEvent_D_guarded ( CgState* cgs, InstrInfo* inode,
955 Int datasize, IRAtom* ea, IRAtom* guard,
956 Bool isWrite )
958 tl_assert(isIRAtom(ea));
959 tl_assert(guard);
960 tl_assert(isIRAtom(guard));
962 if (!clo_cache_sim)
963 return;
965 tl_assert(datasize >= 1 && datasize <= min_line_size);
967 /* Adding guarded memory actions and merging them with the existing
968 queue is too complex. Simply flush the queue and add this
969 action immediately. Since guarded loads and stores are pretty
970 rare, this is not thought likely to cause any noticeable
971 performance loss as a result of the loss of event-merging
972 opportunities. */
973 tl_assert(cgs->events_used >= 0);
974 flushEvents(cgs);
975 tl_assert(cgs->events_used == 0);
976 /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
977 IRExpr* i_node_expr;
978 const HChar* helperName;
979 void* helperAddr;
980 IRExpr** argv;
981 Int regparms;
982 IRDirty* di;
983 i_node_expr = mkIRExpr_HWord( (HWord)inode );
984 helperName = isWrite ? "log_0Ir_1Dw_cache_access"
985 : "log_0Ir_1Dr_cache_access";
986 helperAddr = isWrite ? &log_0Ir_1Dw_cache_access
987 : &log_0Ir_1Dr_cache_access;
988 argv = mkIRExprVec_3( i_node_expr,
989 ea, mkIRExpr_HWord( datasize ) );
990 regparms = 3;
991 di = unsafeIRDirty_0_N(
992 regparms,
993 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
994 argv );
995 di->guard = guard;
996 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
1000 static
1001 void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard )
1003 Event* evt;
1004 tl_assert(isIRAtom(guard));
1005 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard)
1006 == (sizeof(RegWord)==4 ? Ity_I32 : Ity_I64));
1007 if (!clo_branch_sim)
1008 return;
1009 if (cgs->events_used == N_EVENTS)
1010 flushEvents(cgs);
1011 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
1012 evt = &cgs->events[cgs->events_used];
1013 init_Event(evt);
1014 evt->tag = Ev_Bc;
1015 evt->inode = inode;
1016 evt->Ev.Bc.taken = guard;
1017 cgs->events_used++;
1020 static
1021 void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo )
1023 Event* evt;
1024 tl_assert(isIRAtom(whereTo));
1025 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo)
1026 == (sizeof(RegWord)==4 ? Ity_I32 : Ity_I64));
1027 if (!clo_branch_sim)
1028 return;
1029 if (cgs->events_used == N_EVENTS)
1030 flushEvents(cgs);
1031 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
1032 evt = &cgs->events[cgs->events_used];
1033 init_Event(evt);
1034 evt->tag = Ev_Bi;
1035 evt->inode = inode;
1036 evt->Ev.Bi.dst = whereTo;
1037 cgs->events_used++;
1040 ////////////////////////////////////////////////////////////
1043 static
1044 IRSB* cg_instrument ( VgCallbackClosure* closure,
1045 IRSB* sbIn,
1046 const VexGuestLayout* layout,
1047 const VexGuestExtents* vge,
1048 const VexArchInfo* archinfo_host,
1049 IRType gWordTy, IRType hWordTy )
1051 Int i;
1052 UInt isize;
1053 IRStmt* st;
1054 Addr cia; /* address of current insn */
1055 CgState cgs;
1056 IRTypeEnv* tyenv = sbIn->tyenv;
1057 InstrInfo* curr_inode = NULL;
1059 if (gWordTy != hWordTy) {
1060 /* We don't currently support this case. */
1061 VG_(tool_panic)("host/guest word size mismatch");
1064 // Set up new SB
1065 cgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
1067 // Copy verbatim any IR preamble preceding the first IMark
1068 i = 0;
1069 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
1070 addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] );
1071 i++;
1074 // Get the first statement, and initial cia from it
1075 tl_assert(sbIn->stmts_used > 0);
1076 tl_assert(i < sbIn->stmts_used);
1077 st = sbIn->stmts[i];
1078 tl_assert(Ist_IMark == st->tag);
1080 cia = st->Ist.IMark.addr;
1081 isize = st->Ist.IMark.len;
1082 // If Vex fails to decode an instruction, the size will be zero.
1083 // Pretend otherwise.
1084 if (isize == 0) isize = VG_MIN_INSTR_SZB;
1086 // Set up running state and get block info
1087 tl_assert(closure->readdr == vge->base[0]);
1088 cgs.events_used = 0;
1089 cgs.sbInfo = get_SB_info(sbIn, (Addr)closure->readdr);
1090 cgs.sbInfo_i = 0;
1092 if (DEBUG_CG)
1093 VG_(printf)("\n\n---------- cg_instrument ----------\n");
1095 // Traverse the block, initialising inodes, adding events and flushing as
1096 // necessary.
1097 for (/*use current i*/; i < sbIn->stmts_used; i++) {
1099 st = sbIn->stmts[i];
1100 tl_assert(isFlatIRStmt(st));
1102 switch (st->tag) {
1103 case Ist_NoOp:
1104 case Ist_AbiHint:
1105 case Ist_Put:
1106 case Ist_PutI:
1107 case Ist_MBE:
1108 break;
1110 case Ist_IMark:
1111 cia = st->Ist.IMark.addr;
1112 isize = st->Ist.IMark.len;
1114 // If Vex fails to decode an instruction, the size will be zero.
1115 // Pretend otherwise.
1116 if (isize == 0) isize = VG_MIN_INSTR_SZB;
1118 // Sanity-check size.
1119 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
1120 || VG_CLREQ_SZB == isize );
1122 // Get space for and init the inode, record it as the current one.
1123 // Subsequent Dr/Dw/Dm events from the same instruction will
1124 // also use it.
1125 curr_inode = setup_InstrInfo(&cgs, cia, isize);
1127 addEvent_Ir( &cgs, curr_inode );
1128 break;
1130 case Ist_WrTmp: {
1131 IRExpr* data = st->Ist.WrTmp.data;
1132 if (data->tag == Iex_Load) {
1133 IRExpr* aexpr = data->Iex.Load.addr;
1134 // Note also, endianness info is ignored. I guess
1135 // that's not interesting.
1136 addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty),
1137 aexpr );
1139 break;
1142 case Ist_Store: {
1143 IRExpr* data = st->Ist.Store.data;
1144 IRExpr* aexpr = st->Ist.Store.addr;
1145 addEvent_Dw( &cgs, curr_inode,
1146 sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr );
1147 break;
1150 case Ist_StoreG: {
1151 IRStoreG* sg = st->Ist.StoreG.details;
1152 IRExpr* data = sg->data;
1153 IRExpr* addr = sg->addr;
1154 IRType type = typeOfIRExpr(tyenv, data);
1155 tl_assert(type != Ity_INVALID);
1156 addEvent_D_guarded( &cgs, curr_inode,
1157 sizeofIRType(type), addr, sg->guard,
1158 True/*isWrite*/ );
1159 break;
1162 case Ist_LoadG: {
1163 IRLoadG* lg = st->Ist.LoadG.details;
1164 IRType type = Ity_INVALID; /* loaded type */
1165 IRType typeWide = Ity_INVALID; /* after implicit widening */
1166 IRExpr* addr = lg->addr;
1167 typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
1168 tl_assert(type != Ity_INVALID);
1169 addEvent_D_guarded( &cgs, curr_inode,
1170 sizeofIRType(type), addr, lg->guard,
1171 False/*!isWrite*/ );
1172 break;
1175 case Ist_Dirty: {
1176 Int dataSize;
1177 IRDirty* d = st->Ist.Dirty.details;
1178 if (d->mFx != Ifx_None) {
1179 /* This dirty helper accesses memory. Collect the details. */
1180 tl_assert(d->mAddr != NULL);
1181 tl_assert(d->mSize != 0);
1182 dataSize = d->mSize;
1183 // Large (eg. 28B, 108B, 512B on x86) data-sized
1184 // instructions will be done inaccurately, but they're
1185 // very rare and this avoids errors from hitting more
1186 // than two cache lines in the simulation.
1187 if (dataSize > min_line_size)
1188 dataSize = min_line_size;
1189 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1190 addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
1191 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1192 addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr );
1193 } else {
1194 tl_assert(d->mAddr == NULL);
1195 tl_assert(d->mSize == 0);
1197 break;
1200 case Ist_CAS: {
1201 /* We treat it as a read and a write of the location. I
1202 think that is the same behaviour as it was before IRCAS
1203 was introduced, since prior to that point, the Vex
1204 front ends would translate a lock-prefixed instruction
1205 into a (normal) read followed by a (normal) write. */
1206 Int dataSize;
1207 IRCAS* cas = st->Ist.CAS.details;
1208 tl_assert(cas->addr != NULL);
1209 tl_assert(cas->dataLo != NULL);
1210 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
1211 if (cas->dataHi != NULL)
1212 dataSize *= 2; /* since it's a doubleword-CAS */
1213 /* I don't think this can ever happen, but play safe. */
1214 if (dataSize > min_line_size)
1215 dataSize = min_line_size;
1216 addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
1217 addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
1218 break;
1221 case Ist_LLSC: {
1222 IRType dataTy;
1223 if (st->Ist.LLSC.storedata == NULL) {
1224 /* LL */
1225 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
1226 addEvent_Dr( &cgs, curr_inode,
1227 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1228 /* flush events before LL, should help SC to succeed */
1229 flushEvents( &cgs );
1230 } else {
1231 /* SC */
1232 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
1233 addEvent_Dw( &cgs, curr_inode,
1234 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1236 break;
1239 case Ist_Exit: {
1240 // call branch predictor only if this is a branch in guest code
1241 if ( (st->Ist.Exit.jk == Ijk_Boring) ||
1242 (st->Ist.Exit.jk == Ijk_Call) ||
1243 (st->Ist.Exit.jk == Ijk_Ret) )
1245 /* Stuff to widen the guard expression to a host word, so
1246 we can pass it to the branch predictor simulation
1247 functions easily. */
1248 Bool inverted;
1249 Addr nia, sea;
1250 IRConst* dst;
1251 IRType tyW = hWordTy;
1252 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64;
1253 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64;
1254 IRTemp guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
1255 IRTemp guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
1256 IRTemp guard = newIRTemp(cgs.sbOut->tyenv, tyW);
1257 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1258 : IRExpr_Const(IRConst_U64(1));
1260 /* First we need to figure out whether the side exit got
1261 inverted by the ir optimiser. To do that, figure out
1262 the next (fallthrough) instruction's address and the
1263 side exit address and see if they are the same. */
1264 nia = cia + isize;
1266 /* Side exit address */
1267 dst = st->Ist.Exit.dst;
1268 if (tyW == Ity_I32) {
1269 tl_assert(dst->tag == Ico_U32);
1270 sea = dst->Ico.U32;
1271 } else {
1272 tl_assert(tyW == Ity_I64);
1273 tl_assert(dst->tag == Ico_U64);
1274 sea = dst->Ico.U64;
1277 inverted = nia == sea;
1279 /* Widen the guard expression. */
1280 addStmtToIRSB( cgs.sbOut,
1281 IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1282 addStmtToIRSB( cgs.sbOut,
1283 IRStmt_WrTmp( guardW,
1284 IRExpr_Unop(widen,
1285 IRExpr_RdTmp(guard1))) );
1286 /* If the exit is inverted, invert the sense of the guard. */
1287 addStmtToIRSB(
1288 cgs.sbOut,
1289 IRStmt_WrTmp(
1290 guard,
1291 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1292 : IRExpr_RdTmp(guardW)
1294 /* And post the event. */
1295 addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
1298 /* We may never reach the next statement, so need to flush
1299 all outstanding transactions now. */
1300 flushEvents( &cgs );
1301 break;
1304 default:
1305 ppIRStmt(st);
1306 tl_assert(0);
1307 break;
1310 /* Copy the original statement */
1311 addStmtToIRSB( cgs.sbOut, st );
1313 if (DEBUG_CG) {
1314 ppIRStmt(st);
1315 VG_(printf)("\n");
1319 /* Deal with branches to unknown destinations. Except ignore ones
1320 which are function returns as we assume the return stack
1321 predictor never mispredicts. */
1322 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
1323 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1324 switch (sbIn->next->tag) {
1325 case Iex_Const:
1326 break; /* boring - branch to known address */
1327 case Iex_RdTmp:
1328 /* looks like an indirect branch (branch to unknown) */
1329 addEvent_Bi( &cgs, curr_inode, sbIn->next );
1330 break;
1331 default:
1332 /* shouldn't happen - if the incoming IR is properly
1333 flattened, should only have tmp and const cases to
1334 consider. */
1335 tl_assert(0);
1339 /* At the end of the bb. Flush outstandings. */
1340 flushEvents( &cgs );
1342 /* done. stay sane ... */
1343 tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs);
1345 if (DEBUG_CG) {
1346 VG_(printf)( "goto {");
1347 ppIRJumpKind(sbIn->jumpkind);
1348 VG_(printf)( "} ");
1349 ppIRExpr( sbIn->next );
1350 VG_(printf)( "}\n");
1353 return cgs.sbOut;
1356 /*------------------------------------------------------------*/
1357 /*--- Cache configuration ---*/
1358 /*------------------------------------------------------------*/
1360 static cache_t clo_I1_cache = UNDEFINED_CACHE;
1361 static cache_t clo_D1_cache = UNDEFINED_CACHE;
1362 static cache_t clo_LL_cache = UNDEFINED_CACHE;
1364 /*------------------------------------------------------------*/
1365 /*--- cg_fini() and related function ---*/
1366 /*------------------------------------------------------------*/
1368 // Total reads/writes/misses. Calculated during CC traversal at the end.
1369 // All auto-zeroed.
1370 static CacheCC Ir_total;
1371 static CacheCC Dr_total;
1372 static CacheCC Dw_total;
1373 static BranchCC Bc_total;
1374 static BranchCC Bi_total;
1376 static void fprint_CC_table_and_calc_totals(void)
1378 Int i;
1379 VgFile *fp;
1380 HChar *currFile = NULL;
1381 const HChar *currFn = NULL;
1382 LineCC* lineCC;
1384 // Setup output filename. Nb: it's important to do this now, ie. as late
1385 // as possible. If we do it at start-up and the program forks and the
1386 // output file format string contains a %p (pid) specifier, both the
1387 // parent and child will incorrectly write to the same file; this
1388 // happened in 3.3.0.
1389 HChar* cachegrind_out_file =
1390 VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file);
1392 fp = VG_(fopen)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1393 VKI_S_IRUSR|VKI_S_IWUSR);
1394 if (fp == NULL) {
1395 // If the file can't be opened for whatever reason (conflict
1396 // between multiple cachegrinded processes?), give up now.
1397 VG_(umsg)("error: can't open output data file '%s'\n",
1398 cachegrind_out_file );
1399 VG_(umsg)(" ... so detailed results will be missing.\n");
1400 VG_(free)(cachegrind_out_file);
1401 return;
1402 } else {
1403 VG_(free)(cachegrind_out_file);
1406 if (clo_cache_sim) {
1407 // "desc:" lines (giving I1/D1/LL cache configuration). The spaces after
1408 // the 2nd colon makes cg_annotate's output look nicer.
1409 VG_(fprintf)(fp, "desc: I1 cache: %s\n"
1410 "desc: D1 cache: %s\n"
1411 "desc: LL cache: %s\n",
1412 I1.desc_line, D1.desc_line, LL.desc_line);
1415 // "cmd:" line
1416 VG_(fprintf)(fp, "cmd: %s", VG_(args_the_exename));
1417 for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
1418 HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
1419 VG_(fprintf)(fp, " %s", arg);
1421 // "events:" line
1422 if (clo_cache_sim && clo_branch_sim) {
1423 VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1424 "Bc Bcm Bi Bim\n");
1426 else if (clo_cache_sim && !clo_branch_sim) {
1427 VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1428 "\n");
1430 else if (!clo_cache_sim && clo_branch_sim) {
1431 VG_(fprintf)(fp, "\nevents: Ir Bc Bcm Bi Bim\n");
1433 else {
1434 VG_(fprintf)(fp, "\nevents: Ir\n");
1437 // Traverse every lineCC
1438 VG_(OSetGen_ResetIter)(CC_table);
1439 while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) {
1440 Bool just_hit_a_new_file = False;
1441 // If we've hit a new file, print a "fl=" line. Note that because
1442 // each string is stored exactly once in the string table, we can use
1443 // pointer comparison rather than strcmp() to test for equality, which
1444 // is good because most of the time the comparisons are equal and so
1445 // the whole strings would have to be checked.
1446 if ( lineCC->loc.file != currFile ) {
1447 currFile = lineCC->loc.file;
1448 VG_(fprintf)(fp, "fl=%s\n", currFile);
1449 distinct_files++;
1450 just_hit_a_new_file = True;
1452 // If we've hit a new function, print a "fn=" line. We know to do
1453 // this when the function name changes, and also every time we hit a
1454 // new file (in which case the new function name might be the same as
1455 // in the old file, hence the just_hit_a_new_file test).
1456 if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) {
1457 currFn = lineCC->loc.fn;
1458 VG_(fprintf)(fp, "fn=%s\n", currFn);
1459 distinct_fns++;
1462 // Print the LineCC
1463 if (clo_cache_sim && clo_branch_sim) {
1464 VG_(fprintf)(fp, "%d %llu %llu %llu"
1465 " %llu %llu %llu"
1466 " %llu %llu %llu"
1467 " %llu %llu %llu %llu\n",
1468 lineCC->loc.line,
1469 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1470 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1471 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL,
1472 lineCC->Bc.b, lineCC->Bc.mp,
1473 lineCC->Bi.b, lineCC->Bi.mp);
1475 else if (clo_cache_sim && !clo_branch_sim) {
1476 VG_(fprintf)(fp, "%d %llu %llu %llu"
1477 " %llu %llu %llu"
1478 " %llu %llu %llu\n",
1479 lineCC->loc.line,
1480 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1481 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1482 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL);
1484 else if (!clo_cache_sim && clo_branch_sim) {
1485 VG_(fprintf)(fp, "%d %llu"
1486 " %llu %llu %llu %llu\n",
1487 lineCC->loc.line,
1488 lineCC->Ir.a,
1489 lineCC->Bc.b, lineCC->Bc.mp,
1490 lineCC->Bi.b, lineCC->Bi.mp);
1492 else {
1493 VG_(fprintf)(fp, "%d %llu\n",
1494 lineCC->loc.line,
1495 lineCC->Ir.a);
1498 // Update summary stats
1499 Ir_total.a += lineCC->Ir.a;
1500 Ir_total.m1 += lineCC->Ir.m1;
1501 Ir_total.mL += lineCC->Ir.mL;
1502 Dr_total.a += lineCC->Dr.a;
1503 Dr_total.m1 += lineCC->Dr.m1;
1504 Dr_total.mL += lineCC->Dr.mL;
1505 Dw_total.a += lineCC->Dw.a;
1506 Dw_total.m1 += lineCC->Dw.m1;
1507 Dw_total.mL += lineCC->Dw.mL;
1508 Bc_total.b += lineCC->Bc.b;
1509 Bc_total.mp += lineCC->Bc.mp;
1510 Bi_total.b += lineCC->Bi.b;
1511 Bi_total.mp += lineCC->Bi.mp;
1513 distinct_lines++;
1516 // Summary stats must come after rest of table, since we calculate them
1517 // during traversal.
1518 if (clo_cache_sim && clo_branch_sim) {
1519 VG_(fprintf)(fp, "summary:"
1520 " %llu %llu %llu"
1521 " %llu %llu %llu"
1522 " %llu %llu %llu"
1523 " %llu %llu %llu %llu\n",
1524 Ir_total.a, Ir_total.m1, Ir_total.mL,
1525 Dr_total.a, Dr_total.m1, Dr_total.mL,
1526 Dw_total.a, Dw_total.m1, Dw_total.mL,
1527 Bc_total.b, Bc_total.mp,
1528 Bi_total.b, Bi_total.mp);
1530 else if (clo_cache_sim && !clo_branch_sim) {
1531 VG_(fprintf)(fp, "summary:"
1532 " %llu %llu %llu"
1533 " %llu %llu %llu"
1534 " %llu %llu %llu\n",
1535 Ir_total.a, Ir_total.m1, Ir_total.mL,
1536 Dr_total.a, Dr_total.m1, Dr_total.mL,
1537 Dw_total.a, Dw_total.m1, Dw_total.mL);
1539 else if (!clo_cache_sim && clo_branch_sim) {
1540 VG_(fprintf)(fp, "summary:"
1541 " %llu"
1542 " %llu %llu %llu %llu\n",
1543 Ir_total.a,
1544 Bc_total.b, Bc_total.mp,
1545 Bi_total.b, Bi_total.mp);
1547 else {
1548 VG_(fprintf)(fp, "summary:"
1549 " %llu\n",
1550 Ir_total.a);
1553 VG_(fclose)(fp);
1556 static UInt ULong_width(ULong n)
1558 UInt w = 0;
1559 while (n > 0) {
1560 n = n / 10;
1561 w++;
1563 if (w == 0) w = 1;
1564 return w + (w-1)/3; // add space for commas
1567 static void cg_fini(Int exitcode)
1569 static HChar fmt[128]; // OK; large enough
1571 CacheCC D_total;
1572 BranchCC B_total;
1573 ULong LL_total_m, LL_total_mr, LL_total_mw,
1574 LL_total, LL_total_r, LL_total_w;
1575 Int l1, l2, l3;
1577 fprint_CC_table_and_calc_totals();
1579 if (VG_(clo_verbosity) == 0)
1580 return;
1582 // Nb: this isn't called "MAX" because that overshadows a global on Darwin.
1583 #define CG_MAX(a, b) ((a) >= (b) ? (a) : (b))
1585 /* I cache results. Use the I_refs value to determine the first column
1586 * width. */
1587 l1 = ULong_width(Ir_total.a);
1588 l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b));
1589 l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b));
1591 /* Make format string, getting width right for numbers */
1592 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
1594 /* Always print this */
1595 VG_(umsg)(fmt, "I refs: ", Ir_total.a);
1597 /* If cache profiling is enabled, show D access numbers and all
1598 miss numbers */
1599 if (clo_cache_sim) {
1600 VG_(umsg)(fmt, "I1 misses: ", Ir_total.m1);
1601 VG_(umsg)(fmt, "LLi misses: ", Ir_total.mL);
1603 if (0 == Ir_total.a) Ir_total.a = 1;
1604 VG_(umsg)("I1 miss rate: %*.2f%%\n", l1,
1605 Ir_total.m1 * 100.0 / Ir_total.a);
1606 VG_(umsg)("LLi miss rate: %*.2f%%\n", l1,
1607 Ir_total.mL * 100.0 / Ir_total.a);
1608 VG_(umsg)("\n");
1610 /* D cache results. Use the D_refs.rd and D_refs.wr values to
1611 * determine the width of columns 2 & 3. */
1612 D_total.a = Dr_total.a + Dw_total.a;
1613 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1614 D_total.mL = Dr_total.mL + Dw_total.mL;
1616 /* Make format string, getting width right for numbers */
1617 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu rd + %%,%dllu wr)\n",
1618 l1, l2, l3);
1620 VG_(umsg)(fmt, "D refs: ",
1621 D_total.a, Dr_total.a, Dw_total.a);
1622 VG_(umsg)(fmt, "D1 misses: ",
1623 D_total.m1, Dr_total.m1, Dw_total.m1);
1624 VG_(umsg)(fmt, "LLd misses: ",
1625 D_total.mL, Dr_total.mL, Dw_total.mL);
1627 if (0 == D_total.a) D_total.a = 1;
1628 if (0 == Dr_total.a) Dr_total.a = 1;
1629 if (0 == Dw_total.a) Dw_total.a = 1;
1630 VG_(umsg)("D1 miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1631 l1, D_total.m1 * 100.0 / D_total.a,
1632 l2, Dr_total.m1 * 100.0 / Dr_total.a,
1633 l3, Dw_total.m1 * 100.0 / Dw_total.a);
1634 VG_(umsg)("LLd miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1635 l1, D_total.mL * 100.0 / D_total.a,
1636 l2, Dr_total.mL * 100.0 / Dr_total.a,
1637 l3, Dw_total.mL * 100.0 / Dw_total.a);
1638 VG_(umsg)("\n");
1640 /* LL overall results */
1642 LL_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1643 LL_total_r = Dr_total.m1 + Ir_total.m1;
1644 LL_total_w = Dw_total.m1;
1645 VG_(umsg)(fmt, "LL refs: ",
1646 LL_total, LL_total_r, LL_total_w);
1648 LL_total_m = Dr_total.mL + Dw_total.mL + Ir_total.mL;
1649 LL_total_mr = Dr_total.mL + Ir_total.mL;
1650 LL_total_mw = Dw_total.mL;
1651 VG_(umsg)(fmt, "LL misses: ",
1652 LL_total_m, LL_total_mr, LL_total_mw);
1654 VG_(umsg)("LL miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1655 l1, LL_total_m * 100.0 / (Ir_total.a + D_total.a),
1656 l2, LL_total_mr * 100.0 / (Ir_total.a + Dr_total.a),
1657 l3, LL_total_mw * 100.0 / Dw_total.a);
1660 /* If branch profiling is enabled, show branch overall results. */
1661 if (clo_branch_sim) {
1662 /* Make format string, getting width right for numbers */
1663 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1664 l1, l2, l3);
1666 if (0 == Bc_total.b) Bc_total.b = 1;
1667 if (0 == Bi_total.b) Bi_total.b = 1;
1668 B_total.b = Bc_total.b + Bi_total.b;
1669 B_total.mp = Bc_total.mp + Bi_total.mp;
1671 VG_(umsg)("\n");
1672 VG_(umsg)(fmt, "Branches: ",
1673 B_total.b, Bc_total.b, Bi_total.b);
1675 VG_(umsg)(fmt, "Mispredicts: ",
1676 B_total.mp, Bc_total.mp, Bi_total.mp);
1678 VG_(umsg)("Mispred rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1679 l1, B_total.mp * 100.0 / B_total.b,
1680 l2, Bc_total.mp * 100.0 / Bc_total.b,
1681 l3, Bi_total.mp * 100.0 / Bi_total.b);
1684 // Various stats
1685 if (VG_(clo_stats)) {
1686 Int debug_lookups = full_debugs + fn_debugs +
1687 file_line_debugs + no_debugs;
1689 VG_(dmsg)("\n");
1690 VG_(dmsg)("cachegrind: distinct files : %d\n", distinct_files);
1691 VG_(dmsg)("cachegrind: distinct functions : %d\n", distinct_fns);
1692 VG_(dmsg)("cachegrind: distinct lines : %d\n", distinct_lines);
1693 VG_(dmsg)("cachegrind: distinct instrs NoX: %d\n", distinct_instrsNoX);
1694 VG_(dmsg)("cachegrind: distinct instrs Gen: %d\n", distinct_instrsGen);
1695 VG_(dmsg)("cachegrind: debug lookups : %d\n", debug_lookups);
1697 VG_(dmsg)("cachegrind: with full info:%6.1f%% (%d)\n",
1698 full_debugs * 100.0 / debug_lookups, full_debugs);
1699 VG_(dmsg)("cachegrind: with file/line info:%6.1f%% (%d)\n",
1700 file_line_debugs * 100.0 / debug_lookups, file_line_debugs);
1701 VG_(dmsg)("cachegrind: with fn name info:%6.1f%% (%d)\n",
1702 fn_debugs * 100.0 / debug_lookups, fn_debugs);
1703 VG_(dmsg)("cachegrind: with zero info:%6.1f%% (%d)\n",
1704 no_debugs * 100.0 / debug_lookups, no_debugs);
1706 VG_(dmsg)("cachegrind: string table size: %u\n",
1707 VG_(OSetGen_Size)(stringTable));
1708 VG_(dmsg)("cachegrind: CC table size: %u\n",
1709 VG_(OSetGen_Size)(CC_table));
1710 VG_(dmsg)("cachegrind: InstrInfo table size: %u\n",
1711 VG_(OSetGen_Size)(instrInfoTable));
1715 /*--------------------------------------------------------------------*/
1716 /*--- Discarding BB info ---*/
1717 /*--------------------------------------------------------------------*/
1719 // Called when a translation is removed from the translation cache for
1720 // any reason at all: to free up space, because the guest code was
1721 // unmapped or modified, or for any arbitrary reason.
1722 static
1723 void cg_discard_superblock_info ( Addr orig_addr64, VexGuestExtents vge )
1725 SB_info* sbInfo;
1726 Addr orig_addr = vge.base[0];
1728 tl_assert(vge.n_used > 0);
1730 if (DEBUG_CG)
1731 VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
1732 (void*)orig_addr,
1733 (void*)vge.base[0], (ULong)vge.len[0]);
1735 // Get BB info, remove from table, free BB info. Simple! Note that we
1736 // use orig_addr, not the first instruction address in vge.
1737 sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr);
1738 tl_assert(NULL != sbInfo);
1739 VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo);
1742 /*--------------------------------------------------------------------*/
1743 /*--- Command line processing ---*/
1744 /*--------------------------------------------------------------------*/
1746 static Bool cg_process_cmd_line_option(const HChar* arg)
1748 if (VG_(str_clo_cache_opt)(arg,
1749 &clo_I1_cache,
1750 &clo_D1_cache,
1751 &clo_LL_cache)) {}
1753 else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
1754 else if VG_BOOL_CLO(arg, "--cache-sim", clo_cache_sim) {}
1755 else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {}
1756 else
1757 return False;
1759 return True;
1762 static void cg_print_usage(void)
1764 VG_(printf)(
1765 " --cachegrind-out-file=<file> output file name [cachegrind.out.%%p]\n"
1766 " --cache-sim=yes|no collect cache stats? [yes]\n"
1767 " --branch-sim=yes|no collect branch prediction stats? [no]\n"
1769 VG_(print_cache_clo_opts)();
1772 static void cg_print_debug_usage(void)
1774 VG_(printf)(
1775 " (none)\n"
1779 /*--------------------------------------------------------------------*/
1780 /*--- Setup ---*/
1781 /*--------------------------------------------------------------------*/
1783 static void cg_post_clo_init(void); /* just below */
1785 static void cg_pre_clo_init(void)
1787 VG_(details_name) ("Cachegrind");
1788 VG_(details_version) (NULL);
1789 VG_(details_description) ("a cache and branch-prediction profiler");
1790 VG_(details_copyright_author)(
1791 "Copyright (C) 2002-2017, and GNU GPL'd, by Nicholas Nethercote et al.");
1792 VG_(details_bug_reports_to) (VG_BUGS_TO);
1793 VG_(details_avg_translation_sizeB) ( 500 );
1795 VG_(clo_vex_control).iropt_register_updates_default
1796 = VG_(clo_px_file_backed)
1797 = VexRegUpdSpAtMemAccess; // overridable by the user.
1799 VG_(basic_tool_funcs) (cg_post_clo_init,
1800 cg_instrument,
1801 cg_fini);
1803 VG_(needs_superblock_discards)(cg_discard_superblock_info);
1804 VG_(needs_command_line_options)(cg_process_cmd_line_option,
1805 cg_print_usage,
1806 cg_print_debug_usage);
1809 static void cg_post_clo_init(void)
1811 cache_t I1c, D1c, LLc;
1813 CC_table =
1814 VG_(OSetGen_Create)(offsetof(LineCC, loc),
1815 cmp_CodeLoc_LineCC,
1816 VG_(malloc), "cg.main.cpci.1",
1817 VG_(free));
1818 instrInfoTable =
1819 VG_(OSetGen_Create)(/*keyOff*/0,
1820 NULL,
1821 VG_(malloc), "cg.main.cpci.2",
1822 VG_(free));
1823 stringTable =
1824 VG_(OSetGen_Create)(/*keyOff*/0,
1825 stringCmp,
1826 VG_(malloc), "cg.main.cpci.3",
1827 VG_(free));
1829 if (clo_cache_sim) {
1830 VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc,
1831 &clo_I1_cache,
1832 &clo_D1_cache,
1833 &clo_LL_cache);
1835 // min_line_size is used to make sure that we never feed
1836 // accesses to the simulator straddling more than two
1837 // cache lines at any cache level
1838 min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size;
1839 min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size;
1841 Int largest_load_or_store_size
1842 = VG_(machine_get_size_of_largest_guest_register)();
1843 if (min_line_size < largest_load_or_store_size) {
1844 /* We can't continue, because the cache simulation might
1845 straddle more than 2 lines, and it will assert. So let's
1846 just stop before we start. */
1847 VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n",
1848 (Int)min_line_size);
1849 VG_(umsg)(" must be equal to or larger than the maximum register size (%d)\n",
1850 largest_load_or_store_size );
1851 VG_(umsg)(" but it is not. Exiting now.\n");
1852 VG_(exit)(1);
1855 cachesim_initcaches(I1c, D1c, LLc);
1859 VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)
1861 /*--------------------------------------------------------------------*/
1862 /*--- end ---*/
1863 /*--------------------------------------------------------------------*/