Add new exp files to EXTRA_DIST in memcheck/tests/Makefile.am
[valgrind.git] / lackey / lk_main.c
blob8ee62a78bb5cf8cb2b854248f22ff5be17d5e0b1
2 /*--------------------------------------------------------------------*/
3 /*--- An example Valgrind tool. lk_main.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Lackey, an example Valgrind tool that does
8 some simple program measurement and tracing.
10 Copyright (C) 2002-2017 Nicholas Nethercote
11 njn@valgrind.org
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
29 // This tool shows how to do some basic instrumentation.
31 // There are four kinds of instrumentation it can do. They can be turned
32 // on/off independently with command line options:
34 // * --basic-counts : do basic counts, eg. number of instructions
35 // executed, jumps executed, etc.
36 // * --detailed-counts: do more detailed counts: number of loads, stores
37 // and ALU operations of different sizes.
38 // * --trace-mem=yes: trace all (data) memory accesses.
39 // * --trace-superblocks=yes:
40 // trace all superblock entries. Mostly of interest
41 // to the Valgrind developers.
43 // The code for each kind of instrumentation is guarded by a clo_* variable:
44 // clo_basic_counts, clo_detailed_counts, clo_trace_mem and clo_trace_sbs.
46 // If you want to modify any of the instrumentation code, look for the code
47 // that is guarded by the relevant clo_* variable (eg. clo_trace_mem)
48 // If you're not interested in the other kinds of instrumentation you can
49 // remove them. If you want to do more complex modifications, please read
50 // VEX/pub/libvex_ir.h to understand the intermediate representation.
53 // Specific Details about --trace-mem=yes
54 // --------------------------------------
55 // Lackey's --trace-mem code is a good starting point for building Valgrind
56 // tools that act on memory loads and stores. It also could be used as is,
57 // with its output used as input to a post-mortem processing step. However,
58 // because memory traces can be very large, online analysis is generally
59 // better.
61 // It prints memory data access traces that look like this:
63 // I 0023C790,2 # instruction read at 0x0023C790 of size 2
64 // I 0023C792,5
65 // S BE80199C,4 # data store at 0xBE80199C of size 4
66 // I 0025242B,3
67 // L BE801950,4 # data load at 0xBE801950 of size 4
68 // I 0023D476,7
69 // M 0025747C,1 # data modify at 0x0025747C of size 1
70 // I 0023DC20,2
71 // L 00254962,1
72 // L BE801FB3,1
73 // I 00252305,1
74 // L 00254AEB,1
75 // S 00257998,1
77 // Every instruction executed has an "instr" event representing it.
78 // Instructions that do memory accesses are followed by one or more "load",
79 // "store" or "modify" events. Some instructions do more than one load or
80 // store, as in the last two examples in the above trace.
82 // Here are some examples of x86 instructions that do different combinations
83 // of loads, stores, and modifies.
85 // Instruction Memory accesses Event sequence
86 // ----------- --------------- --------------
87 // add %eax, %ebx No loads or stores instr
89 // movl (%eax), %ebx loads (%eax) instr, load
91 // movl %eax, (%ebx) stores (%ebx) instr, store
93 // incl (%ecx) modifies (%ecx) instr, modify
95 // cmpsb loads (%esi), loads(%edi) instr, load, load
97 // call*l (%edx) loads (%edx), stores -4(%esp) instr, load, store
98 // pushl (%edx) loads (%edx), stores -4(%esp) instr, load, store
99 // movsw loads (%esi), stores (%edi) instr, load, store
101 // Instructions using x86 "rep" prefixes are traced as if they are repeated
102 // N times.
104 // Lackey with --trace-mem gives good traces, but they are not perfect, for
105 // the following reasons:
107 // - It does not trace into the OS kernel, so system calls and other kernel
108 // operations (eg. some scheduling and signal handling code) are ignored.
110 // - It could model loads and stores done at the system call boundary using
111 // the pre_mem_read/post_mem_write events. For example, if you call
112 // fstat() you know that the passed in buffer has been written. But it
113 // currently does not do this.
115 // - Valgrind replaces some code (not much) with its own, notably parts of
116 // code for scheduling operations and signal handling. This code is not
117 // traced.
119 // - There is no consideration of virtual-to-physical address mapping.
120 // This may not matter for many purposes.
122 // - Valgrind modifies the instruction stream in some very minor ways. For
123 // example, on x86 the bts, btc, btr instructions are incorrectly
124 // considered to always touch memory (this is a consequence of these
125 // instructions being very difficult to simulate).
127 // - Valgrind tools layout memory differently to normal programs, so the
128 // addresses you get will not be typical. Thus Lackey (and all Valgrind
129 // tools) is suitable for getting relative memory traces -- eg. if you
130 // want to analyse locality of memory accesses -- but is not good if
131 // absolute addresses are important.
133 // Despite all these warnings, Lackey's results should be good enough for a
134 // wide range of purposes. For example, Cachegrind shares all the above
135 // shortcomings and it is still useful.
137 // For further inspiration, you should look at cachegrind/cg_main.c which
138 // uses the same basic technique for tracing memory accesses, but also groups
139 // events together for processing into twos and threes so that fewer C calls
140 // are made and things run faster.
142 // Specific Details about --trace-superblocks=yes
143 // ----------------------------------------------
144 // Valgrind splits code up into single entry, multiple exit blocks
145 // known as superblocks. By itself, --trace-superblocks=yes just
146 // prints a message as each superblock is run:
148 // SB 04013170
149 // SB 04013177
150 // SB 04013173
151 // SB 04013177
153 // The hex number is the address of the first instruction in the
154 // superblock. You can see the relationship more obviously if you use
155 // --trace-superblocks=yes and --trace-mem=yes together. Then a "SB"
156 // message at address X is immediately followed by an "instr:" message
157 // for that address, as the first instruction in the block is
158 // executed, for example:
160 // SB 04014073
161 // I 04014073,3
162 // L 7FEFFF7F8,8
163 // I 04014076,4
164 // I 0401407A,3
165 // I 0401407D,3
166 // I 04014080,3
167 // I 04014083,6
170 #include "pub_tool_basics.h"
171 #include "pub_tool_tooliface.h"
172 #include "pub_tool_libcassert.h"
173 #include "pub_tool_libcprint.h"
174 #include "pub_tool_debuginfo.h"
175 #include "pub_tool_libcbase.h"
176 #include "pub_tool_options.h"
177 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
179 /*------------------------------------------------------------*/
180 /*--- Command line options ---*/
181 /*------------------------------------------------------------*/
183 /* Command line options controlling instrumentation kinds, as described at
184 * the top of this file. */
185 static Bool clo_basic_counts = True;
186 static Bool clo_detailed_counts = False;
187 static Bool clo_trace_mem = False;
188 static Bool clo_trace_sbs = False;
190 /* The name of the function of which the number of calls (under
191 * --basic-counts=yes) is to be counted, with default. Override with command
192 * line option --fnname. */
193 static const HChar* clo_fnname = "main";
195 static Bool lk_process_cmd_line_option(const HChar* arg)
197 if VG_STR_CLO(arg, "--fnname", clo_fnname) {}
198 else if VG_BOOL_CLO(arg, "--basic-counts", clo_basic_counts) {}
199 else if VG_BOOL_CLO(arg, "--detailed-counts", clo_detailed_counts) {}
200 else if VG_BOOL_CLO(arg, "--trace-mem", clo_trace_mem) {}
201 else if VG_BOOL_CLO(arg, "--trace-superblocks", clo_trace_sbs) {}
202 else
203 return False;
205 tl_assert(clo_fnname);
206 tl_assert(clo_fnname[0]);
207 return True;
210 static void lk_print_usage(void)
212 VG_(printf)(
213 " --basic-counts=no|yes count instructions, jumps, etc. [yes]\n"
214 " --detailed-counts=no|yes count loads, stores and alu ops [no]\n"
215 " --trace-mem=no|yes trace all loads and stores [no]\n"
216 " --trace-superblocks=no|yes trace all superblock entries [no]\n"
217 " --fnname=<name> count calls to <name> (only used if\n"
218 " --basic-count=yes) [main]\n"
222 static void lk_print_debug_usage(void)
224 VG_(printf)(
225 " (none)\n"
229 /*------------------------------------------------------------*/
230 /*--- Stuff for --basic-counts ---*/
231 /*------------------------------------------------------------*/
233 /* Nb: use ULongs because the numbers can get very big */
234 static ULong n_func_calls = 0;
235 static ULong n_SBs_entered = 0;
236 static ULong n_SBs_completed = 0;
237 static ULong n_IRStmts = 0;
238 static ULong n_guest_instrs = 0;
239 static ULong n_Jccs = 0;
240 static ULong n_Jccs_untaken = 0;
241 static ULong n_IJccs = 0;
242 static ULong n_IJccs_untaken = 0;
244 static void add_one_func_call(void)
246 n_func_calls++;
249 static void add_one_SB_entered(void)
251 n_SBs_entered++;
254 static void add_one_SB_completed(void)
256 n_SBs_completed++;
259 static void add_one_IRStmt(void)
261 n_IRStmts++;
264 static void add_one_guest_instr(void)
266 n_guest_instrs++;
269 static void add_one_Jcc(void)
271 n_Jccs++;
274 static void add_one_Jcc_untaken(void)
276 n_Jccs_untaken++;
279 static void add_one_inverted_Jcc(void)
281 n_IJccs++;
284 static void add_one_inverted_Jcc_untaken(void)
286 n_IJccs_untaken++;
289 /*------------------------------------------------------------*/
290 /*--- Stuff for --detailed-counts ---*/
291 /*------------------------------------------------------------*/
293 typedef
294 IRExpr
295 IRAtom;
297 /* --- Operations --- */
299 typedef enum { OpLoad=0, OpStore=1, OpAlu=2 } Op;
301 #define N_OPS 3
304 /* --- Types --- */
306 #define N_TYPES 14
308 static Int type2index ( IRType ty )
310 switch (ty) {
311 case Ity_I1: return 0;
312 case Ity_I8: return 1;
313 case Ity_I16: return 2;
314 case Ity_I32: return 3;
315 case Ity_I64: return 4;
316 case Ity_I128: return 5;
317 case Ity_F32: return 6;
318 case Ity_F64: return 7;
319 case Ity_F128: return 8;
320 case Ity_V128: return 9;
321 case Ity_V256: return 10;
322 case Ity_D32: return 11;
323 case Ity_D64: return 12;
324 case Ity_D128: return 13;
325 default: tl_assert(0);
329 static const HChar* nameOfTypeIndex ( Int i )
331 switch (i) {
332 case 0: return "I1"; break;
333 case 1: return "I8"; break;
334 case 2: return "I16"; break;
335 case 3: return "I32"; break;
336 case 4: return "I64"; break;
337 case 5: return "I128"; break;
338 case 6: return "F32"; break;
339 case 7: return "F64"; break;
340 case 8: return "F128"; break;
341 case 9: return "V128"; break;
342 case 10: return "V256"; break;
343 case 11: return "D32"; break;
344 case 12: return "D64"; break;
345 case 13: return "D128"; break;
346 default: tl_assert(0);
351 /* --- Counts --- */
353 static ULong detailCounts[N_OPS][N_TYPES];
355 /* The helper that is called from the instrumented code. */
356 static VG_REGPARM(1)
357 void increment_detail(ULong* detail)
359 (*detail)++;
362 /* A helper that adds the instrumentation for a detail. guard ::
363 Ity_I1 is the guarding condition for the event. If NULL it is
364 assumed to mean "always True". */
365 static void instrument_detail(IRSB* sb, Op op, IRType type, IRAtom* guard)
367 IRDirty* di;
368 IRExpr** argv;
369 const UInt typeIx = type2index(type);
371 tl_assert(op < N_OPS);
372 tl_assert(typeIx < N_TYPES);
374 argv = mkIRExprVec_1( mkIRExpr_HWord( (HWord)&detailCounts[op][typeIx] ) );
375 di = unsafeIRDirty_0_N( 1, "increment_detail",
376 VG_(fnptr_to_fnentry)( &increment_detail ),
377 argv);
378 if (guard) di->guard = guard;
379 addStmtToIRSB( sb, IRStmt_Dirty(di) );
382 /* Summarize and print the details. */
383 static void print_details ( void )
385 Int typeIx;
386 VG_(umsg)(" Type Loads Stores AluOps\n");
387 VG_(umsg)(" -------------------------------------------\n");
388 for (typeIx = 0; typeIx < N_TYPES; typeIx++) {
389 VG_(umsg)(" %-4s %'12llu %'12llu %'12llu\n",
390 nameOfTypeIndex( typeIx ),
391 detailCounts[OpLoad ][typeIx],
392 detailCounts[OpStore][typeIx],
393 detailCounts[OpAlu ][typeIx]
399 /*------------------------------------------------------------*/
400 /*--- Stuff for --trace-mem ---*/
401 /*------------------------------------------------------------*/
403 #define MAX_DSIZE 512
405 typedef
406 enum { Event_Ir, Event_Dr, Event_Dw, Event_Dm }
407 EventKind;
409 typedef
410 struct {
411 IRAtom* addr;
412 EventKind ekind;
413 Int size;
414 IRAtom* guard; /* :: Ity_I1, or NULL=="always True" */
416 Event;
418 /* Up to this many unnotified events are allowed. Must be at least two,
419 so that reads and writes to the same address can be merged into a modify.
420 Beyond that, larger numbers just potentially induce more spilling due to
421 extending live ranges of address temporaries. */
422 #define N_EVENTS 4
424 /* Maintain an ordered list of memory events which are outstanding, in
425 the sense that no IR has yet been generated to do the relevant
426 helper calls. The SB is scanned top to bottom and memory events
427 are added to the end of the list, merging with the most recent
428 notified event where possible (Dw immediately following Dr and
429 having the same size and EA can be merged).
431 This merging is done so that for architectures which have
432 load-op-store instructions (x86, amd64), the instr is treated as if
433 it makes just one memory reference (a modify), rather than two (a
434 read followed by a write at the same address).
436 At various points the list will need to be flushed, that is, IR
437 generated from it. That must happen before any possible exit from
438 the block (the end, or an IRStmt_Exit). Flushing also takes place
439 when there is no space to add a new event, and before entering a
440 RMW (read-modify-write) section on processors supporting LL/SC.
442 If we require the simulation statistics to be up to date with
443 respect to possible memory exceptions, then the list would have to
444 be flushed before each memory reference. That's a pain so we don't
445 bother.
447 Flushing the list consists of walking it start to end and emitting
448 instrumentation IR for each event, in the order in which they
449 appear. */
451 static Event events[N_EVENTS];
452 static Int events_used = 0;
455 static VG_REGPARM(2) void trace_instr(Addr addr, SizeT size)
457 VG_(printf)("I %08lx,%lu\n", addr, size);
460 static VG_REGPARM(2) void trace_load(Addr addr, SizeT size)
462 VG_(printf)(" L %08lx,%lu\n", addr, size);
465 static VG_REGPARM(2) void trace_store(Addr addr, SizeT size)
467 VG_(printf)(" S %08lx,%lu\n", addr, size);
470 static VG_REGPARM(2) void trace_modify(Addr addr, SizeT size)
472 VG_(printf)(" M %08lx,%lu\n", addr, size);
476 static void flushEvents(IRSB* sb)
478 Int i;
479 const HChar* helperName;
480 void* helperAddr;
481 IRExpr** argv;
482 IRDirty* di;
483 Event* ev;
485 for (i = 0; i < events_used; i++) {
487 ev = &events[i];
489 // Decide on helper fn to call and args to pass it.
490 switch (ev->ekind) {
491 case Event_Ir: helperName = "trace_instr";
492 helperAddr = trace_instr; break;
494 case Event_Dr: helperName = "trace_load";
495 helperAddr = trace_load; break;
497 case Event_Dw: helperName = "trace_store";
498 helperAddr = trace_store; break;
500 case Event_Dm: helperName = "trace_modify";
501 helperAddr = trace_modify; break;
502 default:
503 tl_assert(0);
506 // Add the helper.
507 argv = mkIRExprVec_2( ev->addr, mkIRExpr_HWord( ev->size ) );
508 di = unsafeIRDirty_0_N( /*regparms*/2,
509 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
510 argv );
511 if (ev->guard) {
512 di->guard = ev->guard;
514 addStmtToIRSB( sb, IRStmt_Dirty(di) );
517 events_used = 0;
520 // WARNING: If you aren't interested in instruction reads, you can omit the
521 // code that adds calls to trace_instr() in flushEvents(). However, you
522 // must still call this function, addEvent_Ir() -- it is necessary to add
523 // the Ir events to the events list so that merging of paired load/store
524 // events into modify events works correctly.
525 static void addEvent_Ir ( IRSB* sb, IRAtom* iaddr, UInt isize )
527 Event* evt;
528 tl_assert(clo_trace_mem);
529 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
530 || VG_CLREQ_SZB == isize );
531 if (events_used == N_EVENTS)
532 flushEvents(sb);
533 tl_assert(events_used >= 0 && events_used < N_EVENTS);
534 evt = &events[events_used];
535 evt->ekind = Event_Ir;
536 evt->addr = iaddr;
537 evt->size = isize;
538 evt->guard = NULL;
539 events_used++;
542 /* Add a guarded read event. */
543 static
544 void addEvent_Dr_guarded ( IRSB* sb, IRAtom* daddr, Int dsize, IRAtom* guard )
546 Event* evt;
547 tl_assert(clo_trace_mem);
548 tl_assert(isIRAtom(daddr));
549 tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
550 if (events_used == N_EVENTS)
551 flushEvents(sb);
552 tl_assert(events_used >= 0 && events_used < N_EVENTS);
553 evt = &events[events_used];
554 evt->ekind = Event_Dr;
555 evt->addr = daddr;
556 evt->size = dsize;
557 evt->guard = guard;
558 events_used++;
561 /* Add an ordinary read event, by adding a guarded read event with an
562 always-true guard. */
563 static
564 void addEvent_Dr ( IRSB* sb, IRAtom* daddr, Int dsize )
566 addEvent_Dr_guarded(sb, daddr, dsize, NULL);
569 /* Add a guarded write event. */
570 static
571 void addEvent_Dw_guarded ( IRSB* sb, IRAtom* daddr, Int dsize, IRAtom* guard )
573 Event* evt;
574 tl_assert(clo_trace_mem);
575 tl_assert(isIRAtom(daddr));
576 tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
577 if (events_used == N_EVENTS)
578 flushEvents(sb);
579 tl_assert(events_used >= 0 && events_used < N_EVENTS);
580 evt = &events[events_used];
581 evt->ekind = Event_Dw;
582 evt->addr = daddr;
583 evt->size = dsize;
584 evt->guard = guard;
585 events_used++;
588 /* Add an ordinary write event. Try to merge it with an immediately
589 preceding ordinary read event of the same size to the same
590 address. */
591 static
592 void addEvent_Dw ( IRSB* sb, IRAtom* daddr, Int dsize )
594 Event* lastEvt;
595 Event* evt;
596 tl_assert(clo_trace_mem);
597 tl_assert(isIRAtom(daddr));
598 tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
600 // Is it possible to merge this write with the preceding read?
601 lastEvt = &events[events_used-1];
602 if (events_used > 0
603 && lastEvt->ekind == Event_Dr
604 && lastEvt->size == dsize
605 && lastEvt->guard == NULL
606 && eqIRAtom(lastEvt->addr, daddr))
608 lastEvt->ekind = Event_Dm;
609 return;
612 // No. Add as normal.
613 if (events_used == N_EVENTS)
614 flushEvents(sb);
615 tl_assert(events_used >= 0 && events_used < N_EVENTS);
616 evt = &events[events_used];
617 evt->ekind = Event_Dw;
618 evt->size = dsize;
619 evt->addr = daddr;
620 evt->guard = NULL;
621 events_used++;
625 /*------------------------------------------------------------*/
626 /*--- Stuff for --trace-superblocks ---*/
627 /*------------------------------------------------------------*/
629 static void trace_superblock(Addr addr)
631 VG_(printf)("SB %08lx\n", addr);
635 /*------------------------------------------------------------*/
636 /*--- Basic tool functions ---*/
637 /*------------------------------------------------------------*/
639 static void lk_post_clo_init(void)
641 Int op, tyIx;
643 if (clo_detailed_counts) {
644 for (op = 0; op < N_OPS; op++)
645 for (tyIx = 0; tyIx < N_TYPES; tyIx++)
646 detailCounts[op][tyIx] = 0;
650 static
651 IRSB* lk_instrument ( VgCallbackClosure* closure,
652 IRSB* sbIn,
653 const VexGuestLayout* layout,
654 const VexGuestExtents* vge,
655 const VexArchInfo* archinfo_host,
656 IRType gWordTy, IRType hWordTy )
658 IRDirty* di;
659 Int i;
660 IRSB* sbOut;
661 IRTypeEnv* tyenv = sbIn->tyenv;
662 Addr iaddr = 0, dst;
663 UInt ilen = 0;
664 Bool condition_inverted = False;
665 DiEpoch ep = VG_(current_DiEpoch)();
667 if (gWordTy != hWordTy) {
668 /* We don't currently support this case. */
669 VG_(tool_panic)("host/guest word size mismatch");
672 /* Set up SB */
673 sbOut = deepCopyIRSBExceptStmts(sbIn);
675 // Copy verbatim any IR preamble preceding the first IMark
676 i = 0;
677 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
678 addStmtToIRSB( sbOut, sbIn->stmts[i] );
679 i++;
682 if (clo_basic_counts) {
683 /* Count this superblock. */
684 di = unsafeIRDirty_0_N( 0, "add_one_SB_entered",
685 VG_(fnptr_to_fnentry)( &add_one_SB_entered ),
686 mkIRExprVec_0() );
687 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
690 if (clo_trace_sbs) {
691 /* Print this superblock's address. */
692 di = unsafeIRDirty_0_N(
693 0, "trace_superblock",
694 VG_(fnptr_to_fnentry)( &trace_superblock ),
695 mkIRExprVec_1( mkIRExpr_HWord( vge->base[0] ) )
697 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
700 if (clo_trace_mem) {
701 events_used = 0;
704 for (/*use current i*/; i < sbIn->stmts_used; i++) {
705 IRStmt* st = sbIn->stmts[i];
706 if (!st || st->tag == Ist_NoOp) continue;
708 if (clo_basic_counts) {
709 /* Count one VEX statement. */
710 di = unsafeIRDirty_0_N( 0, "add_one_IRStmt",
711 VG_(fnptr_to_fnentry)( &add_one_IRStmt ),
712 mkIRExprVec_0() );
713 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
716 switch (st->tag) {
717 case Ist_NoOp:
718 case Ist_AbiHint:
719 case Ist_Put:
720 case Ist_PutI:
721 case Ist_MBE:
722 addStmtToIRSB( sbOut, st );
723 break;
725 case Ist_IMark:
726 if (clo_basic_counts) {
727 /* Needed to be able to check for inverted condition in Ist_Exit */
728 iaddr = st->Ist.IMark.addr;
729 ilen = st->Ist.IMark.len;
731 /* Count guest instruction. */
732 di = unsafeIRDirty_0_N( 0, "add_one_guest_instr",
733 VG_(fnptr_to_fnentry)( &add_one_guest_instr ),
734 mkIRExprVec_0() );
735 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
737 /* An unconditional branch to a known destination in the
738 * guest's instructions can be represented, in the IRSB to
739 * instrument, by the VEX statements that are the
740 * translation of that known destination. This feature is
741 * called 'SB chasing' and can be influenced by command
742 * line option --vex-guest-chase=[yes|no].
744 * To get an accurate count of the calls to a specific
745 * function, taking SB chasing into account, we need to
746 * check for each guest instruction (Ist_IMark) if it is
747 * the entry point of a function.
749 tl_assert(clo_fnname);
750 tl_assert(clo_fnname[0]);
751 const HChar *fnname;
752 if (VG_(get_fnname_if_entry)(ep, st->Ist.IMark.addr,
753 &fnname)
754 && 0 == VG_(strcmp)(fnname, clo_fnname)) {
755 di = unsafeIRDirty_0_N(
756 0, "add_one_func_call",
757 VG_(fnptr_to_fnentry)( &add_one_func_call ),
758 mkIRExprVec_0() );
759 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
762 if (clo_trace_mem) {
763 // WARNING: do not remove this function call, even if you
764 // aren't interested in instruction reads. See the comment
765 // above the function itself for more detail.
766 addEvent_Ir( sbOut, mkIRExpr_HWord( (HWord)st->Ist.IMark.addr ),
767 st->Ist.IMark.len );
769 addStmtToIRSB( sbOut, st );
770 break;
772 case Ist_WrTmp:
773 // Add a call to trace_load() if --trace-mem=yes.
774 if (clo_trace_mem) {
775 IRExpr* data = st->Ist.WrTmp.data;
776 if (data->tag == Iex_Load) {
777 addEvent_Dr( sbOut, data->Iex.Load.addr,
778 sizeofIRType(data->Iex.Load.ty) );
781 if (clo_detailed_counts) {
782 IRExpr* expr = st->Ist.WrTmp.data;
783 IRType type = typeOfIRExpr(sbOut->tyenv, expr);
784 tl_assert(type != Ity_INVALID);
785 switch (expr->tag) {
786 case Iex_Load:
787 instrument_detail( sbOut, OpLoad, type, NULL/*guard*/ );
788 break;
789 case Iex_Unop:
790 case Iex_Binop:
791 case Iex_Triop:
792 case Iex_Qop:
793 case Iex_ITE:
794 instrument_detail( sbOut, OpAlu, type, NULL/*guard*/ );
795 break;
796 default:
797 break;
800 addStmtToIRSB( sbOut, st );
801 break;
803 case Ist_Store: {
804 IRExpr* data = st->Ist.Store.data;
805 IRType type = typeOfIRExpr(tyenv, data);
806 tl_assert(type != Ity_INVALID);
807 if (clo_trace_mem) {
808 addEvent_Dw( sbOut, st->Ist.Store.addr,
809 sizeofIRType(type) );
811 if (clo_detailed_counts) {
812 instrument_detail( sbOut, OpStore, type, NULL/*guard*/ );
814 addStmtToIRSB( sbOut, st );
815 break;
818 case Ist_StoreG: {
819 IRStoreG* sg = st->Ist.StoreG.details;
820 IRExpr* data = sg->data;
821 IRType type = typeOfIRExpr(tyenv, data);
822 tl_assert(type != Ity_INVALID);
823 if (clo_trace_mem) {
824 addEvent_Dw_guarded( sbOut, sg->addr,
825 sizeofIRType(type), sg->guard );
827 if (clo_detailed_counts) {
828 instrument_detail( sbOut, OpStore, type, sg->guard );
830 addStmtToIRSB( sbOut, st );
831 break;
834 case Ist_LoadG: {
835 IRLoadG* lg = st->Ist.LoadG.details;
836 IRType type = Ity_INVALID; /* loaded type */
837 IRType typeWide = Ity_INVALID; /* after implicit widening */
838 typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
839 tl_assert(type != Ity_INVALID);
840 if (clo_trace_mem) {
841 addEvent_Dr_guarded( sbOut, lg->addr,
842 sizeofIRType(type), lg->guard );
844 if (clo_detailed_counts) {
845 instrument_detail( sbOut, OpLoad, type, lg->guard );
847 addStmtToIRSB( sbOut, st );
848 break;
851 case Ist_Dirty: {
852 if (clo_trace_mem) {
853 Int dsize;
854 IRDirty* d = st->Ist.Dirty.details;
855 if (d->mFx != Ifx_None) {
856 // This dirty helper accesses memory. Collect the details.
857 tl_assert(d->mAddr != NULL);
858 tl_assert(d->mSize != 0);
859 dsize = d->mSize;
860 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
861 addEvent_Dr( sbOut, d->mAddr, dsize );
862 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
863 addEvent_Dw( sbOut, d->mAddr, dsize );
864 } else {
865 tl_assert(d->mAddr == NULL);
866 tl_assert(d->mSize == 0);
869 addStmtToIRSB( sbOut, st );
870 break;
873 case Ist_CAS: {
874 /* We treat it as a read and a write of the location. I
875 think that is the same behaviour as it was before IRCAS
876 was introduced, since prior to that point, the Vex
877 front ends would translate a lock-prefixed instruction
878 into a (normal) read followed by a (normal) write. */
879 Int dataSize;
880 IRType dataTy;
881 IRCAS* cas = st->Ist.CAS.details;
882 tl_assert(cas->addr != NULL);
883 tl_assert(cas->dataLo != NULL);
884 dataTy = typeOfIRExpr(tyenv, cas->dataLo);
885 dataSize = sizeofIRType(dataTy);
886 if (cas->dataHi != NULL)
887 dataSize *= 2; /* since it's a doubleword-CAS */
888 if (clo_trace_mem) {
889 addEvent_Dr( sbOut, cas->addr, dataSize );
890 addEvent_Dw( sbOut, cas->addr, dataSize );
892 if (clo_detailed_counts) {
893 instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
894 if (cas->dataHi != NULL) /* dcas */
895 instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
896 instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
897 if (cas->dataHi != NULL) /* dcas */
898 instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
900 addStmtToIRSB( sbOut, st );
901 break;
904 case Ist_LLSC: {
905 IRType dataTy;
906 if (st->Ist.LLSC.storedata == NULL) {
907 /* LL */
908 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
909 if (clo_trace_mem) {
910 addEvent_Dr( sbOut, st->Ist.LLSC.addr,
911 sizeofIRType(dataTy) );
912 /* flush events before LL, helps SC to succeed */
913 flushEvents(sbOut);
915 if (clo_detailed_counts)
916 instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
917 } else {
918 /* SC */
919 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
920 if (clo_trace_mem)
921 addEvent_Dw( sbOut, st->Ist.LLSC.addr,
922 sizeofIRType(dataTy) );
923 if (clo_detailed_counts)
924 instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
926 addStmtToIRSB( sbOut, st );
927 break;
930 case Ist_Exit:
931 if (clo_basic_counts) {
932 // The condition of a branch was inverted by VEX if a taken
933 // branch is in fact a fall trough according to client address
934 tl_assert(iaddr != 0);
935 dst = (sizeof(Addr) == 4) ? st->Ist.Exit.dst->Ico.U32 :
936 st->Ist.Exit.dst->Ico.U64;
937 condition_inverted = (dst == iaddr + ilen);
939 /* Count Jcc */
940 if (!condition_inverted)
941 di = unsafeIRDirty_0_N( 0, "add_one_Jcc",
942 VG_(fnptr_to_fnentry)( &add_one_Jcc ),
943 mkIRExprVec_0() );
944 else
945 di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc",
946 VG_(fnptr_to_fnentry)(
947 &add_one_inverted_Jcc ),
948 mkIRExprVec_0() );
950 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
952 if (clo_trace_mem) {
953 flushEvents(sbOut);
956 addStmtToIRSB( sbOut, st ); // Original statement
958 if (clo_basic_counts) {
959 /* Count non-taken Jcc */
960 if (!condition_inverted)
961 di = unsafeIRDirty_0_N( 0, "add_one_Jcc_untaken",
962 VG_(fnptr_to_fnentry)(
963 &add_one_Jcc_untaken ),
964 mkIRExprVec_0() );
965 else
966 di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc_untaken",
967 VG_(fnptr_to_fnentry)(
968 &add_one_inverted_Jcc_untaken ),
969 mkIRExprVec_0() );
971 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
973 break;
975 default:
976 ppIRStmt(st);
977 tl_assert(0);
981 if (clo_basic_counts) {
982 /* Count this basic block. */
983 di = unsafeIRDirty_0_N( 0, "add_one_SB_completed",
984 VG_(fnptr_to_fnentry)( &add_one_SB_completed ),
985 mkIRExprVec_0() );
986 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
989 if (clo_trace_mem) {
990 /* At the end of the sbIn. Flush outstandings. */
991 flushEvents(sbOut);
994 return sbOut;
997 static void lk_fini(Int exitcode)
999 tl_assert(clo_fnname);
1000 tl_assert(clo_fnname[0]);
1002 if (clo_basic_counts) {
1003 ULong total_Jccs = n_Jccs + n_IJccs;
1004 ULong taken_Jccs = (n_Jccs - n_Jccs_untaken) + n_IJccs_untaken;
1006 VG_(umsg)("Counted %'llu call%s to %s()\n",
1007 n_func_calls, ( n_func_calls==1 ? "" : "s" ), clo_fnname);
1009 VG_(umsg)("\n");
1010 VG_(umsg)("Jccs:\n");
1011 VG_(umsg)(" total: %'llu\n", total_Jccs);
1012 VG_(umsg)(" taken: %'llu (%.0f%%)\n",
1013 taken_Jccs, taken_Jccs * 100.0 / (total_Jccs ? total_Jccs : 1));
1015 VG_(umsg)("\n");
1016 VG_(umsg)("Executed:\n");
1017 VG_(umsg)(" SBs entered: %'llu\n", n_SBs_entered);
1018 VG_(umsg)(" SBs completed: %'llu\n", n_SBs_completed);
1019 VG_(umsg)(" guest instrs: %'llu\n", n_guest_instrs);
1020 VG_(umsg)(" IRStmts: %'llu\n", n_IRStmts);
1022 VG_(umsg)("\n");
1023 VG_(umsg)("Ratios:\n");
1024 tl_assert(n_SBs_entered); // Paranoia time.
1025 VG_(umsg)(" guest instrs : SB entered = %'llu : 10\n",
1026 10 * n_guest_instrs / n_SBs_entered);
1027 VG_(umsg)(" IRStmts : SB entered = %'llu : 10\n",
1028 10 * n_IRStmts / n_SBs_entered);
1029 tl_assert(n_guest_instrs); // Paranoia time.
1030 VG_(umsg)(" IRStmts : guest instr = %'llu : 10\n",
1031 10 * n_IRStmts / n_guest_instrs);
1034 if (clo_detailed_counts) {
1035 VG_(umsg)("\n");
1036 VG_(umsg)("IR-level counts by type:\n");
1037 print_details();
1040 if (clo_basic_counts) {
1041 VG_(umsg)("\n");
1042 VG_(umsg)("Exit code: %d\n", exitcode);
1046 static void lk_pre_clo_init(void)
1048 VG_(details_name) ("Lackey");
1049 VG_(details_version) (NULL);
1050 VG_(details_description) ("an example Valgrind tool");
1051 VG_(details_copyright_author)(
1052 "Copyright (C) 2002-2017, and GNU GPL'd, by Nicholas Nethercote.");
1053 VG_(details_bug_reports_to) (VG_BUGS_TO);
1054 VG_(details_avg_translation_sizeB) ( 200 );
1056 VG_(basic_tool_funcs) (lk_post_clo_init,
1057 lk_instrument,
1058 lk_fini);
1059 VG_(needs_command_line_options)(lk_process_cmd_line_option,
1060 lk_print_usage,
1061 lk_print_debug_usage);
1064 VG_DETERMINE_INTERFACE_VERSION(lk_pre_clo_init)
1066 /*--------------------------------------------------------------------*/
1067 /*--- end lk_main.c ---*/
1068 /*--------------------------------------------------------------------*/