lackey/lk_main.c

   1
   2 /*--------------------------------------------------------------------*/
   3 /*--- An example Valgrind tool.                          lk_main.c ---*/
   4 /*--------------------------------------------------------------------*/
   5
   6 /*
   7    This file is part of Lackey, an example Valgrind tool that does
   8    some simple program measurement and tracing.
   9
  10    Copyright (C) 2002-2017 Nicholas Nethercote
  11       njn@valgrind.org
  12
  13    This program is free software; you can redistribute it and/or
  14    modify it under the terms of the GNU General Public License as
  15    published by the Free Software Foundation; either version 2 of the
  16    License, or (at your option) any later version.
  17
  18    This program is distributed in the hope that it will be useful, but
  19    WITHOUT ANY WARRANTY; without even the implied warranty of
  20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21    General Public License for more details.
  22
  23    You should have received a copy of the GNU General Public License
  24    along with this program; if not, see <http://www.gnu.org/licenses/>.
  25
  26    The GNU General Public License is contained in the file COPYING.
  27 */
  28
  29 // This tool shows how to do some basic instrumentation.
  30 //
  31 // There are four kinds of instrumentation it can do.  They can be turned
  32 // on/off independently with command line options:
  33 //
  34 // * --basic-counts   : do basic counts, eg. number of instructions
  35 //                      executed, jumps executed, etc.
  36 // * --detailed-counts: do more detailed counts:  number of loads, stores
  37 //                      and ALU operations of different sizes.
  38 // * --trace-mem=yes:   trace all (data) memory accesses.
  39 // * --trace-superblocks=yes:
  40 //                      trace all superblock entries.  Mostly of interest
  41 //                      to the Valgrind developers.
  42 //
  43 // The code for each kind of instrumentation is guarded by a clo_* variable:
  44 // clo_basic_counts, clo_detailed_counts, clo_trace_mem and clo_trace_sbs.
  45 //
  46 // If you want to modify any of the instrumentation code, look for the code
  47 // that is guarded by the relevant clo_* variable (eg. clo_trace_mem)
  48 // If you're not interested in the other kinds of instrumentation you can
  49 // remove them.  If you want to do more complex modifications, please read
  50 // VEX/pub/libvex_ir.h to understand the intermediate representation.
  51 //
  52 //
  53 // Specific Details about --trace-mem=yes
  54 // --------------------------------------
  55 // Lackey's --trace-mem code is a good starting point for building Valgrind
  56 // tools that act on memory loads and stores.  It also could be used as is,
  57 // with its output used as input to a post-mortem processing step.  However,
  58 // because memory traces can be very large, online analysis is generally
  59 // better.
  60 //
  61 // It prints memory data access traces that look like this:
  62 //
  63 //   I  0023C790,2  # instruction read at 0x0023C790 of size 2
  64 //   I  0023C792,5
  65 //    S BE80199C,4  # data store at 0xBE80199C of size 4
  66 //   I  0025242B,3
  67 //    L BE801950,4  # data load at 0xBE801950 of size 4
  68 //   I  0023D476,7
  69 //    M 0025747C,1  # data modify at 0x0025747C of size 1
  70 //   I  0023DC20,2
  71 //    L 00254962,1
  72 //    L BE801FB3,1
  73 //   I  00252305,1
  74 //    L 00254AEB,1
  75 //    S 00257998,1
  76 //
  77 // Every instruction executed has an "instr" event representing it.
  78 // Instructions that do memory accesses are followed by one or more "load",
  79 // "store" or "modify" events.  Some instructions do more than one load or
  80 // store, as in the last two examples in the above trace.
  81 //
  82 // Here are some examples of x86 instructions that do different combinations
  83 // of loads, stores, and modifies.
  84 //
  85 //    Instruction          Memory accesses                  Event sequence
  86 //    -----------          ---------------                  --------------
  87 //    add %eax, %ebx       No loads or stores               instr
  88 //
  89 //    movl (%eax), %ebx    loads (%eax)                     instr, load
  90 //
  91 //    movl %eax, (%ebx)    stores (%ebx)                    instr, store
  92 //
  93 //    incl (%ecx)          modifies (%ecx)                  instr, modify
  94 //
  95 //    cmpsb                loads (%esi), loads(%edi)        instr, load, load
  96 //
  97 //    call*l (%edx)        loads (%edx), stores -4(%esp)    instr, load, store
  98 //    pushl (%edx)         loads (%edx), stores -4(%esp)    instr, load, store
  99 //    movsw                loads (%esi), stores (%edi)      instr, load, store
 100 //
 101 // Instructions using x86 "rep" prefixes are traced as if they are repeated
 102 // N times.
 103 //
 104 // Lackey with --trace-mem gives good traces, but they are not perfect, for
 105 // the following reasons:
 106 //
 107 // - It does not trace into the OS kernel, so system calls and other kernel
 108 //   operations (eg. some scheduling and signal handling code) are ignored.
 109 //
 110 // - It could model loads and stores done at the system call boundary using
 111 //   the pre_mem_read/post_mem_write events.  For example, if you call
 112 //   fstat() you know that the passed in buffer has been written.  But it
 113 //   currently does not do this.
 114 //
 115 // - Valgrind replaces some code (not much) with its own, notably parts of
 116 //   code for scheduling operations and signal handling.  This code is not
 117 //   traced.
 118 //
 119 // - There is no consideration of virtual-to-physical address mapping.
 120 //   This may not matter for many purposes.
 121 //
 122 // - Valgrind modifies the instruction stream in some very minor ways.  For
 123 //   example, on x86 the bts, btc, btr instructions are incorrectly
 124 //   considered to always touch memory (this is a consequence of these
 125 //   instructions being very difficult to simulate).
 126 //
 127 // - Valgrind tools layout memory differently to normal programs, so the
 128 //   addresses you get will not be typical.  Thus Lackey (and all Valgrind
 129 //   tools) is suitable for getting relative memory traces -- eg. if you
 130 //   want to analyse locality of memory accesses -- but is not good if
 131 //   absolute addresses are important.
 132 //
 133 // Despite all these warnings, Lackey's results should be good enough for a
 134 // wide range of purposes.  For example, Cachegrind shares all the above
 135 // shortcomings and it is still useful.
 136 //
 137 // For further inspiration, you should look at cachegrind/cg_main.c which
 138 // uses the same basic technique for tracing memory accesses, but also groups
 139 // events together for processing into twos and threes so that fewer C calls
 140 // are made and things run faster.
 141 //
 142 // Specific Details about --trace-superblocks=yes
 143 // ----------------------------------------------
 144 // Valgrind splits code up into single entry, multiple exit blocks
 145 // known as superblocks.  By itself, --trace-superblocks=yes just
 146 // prints a message as each superblock is run:
 147 //
 148 //  SB 04013170
 149 //  SB 04013177
 150 //  SB 04013173
 151 //  SB 04013177
 152 //
 153 // The hex number is the address of the first instruction in the
 154 // superblock.  You can see the relationship more obviously if you use
 155 // --trace-superblocks=yes and --trace-mem=yes together.  Then a "SB"
 156 // message at address X is immediately followed by an "instr:" message
 157 // for that address, as the first instruction in the block is
 158 // executed, for example:
 159 //
 160 //  SB 04014073
 161 //  I  04014073,3
 162 //   L 7FEFFF7F8,8
 163 //  I  04014076,4
 164 //  I  0401407A,3
 165 //  I  0401407D,3
 166 //  I  04014080,3
 167 //  I  04014083,6
 168
 169
 170 #include "pub_tool_basics.h"
 171 #include "pub_tool_tooliface.h"
 172 #include "pub_tool_libcassert.h"
 173 #include "pub_tool_libcprint.h"
 174 #include "pub_tool_debuginfo.h"
 175 #include "pub_tool_libcbase.h"
 176 #include "pub_tool_options.h"
 177 #include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
 178
 179 /*------------------------------------------------------------*/
 180 /*--- Command line options                                 ---*/
 181 /*------------------------------------------------------------*/
 182
 183 /* Command line options controlling instrumentation kinds, as described at
 184  * the top of this file. */
 185 static Bool clo_basic_counts    = True;
 186 static Bool clo_detailed_counts = False;
 187 static Bool clo_trace_mem       = False;
 188 static Bool clo_trace_sbs       = False;
 189
 190 /* The name of the function of which the number of calls (under
 191  * --basic-counts=yes) is to be counted, with default. Override with command
 192  * line option --fnname. */
 193 static const HChar* clo_fnname = "main";
 194
 195 static Bool lk_process_cmd_line_option(const HChar* arg)
 196 {
 197    if VG_STR_CLO(arg, "--fnname", clo_fnname) {}
 198    else if VG_BOOL_CLO(arg, "--basic-counts",      clo_basic_counts) {}
 199    else if VG_BOOL_CLO(arg, "--detailed-counts",   clo_detailed_counts) {}
 200    else if VG_BOOL_CLO(arg, "--trace-mem",         clo_trace_mem) {}
 201    else if VG_BOOL_CLO(arg, "--trace-superblocks", clo_trace_sbs) {}
 202    else
 203       return False;
 204
 205    tl_assert(clo_fnname);
 206    tl_assert(clo_fnname[0]);
 207    return True;
 208 }
 209
 210 static void lk_print_usage(void)
 211 {
 212    VG_(printf)(
 213 "    --basic-counts=no|yes     count instructions, jumps, etc. [yes]\n"
 214 "    --detailed-counts=no|yes  count loads, stores and alu ops [no]\n"
 215 "    --trace-mem=no|yes        trace all loads and stores [no]\n"
 216 "    --trace-superblocks=no|yes  trace all superblock entries [no]\n"
 217 "    --fnname=<name>           count calls to <name> (only used if\n"
 218 "                              --basic-count=yes)  [main]\n"
 219    );
 220 }
 221
 222 static void lk_print_debug_usage(void)
 223 {
 224    VG_(printf)(
 225 "    (none)\n"
 226    );
 227 }
 228
 229 /*------------------------------------------------------------*/
 230 /*--- Stuff for --basic-counts                             ---*/
 231 /*------------------------------------------------------------*/
 232
 233 /* Nb: use ULongs because the numbers can get very big */
 234 static ULong n_func_calls    = 0;
 235 static ULong n_SBs_entered   = 0;
 236 static ULong n_SBs_completed = 0;
 237 static ULong n_IRStmts       = 0;
 238 static ULong n_guest_instrs  = 0;
 239 static ULong n_Jccs          = 0;
 240 static ULong n_Jccs_untaken  = 0;
 241 static ULong n_IJccs         = 0;
 242 static ULong n_IJccs_untaken = 0;
 243
 244 static void add_one_func_call(void)
 245 {
 246    n_func_calls++;
 247 }
 248
 249 static void add_one_SB_entered(void)
 250 {
 251    n_SBs_entered++;
 252 }
 253
 254 static void add_one_SB_completed(void)
 255 {
 256    n_SBs_completed++;
 257 }
 258
 259 static void add_one_IRStmt(void)
 260 {
 261    n_IRStmts++;
 262 }
 263
 264 static void add_one_guest_instr(void)
 265 {
 266    n_guest_instrs++;
 267 }
 268
 269 static void add_one_Jcc(void)
 270 {
 271    n_Jccs++;
 272 }
 273
 274 static void add_one_Jcc_untaken(void)
 275 {
 276    n_Jccs_untaken++;
 277 }
 278
 279 static void add_one_inverted_Jcc(void)
 280 {
 281    n_IJccs++;
 282 }
 283
 284 static void add_one_inverted_Jcc_untaken(void)
 285 {
 286    n_IJccs_untaken++;
 287 }
 288
 289 /*------------------------------------------------------------*/
 290 /*--- Stuff for --detailed-counts                          ---*/
 291 /*------------------------------------------------------------*/
 292
 293 typedef
 294    IRExpr
 295    IRAtom;
 296
 297 /* --- Operations --- */
 298
 299 typedef enum { OpLoad=0, OpStore=1, OpAlu=2 } Op;
 300
 301 #define N_OPS 3
 302
 303
 304 /* --- Types --- */
 305
 306 #define N_TYPES 14
 307
 308 static Int type2index ( IRType ty )
 309 {
 310    switch (ty) {
 311       case Ity_I1:      return 0;
 312       case Ity_I8:      return 1;
 313       case Ity_I16:     return 2;
 314       case Ity_I32:     return 3;
 315       case Ity_I64:     return 4;
 316       case Ity_I128:    return 5;
 317       case Ity_F32:     return 6;
 318       case Ity_F64:     return 7;
 319       case Ity_F128:    return 8;
 320       case Ity_V128:    return 9;
 321       case Ity_V256:    return 10;
 322       case Ity_D32:     return 11;
 323       case Ity_D64:     return 12;
 324       case Ity_D128:    return 13;
 325       default: tl_assert(0);
 326    }
 327 }
 328
 329 static const HChar* nameOfTypeIndex ( Int i )
 330 {
 331    switch (i) {
 332       case 0: return "I1";   break;
 333       case 1: return "I8";   break;
 334       case 2: return "I16";  break;
 335       case 3: return "I32";  break;
 336       case 4: return "I64";  break;
 337       case 5: return "I128"; break;
 338       case 6: return "F32";  break;
 339       case 7: return "F64";  break;
 340       case 8: return "F128";  break;
 341       case 9: return "V128";  break;
 342       case 10: return "V256"; break;
 343       case 11: return "D32";  break;
 344       case 12: return "D64";  break;
 345       case 13: return "D128"; break;
 346       default: tl_assert(0);
 347    }
 348 }
 349
 350
 351 /* --- Counts --- */
 352
 353 static ULong detailCounts[N_OPS][N_TYPES];
 354
 355 /* The helper that is called from the instrumented code. */
 356 static VG_REGPARM(1)
 357 void increment_detail(ULong* detail)
 358 {
 359    (*detail)++;
 360 }
 361
 362 /* A helper that adds the instrumentation for a detail.  guard ::
 363    Ity_I1 is the guarding condition for the event.  If NULL it is
 364    assumed to mean "always True". */
 365 static void instrument_detail(IRSB* sb, Op op, IRType type, IRAtom* guard)
 366 {
 367    IRDirty* di;
 368    IRExpr** argv;
 369    const UInt typeIx = type2index(type);
 370
 371    tl_assert(op < N_OPS);
 372    tl_assert(typeIx < N_TYPES);
 373
 374    argv = mkIRExprVec_1( mkIRExpr_HWord( (HWord)&detailCounts[op][typeIx] ) );
 375    di = unsafeIRDirty_0_N( 1, "increment_detail",
 376                               VG_(fnptr_to_fnentry)( &increment_detail ),
 377                               argv);
 378    if (guard) di->guard = guard;
 379    addStmtToIRSB( sb, IRStmt_Dirty(di) );
 380 }
 381
 382 /* Summarize and print the details. */
 383 static void print_details ( void )
 384 {
 385    Int typeIx;
 386    VG_(umsg)("   Type        Loads       Stores       AluOps\n");
 387    VG_(umsg)("   -------------------------------------------\n");
 388    for (typeIx = 0; typeIx < N_TYPES; typeIx++) {
 389       VG_(umsg)("   %-4s %'12llu %'12llu %'12llu\n",
 390                 nameOfTypeIndex( typeIx ),
 391                 detailCounts[OpLoad ][typeIx],
 392                 detailCounts[OpStore][typeIx],
 393                 detailCounts[OpAlu  ][typeIx]
 394       );
 395    }
 396 }
 397
 398
 399 /*------------------------------------------------------------*/
 400 /*--- Stuff for --trace-mem                                ---*/
 401 /*------------------------------------------------------------*/
 402
 403 #define MAX_DSIZE    512
 404
 405 typedef
 406    enum { Event_Ir, Event_Dr, Event_Dw, Event_Dm }
 407    EventKind;
 408
 409 typedef
 410    struct {
 411       IRAtom*    addr;
 412       EventKind  ekind;
 413       Int        size;
 414       IRAtom*    guard; /* :: Ity_I1, or NULL=="always True" */
 415    }
 416    Event;
 417
 418 /* Up to this many unnotified events are allowed.  Must be at least two,
 419    so that reads and writes to the same address can be merged into a modify.
 420    Beyond that, larger numbers just potentially induce more spilling due to
 421    extending live ranges of address temporaries. */
 422 #define N_EVENTS 4
 423
 424 /* Maintain an ordered list of memory events which are outstanding, in
 425    the sense that no IR has yet been generated to do the relevant
 426    helper calls.  The SB is scanned top to bottom and memory events
 427    are added to the end of the list, merging with the most recent
 428    notified event where possible (Dw immediately following Dr and
 429    having the same size and EA can be merged).
 430
 431    This merging is done so that for architectures which have
 432    load-op-store instructions (x86, amd64), the instr is treated as if
 433    it makes just one memory reference (a modify), rather than two (a
 434    read followed by a write at the same address).
 435
 436    At various points the list will need to be flushed, that is, IR
 437    generated from it.  That must happen before any possible exit from
 438    the block (the end, or an IRStmt_Exit).  Flushing also takes place
 439    when there is no space to add a new event, and before entering a
 440    RMW (read-modify-write) section on processors supporting LL/SC.
 441
 442    If we require the simulation statistics to be up to date with
 443    respect to possible memory exceptions, then the list would have to
 444    be flushed before each memory reference.  That's a pain so we don't
 445    bother.
 446
 447    Flushing the list consists of walking it start to end and emitting
 448    instrumentation IR for each event, in the order in which they
 449    appear. */
 450
 451 static Event events[N_EVENTS];
 452 static Int   events_used = 0;
 453
 454
 455 static VG_REGPARM(2) void trace_instr(Addr addr, SizeT size)
 456 {
 457    VG_(printf)("I  %08lx,%lu\n", addr, size);
 458 }
 459
 460 static VG_REGPARM(2) void trace_load(Addr addr, SizeT size)
 461 {
 462    VG_(printf)(" L %08lx,%lu\n", addr, size);
 463 }
 464
 465 static VG_REGPARM(2) void trace_store(Addr addr, SizeT size)
 466 {
 467    VG_(printf)(" S %08lx,%lu\n", addr, size);
 468 }
 469
 470 static VG_REGPARM(2) void trace_modify(Addr addr, SizeT size)
 471 {
 472    VG_(printf)(" M %08lx,%lu\n", addr, size);
 473 }
 474
 475
 476 static void flushEvents(IRSB* sb)
 477 {
 478    Int        i;
 479    const HChar* helperName;
 480    void*      helperAddr;
 481    IRExpr**   argv;
 482    IRDirty*   di;
 483    Event*     ev;
 484
 485    for (i = 0; i < events_used; i++) {
 486
 487       ev = &events[i];
 488
 489       // Decide on helper fn to call and args to pass it.
 490       switch (ev->ekind) {
 491          case Event_Ir: helperName = "trace_instr";
 492                         helperAddr =  trace_instr;  break;
 493
 494          case Event_Dr: helperName = "trace_load";
 495                         helperAddr =  trace_load;   break;
 496
 497          case Event_Dw: helperName = "trace_store";
 498                         helperAddr =  trace_store;  break;
 499
 500          case Event_Dm: helperName = "trace_modify";
 501                         helperAddr =  trace_modify; break;
 502          default:
 503             tl_assert(0);
 504       }
 505
 506       // Add the helper.
 507       argv = mkIRExprVec_2( ev->addr, mkIRExpr_HWord( ev->size ) );
 508       di   = unsafeIRDirty_0_N( /*regparms*/2,
 509                                 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
 510                                 argv );
 511       if (ev->guard) {
 512          di->guard = ev->guard;
 513       }
 514       addStmtToIRSB( sb, IRStmt_Dirty(di) );
 515    }
 516
 517    events_used = 0;
 518 }
 519
 520 // WARNING:  If you aren't interested in instruction reads, you can omit the
 521 // code that adds calls to trace_instr() in flushEvents().  However, you
 522 // must still call this function, addEvent_Ir() -- it is necessary to add
 523 // the Ir events to the events list so that merging of paired load/store
 524 // events into modify events works correctly.
 525 static void addEvent_Ir ( IRSB* sb, IRAtom* iaddr, UInt isize )
 526 {
 527    Event* evt;
 528    tl_assert(clo_trace_mem);
 529    tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
 530             || VG_CLREQ_SZB == isize );
 531    if (events_used == N_EVENTS)
 532       flushEvents(sb);
 533    tl_assert(events_used >= 0 && events_used < N_EVENTS);
 534    evt = &events[events_used];
 535    evt->ekind = Event_Ir;
 536    evt->addr  = iaddr;
 537    evt->size  = isize;
 538    evt->guard = NULL;
 539    events_used++;
 540 }
 541
 542 /* Add a guarded read event. */
 543 static
 544 void addEvent_Dr_guarded ( IRSB* sb, IRAtom* daddr, Int dsize, IRAtom* guard )
 545 {
 546    Event* evt;
 547    tl_assert(clo_trace_mem);
 548    tl_assert(isIRAtom(daddr));
 549    tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
 550    if (events_used == N_EVENTS)
 551       flushEvents(sb);
 552    tl_assert(events_used >= 0 && events_used < N_EVENTS);
 553    evt = &events[events_used];
 554    evt->ekind = Event_Dr;
 555    evt->addr  = daddr;
 556    evt->size  = dsize;
 557    evt->guard = guard;
 558    events_used++;
 559 }
 560
 561 /* Add an ordinary read event, by adding a guarded read event with an
 562    always-true guard. */
 563 static
 564 void addEvent_Dr ( IRSB* sb, IRAtom* daddr, Int dsize )
 565 {
 566    addEvent_Dr_guarded(sb, daddr, dsize, NULL);
 567 }
 568
 569 /* Add a guarded write event. */
 570 static
 571 void addEvent_Dw_guarded ( IRSB* sb, IRAtom* daddr, Int dsize, IRAtom* guard )
 572 {
 573    Event* evt;
 574    tl_assert(clo_trace_mem);
 575    tl_assert(isIRAtom(daddr));
 576    tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
 577    if (events_used == N_EVENTS)
 578       flushEvents(sb);
 579    tl_assert(events_used >= 0 && events_used < N_EVENTS);
 580    evt = &events[events_used];
 581    evt->ekind = Event_Dw;
 582    evt->addr  = daddr;
 583    evt->size  = dsize;
 584    evt->guard = guard;
 585    events_used++;
 586 }
 587
 588 /* Add an ordinary write event.  Try to merge it with an immediately
 589    preceding ordinary read event of the same size to the same
 590    address. */
 591 static
 592 void addEvent_Dw ( IRSB* sb, IRAtom* daddr, Int dsize )
 593 {
 594    Event* lastEvt;
 595    Event* evt;
 596    tl_assert(clo_trace_mem);
 597    tl_assert(isIRAtom(daddr));
 598    tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
 599
 600    // Is it possible to merge this write with the preceding read?
 601    lastEvt = &events[events_used-1];
 602    if (events_used > 0
 603        && lastEvt->ekind == Event_Dr
 604        && lastEvt->size  == dsize
 605        && lastEvt->guard == NULL
 606        && eqIRAtom(lastEvt->addr, daddr))
 607    {
 608       lastEvt->ekind = Event_Dm;
 609       return;
 610    }
 611
 612    // No.  Add as normal.
 613    if (events_used == N_EVENTS)
 614       flushEvents(sb);
 615    tl_assert(events_used >= 0 && events_used < N_EVENTS);
 616    evt = &events[events_used];
 617    evt->ekind = Event_Dw;
 618    evt->size  = dsize;
 619    evt->addr  = daddr;
 620    evt->guard = NULL;
 621    events_used++;
 622 }
 623
 624
 625 /*------------------------------------------------------------*/
 626 /*--- Stuff for --trace-superblocks                        ---*/
 627 /*------------------------------------------------------------*/
 628
 629 static void trace_superblock(Addr addr)
 630 {
 631    VG_(printf)("SB %08lx\n", addr);
 632 }
 633
 634
 635 /*------------------------------------------------------------*/
 636 /*--- Basic tool functions                                 ---*/
 637 /*------------------------------------------------------------*/
 638
 639 static void lk_post_clo_init(void)
 640 {
 641    Int op, tyIx;
 642
 643    if (clo_detailed_counts) {
 644       for (op = 0; op < N_OPS; op++)
 645          for (tyIx = 0; tyIx < N_TYPES; tyIx++)
 646             detailCounts[op][tyIx] = 0;
 647    }
 648 }
 649
 650 static
 651 IRSB* lk_instrument ( VgCallbackClosure* closure,
 652                       IRSB* sbIn,
 653                       const VexGuestLayout* layout,
 654                       const VexGuestExtents* vge,
 655                       const VexArchInfo* archinfo_host,
 656                       IRType gWordTy, IRType hWordTy )
 657 {
 658    IRDirty*   di;
 659    Int        i;
 660    IRSB*      sbOut;
 661    IRTypeEnv* tyenv = sbIn->tyenv;
 662    Addr       iaddr = 0, dst;
 663    UInt       ilen = 0;
 664    Bool       condition_inverted = False;
 665    DiEpoch    ep = VG_(current_DiEpoch)();
 666
 667    if (gWordTy != hWordTy) {
 668       /* We don't currently support this case. */
 669       VG_(tool_panic)("host/guest word size mismatch");
 670    }
 671
 672    /* Set up SB */
 673    sbOut = deepCopyIRSBExceptStmts(sbIn);
 674
 675    // Copy verbatim any IR preamble preceding the first IMark
 676    i = 0;
 677    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
 678       addStmtToIRSB( sbOut, sbIn->stmts[i] );
 679       i++;
 680    }
 681
 682    if (clo_basic_counts) {
 683       /* Count this superblock. */
 684       di = unsafeIRDirty_0_N( 0, "add_one_SB_entered",
 685                                  VG_(fnptr_to_fnentry)( &add_one_SB_entered ),
 686                                  mkIRExprVec_0() );
 687       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
 688    }
 689
 690    if (clo_trace_sbs) {
 691       /* Print this superblock's address. */
 692       di = unsafeIRDirty_0_N(
 693               0, "trace_superblock",
 694               VG_(fnptr_to_fnentry)( &trace_superblock ),
 695               mkIRExprVec_1( mkIRExpr_HWord( vge->base[0] ) )
 696            );
 697       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
 698    }
 699
 700    if (clo_trace_mem) {
 701       events_used = 0;
 702    }
 703
 704    for (/*use current i*/; i < sbIn->stmts_used; i++) {
 705       IRStmt* st = sbIn->stmts[i];
 706       if (!st || st->tag == Ist_NoOp) continue;
 707
 708       if (clo_basic_counts) {
 709          /* Count one VEX statement. */
 710          di = unsafeIRDirty_0_N( 0, "add_one_IRStmt",
 711                                     VG_(fnptr_to_fnentry)( &add_one_IRStmt ),
 712                                     mkIRExprVec_0() );
 713          addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
 714       }
 715
 716       switch (st->tag) {
 717          case Ist_NoOp:
 718          case Ist_AbiHint:
 719          case Ist_Put:
 720          case Ist_PutI:
 721          case Ist_MBE:
 722             addStmtToIRSB( sbOut, st );
 723             break;
 724
 725          case Ist_IMark:
 726             if (clo_basic_counts) {
 727                /* Needed to be able to check for inverted condition in Ist_Exit */
 728                iaddr = st->Ist.IMark.addr;
 729                ilen  = st->Ist.IMark.len;
 730
 731                /* Count guest instruction. */
 732                di = unsafeIRDirty_0_N( 0, "add_one_guest_instr",
 733                                           VG_(fnptr_to_fnentry)( &add_one_guest_instr ),
 734                                           mkIRExprVec_0() );
 735                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
 736
 737                /* An unconditional branch to a known destination in the
 738                 * guest's instructions can be represented, in the IRSB to
 739                 * instrument, by the VEX statements that are the
 740                 * translation of that known destination. This feature is
 741                 * called 'SB chasing' and can be influenced by command
 742                 * line option --vex-guest-chase=[yes|no].
 743                 *
 744                 * To get an accurate count of the calls to a specific
 745                 * function, taking SB chasing into account, we need to
 746                 * check for each guest instruction (Ist_IMark) if it is
 747                 * the entry point of a function.
 748                 */
 749                tl_assert(clo_fnname);
 750                tl_assert(clo_fnname[0]);
 751                const HChar *fnname;
 752                if (VG_(get_fnname_if_entry)(ep, st->Ist.IMark.addr,
 753                                             &fnname)
 754                    && 0 == VG_(strcmp)(fnname, clo_fnname)) {
 755                   di = unsafeIRDirty_0_N(
 756                           0, "add_one_func_call",
 757                              VG_(fnptr_to_fnentry)( &add_one_func_call ),
 758                              mkIRExprVec_0() );
 759                   addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
 760                }
 761             }
 762             if (clo_trace_mem) {
 763                // WARNING: do not remove this function call, even if you
 764                // aren't interested in instruction reads.  See the comment
 765                // above the function itself for more detail.
 766                addEvent_Ir( sbOut, mkIRExpr_HWord( (HWord)st->Ist.IMark.addr ),
 767                             st->Ist.IMark.len );
 768             }
 769             addStmtToIRSB( sbOut, st );
 770             break;
 771
 772          case Ist_WrTmp:
 773             // Add a call to trace_load() if --trace-mem=yes.
 774             if (clo_trace_mem) {
 775                IRExpr* data = st->Ist.WrTmp.data;
 776                if (data->tag == Iex_Load) {
 777                   addEvent_Dr( sbOut, data->Iex.Load.addr,
 778                                sizeofIRType(data->Iex.Load.ty) );
 779                }
 780             }
 781             if (clo_detailed_counts) {
 782                IRExpr* expr = st->Ist.WrTmp.data;
 783                IRType  type = typeOfIRExpr(sbOut->tyenv, expr);
 784                tl_assert(type != Ity_INVALID);
 785                switch (expr->tag) {
 786                   case Iex_Load:
 787                     instrument_detail( sbOut, OpLoad, type, NULL/*guard*/ );
 788                      break;
 789                   case Iex_Unop:
 790                   case Iex_Binop:
 791                   case Iex_Triop:
 792                   case Iex_Qop:
 793                   case Iex_ITE:
 794                      instrument_detail( sbOut, OpAlu, type, NULL/*guard*/ );
 795                      break;
 796                   default:
 797                      break;
 798                }
 799             }
 800             addStmtToIRSB( sbOut, st );
 801             break;
 802
 803          case Ist_Store: {
 804             IRExpr* data = st->Ist.Store.data;
 805             IRType  type = typeOfIRExpr(tyenv, data);
 806             tl_assert(type != Ity_INVALID);
 807             if (clo_trace_mem) {
 808                addEvent_Dw( sbOut, st->Ist.Store.addr,
 809                             sizeofIRType(type) );
 810             }
 811             if (clo_detailed_counts) {
 812                instrument_detail( sbOut, OpStore, type, NULL/*guard*/ );
 813             }
 814             addStmtToIRSB( sbOut, st );
 815             break;
 816          }
 817
 818          case Ist_StoreG: {
 819             IRStoreG* sg   = st->Ist.StoreG.details;
 820             IRExpr*   data = sg->data;
 821             IRType    type = typeOfIRExpr(tyenv, data);
 822             tl_assert(type != Ity_INVALID);
 823             if (clo_trace_mem) {
 824                addEvent_Dw_guarded( sbOut, sg->addr,
 825                                     sizeofIRType(type), sg->guard );
 826             }
 827             if (clo_detailed_counts) {
 828                instrument_detail( sbOut, OpStore, type, sg->guard );
 829             }
 830             addStmtToIRSB( sbOut, st );
 831             break;
 832          }
 833
 834          case Ist_LoadG: {
 835             IRLoadG* lg       = st->Ist.LoadG.details;
 836             IRType   type     = Ity_INVALID; /* loaded type */
 837             IRType   typeWide = Ity_INVALID; /* after implicit widening */
 838             typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
 839             tl_assert(type != Ity_INVALID);
 840             if (clo_trace_mem) {
 841                addEvent_Dr_guarded( sbOut, lg->addr,
 842                                     sizeofIRType(type), lg->guard );
 843             }
 844             if (clo_detailed_counts) {
 845                instrument_detail( sbOut, OpLoad, type, lg->guard );
 846             }
 847             addStmtToIRSB( sbOut, st );
 848             break;
 849          }
 850
 851          case Ist_Dirty: {
 852             if (clo_trace_mem) {
 853                Int      dsize;
 854                IRDirty* d = st->Ist.Dirty.details;
 855                if (d->mFx != Ifx_None) {
 856                   // This dirty helper accesses memory.  Collect the details.
 857                   tl_assert(d->mAddr != NULL);
 858                   tl_assert(d->mSize != 0);
 859                   dsize = d->mSize;
 860                   if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
 861                      addEvent_Dr( sbOut, d->mAddr, dsize );
 862                   if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
 863                      addEvent_Dw( sbOut, d->mAddr, dsize );
 864                } else {
 865                   tl_assert(d->mAddr == NULL);
 866                   tl_assert(d->mSize == 0);
 867                }
 868             }
 869             addStmtToIRSB( sbOut, st );
 870             break;
 871          }
 872
 873          case Ist_CAS: {
 874             /* We treat it as a read and a write of the location.  I
 875                think that is the same behaviour as it was before IRCAS
 876                was introduced, since prior to that point, the Vex
 877                front ends would translate a lock-prefixed instruction
 878                into a (normal) read followed by a (normal) write. */
 879             Int    dataSize;
 880             IRType dataTy;
 881             IRCAS* cas = st->Ist.CAS.details;
 882             tl_assert(cas->addr != NULL);
 883             tl_assert(cas->dataLo != NULL);
 884             dataTy   = typeOfIRExpr(tyenv, cas->dataLo);
 885             dataSize = sizeofIRType(dataTy);
 886             if (cas->dataHi != NULL)
 887                dataSize *= 2; /* since it's a doubleword-CAS */
 888             if (clo_trace_mem) {
 889                addEvent_Dr( sbOut, cas->addr, dataSize );
 890                addEvent_Dw( sbOut, cas->addr, dataSize );
 891             }
 892             if (clo_detailed_counts) {
 893                instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
 894                if (cas->dataHi != NULL) /* dcas */
 895                   instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
 896                instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
 897                if (cas->dataHi != NULL) /* dcas */
 898                   instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
 899             }
 900             addStmtToIRSB( sbOut, st );
 901             break;
 902          }
 903
 904          case Ist_LLSC: {
 905             IRType dataTy;
 906             if (st->Ist.LLSC.storedata == NULL) {
 907                /* LL */
 908                dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
 909                if (clo_trace_mem) {
 910                   addEvent_Dr( sbOut, st->Ist.LLSC.addr,
 911                                       sizeofIRType(dataTy) );
 912                   /* flush events before LL, helps SC to succeed */
 913                   flushEvents(sbOut);
 914                }
 915                if (clo_detailed_counts)
 916                   instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
 917             } else {
 918                /* SC */
 919                dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
 920                if (clo_trace_mem)
 921                   addEvent_Dw( sbOut, st->Ist.LLSC.addr,
 922                                       sizeofIRType(dataTy) );
 923                if (clo_detailed_counts)
 924                   instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
 925             }
 926             addStmtToIRSB( sbOut, st );
 927             break;
 928          }
 929
 930          case Ist_Exit:
 931             if (clo_basic_counts) {
 932                // The condition of a branch was inverted by VEX if a taken
 933                // branch is in fact a fall trough according to client address
 934                tl_assert(iaddr != 0);
 935                dst = (sizeof(Addr) == 4) ? st->Ist.Exit.dst->Ico.U32 :
 936                                            st->Ist.Exit.dst->Ico.U64;
 937                condition_inverted = (dst == iaddr + ilen);
 938
 939                /* Count Jcc */
 940                if (!condition_inverted)
 941                   di = unsafeIRDirty_0_N( 0, "add_one_Jcc",
 942                                           VG_(fnptr_to_fnentry)( &add_one_Jcc ),
 943                                           mkIRExprVec_0() );
 944                else
 945                   di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc",
 946                                           VG_(fnptr_to_fnentry)(
 947                                              &add_one_inverted_Jcc ),
 948                                           mkIRExprVec_0() );
 949
 950                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
 951             }
 952             if (clo_trace_mem) {
 953                flushEvents(sbOut);
 954             }
 955
 956             addStmtToIRSB( sbOut, st );      // Original statement
 957
 958             if (clo_basic_counts) {
 959                /* Count non-taken Jcc */
 960                if (!condition_inverted)
 961                   di = unsafeIRDirty_0_N( 0, "add_one_Jcc_untaken",
 962                                           VG_(fnptr_to_fnentry)(
 963                                              &add_one_Jcc_untaken ),
 964                                           mkIRExprVec_0() );
 965                else
 966                   di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc_untaken",
 967                                           VG_(fnptr_to_fnentry)(
 968                                              &add_one_inverted_Jcc_untaken ),
 969                                           mkIRExprVec_0() );
 970
 971                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
 972             }
 973             break;
 974
 975          default:
 976             ppIRStmt(st);
 977             tl_assert(0);
 978       }
 979    }
 980
 981    if (clo_basic_counts) {
 982       /* Count this basic block. */
 983       di = unsafeIRDirty_0_N( 0, "add_one_SB_completed",
 984                                  VG_(fnptr_to_fnentry)( &add_one_SB_completed ),
 985                                  mkIRExprVec_0() );
 986       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
 987    }
 988
 989    if (clo_trace_mem) {
 990       /* At the end of the sbIn.  Flush outstandings. */
 991       flushEvents(sbOut);
 992    }
 993
 994    return sbOut;
 995 }
 996
 997 static void lk_fini(Int exitcode)
 998 {
 999    tl_assert(clo_fnname);
1000    tl_assert(clo_fnname[0]);
1001
1002    if (clo_basic_counts) {
1003       ULong total_Jccs = n_Jccs + n_IJccs;
1004       ULong taken_Jccs = (n_Jccs - n_Jccs_untaken) + n_IJccs_untaken;
1005
1006       VG_(umsg)("Counted %'llu call%s to %s()\n",
1007                 n_func_calls, ( n_func_calls==1 ? "" : "s" ), clo_fnname);
1008
1009       VG_(umsg)("\n");
1010       VG_(umsg)("Jccs:\n");
1011       VG_(umsg)("  total:         %'llu\n", total_Jccs);
1012       VG_(umsg)("  taken:         %'llu (%.0f%%)\n",
1013                 taken_Jccs, taken_Jccs * 100.0 / (total_Jccs ? total_Jccs : 1));
1014
1015       VG_(umsg)("\n");
1016       VG_(umsg)("Executed:\n");
1017       VG_(umsg)("  SBs entered:   %'llu\n", n_SBs_entered);
1018       VG_(umsg)("  SBs completed: %'llu\n", n_SBs_completed);
1019       VG_(umsg)("  guest instrs:  %'llu\n", n_guest_instrs);
1020       VG_(umsg)("  IRStmts:       %'llu\n", n_IRStmts);
1021
1022       VG_(umsg)("\n");
1023       VG_(umsg)("Ratios:\n");
1024       tl_assert(n_SBs_entered); // Paranoia time.
1025       VG_(umsg)("  guest instrs : SB entered  = %'llu : 10\n",
1026          10 * n_guest_instrs / n_SBs_entered);
1027       VG_(umsg)("       IRStmts : SB entered  = %'llu : 10\n",
1028          10 * n_IRStmts / n_SBs_entered);
1029       tl_assert(n_guest_instrs); // Paranoia time.
1030       VG_(umsg)("       IRStmts : guest instr = %'llu : 10\n",
1031          10 * n_IRStmts / n_guest_instrs);
1032    }
1033
1034    if (clo_detailed_counts) {
1035       VG_(umsg)("\n");
1036       VG_(umsg)("IR-level counts by type:\n");
1037       print_details();
1038    }
1039
1040    if (clo_basic_counts) {
1041       VG_(umsg)("\n");
1042       VG_(umsg)("Exit code:       %d\n", exitcode);
1043    }
1044 }
1045
1046 static void lk_pre_clo_init(void)
1047 {
1048    VG_(details_name)            ("Lackey");
1049    VG_(details_version)         (NULL);
1050    VG_(details_description)     ("an example Valgrind tool");
1051    VG_(details_copyright_author)(
1052       "Copyright (C) 2002-2017, and GNU GPL'd, by Nicholas Nethercote.");
1053    VG_(details_bug_reports_to)  (VG_BUGS_TO);
1054    VG_(details_avg_translation_sizeB) ( 200 );
1055
1056    VG_(basic_tool_funcs)          (lk_post_clo_init,
1057                                    lk_instrument,
1058                                    lk_fini);
1059    VG_(needs_command_line_options)(lk_process_cmd_line_option,
1060                                    lk_print_usage,
1061                                    lk_print_debug_usage);
1062 }
1063
1064 VG_DETERMINE_INTERFACE_VERSION(lk_pre_clo_init)
1065
1066 /*--------------------------------------------------------------------*/
1067 /*--- end                                                lk_main.c ---*/
1068 /*--------------------------------------------------------------------*/