perf/bigcode.c

   1 // This artificial program runs a lot of code.  The exact amount depends on
   2 // the command line -- if an arg "0" is given, it does exactly
   3 // the same amount of work, but using four times as much code.
   4 // If an arg >= 1 is given, the amount of code is multiplied by this arg.
   5 //
   6 // It's a stress test for Valgrind's translation speed;  natively the two
   7 // modes run in about the same time (the I-cache effects aren't big enough
   8 // to make a difference), but under Valgrind the one running more code is
   9 // significantly slower due to the extra translation time.
  10
  11 // 31 Aug 2015: this only "works" on x86/amd64/s390 by accident; the
  12 // test is essentially kludged.  This "generates" code into memory
  13 // (the mmap'd area) and the executes it.  But historically and even
  14 // after this commit (r15601), the test has been run without
  15 // --smc-check=all or all-non-file.  That just happens to work because
  16 // the "generated" code is never modified, so there's never a
  17 // translated-vs-reality coherence problem.  Really we ought to run
  18 // with the new-as-of-r15601 default --smc-check=all-non-file, but that
  19 // hugely slows it down and makes the results non-comparable with
  20 // pre r15601 results, so instead the .vgperf files now specify the
  21 // old default value --smc-check=stack explicitly.
  22
  23
  24 #include <stdio.h>
  25 #include <string.h>
  26 #include <stdlib.h>
  27 #include <assert.h>
  28 #if defined(__mips__)
  29 #include <asm/cachectl.h>
  30 #include <sys/syscall.h>
  31 #endif
  32 #include "tests/sys_mman.h"
  33
  34 #define FN_SIZE   1280     // Must be big enough to hold the compiled f()
  35                            // and any literal pool that might be used
  36 #define N_LOOPS   20000    // Should be divisible by four
  37 #define RATIO     4        // Ratio of code sizes between the two modes
  38
  39 int f(int x, int y)
  40 {
  41    int i;
  42    for (i = 0; i < 5000; i++) {
  43       switch (x % 8) {
  44        case 1:  y += 3;
  45        case 2:  y += x;
  46        case 3:  y *= 2;
  47        default: y--;
  48       }
  49    }
  50    return y;
  51 }
  52
  53 int main(int argc, char* argv[])
  54 {
  55    int h, i, sum1 = 0, sum2 = 0, sum3 = 0, sum4 = 0;
  56    int n_fns, n_reps;
  57
  58    if (argc <= 1) {
  59       // Mode 1: not so much code
  60       n_fns  = N_LOOPS / RATIO;
  61       n_reps = RATIO;
  62       printf("mode 1: ");
  63    } else {
  64       // Mode 2: lots of code
  65       const int mul = atoi(argv[1]);
  66       if (mul == 0)
  67          n_fns = N_LOOPS;
  68       else
  69          n_fns = N_LOOPS * mul;
  70       n_reps = 1;
  71       printf("mode 1: ");
  72    }
  73    printf("%d copies of f(), %d reps\n", n_fns, n_reps);
  74
  75    char* a = mmap(0, FN_SIZE * n_fns,
  76                      PROT_EXEC|PROT_WRITE|PROT_READ,
  77                      MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
  78    assert(a != (char*)MAP_FAILED);
  79
  80    // Make a whole lot of copies of f().  FN_SIZE is much bigger than f()
  81    // will ever be (we hope).
  82    for (i = 0; i < n_fns; i++) {
  83       memcpy(&a[FN_SIZE*i], f, FN_SIZE);
  84    }
  85
  86 #if defined(__mips__)
  87    syscall(__NR_cacheflush, a, FN_SIZE * n_fns, ICACHE);
  88 #endif
  89
  90    for (h = 0; h < n_reps; h += 1) {
  91       for (i = 0; i < n_fns; i += 4) {
  92          int(*f1)(int,int) = (void*)&a[FN_SIZE*(i+0)];
  93          int(*f2)(int,int) = (void*)&a[FN_SIZE*(i+1)];
  94          int(*f3)(int,int) = (void*)&a[FN_SIZE*(i+2)];
  95          int(*f4)(int,int) = (void*)&a[FN_SIZE*(i+3)];
  96          sum1 += f1(i+0, n_fns-i+0);
  97          sum2 += f2(i+1, n_fns-i+1);
  98          sum3 += f3(i+2, n_fns-i+2);
  99          sum4 += f4(i+3, n_fns-i+3);
 100          if (i % 1000 == 0)
 101             printf(".");
 102       }
 103    }
 104    printf("result = %d\n", sum1 + sum2 + sum3 + sum4);
 105    return 0;
 106 }