1 // This artificial program runs a lot of code. The exact amount depends on
2 // the command line -- if an arg "0" is given, it does exactly
3 // the same amount of work, but using four times as much code.
4 // If an arg >= 1 is given, the amount of code is multiplied by this arg.
6 // It's a stress test for Valgrind's translation speed; natively the two
7 // modes run in about the same time (the I-cache effects aren't big enough
8 // to make a difference), but under Valgrind the one running more code is
9 // significantly slower due to the extra translation time.
11 // 31 Aug 2015: this only "works" on x86/amd64/s390 by accident; the
12 // test is essentially kludged. This "generates" code into memory
13 // (the mmap'd area) and the executes it. But historically and even
14 // after this commit (r15601), the test has been run without
15 // --smc-check=all or all-non-file. That just happens to work because
16 // the "generated" code is never modified, so there's never a
17 // translated-vs-reality coherence problem. Really we ought to run
18 // with the new-as-of-r15601 default --smc-check=all-non-file, but that
19 // hugely slows it down and makes the results non-comparable with
20 // pre r15601 results, so instead the .vgperf files now specify the
21 // old default value --smc-check=stack explicitly.
29 #include <asm/cachectl.h>
30 #include <sys/syscall.h>
32 #include "tests/sys_mman.h"
34 #define FN_SIZE 1280 // Must be big enough to hold the compiled f()
35 // and any literal pool that might be used
36 #define N_LOOPS 20000 // Should be divisible by four
37 #define RATIO 4 // Ratio of code sizes between the two modes
42 for (i
= 0; i
< 5000; i
++) {
53 int main(int argc
, char* argv
[])
55 int h
, i
, sum1
= 0, sum2
= 0, sum3
= 0, sum4
= 0;
59 // Mode 1: not so much code
60 n_fns
= N_LOOPS
/ RATIO
;
64 // Mode 2: lots of code
65 const int mul
= atoi(argv
[1]);
69 n_fns
= N_LOOPS
* mul
;
73 printf("%d copies of f(), %d reps\n", n_fns
, n_reps
);
75 char* a
= mmap(0, FN_SIZE
* n_fns
,
76 PROT_EXEC
|PROT_WRITE
|PROT_READ
,
77 MAP_PRIVATE
|MAP_ANONYMOUS
, -1,0);
78 assert(a
!= (char*)MAP_FAILED
);
80 // Make a whole lot of copies of f(). FN_SIZE is much bigger than f()
81 // will ever be (we hope).
82 for (i
= 0; i
< n_fns
; i
++) {
83 memcpy(&a
[FN_SIZE
*i
], f
, FN_SIZE
);
87 syscall(__NR_cacheflush
, a
, FN_SIZE
* n_fns
, ICACHE
);
88 #elif defined(__nanomips__)
89 __builtin___clear_cache(a
, (char*)a
+ FN_SIZE
* n_fns
);
92 for (h
= 0; h
< n_reps
; h
+= 1) {
93 for (i
= 0; i
< n_fns
; i
+= 4) {
94 int(*f1
)(int,int) = (void*)&a
[FN_SIZE
*(i
+0)];
95 int(*f2
)(int,int) = (void*)&a
[FN_SIZE
*(i
+1)];
96 int(*f3
)(int,int) = (void*)&a
[FN_SIZE
*(i
+2)];
97 int(*f4
)(int,int) = (void*)&a
[FN_SIZE
*(i
+3)];
98 sum1
+= f1(i
+0, n_fns
-i
+0);
99 sum2
+= f2(i
+1, n_fns
-i
+1);
100 sum3
+= f3(i
+2, n_fns
-i
+2);
101 sum4
+= f4(i
+3, n_fns
-i
+3);
106 printf("result = %d\n", sum1
+ sum2
+ sum3
+ sum4
);